From 9a3bbe128403a6bd6f8dd53f11dec991cfc7d526 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 8 Apr 2009 17:42:44 +0100
Subject: Seem to be able to reliable read and write...

---
 src/rabbit_disk_queue.erl | 75 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 src/rabbit_disk_queue.erl

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
new file mode 100644
index 00000000..24c9db7d
--- /dev/null
+++ b/src/rabbit_disk_queue.erl
@@ -0,0 +1,75 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_disk_queue).
+
+-compile(export_all). %% CHANGE ME
+
+-define(WRITE_OK_SIZE_BITS, 8).
+-define(WRITE_OK, 255).
+-define(INTEGER_SIZE_BYTES, 8).
+-define(INTEGER_SIZE_BITS, 8 * ?INTEGER_SIZE_BYTES).
+
+base_filename() ->
+    mnesia:system_info(directory) ++ "/rabbit_disk_queue/".
+
+append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
+    BodySize = size(MsgBody),
+    MsgIdBin = term_to_binary(MsgId),
+    MsgIdBinSize = size(MsgIdBin),
+    TotalSize = BodySize + MsgIdBinSize,
+    case file:write(FileHdl, <<TotalSize:(?INTEGER_SIZE_BITS),
+			       MsgIdBinSize:(?INTEGER_SIZE_BITS),
+			       MsgIdBin:MsgIdBinSize/binary, MsgBody:BodySize/binary>>) of
+	ok -> file:write(FileHdl, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>);
+	KO -> KO
+    end.
+
+read_message_at_offset(FileHdl, Offset) ->
+    case file:position(FileHdl, {bof, Offset}) of
+	{ok, Offset} ->
+	    case file:read(FileHdl, 2 * (?INTEGER_SIZE_BYTES)) of
+		{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS)>>} ->
+		    ExpectedAbsPos = Offset + (2 * (?INTEGER_SIZE_BYTES)) + MsgIdBinSize,
+		    case file:position(FileHdl, {cur, MsgIdBinSize}) of
+			{ok, ExpectedAbsPos} ->
+			    BodySize = TotalSize - MsgIdBinSize,
+			    case file:read(FileHdl, 1 + BodySize) of
+				{ok, <<MsgBody:BodySize/binary, 255:(?WRITE_OK_SIZE_BITS)>>} ->
+				    {ok, MsgBody, BodySize};
+				KO -> KO
+			    end;
+			KO -> KO
+		    end;
+		KO -> KO
+	    end;
+	KO -> KO
+    end.
-- 
cgit v1.2.1

-- 
cgit v1.2.1


From 8802c7ef62629b6a758c85dae1944c918d2f6251 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Apr 2009 14:17:20 +0100
Subject: Well, I think I can recover crashes that happened mid way through a
 compaction. Note this hasn't really been tested yet...

---
 include/rabbit.hrl        |   2 +
 src/rabbit_disk_queue.erl | 196 +++++++++++++++++++++++++++++++++++++++++++++-
 src/rabbit_mnesia.erl     |   7 +-
 3 files changed, 201 insertions(+), 4 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index c707112f..88596a43 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -64,6 +64,8 @@
 
 -record(basic_message, {exchange_name, routing_key, content, persistent_key}).
 
+-record(dq_msg_loc, {queue_and_msg_id, is_delivered}).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 24c9db7d..2dbcabb5 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -33,13 +33,149 @@
 
 -compile(export_all). %% CHANGE ME
 
+-include_lib("stdlib/include/qlc.hrl").
+-include("rabbit.hrl").
+
 -define(WRITE_OK_SIZE_BITS, 8).
 -define(WRITE_OK, 255).
 -define(INTEGER_SIZE_BYTES, 8).
 -define(INTEGER_SIZE_BITS, 8 * ?INTEGER_SIZE_BYTES).
+-define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
+-define(FILE_DETAIL_ETS_NAME, rabbit_disk_queue_file_detail).
+-define(FILE_EXTENSION, ".rdq").
+-define(FILE_EXTENSION_TMP, ".rdt").
+
+-record(dqstate, {msg_location, file_summary, file_detail, next_file_name}).
+
+init(_Args) ->
+    process_flag(trap_exit, true),
+    Dir = base_directory(),
+    ok = filelib:ensure_dir(Dir),
+    State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
+		       file_summary = dict:new(),
+		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [bag, private]),
+		       next_file_name = 0
+		     },
+    {ok, State1} = load_from_disk(State),
+    {ok, State1}.
+    
+
+base_directory() ->
+    filename:join(mnesia:system_info(directory), "/rabbit_disk_queue/").
+
+%% ---- DISK RECOVERY ----
+
+load_from_disk(State = #dqstate{ msg_location = MsgLoc,
+				 file_summary = FileSum,
+				 file_detail = FileDetail
+			       }
+	      ) ->
+    {Files, TmpFiles} = get_disk_queue_files(),
+    ok = recover_crashed_compactions(Files, TmpFiles),
+    {ok, State}.
 
-base_filename() ->
-    mnesia:system_info(directory) ++ "/rabbit_disk_queue/".
+recover_crashed_compactions(Files, []) ->
+    ok;
+recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
+    NonTmpRelatedFile = filename:rootname(TmpFile) ++ (?FILE_EXTENSION),
+    true = lists:member(NonTmpRelatedFile, Files),
+    % [{MsgId, TotalSize, FileOffset}]
+    {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(filename:join(base_directory(), TmpFile)),
+    % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
+    lists:foreach(fun ({MsgId, _TotalSize, _FileOffset}) ->
+			  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+		  end, UncorruptedMessagesTmp),
+    {ok, UncorruptedMessages} = scan_file_for_valid_messages(filename:join(base_directory(), NonTmpRelatedFile)),
+    %% 1) It's possible that everything in the tmp file is also in the main file
+    %%    such that the main file is (prefix ++ tmpfile). This means that compaction
+    %%    failed immediately prior to the final step of deleting the tmp file.
+    %%    Plan: just delete the tmp file
+    %% 2) It's possible that everything in the tmp file is also in the main file
+    %%    but with holes throughout (or just somthing like main = (prefix ++ hole ++ tmpfile)).
+    %%    This means that compaction wrote out the tmp file successfully and then failed.
+    %%    Plan: just delete the tmp file and allow the compaction to eventually be triggered later
+    %% 3) It's possible that everything in the tmp file is also in the main file
+    %%    but such that the main file does not end with tmp file (and there are valid messages
+    %%    in the suffix; main = (prefix ++ tmpfile[with extra holes?] ++ suffix)).
+    %%    This means that compaction failed as we were writing out the tmp file.
+    %%    Plan: just delete the tmp file and allow the compaction to eventually be triggered later
+    %% 4) It's possible that there are messages in the tmp file which are not in the main file.
+    %%    This means that writing out the tmp file succeeded, but then we failed as we
+    %%    were copying them back over to the main file, after truncating the main file.
+    %%    As the main file has already been truncated, it should consist only of valid messages
+    %%    Plan: Truncate the main file back to before any of the files in the tmp file and copy
+    %%    them over again
+    GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
+    MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
+    MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
+    case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
+	true -> % we're in case 1, 2 or 3 above. Just delete the tmp file
+	        % note this also catches the case when the tmp file is empty
+	    ok = file:delete(TmpFile);
+	_False ->
+	    % we're in case 4 above.
+	    % check that everything in the main file is a valid message in mnesia
+	    lists:foreach(fun (MsgId) ->
+				  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+			  end, MsgIds),
+	    % The main file should be contiguous
+	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
+	    % we should have that none of the messages in the prefix are in the tmp file
+	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end, MsgIds),
+
+	    {ok, MainHdl} = file:open(filename:join(base_directory(), NonTmpRelatedFile), [write, raw, binary]),
+	    {ok, Top} = file:position(MainHdl, Top),
+	    ok = file:truncate(MainHdl), % wipe out any rubbish at the end of the file
+	    % there really could be rubbish at the end of the file - we could have failed after the
+	    % extending truncate.
+	    % Remember the head of the list will be the highest entry in the file
+	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
+	    TmpSize = TmpTopOffset + TmpTopTotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)),
+	    ExpectedAbsPos = Top + TmpSize,
+	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
+	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
+					 % if we run out of disk space, this truncate could fail, but we still
+					 % aren't risking losing data
+	    {ok, TmpHdl} = file:open(filename:join(base_directory(), TmpFile), [read, raw, binary]),
+	    {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
+	    ok = file:close(MainHdl),
+	    ok = file:close(TmpHdl),
+	    ok = file:delete(TmpFile)
+    end,
+    recover_crashed_compactions(Files, TmpFiles).
+
+% this assumes that the messages are ordered such that the highest address is at
+% the head of the list.
+% this matches what scan_file_for_valid_messages produces
+find_contiguous_block_prefix([]) -> {0, []};
+find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
+    case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
+	{ok, Acc} -> {Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)), lists:reverse(Acc)};
+	Res -> Res
+    end.
+find_contiguous_block_prefix([], 0, Acc) ->
+    {ok, Acc};
+find_contiguous_block_prefix([], _N, _Acc) ->
+    {0, []};
+find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
+  when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) ->
+    find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
+find_contiguous_block_prefix(List, _ExpectedOffset, Acc) ->
+    find_contiguous_block_prefix(List).
+    
+file_name_sort(A, B) ->
+    ANum = list_to_integer(filename:rootname(A)),
+    BNum = list_to_integer(filename:rootname(B)),
+    ANum < BNum.
+
+get_disk_queue_files() ->
+    DQFiles = filelib:wildcard("*" ++ (?FILE_EXTENSION), base_directory()),
+    DQFilesSorted = lists:sort(fun file_name_sort/2, DQFiles),
+    DQTFiles = filelib:wildcard("*" ++ (?FILE_EXTENSION_TMP), base_directory()),
+    DQTFilesSorted = lists:sort(fun file_name_sort/2, DQTFiles),
+    {DQFilesSorted, DQTFilesSorted}.
+
+%% ---- RAW READING AND WRITING OF FILES ----
 
 append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
     BodySize = size(MsgBody),
@@ -63,7 +199,7 @@ read_message_at_offset(FileHdl, Offset) ->
 			{ok, ExpectedAbsPos} ->
 			    BodySize = TotalSize - MsgIdBinSize,
 			    case file:read(FileHdl, 1 + BodySize) of
-				{ok, <<MsgBody:BodySize/binary, 255:(?WRITE_OK_SIZE_BITS)>>} ->
+				{ok, <<MsgBody:BodySize/binary, (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
 				    {ok, MsgBody, BodySize};
 				KO -> KO
 			    end;
@@ -73,3 +209,57 @@ read_message_at_offset(FileHdl, Offset) ->
 	    end;
 	KO -> KO
     end.
+
+scan_file_for_valid_messages(File) ->
+    {ok, Hdl} = file:open(File, [raw, binary, read]),
+    Valid = scan_file_for_valid_messages(Hdl, 0, []),
+    file:close(Hdl),
+    Valid.
+
+scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
+    case read_next_file_entry(FileHdl, Offset) of
+	{ok, eof} -> {ok, Acc};
+	{ok, {corrupted, NextOffset}} ->
+	    scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
+	{ok, {ok, MsgId, TotalSize, NextOffset}} ->
+	    scan_file_for_valid_messages(FileHdl, NextOffset, [{MsgId, TotalSize, Offset}|Acc]);
+	KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
+    end.
+	    
+
+read_next_file_entry(FileHdl, Offset) ->
+    case file:read(FileHdl, 2 * (?INTEGER_SIZE_BYTES)) of
+	{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS)>>} ->
+	    case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
+		{true, _} -> {ok, eof}; %% Nothing we can do other than stop
+		{false, true} -> %% current message corrupted, try skipping past it
+		    ExpectedAbsPos = Offset + (2* (?INTEGER_SIZE_BYTES)) + TotalSize + 1,
+		    case file:position(FileHdl, {cur, TotalSize + 1}) of
+			{ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
+			{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
+			KO -> KO
+		    end;
+		{false, false} -> %% all good, let's continue
+		    case file:read(FileHdl, MsgIdBinSize) of
+			{ok, <<MsgId:MsgIdBinSize/binary>>} ->
+			    ExpectedAbsPos = Offset + (2 * (?INTEGER_SIZE_BYTES)) + TotalSize,
+			    case file:position(FileHdl, {cur, TotalSize - MsgIdBinSize}) of
+				{ok, ExpectedAbsPos} ->
+				    case file:read(FileHdl, 1) of
+					{ok, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
+					    {ok, {ok, binary_to_term(MsgId), TotalSize,
+						  Offset + (2* (?INTEGER_SIZE_BYTES)) + TotalSize + 1}};
+					{ok, _SomeOtherData} ->
+					    {ok, {corrupted, Offset + (2* (?INTEGER_SIZE_BYTES)) + TotalSize + 1}};
+					KO -> KO
+				    end;
+				{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
+				KO -> KO
+			    end;
+			eof -> {ok, eof};
+			KO -> KO
+		    end
+	    end;
+	eof -> {ok, eof};
+	KO -> KO
+    end.
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 15213861..7179b637 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -143,7 +143,12 @@ table_definitions() ->
        {disc_copies, [node()]}]},
      {rabbit_queue,
       [{record_name, amqqueue},
-       {attributes, record_info(fields, amqqueue)}]}].
+       {attributes, record_info(fields, amqqueue)}]},
+     {rabbit_disk_queue,
+      [{record_name, dq_msg_loc},
+       {attributes, record_info(fields, dq_msg_loc)},
+       {disc_copies, [node()]}]}
+    ].
 
 table_names() ->
     [Tab || {Tab, _} <- table_definitions()].
-- 
cgit v1.2.1


From 85f59a61c10432b3a38066edc90bfa4076f1e6e9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Apr 2009 18:01:36 +0100
Subject: internal_publish, internal_deliver, internal_ack done, but UNTESTED.
 publish will try and roll over to a new file if necessary. Ack has not yet
 been taught how to do compaction / GC.

---
 src/rabbit_disk_queue.erl | 233 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 210 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2dbcabb5..b1aaa8db 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -45,47 +45,226 @@
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
 
--record(dqstate, {msg_location, file_summary, file_detail, next_file_name}).
+-record(dqstate, {msg_location,
+		  file_summary,
+		  file_detail,
+		  current_file_num,
+		  current_file_name,
+		  current_file_handle,
+		  file_size_limit,
+		  read_file_handles,
+		  read_file_handles_limit
+		 }).
 
-init(_Args) ->
+init(FileSizeLimit, ReadFileHandlesLimit) ->
     process_flag(trap_exit, true),
     Dir = base_directory(),
     ok = filelib:ensure_dir(Dir),
     State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
 		       file_summary = dict:new(),
 		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [bag, private]),
-		       next_file_name = 0
+		       current_file_num = 0,
+		       current_file_name = "0" ++ (?FILE_EXTENSION),
+		       current_file_handle = undefined,
+		       file_size_limit = FileSizeLimit,
+		       read_file_handles = {dict:new(), gb_trees:empty()},
+		       read_file_handles_limit = ReadFileHandlesLimit
 		     },
-    {ok, State1} = load_from_disk(State),
-    {ok, State1}.
-    
+    {ok, State1 = #dqstate { current_file_name = CurrentName } } = load_from_disk(State),
+    {ok, FileHdl} = file:open(form_filename(CurrentName), [append, raw, binary]),
+    {ok, State1 # dqstate { current_file_handle = FileHdl }}.
+
+form_filename(Name) ->
+    filename:join(base_directory(), Name).
 
 base_directory() ->
     filename:join(mnesia:system_info(directory), "/rabbit_disk_queue/").
 
+%% ---- INTERNAL RAW FUNCTIONS ----
+
+internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
+					      current_file_handle = CurHdl,
+					      current_file_name = CurName,
+					      read_file_handles_limit = ReadFileHandlesLimit,
+					      read_file_handles = {ReadHdls, ReadHdlsAge}
+					     }) ->
+    [{MsgId, _RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
+    if CurName =:= File ->
+	ok = file:sync(CurHdl)
+    end,
+    % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
+    % of premature optimisation. So I might remove it and dump it overboard
+    {FileHdl, ReadHdls1, ReadHdlsAge1}
+	= case dict:find(File, ReadHdls) of
+	      error ->
+		  {ok, Hdl} = file:open(form_filename(File), [read, raw, binary]),
+		  Now = now(),
+		  case dict:size(ReadHdls) < ReadFileHandlesLimit of
+		      true ->
+			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, ReadHdlsAge)};
+		      _False ->
+			  {_Then, OldFile, ReadHdlsAge2} = gb_trees:take_smallest(ReadHdlsAge),
+			  OldHdl = dict:find(OldFile, ReadHdls),
+			  ok = file:close(OldHdl),
+			  ReadHdls2 = dict:erase(OldFile, ReadHdls),
+			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls2), gb_trees:enter(Now, File, ReadHdlsAge2)}
+		  end;
+	      {ok, {Hdl, Then}} ->
+		  Now = now(),
+		  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
+	  end,
+    % read the message
+    {ok, {MsgBody, BodySize, TotalSize}} = read_message_at_offset(FileHdl, Offset),
+    ok = mnesia:write(rabbit_disk_queue, {Q, MsgId}, write),
+    {ok, {MsgBody, BodySize, TotalSize}, State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
+
+internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
+					  file_summary = FileSummary,
+					  file_detail = FileDetail
+					 }) ->
+    [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
+    % is this the last time we need the message, in which case tidy up
+    FileSummary1 =
+	if 1 =:= RefCount ->
+		true = ets:delete(MsgLocation, MsgId),
+		true = ets:delete_object(FileDetail, {File, Offset, TotalSize}),
+		{ok, {ValidTotalSize, ContiguousTop, Left, Right}} = dict:find(File, FileSummary),
+		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+		FileSummary2 = dict:store(File, {ValidTotalSize - TotalSize - 1 - (2* (?INTEGER_SIZE_BYTES)),
+						 ContiguousTop1, Left, Right}, FileSummary),
+		ok = mnesia:delete({rabbit_disk_queue, {Q, MsgId}}),
+		FileSummary2;
+	   true ->
+		ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+		FileSummary
+	end,
+    State1 = compact(File, State # dqstate { file_summary = FileSummary1 } ),
+    {ok, State1}.
+
+internal_publish(Q, MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
+						       current_file_handle = CurHdl,
+						       current_file_name = CurName,
+						       file_summary = FileSummary,
+						       file_detail = FileDetail
+						      }
+		) when is_binary(MsgBody) ->
+    {ok, State1} =
+	case ets:lookup(MsgLocation, MsgId) of
+	    [] ->
+	        % New message, lots to do
+		{ok, Offset} = file:position(CurHdl, cur),
+		{ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
+		true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
+		true = ets:insert_new(FileDetail, {CurName, Offset, TotalSize}),
+		{ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
+		ValidTotalSize1 = ValidTotalSize + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)),
+		ContiguousTop1 = if Offset =:= ContiguousTop ->
+					 ValidTotalSize; % can't be any holes in this file
+				    true -> ContiguousTop
+				 end,
+		FileSummary2 = dict:store(CurName, {ValidTotalSize1, ContiguousTop1, Left, undefined}, FileSummary),
+		maybe_roll_to_new_file(Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)),
+				       State # dqstate { file_summary = FileSummary2 });
+	    [{MsgId, RefCount, File, Offset, TotalSize}] ->
+	        % We already know about it, just update counter
+		ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
+		{ok, State}
+	end,
+    ok = mnesia:write(rabbit_disk_queue, {Q, MsgId}, write),
+    {ok, State1}.
+
+%% ---- ROLLING OVER THE APPEND FILE ----
+
+maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimit,
+						  current_file_name = CurName,
+						  current_file_handle = CurHdl,
+						  current_file_num = CurNum,
+						  file_summary = FileSummary,
+						  file_detail = FileDetail
+						}
+		      ) when Offset >= FileSizeLimit ->
+    ok = file:sync(CurHdl),
+    ok = file:close(CurHdl),
+    NextNum = CurNum + 1,
+    NextName = integer_to_list(NextNum) ++ (?FILE_EXTENSION),
+    [] = ets:lookup(FileDetail, NextName),
+    {ok, NextHdl} = file:open(form_filename(NextNum), [write, raw, binary]),
+    {ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
+    FileSummary1 = dict:store(CurName, {ValidTotalSize, ContiguousTop, Left, NextName}, FileSummary),
+    {ok, State # dqstate { current_file_name = NextName,
+			   current_file_handle = NextHdl,
+			   current_file_num = NextNum,
+			   file_summary = dict:store(NextName, {0, 0, CurName, undefined}, FileSummary1)
+			 }
+    };
+maybe_roll_to_new_file(_, State) ->
+    {ok, State}.
+
+%% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
+
+compact(File, State) ->
+    State.
+
 %% ---- DISK RECOVERY ----
 
-load_from_disk(State = #dqstate{ msg_location = MsgLoc,
-				 file_summary = FileSum,
-				 file_detail = FileDetail
-			       }
-	      ) ->
+load_from_disk(State) ->
+    % sorted so that smallest number is first. which also means eldest file (left-most) first
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
-    {ok, State}.
+    % There should be no more tmp files now, so go ahead and load the whole lot
+    {ok, State1 = #dqstate{ msg_location = MsgLocation }} = load_messages(undefined, Files, State),
+    % Finally, check there is nothing in mnesia which we haven't loaded
+    true = lists:all(fun ({_Q, MsgId}) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+		     mnesia:all_keys(rabbit_disk_queue)),
+    {ok, State1}.
 
-recover_crashed_compactions(Files, []) ->
+load_messages(undefined, [], State) ->
+    State;
+load_messages(Left, [], State) ->
+    Num = list_to_integer(filename:rootname(Left)),
+    State # dqstate { current_file_num = Num, current_file_name = Left };
+load_messages(Left, [File|Files],
+	      State = #dqstate { msg_location = MsgLocation,
+				 file_summary = FileSummary,
+				 file_detail = FileDetail
+			       }) ->
+    % [{MsgId, TotalSize, FileOffset}]
+    {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
+    {ValidMessagesRev, ValidTotalSize} = lists:foldl(
+	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+		case length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'})) of
+		    0 -> {VMAcc, VTSAcc};
+		    RefCount ->
+			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
+			true = ets:insert_new(FileDetail, {File, Offset, TotalSize}),
+			{[{MsgId, TotalSize, Offset}|VMAcc], VTSAcc + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES))}
+		end
+	end, {[], 0}, Messages),
+    % foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
+    % as from scan_file_for_valid_messages
+    {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
+    Right = case Files of
+		[] -> undefined;
+		[F|_] -> F
+	    end,
+    State1 = State # dqstate { file_summary =
+			       dict:store(File, {ValidTotalSize, ContiguousTop, Left, Right}, FileSummary) },
+    load_messages(File, Files, State1).
+
+%% ---- DISK RECOVERY OF FAILED COMPACTION ----
+
+recover_crashed_compactions(_Files, []) ->
     ok;
 recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ (?FILE_EXTENSION),
     true = lists:member(NonTmpRelatedFile, Files),
     % [{MsgId, TotalSize, FileOffset}]
-    {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(filename:join(base_directory(), TmpFile)),
+    {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(form_filename(TmpFile)),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun ({MsgId, _TotalSize, _FileOffset}) ->
 			  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
 		  end, UncorruptedMessagesTmp),
-    {ok, UncorruptedMessages} = scan_file_for_valid_messages(filename:join(base_directory(), NonTmpRelatedFile)),
+    {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     %% 1) It's possible that everything in the tmp file is also in the main file
     %%    such that the main file is (prefix ++ tmpfile). This means that compaction
     %%    failed immediately prior to the final step of deleting the tmp file.
@@ -123,7 +302,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % we should have that none of the messages in the prefix are in the tmp file
 	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end, MsgIds),
 
-	    {ok, MainHdl} = file:open(filename:join(base_directory(), NonTmpRelatedFile), [write, raw, binary]),
+	    {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile), [write, raw, binary]),
 	    {ok, Top} = file:position(MainHdl, Top),
 	    ok = file:truncate(MainHdl), % wipe out any rubbish at the end of the file
 	    % there really could be rubbish at the end of the file - we could have failed after the
@@ -136,11 +315,18 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
 					 % if we run out of disk space, this truncate could fail, but we still
 					 % aren't risking losing data
-	    {ok, TmpHdl} = file:open(filename:join(base_directory(), TmpFile), [read, raw, binary]),
+	    {ok, TmpHdl} = file:open(form_filename(TmpFile), [read, raw, binary]),
 	    {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
 	    ok = file:close(MainHdl),
 	    ok = file:close(TmpHdl),
-	    ok = file:delete(TmpFile)
+	    ok = file:delete(TmpFile),
+
+	    {ok, MainMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+	    MsgIdsMain = lists:map(GrabMsgId, MainMessages),
+	    % check that everything in MsgIds is in MsgIdsMain
+	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end, MsgIds),
+	    % check that everything in MsgIdsTmp is in MsgIdsMain
+	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end, MsgIdsTmp)
     end,
     recover_crashed_compactions(Files, TmpFiles).
 
@@ -160,7 +346,7 @@ find_contiguous_block_prefix([], _N, _Acc) ->
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
   when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) ->
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
-find_contiguous_block_prefix(List, _ExpectedOffset, Acc) ->
+find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
     find_contiguous_block_prefix(List).
     
 file_name_sort(A, B) ->
@@ -185,7 +371,8 @@ append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
     case file:write(FileHdl, <<TotalSize:(?INTEGER_SIZE_BITS),
 			       MsgIdBinSize:(?INTEGER_SIZE_BITS),
 			       MsgIdBin:MsgIdBinSize/binary, MsgBody:BodySize/binary>>) of
-	ok -> file:write(FileHdl, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>);
+	ok -> ok = file:write(FileHdl, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>),
+	      {ok, TotalSize};
 	KO -> KO
     end.
 
@@ -200,7 +387,7 @@ read_message_at_offset(FileHdl, Offset) ->
 			    BodySize = TotalSize - MsgIdBinSize,
 			    case file:read(FileHdl, 1 + BodySize) of
 				{ok, <<MsgBody:BodySize/binary, (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
-				    {ok, MsgBody, BodySize};
+				    {ok, {MsgBody, BodySize, TotalSize}};
 				KO -> KO
 			    end;
 			KO -> KO
@@ -213,7 +400,7 @@ read_message_at_offset(FileHdl, Offset) ->
 scan_file_for_valid_messages(File) ->
     {ok, Hdl} = file:open(File, [raw, binary, read]),
     Valid = scan_file_for_valid_messages(Hdl, 0, []),
-    file:close(Hdl),
+    file:close(Hdl), % if something really bad's happened, the close could fail, but ignore
     Valid.
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
@@ -223,7 +410,7 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
 	    scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
 	{ok, {ok, MsgId, TotalSize, NextOffset}} ->
 	    scan_file_for_valid_messages(FileHdl, NextOffset, [{MsgId, TotalSize, Offset}|Acc]);
-	KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
+	_KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
     end.
 	    
 
-- 
cgit v1.2.1


From d5e2b812842f34d8fea9590f2c1a3ad3ed0af90b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Apr 2009 15:42:40 +0100
Subject: All but the compaction/GC done now. Not tested at all.

---
 src/rabbit_disk_queue.erl | 224 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 171 insertions(+), 53 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b1aaa8db..5dc20953 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -31,7 +31,14 @@
 
 -module(rabbit_disk_queue).
 
--compile(export_all). %% CHANGE ME
+-behaviour(gen_server).
+
+-export([start_link/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([publish/3, deliver/2, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
 
 -include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
@@ -45,6 +52,8 @@
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
 
+-define(SERVER, ?MODULE).
+
 -record(dqstate, {msg_location,
 		  file_summary,
 		  file_detail,
@@ -56,7 +65,32 @@
 		  read_file_handles_limit
 		 }).
 
-init(FileSizeLimit, ReadFileHandlesLimit) ->
+%% ---- PUBLIC API ----
+
+start_link(FileSizeLimit, ReadFileHandlesLimit) ->
+    gen_server:start_link({local, ?SERVER}, ?MODULE, [FileSizeLimit, ReadFileHandlesLimit], []).
+
+publish(Q, MsgId, Msg) when is_binary(Msg) ->
+    gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
+
+deliver(Q, MsgId) ->
+    gen_server:call(?SERVER, {deliver, Q, MsgId}).
+
+ack(Q, MsgIds) when is_list(MsgIds) ->
+    gen_server:cast(?SERVER, {ack, Q, MsgIds}).
+
+tx_publish(MsgId, Msg) when is_binary(Msg) ->
+    gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
+
+tx_commit(Q, MsgIds) when is_list(MsgIds) ->
+    gen_server:call(?SERVER, {tx_commit, Q, MsgIds}).
+
+tx_cancel(MsgIds) when is_list(MsgIds) ->
+    gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
+
+%% ---- GEN-SERVER INTERNAL API ----
+
+init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     Dir = base_directory(),
     ok = filelib:ensure_dir(Dir),
@@ -74,12 +108,54 @@ init(FileSizeLimit, ReadFileHandlesLimit) ->
     {ok, FileHdl} = file:open(form_filename(CurrentName), [append, raw, binary]),
     {ok, State1 # dqstate { current_file_handle = FileHdl }}.
 
+handle_call({deliver, Q, MsgId}, _From, State) ->
+    {ok, {MsgBody, BodySize, Delivered}, State1} = internal_deliver(Q, MsgId, State),
+    {reply, {MsgBody, BodySize, Delivered}, State1};
+handle_call({tx_commit, Q, MsgIds}, _From, State) ->
+    {ok, State1} = internal_tx_commit(Q, MsgIds, State),
+    {reply, ok, State1}.
+
+handle_cast({publish, Q, MsgId, MsgBody}, State) ->
+    {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
+    {noreply, State1};
+handle_cast({ack, Q, MsgIds}, State) ->
+    {ok, State1} = lists:foldl(fun (MsgId, {ok, State2}) ->
+				       internal_ack(Q, MsgId, State2)
+			       end, State, MsgIds),
+    {noreply, State1};
+handle_cast({tx_publish, MsgId, MsgBody}, State) ->
+    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
+    {noreply, State1};
+handle_cast({tx_cancel, MsgIds}, State) ->
+    {ok, State1} = internal_tx_cancel(MsgIds, State),
+    {noreply, State1}.
+
+handle_info(_Info, State) ->
+    {noreply, State}.
+
+terminate(_Reason, #dqstate { current_file_handle = FileHdl,
+			      read_file_handles = {ReadHdls, _ReadHdlsAge}
+			    }) ->
+    ok = file:sync(FileHdl),
+    ok = file:close(FileHdl),
+    dict:map(fun (_File, Hdl) ->
+		     ok = file:close(Hdl)
+	     end, ReadHdls).
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%% ---- UTILITY FUNCTIONS ----
+
 form_filename(Name) ->
     filename:join(base_directory(), Name).
 
 base_directory() ->
     filename:join(mnesia:system_info(directory), "/rabbit_disk_queue/").
 
+file_packing_adjustment_bytes() ->
+    1 + (2* (?INTEGER_SIZE_BYTES)).
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
@@ -88,9 +164,9 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 					      read_file_handles_limit = ReadFileHandlesLimit,
 					      read_file_handles = {ReadHdls, ReadHdlsAge}
 					     }) ->
-    [{MsgId, _RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
-    if CurName =:= File ->
-	ok = file:sync(CurHdl)
+    [{MsgId, _RefCount, File, Offset, _TotalSize}] = ets:lookup(MsgLocation, MsgId),
+    if CurName =:= File -> ok = file:sync(CurHdl);
+       true -> ok
     end,
     % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
     % of premature optimisation. So I might remove it and dump it overboard
@@ -104,7 +180,7 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, ReadHdlsAge)};
 		      _False ->
 			  {_Then, OldFile, ReadHdlsAge2} = gb_trees:take_smallest(ReadHdlsAge),
-			  OldHdl = dict:find(OldFile, ReadHdls),
+			  {ok, OldHdl} = dict:find(OldFile, ReadHdls),
 			  ok = file:close(OldHdl),
 			  ReadHdls2 = dict:erase(OldFile, ReadHdls),
 			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls2), gb_trees:enter(Now, File, ReadHdlsAge2)}
@@ -114,9 +190,11 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
 	  end,
     % read the message
-    {ok, {MsgBody, BodySize, TotalSize}} = read_message_at_offset(FileHdl, Offset),
-    ok = mnesia:write(rabbit_disk_queue, {Q, MsgId}, write),
-    {ok, {MsgBody, BodySize, TotalSize}, State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
+    {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
+    [{{Q, MsgId}, Delivered}] = mnesia:read(rabbit_disk_queue, {Q, MsgId}, read),
+    ok = mnesia:write(rabbit_disk_queue, {{Q, MsgId}, true}, write),
+    {ok, {MsgBody, BodySize, Delivered},
+     State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
 internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 					  file_summary = FileSummary,
@@ -130,48 +208,87 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		true = ets:delete_object(FileDetail, {File, Offset, TotalSize}),
 		{ok, {ValidTotalSize, ContiguousTop, Left, Right}} = dict:find(File, FileSummary),
 		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-		FileSummary2 = dict:store(File, {ValidTotalSize - TotalSize - 1 - (2* (?INTEGER_SIZE_BYTES)),
+		FileSummary2 = dict:store(File, {ValidTotalSize - TotalSize - file_packing_adjustment_bytes(),
 						 ContiguousTop1, Left, Right}, FileSummary),
 		ok = mnesia:delete({rabbit_disk_queue, {Q, MsgId}}),
 		FileSummary2;
-	   true ->
+	   1 < RefCount ->
 		ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 		FileSummary
 	end,
     State1 = compact(File, State # dqstate { file_summary = FileSummary1 } ),
     {ok, State1}.
 
-internal_publish(Q, MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
+internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
 						       current_file_handle = CurHdl,
 						       current_file_name = CurName,
 						       file_summary = FileSummary,
 						       file_detail = FileDetail
-						      }
-		) when is_binary(MsgBody) ->
-    {ok, State1} =
-	case ets:lookup(MsgLocation, MsgId) of
-	    [] ->
-	        % New message, lots to do
-		{ok, Offset} = file:position(CurHdl, cur),
-		{ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
-		true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
-		true = ets:insert_new(FileDetail, {CurName, Offset, TotalSize}),
-		{ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
-		ValidTotalSize1 = ValidTotalSize + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)),
-		ContiguousTop1 = if Offset =:= ContiguousTop ->
-					 ValidTotalSize; % can't be any holes in this file
-				    true -> ContiguousTop
-				 end,
-		FileSummary2 = dict:store(CurName, {ValidTotalSize1, ContiguousTop1, Left, undefined}, FileSummary),
-		maybe_roll_to_new_file(Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)),
-				       State # dqstate { file_summary = FileSummary2 });
-	    [{MsgId, RefCount, File, Offset, TotalSize}] ->
-	        % We already know about it, just update counter
-		ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
-		{ok, State}
-	end,
-    ok = mnesia:write(rabbit_disk_queue, {Q, MsgId}, write),
-    {ok, State1}.
+						     }) ->
+    case ets:lookup(MsgLocation, MsgId) of
+	[] ->
+	    % New message, lots to do
+	    {ok, Offset} = file:position(CurHdl, cur),
+	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
+	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
+	    true = ets:insert_new(FileDetail, {CurName, Offset, TotalSize}),
+	    {ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
+	    ValidTotalSize1 = ValidTotalSize + TotalSize + file_packing_adjustment_bytes(),
+	    ContiguousTop1 = if Offset =:= ContiguousTop ->
+				     ValidTotalSize; % can't be any holes in this file
+				true -> ContiguousTop
+			     end,
+	    FileSummary2 = dict:store(CurName, {ValidTotalSize1, ContiguousTop1, Left, undefined}, FileSummary),
+	    maybe_roll_to_new_file(Offset + TotalSize + file_packing_adjustment_bytes(),
+				   State # dqstate { file_summary = FileSummary2 });
+	[{MsgId, RefCount, File, Offset, TotalSize}] ->
+	    % We already know about it, just update counter
+	    ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
+	    {ok, State}
+    end.
+
+internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
+						 current_file_handle = CurHdl,
+						 current_file_name = CurName
+					       }) ->
+    {atomic, Sync}
+	= mnesia:transaction(
+	    fun() -> lists:foldl(fun (MsgId, Acc) ->
+					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
+					     ets:lookup(MsgLocation, MsgId),
+					 ok = mnesia:write(rabbit_disk_queue, {{Q, MsgId}, false}, write),
+					 Acc or (CurName =:= File)
+				 end, false, MsgIds)
+	    end),
+    if Sync -> ok = file:sync(CurHdl);
+       true -> ok
+    end,
+    {ok, State}.
+
+internal_publish(Q, MsgId, MsgBody, State) ->
+    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
+    internal_tx_commit(Q, [MsgId], State1).
+
+internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
+					      file_summary = FileSummary,
+					      file_detail = FileDetail }) ->
+    FileSummary1 =
+	lists:foldl(fun (MsgId, FileSummary2) ->
+			    [{MsgId, RefCount, File, Offset, TotalSize}]
+				= ets:lookup(MsgLocation, MsgId),
+			    if 1 =:= RefCount ->
+				    true = ets:delete(MsgLocation, MsgId),
+				    true = ets:delete_object(FileDetail, {File, Offset, TotalSize}),
+				    {ok, {ValidTotalSize, ContiguousTop, Left, Right}} = dict:find(File, FileSummary2),
+				    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+				    dict:store(File, {ValidTotalSize - TotalSize - file_packing_adjustment_bytes(),
+						      ContiguousTop1, Left, Right}, FileSummary2);
+			       1 < RefCount ->
+				    ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+				    FileSummary2
+			    end
+		    end, FileSummary, MsgIds),
+    {ok, State #dqstate { file_summary = FileSummary1 }}.
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
@@ -188,7 +305,7 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ (?FILE_EXTENSION),
     [] = ets:lookup(FileDetail, NextName),
-    {ok, NextHdl} = file:open(form_filename(NextNum), [write, raw, binary]),
+    {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary]),
     {ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
     FileSummary1 = dict:store(CurName, {ValidTotalSize, ContiguousTop, Left, NextName}, FileSummary),
     {ok, State # dqstate { current_file_name = NextName,
@@ -212,7 +329,7 @@ load_from_disk(State) ->
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
     % There should be no more tmp files now, so go ahead and load the whole lot
-    {ok, State1 = #dqstate{ msg_location = MsgLocation }} = load_messages(undefined, Files, State),
+    (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
     true = lists:all(fun ({_Q, MsgId}) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
 		     mnesia:all_keys(rabbit_disk_queue)),
@@ -232,12 +349,12 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'})) of
+		case length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}, read)) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
 			true = ets:insert_new(FileDetail, {File, Offset, TotalSize}),
-			{[{MsgId, TotalSize, Offset}|VMAcc], VTSAcc + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES))}
+			{[{MsgId, TotalSize, Offset}|VMAcc], VTSAcc + TotalSize + file_packing_adjustment_bytes()}
 		end
 	end, {[], 0}, Messages),
     % foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
@@ -262,7 +379,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
     {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(form_filename(TmpFile)),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun ({MsgId, _TotalSize, _FileOffset}) ->
-			  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+			  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}, read))
 		  end, UncorruptedMessagesTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     %% 1) It's possible that everything in the tmp file is also in the main file
@@ -295,7 +412,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+				  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}, read))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
@@ -309,7 +426,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % extending truncate.
 	    % Remember the head of the list will be the highest entry in the file
 	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
-	    TmpSize = TmpTopOffset + TmpTopTotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)),
+	    TmpSize = TmpTopOffset + TmpTopTotalSize + file_packing_adjustment_bytes(),
 	    ExpectedAbsPos = Top + TmpSize,
 	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
 	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
@@ -336,7 +453,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
-	{ok, Acc} -> {Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)), lists:reverse(Acc)};
+	{ok, Acc} -> {Offset + TotalSize + file_packing_adjustment_bytes(), lists:reverse(Acc)};
 	Res -> Res
     end.
 find_contiguous_block_prefix([], 0, Acc) ->
@@ -344,7 +461,7 @@ find_contiguous_block_prefix([], 0, Acc) ->
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
-  when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) ->
+  when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) -> %% Can't use file_packing_adjustment_bytes()
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
 find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
     find_contiguous_block_prefix(List).
@@ -400,7 +517,7 @@ read_message_at_offset(FileHdl, Offset) ->
 scan_file_for_valid_messages(File) ->
     {ok, Hdl} = file:open(File, [raw, binary, read]),
     Valid = scan_file_for_valid_messages(Hdl, 0, []),
-    file:close(Hdl), % if something really bad's happened, the close could fail, but ignore
+    _ = file:close(Hdl), % if something really bad's happened, the close could fail, but ignore
     Valid.
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
@@ -415,12 +532,13 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
 	    
 
 read_next_file_entry(FileHdl, Offset) ->
-    case file:read(FileHdl, 2 * (?INTEGER_SIZE_BYTES)) of
+    TwoIntegers = 2 * (?INTEGER_SIZE_BYTES),
+    case file:read(FileHdl, TwoIntegers) of
 	{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS)>>} ->
 	    case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
 		{true, _} -> {ok, eof}; %% Nothing we can do other than stop
 		{false, true} -> %% current message corrupted, try skipping past it
-		    ExpectedAbsPos = Offset + (2* (?INTEGER_SIZE_BYTES)) + TotalSize + 1,
+		    ExpectedAbsPos = Offset + file_packing_adjustment_bytes() + TotalSize,
 		    case file:position(FileHdl, {cur, TotalSize + 1}) of
 			{ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
 			{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
@@ -429,15 +547,15 @@ read_next_file_entry(FileHdl, Offset) ->
 		{false, false} -> %% all good, let's continue
 		    case file:read(FileHdl, MsgIdBinSize) of
 			{ok, <<MsgId:MsgIdBinSize/binary>>} ->
-			    ExpectedAbsPos = Offset + (2 * (?INTEGER_SIZE_BYTES)) + TotalSize,
+			    ExpectedAbsPos = Offset + TwoIntegers + TotalSize,
 			    case file:position(FileHdl, {cur, TotalSize - MsgIdBinSize}) of
 				{ok, ExpectedAbsPos} ->
 				    case file:read(FileHdl, 1) of
 					{ok, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
 					    {ok, {ok, binary_to_term(MsgId), TotalSize,
-						  Offset + (2* (?INTEGER_SIZE_BYTES)) + TotalSize + 1}};
+						  Offset + file_packing_adjustment_bytes() + TotalSize}};
 					{ok, _SomeOtherData} ->
-					    {ok, {corrupted, Offset + (2* (?INTEGER_SIZE_BYTES)) + TotalSize + 1}};
+					    {ok, {corrupted, Offset + file_packing_adjustment_bytes() + TotalSize}};
 					KO -> KO
 				    end;
 				{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
-- 
cgit v1.2.1


From 68c1abcf8b2d2e7cacab0b8a046d66e246259308 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Apr 2009 16:07:21 +0100
Subject: initial fixes from testing

---
 src/rabbit_disk_queue.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 5dc20953..dbd81215 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -92,8 +92,6 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
-    Dir = base_directory(),
-    ok = filelib:ensure_dir(Dir),
     State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
 		       file_summary = dict:new(),
 		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [bag, private]),
@@ -105,7 +103,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 		       read_file_handles_limit = ReadFileHandlesLimit
 		     },
     {ok, State1 = #dqstate { current_file_name = CurrentName } } = load_from_disk(State),
-    {ok, FileHdl} = file:open(form_filename(CurrentName), [append, raw, binary]),
+    Path = form_filename(CurrentName),
+    ok = filelib:ensure_dir(Path),
+    {ok, FileHdl} = file:open(Path, [append, raw, binary]),
     {ok, State1 # dqstate { current_file_handle = FileHdl }}.
 
 handle_call({deliver, Q, MsgId}, _From, State) ->
@@ -151,7 +151,7 @@ form_filename(Name) ->
     filename:join(base_directory(), Name).
 
 base_directory() ->
-    filename:join(mnesia:system_info(directory), "/rabbit_disk_queue/").
+    filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
 
 file_packing_adjustment_bytes() ->
     1 + (2* (?INTEGER_SIZE_BYTES)).
@@ -332,7 +332,7 @@ load_from_disk(State) ->
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
     true = lists:all(fun ({_Q, MsgId}) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
-		     mnesia:all_keys(rabbit_disk_queue)),
+		     mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
     {ok, State1}.
 
 load_messages(undefined, [], State) ->
@@ -349,7 +349,7 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}, read)) of
+		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
@@ -379,7 +379,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
     {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(form_filename(TmpFile)),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun ({MsgId, _TotalSize, _FileOffset}) ->
-			  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}, read))
+			  0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
 		  end, UncorruptedMessagesTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     %% 1) It's possible that everything in the tmp file is also in the main file
@@ -412,7 +412,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  0 < length(mnesia:match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}, read))
+				  0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
-- 
cgit v1.2.1


From eeaf8c0e2532c43c0289705b2ec91fe93c98a282 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Apr 2009 17:21:23 +0100
Subject: many bugs fixed. Still no compaction. However, performance is
 horrible, because the test which established ets:insert_new did what we
 wanted was actually flawed, and we can't do that, and ets:insert is not fast
 enough in a bag with many similar keys. What I want is to be able to say
 "yes, just insert this, I guarantee that whilst there are other equal keys in
 this bag, you don't need to go and try to find matching values". Hmph

---
 src/rabbit_disk_queue.erl | 47 +++++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index dbd81215..d5000683 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -74,7 +74,7 @@ publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
 
 deliver(Q, MsgId) ->
-    gen_server:call(?SERVER, {deliver, Q, MsgId}).
+    gen_server:call(?SERVER, {deliver, Q, MsgId}, infinity).
 
 ack(Q, MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {ack, Q, MsgIds}).
@@ -83,7 +83,7 @@ tx_publish(MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
 
 tx_commit(Q, MsgIds) when is_list(MsgIds) ->
-    gen_server:call(?SERVER, {tx_commit, Q, MsgIds}).
+    gen_server:call(?SERVER, {tx_commit, Q, MsgIds}, infinity).
 
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
@@ -92,11 +92,12 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
+    InitName = "0" ++ (?FILE_EXTENSION),
     State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
-		       file_summary = dict:new(),
+		       file_summary = dict:store(InitName, {0, 0, undefined, undefined}, dict:new()),
 		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [bag, private]),
 		       current_file_num = 0,
-		       current_file_name = "0" ++ (?FILE_EXTENSION),
+		       current_file_name = InitName,
 		       current_file_handle = undefined,
 		       file_size_limit = FileSizeLimit,
 		       read_file_handles = {dict:new(), gb_trees:empty()},
@@ -121,7 +122,7 @@ handle_cast({publish, Q, MsgId, MsgBody}, State) ->
 handle_cast({ack, Q, MsgIds}, State) ->
     {ok, State1} = lists:foldl(fun (MsgId, {ok, State2}) ->
 				       internal_ack(Q, MsgId, State2)
-			       end, State, MsgIds),
+			       end, {ok, State}, MsgIds),
     {noreply, State1};
 handle_cast({tx_publish, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
@@ -136,11 +137,12 @@ handle_info(_Info, State) ->
 terminate(_Reason, #dqstate { current_file_handle = FileHdl,
 			      read_file_handles = {ReadHdls, _ReadHdlsAge}
 			    }) ->
+    io:format("DYING~n", []),
     ok = file:sync(FileHdl),
     ok = file:close(FileHdl),
-    dict:map(fun (_File, Hdl) ->
-		     ok = file:close(Hdl)
-	     end, ReadHdls).
+    dict:fold(fun (_File, Hdl, _Acc) ->
+		     file:close(Hdl)
+	      end, ok, ReadHdls).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -191,8 +193,8 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 	  end,
     % read the message
     {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
-    [{{Q, MsgId}, Delivered}] = mnesia:read(rabbit_disk_queue, {Q, MsgId}, read),
-    ok = mnesia:write(rabbit_disk_queue, {{Q, MsgId}, true}, write),
+    [#dq_msg_loc {queue_and_msg_id = {Q, MsgId}, is_delivered = Delivered}] = mnesia:dirty_read(rabbit_disk_queue, {Q, MsgId}),
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc {queue_and_msg_id = {Q, MsgId}, is_delivered = true}),
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
@@ -210,7 +212,7 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
 		FileSummary2 = dict:store(File, {ValidTotalSize - TotalSize - file_packing_adjustment_bytes(),
 						 ContiguousTop1, Left, Right}, FileSummary),
-		ok = mnesia:delete({rabbit_disk_queue, {Q, MsgId}}),
+		ok = mnesia:dirty_delete({rabbit_disk_queue, {Q, MsgId}}),
 		FileSummary2;
 	   1 < RefCount ->
 		ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
@@ -229,9 +231,10 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	[] ->
 	    % New message, lots to do
 	    {ok, Offset} = file:position(CurHdl, cur),
+	    io:format("Reported file position: ~p~n", [Offset]),
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
-	    true = ets:insert_new(FileDetail, {CurName, Offset, TotalSize}),
+	    true = ets:insert(FileDetail, {CurName, Offset, TotalSize}),
 	    {ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + file_packing_adjustment_bytes(),
 	    ContiguousTop1 = if Offset =:= ContiguousTop ->
@@ -243,7 +246,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 				   State # dqstate { file_summary = FileSummary2 });
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
-	    ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
+	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
 	    {ok, State}
     end.
 
@@ -256,7 +259,7 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 	    fun() -> lists:foldl(fun (MsgId, Acc) ->
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     ets:lookup(MsgLocation, MsgId),
-					 ok = mnesia:write(rabbit_disk_queue, {{Q, MsgId}, false}, write),
+					 ok = mnesia:write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {Q, MsgId}, is_delivered = false}, write),
 					 Acc or (CurName =:= File)
 				 end, false, MsgIds)
 	    end),
@@ -353,7 +356,7 @@ load_messages(Left, [File|Files],
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
-			true = ets:insert_new(FileDetail, {File, Offset, TotalSize}),
+			true = ets:insert(FileDetail, {File, Offset, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc], VTSAcc + TotalSize + file_packing_adjustment_bytes()}
 		end
 	end, {[], 0}, Messages),
@@ -373,15 +376,18 @@ load_messages(Left, [File|Files],
 recover_crashed_compactions(_Files, []) ->
     ok;
 recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
+    GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ (?FILE_EXTENSION),
     true = lists:member(NonTmpRelatedFile, Files),
     % [{MsgId, TotalSize, FileOffset}]
     {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(form_filename(TmpFile)),
+    MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
-    lists:foreach(fun ({MsgId, _TotalSize, _FileOffset}) ->
-			  0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
-		  end, UncorruptedMessagesTmp),
+    lists:foreach(fun (MsgId) ->
+			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+    MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
     %% 1) It's possible that everything in the tmp file is also in the main file
     %%    such that the main file is (prefix ++ tmpfile). This means that compaction
     %%    failed immediately prior to the final step of deleting the tmp file.
@@ -401,9 +407,6 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
     %%    As the main file has already been truncated, it should consist only of valid messages
     %%    Plan: Truncate the main file back to before any of the files in the tmp file and copy
     %%    them over again
-    GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
-    MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
-    MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
     case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
 	true -> % we're in case 1, 2 or 3 above. Just delete the tmp file
 	        % note this also catches the case when the tmp file is empty
@@ -412,7 +415,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+				  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
-- 
cgit v1.2.1


From 7be550055b3de47744a1515992eb438cd8887984 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Apr 2009 18:01:31 +0100
Subject: refactored away from using the ets bag incorrectly. However, we're
 still cpu bound, need to do some profiling.

rabbit_disk_queue:start_link(1024*1024, 100).
[rabbit_disk_queue:publish(q, N, <<N:64>>) || N <- lists:seq(1,1024*1024)].
---
 src/rabbit_disk_queue.erl | 106 +++++++++++++++++++++++++++++-----------------
 1 file changed, 68 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d5000683..66b9b33b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -48,15 +48,15 @@
 -define(INTEGER_SIZE_BYTES, 8).
 -define(INTEGER_SIZE_BITS, 8 * ?INTEGER_SIZE_BYTES).
 -define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
--define(FILE_DETAIL_ETS_NAME, rabbit_disk_queue_file_detail).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
 
 -define(SERVER, ?MODULE).
 
+-record(dqfile, {valid_data, contiguous_prefix, left, right, detail}).
+
 -record(dqstate, {msg_location,
 		  file_summary,
-		  file_detail,
 		  current_file_num,
 		  current_file_name,
 		  current_file_handle,
@@ -94,8 +94,12 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     InitName = "0" ++ (?FILE_EXTENSION),
     State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
-		       file_summary = dict:store(InitName, {0, 0, undefined, undefined}, dict:new()),
-		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [bag, private]),
+		       file_summary = dict:store(InitName, (#dqfile { valid_data = 0,
+								      contiguous_prefix = 0,
+								      left = undefined,
+								      right = undefined,
+								      detail = dict:new()}),
+						 dict:new()),
 		       current_file_num = 0,
 		       current_file_name = InitName,
 		       current_file_handle = undefined,
@@ -199,19 +203,23 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
 internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
-					  file_summary = FileSummary,
-					  file_detail = FileDetail
-					 }) ->
+					  file_summary = FileSummary
+					}) ->
     [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
     % is this the last time we need the message, in which case tidy up
     FileSummary1 =
 	if 1 =:= RefCount ->
 		true = ets:delete(MsgLocation, MsgId),
-		true = ets:delete_object(FileDetail, {File, Offset, TotalSize}),
-		{ok, {ValidTotalSize, ContiguousTop, Left, Right}} = dict:find(File, FileSummary),
+		{ok, FileSum = #dqfile { valid_data = ValidTotalSize,
+					 contiguous_prefix = ContiguousTop,
+					 detail = FileDetail }}
+		    = dict:find(File, FileSummary),
+		FileDetail1 = dict:erase(Offset, FileDetail),
 		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-		FileSummary2 = dict:store(File, {ValidTotalSize - TotalSize - file_packing_adjustment_bytes(),
-						 ContiguousTop1, Left, Right}, FileSummary),
+		FileSummary2 = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - file_packing_adjustment_bytes()),
+								  contiguous_prefix = ContiguousTop1,
+								  detail = FileDetail1
+								}, FileSummary),
 		ok = mnesia:dirty_delete({rabbit_disk_queue, {Q, MsgId}}),
 		FileSummary2;
 	   1 < RefCount ->
@@ -224,8 +232,7 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
 						       current_file_handle = CurHdl,
 						       current_file_name = CurName,
-						       file_summary = FileSummary,
-						       file_detail = FileDetail
+						       file_summary = FileSummary
 						     }) ->
     case ets:lookup(MsgLocation, MsgId) of
 	[] ->
@@ -234,16 +241,23 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    io:format("Reported file position: ~p~n", [Offset]),
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
-	    true = ets:insert(FileDetail, {CurName, Offset, TotalSize}),
-	    {ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
+	    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
+				     contiguous_prefix = ContiguousTop,
+				     right = undefined,
+				     detail = FileDetail }}
+		= dict:find(CurName, FileSummary),
+	    FileDetail1 = dict:store(Offset, TotalSize, FileDetail),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + file_packing_adjustment_bytes(),
 	    ContiguousTop1 = if Offset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
-	    FileSummary2 = dict:store(CurName, {ValidTotalSize1, ContiguousTop1, Left, undefined}, FileSummary),
+	    FileSummary1 = dict:store(CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
+								 contiguous_prefix = ContiguousTop1,
+								 detail = FileDetail1},
+				      FileSummary),
 	    maybe_roll_to_new_file(Offset + TotalSize + file_packing_adjustment_bytes(),
-				   State # dqstate { file_summary = FileSummary2 });
+				   State # dqstate { file_summary = FileSummary1 });
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
 	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
@@ -273,19 +287,24 @@ internal_publish(Q, MsgId, MsgBody, State) ->
     internal_tx_commit(Q, [MsgId], State1).
 
 internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
-					      file_summary = FileSummary,
-					      file_detail = FileDetail }) ->
+					      file_summary = FileSummary
+					    }) ->
     FileSummary1 =
 	lists:foldl(fun (MsgId, FileSummary2) ->
 			    [{MsgId, RefCount, File, Offset, TotalSize}]
 				= ets:lookup(MsgLocation, MsgId),
 			    if 1 =:= RefCount ->
 				    true = ets:delete(MsgLocation, MsgId),
-				    true = ets:delete_object(FileDetail, {File, Offset, TotalSize}),
-				    {ok, {ValidTotalSize, ContiguousTop, Left, Right}} = dict:find(File, FileSummary2),
+				    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
+							     contiguous_prefix = ContiguousTop,
+							     detail = FileDetail }}
+					= dict:find(File, FileSummary2),
+				    FileDetail1 = dict:erase(Offset, FileDetail),
 				    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				    dict:store(File, {ValidTotalSize - TotalSize - file_packing_adjustment_bytes(),
-						      ContiguousTop1, Left, Right}, FileSummary2);
+				    dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - file_packing_adjustment_bytes()),
+								       contiguous_prefix = ContiguousTop1,
+								       detail = FileDetail1
+								     }, FileSummary2);
 			       1 < RefCount ->
 				    ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 				    FileSummary2
@@ -299,22 +318,25 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
 						  current_file_name = CurName,
 						  current_file_handle = CurHdl,
 						  current_file_num = CurNum,
-						  file_summary = FileSummary,
-						  file_detail = FileDetail
+						  file_summary = FileSummary
 						}
 		      ) when Offset >= FileSizeLimit ->
     ok = file:sync(CurHdl),
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ (?FILE_EXTENSION),
-    [] = ets:lookup(FileDetail, NextName),
     {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary]),
-    {ok, {ValidTotalSize, ContiguousTop, Left, undefined}} = dict:find(CurName, FileSummary),
-    FileSummary1 = dict:store(CurName, {ValidTotalSize, ContiguousTop, Left, NextName}, FileSummary),
+    {ok, FileSum = #dqfile {right = undefined}} = dict:find(CurName, FileSummary),
+    FileSummary1 = dict:store(CurName, FileSum #dqfile {right = NextName}, FileSummary),
     {ok, State # dqstate { current_file_name = NextName,
 			   current_file_handle = NextHdl,
 			   current_file_num = NextNum,
-			   file_summary = dict:store(NextName, {0, 0, CurName, undefined}, FileSummary1)
+			   file_summary = dict:store(NextName, #dqfile { valid_data = 0,
+									 contiguous_prefix = 0,
+									 left = CurName,
+									 right = undefined,
+									 detail = dict:new()},
+						     FileSummary1)
 			 }
     };
 maybe_roll_to_new_file(_, State) ->
@@ -345,21 +367,22 @@ load_messages(Left, [], State) ->
     State # dqstate { current_file_num = Num, current_file_name = Left };
 load_messages(Left, [File|Files],
 	      State = #dqstate { msg_location = MsgLocation,
-				 file_summary = FileSummary,
-				 file_detail = FileDetail
+				 file_summary = FileSummary
 			       }) ->
     % [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
-    {ValidMessagesRev, ValidTotalSize} = lists:foldl(
-	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+    {ValidMessagesRev, ValidTotalSize, FileDetail} = lists:foldl(
+	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc, FileDetail1}) ->
 		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'})) of
-		    0 -> {VMAcc, VTSAcc};
+		    0 -> {VMAcc, VTSAcc, FileDetail1};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
-			true = ets:insert(FileDetail, {File, Offset, TotalSize}),
-			{[{MsgId, TotalSize, Offset}|VMAcc], VTSAcc + TotalSize + file_packing_adjustment_bytes()}
+			{[{MsgId, TotalSize, Offset}|VMAcc],
+			 VTSAcc + TotalSize + file_packing_adjustment_bytes(),
+			 dict:store(Offset, TotalSize, FileDetail1)
+			}
 		end
-	end, {[], 0}, Messages),
+	end, {[], 0, dict:new()}, Messages),
     % foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
     % as from scan_file_for_valid_messages
     {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
@@ -368,7 +391,14 @@ load_messages(Left, [File|Files],
 		[F|_] -> F
 	    end,
     State1 = State # dqstate { file_summary =
-			       dict:store(File, {ValidTotalSize, ContiguousTop, Left, Right}, FileSummary) },
+			       dict:store(File, #dqfile { valid_data = ValidTotalSize,
+							  contiguous_prefix = ContiguousTop,
+							  left = Left,
+							  right = Right,
+							  detail = FileDetail
+							 },
+					  FileSummary)
+			     },
     load_messages(File, Files, State1).
 
 %% ---- DISK RECOVERY OF FAILED COMPACTION ----
-- 
cgit v1.2.1


From 18bfaddc3315c2286014e64ad17e0abb4c575bb1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Apr 2009 18:55:58 +0100
Subject: A couple more fixes.

rabbit_disk_queue:start_link(1024*10, 100).

List = lists:seq(1,1024*32), [rabbit_disk_queue:tx_publish(N, <<N:256>>) || N <- List], rabbit_disk_queue:tx_commit(q, List), io:format("committed~n", []), [rabbit_disk_queue:deliver(q, N) || N <- List], io:format("delivered~n", []), rabbit_disk_queue:ack(q, List), rabbit_disk_queue:tx_commit(q, []), io:format("acked~n", []).
---
 src/rabbit_disk_queue.erl | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 66b9b33b..e7100061 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -165,15 +165,15 @@ file_packing_adjustment_bytes() ->
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
-					      current_file_handle = CurHdl,
-					      current_file_name = CurName,
+					      %current_file_handle = CurHdl,
+					      %current_file_name = CurName,
 					      read_file_handles_limit = ReadFileHandlesLimit,
 					      read_file_handles = {ReadHdls, ReadHdlsAge}
 					     }) ->
     [{MsgId, _RefCount, File, Offset, _TotalSize}] = ets:lookup(MsgLocation, MsgId),
-    if CurName =:= File -> ok = file:sync(CurHdl);
-       true -> ok
-    end,
+    %if CurName =:= File -> ok = file:sync(CurHdl); % don't think this is necessary. Within a process you should always have a consistent view of a file
+    %   true -> ok
+    %end,
     % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
     % of premature optimisation. So I might remove it and dump it overboard
     {FileHdl, ReadHdls1, ReadHdlsAge1}
@@ -186,7 +186,7 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, ReadHdlsAge)};
 		      _False ->
 			  {_Then, OldFile, ReadHdlsAge2} = gb_trees:take_smallest(ReadHdlsAge),
-			  {ok, OldHdl} = dict:find(OldFile, ReadHdls),
+			  {ok, {OldHdl, _Then}} = dict:find(OldFile, ReadHdls),
 			  ok = file:close(OldHdl),
 			  ReadHdls2 = dict:erase(OldFile, ReadHdls),
 			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls2), gb_trees:enter(Now, File, ReadHdlsAge2)}
@@ -197,8 +197,8 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 	  end,
     % read the message
     {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
-    [#dq_msg_loc {queue_and_msg_id = {Q, MsgId}, is_delivered = Delivered}] = mnesia:dirty_read(rabbit_disk_queue, {Q, MsgId}),
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc {queue_and_msg_id = {Q, MsgId}, is_delivered = true}),
+    [#dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = Delivered}] = mnesia:dirty_read(rabbit_disk_queue, {MsgId, Q}),
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = true}),
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
@@ -220,7 +220,7 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 								  contiguous_prefix = ContiguousTop1,
 								  detail = FileDetail1
 								}, FileSummary),
-		ok = mnesia:dirty_delete({rabbit_disk_queue, {Q, MsgId}}),
+		ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
 		FileSummary2;
 	   1 < RefCount ->
 		ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
@@ -238,7 +238,6 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	[] ->
 	    % New message, lots to do
 	    {ok, Offset} = file:position(CurHdl, cur),
-	    io:format("Reported file position: ~p~n", [Offset]),
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
 	    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
@@ -273,7 +272,7 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 	    fun() -> lists:foldl(fun (MsgId, Acc) ->
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     ets:lookup(MsgLocation, MsgId),
-					 ok = mnesia:write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {Q, MsgId}, is_delivered = false}, write),
+					 ok = mnesia:write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {MsgId, Q}, is_delivered = false}, write),
 					 Acc or (CurName =:= File)
 				 end, false, MsgIds)
 	    end),
@@ -284,7 +283,9 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 
 internal_publish(Q, MsgId, MsgBody, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    internal_tx_commit(Q, [MsgId], State1).
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {MsgId, Q}, is_delivered = false}),
+    {ok, State1}.
+
 
 internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
 					      file_summary = FileSummary
@@ -356,7 +357,7 @@ load_from_disk(State) ->
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:all(fun ({_Q, MsgId}) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+    true = lists:all(fun ({MsgId, _Q}) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
 		     mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
     {ok, State1}.
 
@@ -373,7 +374,7 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize, FileDetail} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc, FileDetail1}) ->
-		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'})) of
+		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'})) of
 		    0 -> {VMAcc, VTSAcc, FileDetail1};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
@@ -414,7 +415,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'}))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
@@ -445,7 +446,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {'_', MsgId}, '_'}))
+				  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'}))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
-- 
cgit v1.2.1


From c7931482ed889a81aa88b8ab22058631ccc090d4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Apr 2009 23:21:54 +0100
Subject: did some profiling and brought out an mnesia lock much earlier. This
 means that for large commits, the resulting mnesia transaction is _much_
 faster.

also fixed makefile so that cleandb wipes out disk_queue too
---
 Makefile                  | 2 +-
 src/rabbit_disk_queue.erl | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 47aa586c..b7464244 100644
--- a/Makefile
+++ b/Makefile
@@ -69,7 +69,7 @@ clean: cleandb
 	rm -f docs/*.[0-9].gz
 
 cleandb: stop-node
-	erl -mnesia dir '"$(RABBITMQ_MNESIA_DIR)"' -noshell -eval 'lists:foreach(fun file:delete/1, filelib:wildcard(mnesia:system_info(directory) ++ "/*")), halt().'
+	erl -mnesia dir '"$(RABBITMQ_MNESIA_DIR)"' -noshell -eval 'lists:foreach(fun file:delete/1, filelib:wildcard(mnesia:system_info(directory) ++ "/*") ++ filelib:wildcard(filename:join(mnesia:system_info(directory), "rabbit_disk_queue/*"))), halt().'
 
 ############ various tasks to interact with RabbitMQ ###################
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e7100061..391c9b71 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -269,7 +269,8 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 					       }) ->
     {atomic, Sync}
 	= mnesia:transaction(
-	    fun() -> lists:foldl(fun (MsgId, Acc) ->
+	    fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
+		     lists:foldl(fun (MsgId, Acc) ->
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     ets:lookup(MsgLocation, MsgId),
 					 ok = mnesia:write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {MsgId, Q}, is_delivered = false}, write),
-- 
cgit v1.2.1


From 19224fb7340d07615ef441272b5a6eebc8a97eaa Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 01:05:09 +0100
Subject: Mainly removing the unnecessary file:position in append. This wasn't
 needed as I can track the position myself, and profiling showed that it is
 expensive (which I'd kinda been aware of before). Having removed it, things
 are quite a bit faster - though still CPU bound with smaller messages. Next
 step is to convert dicts in state into ets tables.

---
 src/rabbit_disk_queue.erl | 69 ++++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 391c9b71..8affd7aa 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -50,6 +50,7 @@
 -define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
+-define(FILE_PACKING_ADJUSTMENT, 1 + (2* (?INTEGER_SIZE_BYTES))).
 
 -define(SERVER, ?MODULE).
 
@@ -60,6 +61,7 @@
 		  current_file_num,
 		  current_file_name,
 		  current_file_handle,
+		  current_offset,
 		  file_size_limit,
 		  read_file_handles,
 		  read_file_handles_limit
@@ -103,14 +105,16 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 		       current_file_num = 0,
 		       current_file_name = InitName,
 		       current_file_handle = undefined,
+		       current_offset = 0,
 		       file_size_limit = FileSizeLimit,
 		       read_file_handles = {dict:new(), gb_trees:empty()},
 		       read_file_handles_limit = ReadFileHandlesLimit
 		     },
-    {ok, State1 = #dqstate { current_file_name = CurrentName } } = load_from_disk(State),
+    {ok, State1 = #dqstate { current_file_name = CurrentName, current_offset = Offset } } = load_from_disk(State),
     Path = form_filename(CurrentName),
     ok = filelib:ensure_dir(Path),
-    {ok, FileHdl} = file:open(Path, [append, raw, binary]),
+    {ok, FileHdl} = file:open(Path, [read, write, raw, binary]), %% read only needed so that we can seek
+    {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     {ok, State1 # dqstate { current_file_handle = FileHdl }}.
 
 handle_call({deliver, Q, MsgId}, _From, State) ->
@@ -159,21 +163,13 @@ form_filename(Name) ->
 base_directory() ->
     filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
 
-file_packing_adjustment_bytes() ->
-    1 + (2* (?INTEGER_SIZE_BYTES)).
-
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
-					      %current_file_handle = CurHdl,
-					      %current_file_name = CurName,
 					      read_file_handles_limit = ReadFileHandlesLimit,
 					      read_file_handles = {ReadHdls, ReadHdlsAge}
 					     }) ->
     [{MsgId, _RefCount, File, Offset, _TotalSize}] = ets:lookup(MsgLocation, MsgId),
-    %if CurName =:= File -> ok = file:sync(CurHdl); % don't think this is necessary. Within a process you should always have a consistent view of a file
-    %   true -> ok
-    %end,
     % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
     % of premature optimisation. So I might remove it and dump it overboard
     {FileHdl, ReadHdls1, ReadHdlsAge1}
@@ -198,7 +194,9 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
     % read the message
     {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
     [#dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = Delivered}] = mnesia:dirty_read(rabbit_disk_queue, {MsgId, Q}),
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = true}),
+    if Delivered -> ok;
+       true -> ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = true})
+    end,
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
@@ -216,7 +214,7 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		    = dict:find(File, FileSummary),
 		FileDetail1 = dict:erase(Offset, FileDetail),
 		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-		FileSummary2 = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - file_packing_adjustment_bytes()),
+		FileSummary2 = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
 								  contiguous_prefix = ContiguousTop1,
 								  detail = FileDetail1
 								}, FileSummary),
@@ -232,12 +230,12 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
 						       current_file_handle = CurHdl,
 						       current_file_name = CurName,
+						       current_offset = Offset,
 						       file_summary = FileSummary
 						     }) ->
     case ets:lookup(MsgLocation, MsgId) of
 	[] ->
 	    % New message, lots to do
-	    {ok, Offset} = file:position(CurHdl, cur),
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
 	    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
@@ -246,7 +244,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 				     detail = FileDetail }}
 		= dict:find(CurName, FileSummary),
 	    FileDetail1 = dict:store(Offset, TotalSize, FileDetail),
-	    ValidTotalSize1 = ValidTotalSize + TotalSize + file_packing_adjustment_bytes(),
+	    ValidTotalSize1 = ValidTotalSize + TotalSize + (?FILE_PACKING_ADJUSTMENT),
 	    ContiguousTop1 = if Offset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
@@ -255,8 +253,10 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 								 contiguous_prefix = ContiguousTop1,
 								 detail = FileDetail1},
 				      FileSummary),
-	    maybe_roll_to_new_file(Offset + TotalSize + file_packing_adjustment_bytes(),
-				   State # dqstate { file_summary = FileSummary1 });
+	    maybe_roll_to_new_file(Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT),
+				   State # dqstate { file_summary = FileSummary1,
+						     current_offset = Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT)
+						   });
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
 	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
@@ -303,7 +303,7 @@ internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
 					= dict:find(File, FileSummary2),
 				    FileDetail1 = dict:erase(Offset, FileDetail),
 				    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				    dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - file_packing_adjustment_bytes()),
+				    dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
 								       contiguous_prefix = ContiguousTop1,
 								       detail = FileDetail1
 								     }, FileSummary2);
@@ -333,6 +333,7 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
     {ok, State # dqstate { current_file_name = NextName,
 			   current_file_handle = NextHdl,
 			   current_file_num = NextNum,
+			   current_offset = 0,
 			   file_summary = dict:store(NextName, #dqfile { valid_data = 0,
 									 contiguous_prefix = 0,
 									 left = CurName,
@@ -364,9 +365,14 @@ load_from_disk(State) ->
 
 load_messages(undefined, [], State) ->
     State;
-load_messages(Left, [], State) ->
+load_messages(Left, [], State = #dqstate { file_summary = Summary }) ->
     Num = list_to_integer(filename:rootname(Left)),
-    State # dqstate { current_file_num = Num, current_file_name = Left };
+    {ok, #dqfile { detail = FileDetail }} = dict:find(Left, Summary),
+    Offset = dict:fold(fun (Offset1, TotalSize, Acc) ->
+			       Acc1 = Offset1 + TotalSize + (?FILE_PACKING_ADJUSTMENT),
+			       lists:max([Acc, Acc1])
+		       end, 0, FileDetail),
+    State # dqstate { current_file_num = Num, current_file_name = Left, current_offset = Offset };
 load_messages(Left, [File|Files],
 	      State = #dqstate { msg_location = MsgLocation,
 				 file_summary = FileSummary
@@ -380,7 +386,7 @@ load_messages(Left, [File|Files],
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
-			 VTSAcc + TotalSize + file_packing_adjustment_bytes(),
+			 VTSAcc + TotalSize + (?FILE_PACKING_ADJUSTMENT),
 			 dict:store(Offset, TotalSize, FileDetail1)
 			}
 		end
@@ -405,9 +411,12 @@ load_messages(Left, [File|Files],
 
 %% ---- DISK RECOVERY OF FAILED COMPACTION ----
 
-recover_crashed_compactions(_Files, []) ->
-    ok;
-recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
+recover_crashed_compactions(Files, TmpFiles) ->
+    lists:foreach(fun (TmpFile) -> ok = recover_crashed_compactions1(Files, TmpFile) end,
+		  TmpFiles),
+    ok.
+
+recover_crashed_compactions1(Files, TmpFile) ->
     GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ (?FILE_EXTENSION),
     true = lists:member(NonTmpRelatedFile, Files),
@@ -461,7 +470,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % extending truncate.
 	    % Remember the head of the list will be the highest entry in the file
 	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
-	    TmpSize = TmpTopOffset + TmpTopTotalSize + file_packing_adjustment_bytes(),
+	    TmpSize = TmpTopOffset + TmpTopTotalSize + (?FILE_PACKING_ADJUSTMENT),
 	    ExpectedAbsPos = Top + TmpSize,
 	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
 	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
@@ -480,7 +489,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 	    % check that everything in MsgIdsTmp is in MsgIdsMain
 	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end, MsgIdsTmp)
     end,
-    recover_crashed_compactions(Files, TmpFiles).
+    ok.
 
 % this assumes that the messages are ordered such that the highest address is at
 % the head of the list.
@@ -488,7 +497,7 @@ recover_crashed_compactions(Files, [TmpFile|TmpFiles]) ->
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
-	{ok, Acc} -> {Offset + TotalSize + file_packing_adjustment_bytes(), lists:reverse(Acc)};
+	{ok, Acc} -> {Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT), lists:reverse(Acc)};
 	Res -> Res
     end.
 find_contiguous_block_prefix([], 0, Acc) ->
@@ -496,7 +505,7 @@ find_contiguous_block_prefix([], 0, Acc) ->
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
-  when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) -> %% Can't use file_packing_adjustment_bytes()
+  when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) -> %% Can't use (?FILE_PACKING_ADJUSTMENT)
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
 find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
     find_contiguous_block_prefix(List).
@@ -573,7 +582,7 @@ read_next_file_entry(FileHdl, Offset) ->
 	    case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
 		{true, _} -> {ok, eof}; %% Nothing we can do other than stop
 		{false, true} -> %% current message corrupted, try skipping past it
-		    ExpectedAbsPos = Offset + file_packing_adjustment_bytes() + TotalSize,
+		    ExpectedAbsPos = Offset + (?FILE_PACKING_ADJUSTMENT) + TotalSize,
 		    case file:position(FileHdl, {cur, TotalSize + 1}) of
 			{ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
 			{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
@@ -588,9 +597,9 @@ read_next_file_entry(FileHdl, Offset) ->
 				    case file:read(FileHdl, 1) of
 					{ok, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
 					    {ok, {ok, binary_to_term(MsgId), TotalSize,
-						  Offset + file_packing_adjustment_bytes() + TotalSize}};
+						  Offset + (?FILE_PACKING_ADJUSTMENT) + TotalSize}};
 					{ok, _SomeOtherData} ->
-					    {ok, {corrupted, Offset + file_packing_adjustment_bytes() + TotalSize}};
+					    {ok, {corrupted, Offset + (?FILE_PACKING_ADJUSTMENT) + TotalSize}};
 					KO -> KO
 				    end;
 				{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
-- 
cgit v1.2.1


From 39675a2671eecf7a0a5bf61409bb299f5042a8df Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 01:20:44 +0100
Subject: err, we only need to attempt compaction if we removed something!

---
 src/rabbit_disk_queue.erl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8affd7aa..2f27fc6b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -205,7 +205,7 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 					}) ->
     [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
     % is this the last time we need the message, in which case tidy up
-    FileSummary1 =
+    State1 =
 	if 1 =:= RefCount ->
 		true = ets:delete(MsgLocation, MsgId),
 		{ok, FileSum = #dqfile { valid_data = ValidTotalSize,
@@ -214,17 +214,16 @@ internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		    = dict:find(File, FileSummary),
 		FileDetail1 = dict:erase(Offset, FileDetail),
 		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-		FileSummary2 = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+		FileSummary1 = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
 								  contiguous_prefix = ContiguousTop1,
 								  detail = FileDetail1
 								}, FileSummary),
 		ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
-		FileSummary2;
+		compact(File, State # dqstate { file_summary = FileSummary1 } );
 	   1 < RefCount ->
 		ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-		FileSummary
+		State
 	end,
-    State1 = compact(File, State # dqstate { file_summary = FileSummary1 } ),
     {ok, State1}.
 
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
-- 
cgit v1.2.1


From 8b964d894c8976123f36698fbdb1e9615161aba4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 01:24:10 +0100
Subject: oops, forgot to remove that io:format in terminate

---
 src/rabbit_disk_queue.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2f27fc6b..dc23c9ca 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -145,7 +145,6 @@ handle_info(_Info, State) ->
 terminate(_Reason, #dqstate { current_file_handle = FileHdl,
 			      read_file_handles = {ReadHdls, _ReadHdlsAge}
 			    }) ->
-    io:format("DYING~n", []),
     ok = file:sync(FileHdl),
     ok = file:close(FileHdl),
     dict:fold(fun (_File, Hdl, _Acc) ->
-- 
cgit v1.2.1


From cc37e02bb6c2c0b6b64aac5a4202d175dd3557dd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 11:26:13 +0100
Subject: only do compact after having ack'd all messages. This should prevent
 unnecessary compactions (i.e. we now have the ability to scan and delete all
 now-empty files before attempting any compaction).

---
 src/rabbit_disk_queue.erl | 70 ++++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index dc23c9ca..e605828e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -128,9 +128,7 @@ handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
     {noreply, State1};
 handle_cast({ack, Q, MsgIds}, State) ->
-    {ok, State1} = lists:foldl(fun (MsgId, {ok, State2}) ->
-				       internal_ack(Q, MsgId, State2)
-			       end, {ok, State}, MsgIds),
+    {ok, State1} = internal_ack(Q, MsgIds, State),
     {noreply, State1};
 handle_cast({tx_publish, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
@@ -199,31 +197,35 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
-internal_ack(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
-					  file_summary = FileSummary
-					}) ->
-    [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
-    % is this the last time we need the message, in which case tidy up
-    State1 =
-	if 1 =:= RefCount ->
-		true = ets:delete(MsgLocation, MsgId),
-		{ok, FileSum = #dqfile { valid_data = ValidTotalSize,
-					 contiguous_prefix = ContiguousTop,
-					 detail = FileDetail }}
-		    = dict:find(File, FileSummary),
-		FileDetail1 = dict:erase(Offset, FileDetail),
-		ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-		FileSummary1 = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-								  contiguous_prefix = ContiguousTop1,
-								  detail = FileDetail1
-								}, FileSummary),
-		ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
-		compact(File, State # dqstate { file_summary = FileSummary1 } );
-	   1 < RefCount ->
-		ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-		State
-	end,
-    {ok, State1}.
+internal_ack(Q, MsgIds, State) ->
+    {Files, State1}
+	= lists:foldl(fun (MsgId, {Files1, State2 = #dqstate { msg_location = MsgLocation,
+							       file_summary = FileSummary
+							      }}) ->
+			      [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
+			      % is this the last time we need the message, in which case tidy up
+			      if 1 =:= RefCount ->
+				      true = ets:delete(MsgLocation, MsgId),
+				      {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
+							       contiguous_prefix = ContiguousTop,
+							       detail = FileDetail }}
+					  = dict:find(File, FileSummary),
+				      FileDetail1 = dict:erase(Offset, FileDetail),
+				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+				      FileSummary1
+					  = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+									       contiguous_prefix = ContiguousTop1,
+									       detail = FileDetail1
+									      }, FileSummary),
+				      ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
+				      {sets:add_element(File, Files1), State2 # dqstate { file_summary = FileSummary1 }};
+				 1 < RefCount ->
+				      ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+				      {Files1, State2}
+			      end
+		      end, {sets:new(), State}, MsgIds),
+    State2 = compact(Files, State1),
+    {ok, State2}.
 
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
 						       current_file_handle = CurHdl,
@@ -345,7 +347,7 @@ maybe_roll_to_new_file(_, State) ->
 
 %% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
 
-compact(File, State) ->
+compact(Files, State) ->
     State.
 
 %% ---- DISK RECOVERY ----
@@ -353,12 +355,16 @@ compact(File, State) ->
 load_from_disk(State) ->
     % sorted so that smallest number is first. which also means eldest file (left-most) first
     {Files, TmpFiles} = get_disk_queue_files(),
+    io:format("got files~n", []),
     ok = recover_crashed_compactions(Files, TmpFiles),
+    io:format("crash recovery done~n", []),
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
+    io:format("loaded messages~n", []),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:all(fun ({MsgId, _Q}) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
-		     mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
+    true = lists:foldl(fun ({MsgId, _Q}, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+		       true, mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
+    io:format("checked in mnesia~n", []),
     {ok, State1}.
 
 load_messages(undefined, [], State) ->
@@ -376,7 +382,9 @@ load_messages(Left, [File|Files],
 				 file_summary = FileSummary
 			       }) ->
     % [{MsgId, TotalSize, FileOffset}]
+    io:format("scan start~n", []),
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
+    io:format("scan end~n", []),
     {ValidMessagesRev, ValidTotalSize, FileDetail} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc, FileDetail1}) ->
 		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'})) of
-- 
cgit v1.2.1


From f7cb1befebdf8f01bb1453126f2c18d84b68bc76 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 12:15:47 +0100
Subject: switched back to ets for file detail. Also remember to call compact
 from tx_cancel.

---
 src/rabbit_disk_queue.erl | 124 ++++++++++++++++++++++------------------------
 1 file changed, 60 insertions(+), 64 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e605828e..0313ebd7 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -48,16 +48,18 @@
 -define(INTEGER_SIZE_BYTES, 8).
 -define(INTEGER_SIZE_BITS, 8 * ?INTEGER_SIZE_BYTES).
 -define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
+-define(FILE_DETAIL_ETS_NAME, rabbit_disk_queue_file_detail).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
 -define(FILE_PACKING_ADJUSTMENT, 1 + (2* (?INTEGER_SIZE_BYTES))).
 
 -define(SERVER, ?MODULE).
 
--record(dqfile, {valid_data, contiguous_prefix, left, right, detail}).
+-record(dqfile, {valid_data, contiguous_prefix, left, right}).
 
 -record(dqstate, {msg_location,
 		  file_summary,
+		  file_detail,
 		  current_file_num,
 		  current_file_name,
 		  current_file_handle,
@@ -99,9 +101,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 		       file_summary = dict:store(InitName, (#dqfile { valid_data = 0,
 								      contiguous_prefix = 0,
 								      left = undefined,
-								      right = undefined,
-								      detail = dict:new()}),
+								      right = undefined}),
 						 dict:new()),
+		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [ordered_set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
 		       current_file_handle = undefined,
@@ -197,41 +199,41 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
-internal_ack(Q, MsgIds, State) ->
-    {Files, State1}
-	= lists:foldl(fun (MsgId, {Files1, State2 = #dqstate { msg_location = MsgLocation,
-							       file_summary = FileSummary
-							      }}) ->
+internal_ack(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
+					   file_summary = FileSummary,
+					   file_detail = FileDetail
+					  }) ->
+    {Files, FileSummary1}
+	= lists:foldl(fun (MsgId, {Files2, FileSummary2}) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
 			      % is this the last time we need the message, in which case tidy up
 			      if 1 =:= RefCount ->
 				      true = ets:delete(MsgLocation, MsgId),
 				      {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
-							       contiguous_prefix = ContiguousTop,
-							       detail = FileDetail }}
-					  = dict:find(File, FileSummary),
-				      FileDetail1 = dict:erase(Offset, FileDetail),
+							       contiguous_prefix = ContiguousTop }}
+					  = dict:find(File, FileSummary2),
+				      true = ets:delete(FileDetail, {File, Offset}),
 				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				      FileSummary1
+				      FileSummary3
 					  = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-									       contiguous_prefix = ContiguousTop1,
-									       detail = FileDetail1
-									      }, FileSummary),
+									       contiguous_prefix = ContiguousTop1
+									     }, FileSummary2),
 				      ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
-				      {sets:add_element(File, Files1), State2 # dqstate { file_summary = FileSummary1 }};
+				      {sets:add_element(File, Files2), FileSummary3};
 				 1 < RefCount ->
 				      ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				      {Files1, State2}
+				      {Files2, FileSummary2}
 			      end
-		      end, {sets:new(), State}, MsgIds),
-    State2 = compact(Files, State1),
+		      end, {sets:new(), FileSummary}, MsgIds),
+    State2 = compact(Files, State # dqstate { file_summary = FileSummary1 }),
     {ok, State2}.
 
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
 						       current_file_handle = CurHdl,
 						       current_file_name = CurName,
 						       current_offset = Offset,
-						       file_summary = FileSummary
+						       file_summary = FileSummary,
+						       file_detail = FileDetail
 						     }) ->
     case ets:lookup(MsgLocation, MsgId) of
 	[] ->
@@ -240,18 +242,16 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
 	    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
 				     contiguous_prefix = ContiguousTop,
-				     right = undefined,
-				     detail = FileDetail }}
+				     right = undefined }}
 		= dict:find(CurName, FileSummary),
-	    FileDetail1 = dict:store(Offset, TotalSize, FileDetail),
+	    true = ets:insert_new(FileDetail, {{CurName, Offset}, TotalSize}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + (?FILE_PACKING_ADJUSTMENT),
 	    ContiguousTop1 = if Offset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
 	    FileSummary1 = dict:store(CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
-								 contiguous_prefix = ContiguousTop1,
-								 detail = FileDetail1},
+								 contiguous_prefix = ContiguousTop1 },
 				      FileSummary),
 	    maybe_roll_to_new_file(Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT),
 				   State # dqstate { file_summary = FileSummary1,
@@ -289,30 +289,32 @@ internal_publish(Q, MsgId, MsgBody, State) ->
 
 
 internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
-					      file_summary = FileSummary
+					      file_summary = FileSummary,
+					      file_detail = FileDetail
 					    }) ->
-    FileSummary1 =
-	lists:foldl(fun (MsgId, FileSummary2) ->
+    {Files, FileSummary1} =
+	lists:foldl(fun (MsgId, {Files2, FileSummary2}) ->
 			    [{MsgId, RefCount, File, Offset, TotalSize}]
 				= ets:lookup(MsgLocation, MsgId),
 			    if 1 =:= RefCount ->
 				    true = ets:delete(MsgLocation, MsgId),
 				    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
-							     contiguous_prefix = ContiguousTop,
-							     detail = FileDetail }}
+							     contiguous_prefix = ContiguousTop }}
 					= dict:find(File, FileSummary2),
-				    FileDetail1 = dict:erase(Offset, FileDetail),
+				    true = ets:delete(FileDetail, {File, Offset}),
 				    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				    dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-								       contiguous_prefix = ContiguousTop1,
-								       detail = FileDetail1
-								     }, FileSummary2);
+				    FileSummary3
+					= dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+									     contiguous_prefix = ContiguousTop1
+									    }, FileSummary2),
+				    {sets:add_element(File, Files2), FileSummary3};
 			       1 < RefCount ->
 				    ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				    FileSummary2
+				    {Files2, FileSummary2}
 			    end
-		    end, FileSummary, MsgIds),
-    {ok, State #dqstate { file_summary = FileSummary1 }}.
+		    end, {sets:new(), FileSummary}, MsgIds),
+    State2 = compact(Files, State # dqstate { file_summary = FileSummary1 }),
+    {ok, State2}.
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
@@ -337,8 +339,7 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
 			   file_summary = dict:store(NextName, #dqfile { valid_data = 0,
 									 contiguous_prefix = 0,
 									 left = CurName,
-									 right = undefined,
-									 detail = dict:new()},
+									 right = undefined },
 						     FileSummary1)
 			 }
     };
@@ -347,7 +348,7 @@ maybe_roll_to_new_file(_, State) ->
 
 %% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
 
-compact(Files, State) ->
+compact(_FilesSet, State) ->
     State.
 
 %% ---- DISK RECOVERY ----
@@ -355,48 +356,44 @@ compact(Files, State) ->
 load_from_disk(State) ->
     % sorted so that smallest number is first. which also means eldest file (left-most) first
     {Files, TmpFiles} = get_disk_queue_files(),
-    io:format("got files~n", []),
     ok = recover_crashed_compactions(Files, TmpFiles),
-    io:format("crash recovery done~n", []),
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
-    io:format("loaded messages~n", []),
     % Finally, check there is nothing in mnesia which we haven't loaded
     true = lists:foldl(fun ({MsgId, _Q}, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
 		       true, mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
-    io:format("checked in mnesia~n", []),
     {ok, State1}.
 
 load_messages(undefined, [], State) ->
     State;
-load_messages(Left, [], State = #dqstate { file_summary = Summary }) ->
+load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
     Num = list_to_integer(filename:rootname(Left)),
-    {ok, #dqfile { detail = FileDetail }} = dict:find(Left, Summary),
-    Offset = dict:fold(fun (Offset1, TotalSize, Acc) ->
-			       Acc1 = Offset1 + TotalSize + (?FILE_PACKING_ADJUSTMENT),
-			       lists:max([Acc, Acc1])
-		       end, 0, FileDetail),
-    State # dqstate { current_file_num = Num, current_file_name = Left, current_offset = Offset };
+    Offset = case ets:match_object(FileDetail, {{Left, '_'}, '_'}) of
+		 [] -> 0;
+		 L -> {{Left, Offset1}, TotalSize} = lists:last(L),
+		      Offset1 + TotalSize + (?FILE_PACKING_ADJUSTMENT)
+	     end,
+    State # dqstate { current_file_num = Num, current_file_name = Left,
+		      current_offset = Offset };
 load_messages(Left, [File|Files],
 	      State = #dqstate { msg_location = MsgLocation,
-				 file_summary = FileSummary
+				 file_summary = FileSummary,
+				 file_detail = FileDetail
 			       }) ->
     % [{MsgId, TotalSize, FileOffset}]
-    io:format("scan start~n", []),
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
-    io:format("scan end~n", []),
-    {ValidMessagesRev, ValidTotalSize, FileDetail} = lists:foldl(
-	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc, FileDetail1}) ->
+    {ValidMessagesRev, ValidTotalSize} = lists:foldl(
+	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
 		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'})) of
-		    0 -> {VMAcc, VTSAcc, FileDetail1};
+		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
+			true = ets:insert_new(FileDetail, {{File, Offset}, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
-			 VTSAcc + TotalSize + (?FILE_PACKING_ADJUSTMENT),
-			 dict:store(Offset, TotalSize, FileDetail1)
+			 VTSAcc + TotalSize + (?FILE_PACKING_ADJUSTMENT)
 			}
 		end
-	end, {[], 0, dict:new()}, Messages),
+	end, {[], 0}, Messages),
     % foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
     % as from scan_file_for_valid_messages
     {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
@@ -408,8 +405,7 @@ load_messages(Left, [File|Files],
 			       dict:store(File, #dqfile { valid_data = ValidTotalSize,
 							  contiguous_prefix = ContiguousTop,
 							  left = Left,
-							  right = Right,
-							  detail = FileDetail
+							  right = Right
 							 },
 					  FileSummary)
 			     },
-- 
cgit v1.2.1


From d42b97467000c02491881577a80a5abe44b74137 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 12:57:15 +0100
Subject: switched summary to an ets table too

---
 src/rabbit_disk_queue.erl | 118 +++++++++++++++++++++-------------------------
 1 file changed, 54 insertions(+), 64 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0313ebd7..5ab684e6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -49,6 +49,7 @@
 -define(INTEGER_SIZE_BITS, 8 * ?INTEGER_SIZE_BYTES).
 -define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
 -define(FILE_DETAIL_ETS_NAME, rabbit_disk_queue_file_detail).
+-define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
 -define(FILE_PACKING_ADJUSTMENT, 1 + (2* (?INTEGER_SIZE_BYTES))).
@@ -97,12 +98,13 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     InitName = "0" ++ (?FILE_EXTENSION),
+    FileSummary = ets:new((?FILE_SUMMARY_ETS_NAME), [set, private]),
+    true = ets:insert(FileSummary, {InitName, #dqfile { valid_data = 0,
+							contiguous_prefix = 0,
+							left = undefined,
+							right = undefined}}),
     State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
-		       file_summary = dict:store(InitName, (#dqfile { valid_data = 0,
-								      contiguous_prefix = 0,
-								      left = undefined,
-								      right = undefined}),
-						 dict:new()),
+		       file_summary = FileSummary,
 		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [ordered_set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
@@ -145,8 +147,8 @@ handle_info(_Info, State) ->
 terminate(_Reason, #dqstate { current_file_handle = FileHdl,
 			      read_file_handles = {ReadHdls, _ReadHdlsAge}
 			    }) ->
-    ok = file:sync(FileHdl),
-    ok = file:close(FileHdl),
+    file:sync(FileHdl),
+    file:close(FileHdl),
     dict:fold(fun (_File, Hdl, _Acc) ->
 		     file:close(Hdl)
 	      end, ok, ReadHdls).
@@ -203,29 +205,28 @@ internal_ack(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 					   file_summary = FileSummary,
 					   file_detail = FileDetail
 					  }) ->
-    {Files, FileSummary1}
-	= lists:foldl(fun (MsgId, {Files2, FileSummary2}) ->
+    Files
+	= lists:foldl(fun (MsgId, Files2) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
 			      % is this the last time we need the message, in which case tidy up
 			      if 1 =:= RefCount ->
 				      true = ets:delete(MsgLocation, MsgId),
-				      {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
-							       contiguous_prefix = ContiguousTop }}
-					  = dict:find(File, FileSummary2),
+				      [{File, FileSum = #dqfile { valid_data = ValidTotalSize,
+								  contiguous_prefix = ContiguousTop }}]
+					  = ets:lookup(FileSummary, File),
 				      true = ets:delete(FileDetail, {File, Offset}),
 				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				      FileSummary3
-					  = dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-									       contiguous_prefix = ContiguousTop1
-									     }, FileSummary2),
+				      true = ets:insert(FileSummary,
+							{File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+										 contiguous_prefix = ContiguousTop1}}),
 				      ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
-				      {sets:add_element(File, Files2), FileSummary3};
+				      sets:add_element(File, Files2);
 				 1 < RefCount ->
-				      ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				      {Files2, FileSummary2}
+				      true = ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+				      Files2
 			      end
-		      end, {sets:new(), FileSummary}, MsgIds),
-    State2 = compact(Files, State # dqstate { file_summary = FileSummary1 }),
+		      end, sets:new(), MsgIds),
+    State2 = compact(Files, State),
     {ok, State2}.
 
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
@@ -240,23 +241,20 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    % New message, lots to do
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
-	    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
-				     contiguous_prefix = ContiguousTop,
-				     right = undefined }}
-		= dict:find(CurName, FileSummary),
+	    [{CurName, FileSum = #dqfile { valid_data = ValidTotalSize,
+					   contiguous_prefix = ContiguousTop,
+					   right = undefined }}]
+		= ets:lookup(FileSummary, CurName),
 	    true = ets:insert_new(FileDetail, {{CurName, Offset}, TotalSize}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + (?FILE_PACKING_ADJUSTMENT),
 	    ContiguousTop1 = if Offset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
-	    FileSummary1 = dict:store(CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
-								 contiguous_prefix = ContiguousTop1 },
-				      FileSummary),
+	    true = ets:insert(FileSummary, {CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
+								       contiguous_prefix = ContiguousTop1 }}),
 	    maybe_roll_to_new_file(Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT),
-				   State # dqstate { file_summary = FileSummary1,
-						     current_offset = Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT)
-						   });
+				   State # dqstate {current_offset = Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT)});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
 	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
@@ -287,33 +285,30 @@ internal_publish(Q, MsgId, MsgBody, State) ->
     ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {MsgId, Q}, is_delivered = false}),
     {ok, State1}.
 
-
 internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
 					      file_summary = FileSummary,
 					      file_detail = FileDetail
 					    }) ->
-    {Files, FileSummary1} =
-	lists:foldl(fun (MsgId, {Files2, FileSummary2}) ->
+    Files =
+	lists:foldl(fun (MsgId, Files2) ->
 			    [{MsgId, RefCount, File, Offset, TotalSize}]
 				= ets:lookup(MsgLocation, MsgId),
 			    if 1 =:= RefCount ->
 				    true = ets:delete(MsgLocation, MsgId),
-				    {ok, FileSum = #dqfile { valid_data = ValidTotalSize,
-							     contiguous_prefix = ContiguousTop }}
-					= dict:find(File, FileSummary2),
+				    [{File, FileSum = #dqfile { valid_data = ValidTotalSize,
+								contiguous_prefix = ContiguousTop }}]
+					= ets:lookup(FileSummary, File),
 				    true = ets:delete(FileDetail, {File, Offset}),
 				    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				    FileSummary3
-					= dict:store(File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-									     contiguous_prefix = ContiguousTop1
-									    }, FileSummary2),
-				    {sets:add_element(File, Files2), FileSummary3};
+				    true = ets:insert(FileSummary, {File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+											    contiguous_prefix = ContiguousTop1}}),
+				    sets:add_element(File, Files2);
 			       1 < RefCount ->
 				    ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				    {Files2, FileSummary2}
+				    Files2
 			    end
-		    end, {sets:new(), FileSummary}, MsgIds),
-    State2 = compact(Files, State # dqstate { file_summary = FileSummary1 }),
+		    end, sets:new(), MsgIds),
+    State2 = compact(Files, State),
     {ok, State2}.
 
 %% ---- ROLLING OVER THE APPEND FILE ----
@@ -330,19 +325,17 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ (?FILE_EXTENSION),
     {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary]),
-    {ok, FileSum = #dqfile {right = undefined}} = dict:find(CurName, FileSummary),
-    FileSummary1 = dict:store(CurName, FileSum #dqfile {right = NextName}, FileSummary),
+    [{CurName, FileSum = #dqfile {right = undefined}}] = ets:lookup(FileSummary, CurName),
+    true = ets:insert(FileSummary, {CurName, FileSum #dqfile {right = NextName}}),
+    true = ets:insert_new(FileSummary, {NextName, #dqfile { valid_data = 0,
+							    contiguous_prefix = 0,
+							    left = CurName,
+							    right = undefined }}),
     {ok, State # dqstate { current_file_name = NextName,
 			   current_file_handle = NextHdl,
 			   current_file_num = NextNum,
-			   current_offset = 0,
-			   file_summary = dict:store(NextName, #dqfile { valid_data = 0,
-									 contiguous_prefix = 0,
-									 left = CurName,
-									 right = undefined },
-						     FileSummary1)
-			 }
-    };
+			   current_offset = 0
+			 }}
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
@@ -401,15 +394,12 @@ load_messages(Left, [File|Files],
 		[] -> undefined;
 		[F|_] -> F
 	    end,
-    State1 = State # dqstate { file_summary =
-			       dict:store(File, #dqfile { valid_data = ValidTotalSize,
-							  contiguous_prefix = ContiguousTop,
-							  left = Left,
-							  right = Right
-							 },
-					  FileSummary)
-			     },
-    load_messages(File, Files, State1).
+    true = ets:insert_new(FileSummary, {File, #dqfile { valid_data = ValidTotalSize,
+							contiguous_prefix = ContiguousTop,
+							left = Left,
+							right = Right
+						       }}),
+    load_messages(File, Files, State).
 
 %% ---- DISK RECOVERY OF FAILED COMPACTION ----
 
-- 
cgit v1.2.1


From 232812645851ee268d8df4fb6b3f738f8a5ca6ae Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 12:58:22 +0100
Subject: typeo

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 5ab684e6..b9faf767 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -335,7 +335,7 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
 			   current_file_handle = NextHdl,
 			   current_file_num = NextNum,
 			   current_offset = 0
-			 }}
+			 }};
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
-- 
cgit v1.2.1


From c187492f8f39c8b3cedb1bc15a847996d6a2448b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 13:14:36 +0100
Subject: corrected some bugs. Performance still isn't quite where I would like
 it to be. Need to do more profiling. Seem to be unnecessarily CPU bound
 despite use of ets throughout.

---
 src/rabbit_disk_queue.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b9faf767..58247799 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -99,10 +99,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     InitName = "0" ++ (?FILE_EXTENSION),
     FileSummary = ets:new((?FILE_SUMMARY_ETS_NAME), [set, private]),
-    true = ets:insert(FileSummary, {InitName, #dqfile { valid_data = 0,
-							contiguous_prefix = 0,
-							left = undefined,
-							right = undefined}}),
     State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
 		       file_summary = FileSummary,
 		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [ordered_set, private]),
@@ -357,7 +353,11 @@ load_from_disk(State) ->
 		       true, mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
     {ok, State1}.
 
-load_messages(undefined, [], State) ->
+load_messages(undefined, [], State = #dqstate { file_summary = FileSummary, current_file_name = CurName }) ->
+    true = ets:insert_new(FileSummary, {CurName, #dqfile { valid_data = 0,
+							   contiguous_prefix = 0,
+							   left = undefined,
+							   right = undefined}}),
     State;
 load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
     Num = list_to_integer(filename:rootname(Left)),
-- 
cgit v1.2.1


From a9f13e344023b9183447c0ae61e4718d16ceaf6e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 18:31:09 +0100
Subject: combine two writes into one and alter some file:open options.
 Profiling shows that file:write is by far the most expensive operation now.
 (roughly 140,000 to mnesia coming in at 27,000). So it looks like the next
 idea is to follow the advice in man 3 file and implement our own write
 buffer. Ensure we hit 4KB or more before calling file:write - though
 obviously chuck out early for suitable commits. This will also complicate
 deliver, tx_cancel and ack.

---
 src/rabbit_disk_queue.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 58247799..765380b6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -113,7 +113,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State1 = #dqstate { current_file_name = CurrentName, current_offset = Offset } } = load_from_disk(State),
     Path = form_filename(CurrentName),
     ok = filelib:ensure_dir(Path),
-    {ok, FileHdl} = file:open(Path, [read, write, raw, binary]), %% read only needed so that we can seek
+    {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]), %% read only needed so that we can seek
     {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     {ok, State1 # dqstate { current_file_handle = FileHdl }}.
 
@@ -172,7 +172,7 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
     {FileHdl, ReadHdls1, ReadHdlsAge1}
 	= case dict:find(File, ReadHdls) of
 	      error ->
-		  {ok, Hdl} = file:open(form_filename(File), [read, raw, binary]),
+		  {ok, Hdl} = file:open(form_filename(File), [read, raw, binary, read_ahead]),
 		  Now = now(),
 		  case dict:size(ReadHdls) < ReadFileHandlesLimit of
 		      true ->
@@ -320,7 +320,7 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ (?FILE_EXTENSION),
-    {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary]),
+    {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary, delayed_write]),
     [{CurName, FileSum = #dqfile {right = undefined}}] = ets:lookup(FileSummary, CurName),
     true = ets:insert(FileSummary, {CurName, FileSum #dqfile {right = NextName}}),
     true = ets:insert_new(FileSummary, {NextName, #dqfile { valid_data = 0,
@@ -455,7 +455,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    % we should have that none of the messages in the prefix are in the tmp file
 	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end, MsgIds),
 
-	    {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile), [write, raw, binary]),
+	    {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile), [write, raw, binary, delayed_write]),
 	    {ok, Top} = file:position(MainHdl, Top),
 	    ok = file:truncate(MainHdl), % wipe out any rubbish at the end of the file
 	    % there really could be rubbish at the end of the file - we could have failed after the
@@ -468,7 +468,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
 					 % if we run out of disk space, this truncate could fail, but we still
 					 % aren't risking losing data
-	    {ok, TmpHdl} = file:open(form_filename(TmpFile), [read, raw, binary]),
+	    {ok, TmpHdl} = file:open(form_filename(TmpFile), [read, raw, binary, read_ahead]),
 	    {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
 	    ok = file:close(MainHdl),
 	    ok = file:close(TmpHdl),
@@ -523,9 +523,9 @@ append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
     TotalSize = BodySize + MsgIdBinSize,
     case file:write(FileHdl, <<TotalSize:(?INTEGER_SIZE_BITS),
 			       MsgIdBinSize:(?INTEGER_SIZE_BITS),
-			       MsgIdBin:MsgIdBinSize/binary, MsgBody:BodySize/binary>>) of
-	ok -> ok = file:write(FileHdl, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>),
-	      {ok, TotalSize};
+			       MsgIdBin:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+			       (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>) of
+	ok -> {ok, TotalSize};
 	KO -> KO
     end.
 
-- 
cgit v1.2.1


From 9080f0cead93fe1ab14fec34e432a5181c7e0f72 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 11 Apr 2009 23:14:59 +0100
Subject: changed the mnesia table to be a bag of MsgId -> {Q, Delivered} This
 makes startup MUCH faster, but delivery could be fractionally slower.

---
 include/rabbit.hrl        |  2 +-
 src/rabbit_disk_queue.erl | 25 +++++++++++++++----------
 src/rabbit_mnesia.erl     |  1 +
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 88596a43..27891fd3 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -64,7 +64,7 @@
 
 -record(basic_message, {exchange_name, routing_key, content, persistent_key}).
 
--record(dq_msg_loc, {queue_and_msg_id, is_delivered}).
+-record(dq_msg_loc, {msg_id, queue, is_delivered}).
 
 %%----------------------------------------------------------------------------
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 765380b6..ebf0561b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -190,9 +190,11 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 	  end,
     % read the message
     {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
-    [#dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = Delivered}] = mnesia:dirty_read(rabbit_disk_queue, {MsgId, Q}),
+    [Obj = #dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = Delivered}]
+	= mnesia:dirty_index_match_object(rabbit_disk_queue, #dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = '_'}, 1),
     if Delivered -> ok;
-       true -> ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc {queue_and_msg_id = {MsgId, Q}, is_delivered = true})
+       true -> ok = mnesia:dirty_delete_object(rabbit_disk_queue, Obj),
+	       ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
     end,
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
@@ -215,7 +217,9 @@ internal_ack(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 				      true = ets:insert(FileSummary,
 							{File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
 										 contiguous_prefix = ContiguousTop1}}),
-				      ok = mnesia:dirty_delete({rabbit_disk_queue, {MsgId, Q}}),
+				      [Obj] = mnesia:dirty_match_object(rabbit_disk_queue,
+									#dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = '_'}),
+				      ok = mnesia:dirty_delete_object(rabbit_disk_queue, Obj),
 				      sets:add_element(File, Files2);
 				 1 < RefCount ->
 				      true = ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
@@ -267,7 +271,8 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 		     lists:foldl(fun (MsgId, Acc) ->
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     ets:lookup(MsgLocation, MsgId),
-					 ok = mnesia:write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {MsgId, Q}, is_delivered = false}, write),
+					 ok = mnesia:write(rabbit_disk_queue,
+							   #dq_msg_loc { msg_id = MsgId, queue = Q, is_delivered = false}, write),
 					 Acc or (CurName =:= File)
 				 end, false, MsgIds)
 	    end),
@@ -278,7 +283,7 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 
 internal_publish(Q, MsgId, MsgBody, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { queue_and_msg_id = {MsgId, Q}, is_delivered = false}),
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id = MsgId, queue = Q, is_delivered = false}),
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
@@ -349,8 +354,8 @@ load_from_disk(State) ->
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:foldl(fun ({MsgId, _Q}, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
-		       true, mnesia:async_dirty(fun() -> mnesia:all_keys(rabbit_disk_queue) end)),
+    true = lists:foldl(fun (MsgId, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+		       true, mnesia:dirty_all_keys(rabbit_disk_queue)),
     {ok, State1}.
 
 load_messages(undefined, [], State = #dqstate { file_summary = FileSummary, current_file_name = CurName }) ->
@@ -377,7 +382,7 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'})) of
+		case length(mnesia:dirty_read(rabbit_disk_queue, MsgId)) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
@@ -417,7 +422,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'}))
+			  true = 0 < length(mnesia:dirty_read(rabbit_disk_queue, MsgId))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
@@ -448,7 +453,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue, {dq_msg_loc, {MsgId, '_'}, '_'}))
+				  true = 0 < length(mnesia:dirty_read(rabbit_disk_queue, MsgId))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 7179b637..be58581a 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -146,6 +146,7 @@ table_definitions() ->
        {attributes, record_info(fields, amqqueue)}]},
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
+       {type, bag},
        {attributes, record_info(fields, dq_msg_loc)},
        {disc_copies, [node()]}]}
     ].
-- 
cgit v1.2.1


From 82b8a63da21bf9b400650cb495a14e0eb4a82dbe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 12:13:46 +0100
Subject: refactored out common functionality between ack and cancel

---
 Makefile                  |  4 +--
 src/rabbit_disk_queue.erl | 87 +++++++++++++++++++----------------------------
 2 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/Makefile b/Makefile
index b7464244..8744f637 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/tmp
+RABBITMQ_MNESIA_DIR=/data/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=/data/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ebf0561b..90959b40 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -199,33 +199,39 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
 
-internal_ack(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
-					   file_summary = FileSummary,
-					   file_detail = FileDetail
-					  }) ->
-    Files
-	= lists:foldl(fun (MsgId, Files2) ->
-			      [{MsgId, RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
-			      % is this the last time we need the message, in which case tidy up
-			      if 1 =:= RefCount ->
-				      true = ets:delete(MsgLocation, MsgId),
-				      [{File, FileSum = #dqfile { valid_data = ValidTotalSize,
-								  contiguous_prefix = ContiguousTop }}]
-					  = ets:lookup(FileSummary, File),
-				      true = ets:delete(FileDetail, {File, Offset}),
-				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				      true = ets:insert(FileSummary,
-							{File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-										 contiguous_prefix = ContiguousTop1}}),
-				      [Obj] = mnesia:dirty_match_object(rabbit_disk_queue,
-									#dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = '_'}),
-				      ok = mnesia:dirty_delete_object(rabbit_disk_queue, Obj),
-				      sets:add_element(File, Files2);
-				 1 < RefCount ->
-				      true = ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				      Files2
-			      end
-		      end, sets:new(), MsgIds),
+internal_ack(Q, MsgIds, State) ->
+    remove_messages(Q, MsgIds, true, State).
+
+%% Q is only needed if MnesiaDelete = true
+remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
+							     file_summary = FileSummary,
+							     file_detail = FileDetail
+							   }) ->
+    Files = lists:foldl(fun (MsgId, Files2) ->
+			[{MsgId, RefCount, File, Offset, TotalSize}]
+			    = ets:lookup(MsgLocation, MsgId),
+			if 1 =:= RefCount ->
+				true = ets:delete(MsgLocation, MsgId),
+				[{File, FileSum = #dqfile { valid_data = ValidTotalSize,
+							    contiguous_prefix = ContiguousTop }}]
+				    = ets:lookup(FileSummary, File),
+				true = ets:delete(FileDetail, {File, Offset}),
+				ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+				true = ets:insert(FileSummary, {File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+											contiguous_prefix = ContiguousTop1}}),
+				if MnesiaDelete ->
+					[Obj] = mnesia:dirty_match_object(rabbit_disk_queue,
+									  #dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = '_'}),
+					ok = mnesia:dirty_delete_object(rabbit_disk_queue, Obj);
+				   true ->
+					ok
+				end,
+				sets:add_element(File, Files2);
+			   1 < RefCount ->
+				ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+				Files2
+			end
+		end, sets:new(), MsgIds),
     State2 = compact(Files, State),
     {ok, State2}.
 
@@ -286,31 +292,8 @@ internal_publish(Q, MsgId, MsgBody, State) ->
     ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id = MsgId, queue = Q, is_delivered = false}),
     {ok, State1}.
 
-internal_tx_cancel(MsgIds, State = #dqstate { msg_location = MsgLocation,
-					      file_summary = FileSummary,
-					      file_detail = FileDetail
-					    }) ->
-    Files =
-	lists:foldl(fun (MsgId, Files2) ->
-			    [{MsgId, RefCount, File, Offset, TotalSize}]
-				= ets:lookup(MsgLocation, MsgId),
-			    if 1 =:= RefCount ->
-				    true = ets:delete(MsgLocation, MsgId),
-				    [{File, FileSum = #dqfile { valid_data = ValidTotalSize,
-								contiguous_prefix = ContiguousTop }}]
-					= ets:lookup(FileSummary, File),
-				    true = ets:delete(FileDetail, {File, Offset}),
-				    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				    true = ets:insert(FileSummary, {File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-											    contiguous_prefix = ContiguousTop1}}),
-				    sets:add_element(File, Files2);
-			       1 < RefCount ->
-				    ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				    Files2
-			    end
-		    end, sets:new(), MsgIds),
-    State2 = compact(Files, State),
-    {ok, State2}.
+internal_tx_cancel(MsgIds, State) ->
+    remove_messages(undefined, MsgIds, false, State).
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
-- 
cgit v1.2.1


From fee14c74afe1ef306158afd3837a98c521a31ca3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 12:30:38 +0100
Subject: switched mnesia to an ordered_set. Seems to match performance with
 bag for startup and without the issues on fanout.

---
 include/rabbit.hrl        |  2 +-
 src/rabbit_disk_queue.erl | 26 +++++++++++++-------------
 src/rabbit_mnesia.erl     |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 27891fd3..4f06b833 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -64,7 +64,7 @@
 
 -record(basic_message, {exchange_name, routing_key, content, persistent_key}).
 
--record(dq_msg_loc, {msg_id, queue, is_delivered}).
+-record(dq_msg_loc, {msg_id_and_queue, is_delivered}).
 
 %%----------------------------------------------------------------------------
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 90959b40..db719ef3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -190,11 +190,10 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 	  end,
     % read the message
     {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
-    [Obj = #dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = Delivered}]
-	= mnesia:dirty_index_match_object(rabbit_disk_queue, #dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = '_'}, 1),
+    [Obj = #dq_msg_loc {is_delivered = Delivered}]
+	= mnesia:dirty_read(rabbit_disk_queue, {MsgId, Q}),
     if Delivered -> ok;
-       true -> ok = mnesia:dirty_delete_object(rabbit_disk_queue, Obj),
-	       ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
+       true ->  ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
     end,
     {ok, {MsgBody, BodySize, Delivered},
      State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
@@ -220,9 +219,7 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 				true = ets:insert(FileSummary, {File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
 											contiguous_prefix = ContiguousTop1}}),
 				if MnesiaDelete ->
-					[Obj] = mnesia:dirty_match_object(rabbit_disk_queue,
-									  #dq_msg_loc {msg_id = MsgId, queue = Q, is_delivered = '_'}),
-					ok = mnesia:dirty_delete_object(rabbit_disk_queue, Obj);
+					ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
 				   true ->
 					ok
 				end,
@@ -278,7 +275,7 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     ets:lookup(MsgLocation, MsgId),
 					 ok = mnesia:write(rabbit_disk_queue,
-							   #dq_msg_loc { msg_id = MsgId, queue = Q, is_delivered = false}, write),
+							   #dq_msg_loc { msg_id_and_queue = {MsgId, Q}, is_delivered = false}, write),
 					 Acc or (CurName =:= File)
 				 end, false, MsgIds)
 	    end),
@@ -289,7 +286,7 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 
 internal_publish(Q, MsgId, MsgBody, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id = MsgId, queue = Q, is_delivered = false}),
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id_and_queue = {MsgId, Q}, is_delivered = false}),
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
@@ -337,7 +334,7 @@ load_from_disk(State) ->
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:foldl(fun (MsgId, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+    true = lists:foldl(fun ({MsgId, _Q}, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
 		       true, mnesia:dirty_all_keys(rabbit_disk_queue)),
     {ok, State1}.
 
@@ -365,7 +362,8 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:dirty_read(rabbit_disk_queue, MsgId)) of
+		case length(mnesia:dirty_match_object(rabbit_disk_queue,
+						      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'}, is_delivered = '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
@@ -405,7 +403,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-			  true = 0 < length(mnesia:dirty_read(rabbit_disk_queue, MsgId))
+			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue,
+								      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'}, is_delivered = '_'}))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
@@ -436,7 +435,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  true = 0 < length(mnesia:dirty_read(rabbit_disk_queue, MsgId))
+				  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue,
+									      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'}, is_delivered = '_'}))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index be58581a..858b024a 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -146,7 +146,7 @@ table_definitions() ->
        {attributes, record_info(fields, amqqueue)}]},
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
-       {type, bag},
+       {type, ordered_set},
        {attributes, record_info(fields, dq_msg_loc)},
        {disc_copies, [node()]}]}
     ].
-- 
cgit v1.2.1


From 8b482395dd8a19ec4a33fb6db57f6703d5417507 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 16:56:49 +0100
Subject: added a stop and clean_stop api to help with tests/benchmarks

---
 src/rabbit_disk_queue.erl | 122 +++++++++++++++++++++++++++++++---------------
 1 file changed, 82 insertions(+), 40 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index db719ef3..b67896ce 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,6 +40,8 @@
 
 -export([publish/3, deliver/2, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
 
+-export([stop/0, clean_stop/0]).
+
 -include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
 
@@ -93,6 +95,12 @@ tx_commit(Q, MsgIds) when is_list(MsgIds) ->
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
 
+stop() ->
+    gen_server:call(?SERVER, stop).
+
+clean_stop() ->
+    gen_server:call(?SERVER, clean_stop).
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -110,7 +118,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 		       read_file_handles = {dict:new(), gb_trees:empty()},
 		       read_file_handles_limit = ReadFileHandlesLimit
 		     },
-    {ok, State1 = #dqstate { current_file_name = CurrentName, current_offset = Offset } } = load_from_disk(State),
+    {ok, State1 = #dqstate { current_file_name = CurrentName,
+			     current_offset = Offset } } = load_from_disk(State),
     Path = form_filename(CurrentName),
     ok = filelib:ensure_dir(Path),
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]), %% read only needed so that we can seek
@@ -122,7 +131,22 @@ handle_call({deliver, Q, MsgId}, _From, State) ->
     {reply, {MsgBody, BodySize, Delivered}, State1};
 handle_call({tx_commit, Q, MsgIds}, _From, State) ->
     {ok, State1} = internal_tx_commit(Q, MsgIds, State),
-    {reply, ok, State1}.
+    {reply, ok, State1};
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}; %% gen_server now calls terminate
+handle_call(clean_stop, _From, State) ->
+    State1 = #dqstate { msg_location = MsgLocation,
+			file_summary = FileSummary,
+			file_detail = FileDetail }
+	= shutdown(State), %% tidy up file handles early
+    {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
+    true = ets:delete(MsgLocation),
+    true = ets:delete(FileSummary),
+    true = ets:delete(FileDetail),
+    lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
+    {stop, normal, ok, State # dqstate { current_file_handle = undefined,
+					 read_file_handles = {dict:new(), gb_trees:empty()}}}.
+    %% gen_server now calls terminate, which then calls shutdown
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
@@ -140,14 +164,21 @@ handle_cast({tx_cancel, MsgIds}, State) ->
 handle_info(_Info, State) ->
     {noreply, State}.
 
-terminate(_Reason, #dqstate { current_file_handle = FileHdl,
-			      read_file_handles = {ReadHdls, _ReadHdlsAge}
-			    }) ->
-    file:sync(FileHdl),
-    file:close(FileHdl),
+terminate(_Reason, State) ->
+    shutdown(State).
+
+shutdown(State = #dqstate { current_file_handle = FileHdl,
+			    read_file_handles = {ReadHdls, _ReadHdlsAge}
+			  }) ->
+    if FileHdl =:= undefined -> ok;
+       true -> file:sync(FileHdl),
+	       file:close(FileHdl)
+    end,
     dict:fold(fun (_File, Hdl, _Acc) ->
 		     file:close(Hdl)
-	      end, ok, ReadHdls).
+	      end, ok, ReadHdls),
+    State # dqstate { current_file_handle = undefined,
+		      read_file_handles = {dict:new(), gb_trees:empty()}}.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -186,7 +217,8 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		  end;
 	      {ok, {Hdl, Then}} ->
 		  Now = now(),
-		  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
+		  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
+		   gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
 	  end,
     % read the message
     {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
@@ -206,29 +238,32 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 							     file_summary = FileSummary,
 							     file_detail = FileDetail
 							   }) ->
-    Files = lists:foldl(fun (MsgId, Files2) ->
-			[{MsgId, RefCount, File, Offset, TotalSize}]
-			    = ets:lookup(MsgLocation, MsgId),
-			if 1 =:= RefCount ->
-				true = ets:delete(MsgLocation, MsgId),
-				[{File, FileSum = #dqfile { valid_data = ValidTotalSize,
-							    contiguous_prefix = ContiguousTop }}]
-				    = ets:lookup(FileSummary, File),
-				true = ets:delete(FileDetail, {File, Offset}),
-				ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				true = ets:insert(FileSummary, {File, FileSum #dqfile { valid_data = (ValidTotalSize - TotalSize - (?FILE_PACKING_ADJUSTMENT)),
-											contiguous_prefix = ContiguousTop1}}),
-				if MnesiaDelete ->
-					ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
-				   true ->
-					ok
-				end,
-				sets:add_element(File, Files2);
-			   1 < RefCount ->
-				ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				Files2
-			end
-		end, sets:new(), MsgIds),
+    Files
+	= lists:foldl(fun (MsgId, Files2) ->
+			      [{MsgId, RefCount, File, Offset, TotalSize}]
+				  = ets:lookup(MsgLocation, MsgId),
+			      if 1 =:= RefCount ->
+				      true = ets:delete(MsgLocation, MsgId),
+				      [{File, FileSum = #dqfile { valid_data = ValidTotalSize,
+								  contiguous_prefix = ContiguousTop }}]
+					  = ets:lookup(FileSummary, File),
+				      true = ets:delete(FileDetail, {File, Offset}),
+				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+				      true = ets:insert(FileSummary,
+							{File, FileSum #dqfile { valid_data = (ValidTotalSize -
+											       TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+										 contiguous_prefix = ContiguousTop1}}),
+				      if MnesiaDelete ->
+					      ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
+					 true ->
+					      ok
+				      end,
+				      sets:add_element(File, Files2);
+				 1 < RefCount ->
+				      ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+				      Files2
+			      end
+		      end, sets:new(), MsgIds),
     State2 = compact(Files, State),
     {ok, State2}.
 
@@ -275,7 +310,8 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     ets:lookup(MsgLocation, MsgId),
 					 ok = mnesia:write(rabbit_disk_queue,
-							   #dq_msg_loc { msg_id_and_queue = {MsgId, Q}, is_delivered = false}, write),
+							   #dq_msg_loc { msg_id_and_queue = {MsgId, Q},
+									 is_delivered = false}, write),
 					 Acc or (CurName =:= File)
 				 end, false, MsgIds)
 	    end),
@@ -286,7 +322,8 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 
 internal_publish(Q, MsgId, MsgBody, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id_and_queue = {MsgId, Q}, is_delivered = false}),
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id_and_queue = {MsgId, Q},
+							     is_delivered = false}),
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
@@ -338,7 +375,8 @@ load_from_disk(State) ->
 		       true, mnesia:dirty_all_keys(rabbit_disk_queue)),
     {ok, State1}.
 
-load_messages(undefined, [], State = #dqstate { file_summary = FileSummary, current_file_name = CurName }) ->
+load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
+						current_file_name = CurName }) ->
     true = ets:insert_new(FileSummary, {CurName, #dqfile { valid_data = 0,
 							   contiguous_prefix = 0,
 							   left = undefined,
@@ -363,7 +401,8 @@ load_messages(Left, [File|Files],
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
 		case length(mnesia:dirty_match_object(rabbit_disk_queue,
-						      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'}, is_delivered = '_'})) of
+						      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'},
+								    is_delivered = '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
@@ -404,7 +443,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
 			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue,
-								      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'}, is_delivered = '_'}))
+								      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'},
+										    is_delivered = '_'}))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
@@ -435,8 +475,10 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    % we're in case 4 above.
 	    % check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue,
-									      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'}, is_delivered = '_'}))
+				  true = 0 <
+				      length(mnesia:dirty_match_object(rabbit_disk_queue,
+								       #dq_msg_loc { msg_id_and_queue = {MsgId, '_'},
+										     is_delivered = '_'}))
 			  end, MsgIds),
 	    % The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
@@ -485,7 +527,7 @@ find_contiguous_block_prefix([], 0, Acc) ->
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
-  when ExpectedOffset =:= Offset + TotalSize + 1 + (2* (?INTEGER_SIZE_BYTES)) -> %% Can't use (?FILE_PACKING_ADJUSTMENT)
+  when ExpectedOffset =:= Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT) ->
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
 find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
     find_contiguous_block_prefix(List).
-- 
cgit v1.2.1


From 067ff36bdb3fcac476c67aef120a67ac688548b9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 19:54:19 +0100
Subject: initial stab at some tests (though still only 1 queue!). Also
 accidentally seem to have committed a change to the Makefile a while back so
 am undoing that.

---
 Makefile             |  4 ++--
 src/rabbit_tests.erl | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 8744f637..b7464244 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/data/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/data/tmp
+RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 6312e8e3..0114eb25 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -31,7 +31,9 @@
 
 -module(rabbit_tests).
 
--export([all_tests/0, test_parsing/0]).
+-compile(export_all).
+
+-export([all_tests/0, test_parsing/0, test_disk_queue/0]).
 
 -import(lists).
 
@@ -621,3 +623,35 @@ delete_log_handlers(Handlers) ->
     [[] = error_logger:delete_report_handler(Handler) ||
         Handler <- Handlers],
     ok.
+
+test_disk_queue() ->
+    [begin rdq_time_tx_publish_commit(q, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
+	MsgSize <- [128, 512, 2048, 8192, 32768, 131072],
+	MsgCount <- [1024, 2048, 4096, 8192, 16384]
+    ],
+    rdq_virgin().
+
+rdq_time_tx_publish_commit(Q, MsgCount, MsgSizeBytes) ->
+    rdq_virgin(),
+    rdq_start(),
+    Msg = <<0:(8*MsgSizeBytes)>>,
+    List = lists:seq(1, MsgCount),
+    {Micros, ok} = timer:tc(?MODULE, rdq_time_commands,
+			    [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List] end,
+			      fun() -> rabbit_disk_queue:tx_commit(Q, List) end]]),
+    io:format("Published ~p ~p-byte messages in ~p microseconds (~p microseconds/msg) (~p microseconds/byte)~n", [MsgCount, MsgSizeBytes, Micros, (Micros / MsgCount), (Micros / MsgCount / MsgSizeBytes)]),
+    rdq_stop().
+
+rdq_time_commands(Funcs) ->
+    lists:foreach(fun (F) -> F() end, Funcs).
+
+rdq_virgin() ->
+    {Micros, {ok, _}} = timer:tc(rabbit_disk_queue, start_link, [1024*1024*10, 1000]),
+    io:format("Disk queue start up took ~p microseconds~n", [Micros]),
+    ok = rabbit_disk_queue:clean_stop().
+
+rdq_start() ->
+    {ok, _} = rabbit_disk_queue:start_link(1024*1024*10, 1000).
+
+rdq_stop() ->
+    rabbit_disk_queue:stop().
-- 
cgit v1.2.1


From 209b8e814e2a5ff1b4b03f989209c9656b8336c1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 21:13:02 +0100
Subject: and now we have some substantial tests. This has already led to a
 good optimisation on reading, and found and fixed a bug in messages going to
 multiple queues.

---
 src/rabbit_disk_queue.erl | 41 +++++++++++++++++------------------------
 src/rabbit_tests.erl      | 25 +++++++++++++++++++------
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b67896ce..8de9d22c 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -144,8 +144,8 @@ handle_call(clean_stop, _From, State) ->
     true = ets:delete(FileSummary),
     true = ets:delete(FileDetail),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
-    {stop, normal, ok, State # dqstate { current_file_handle = undefined,
-					 read_file_handles = {dict:new(), gb_trees:empty()}}}.
+    {stop, normal, ok, State1 # dqstate { current_file_handle = undefined,
+					  read_file_handles = {dict:new(), gb_trees:empty()}}}.
     %% gen_server now calls terminate, which then calls shutdown
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
@@ -197,7 +197,7 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 					      read_file_handles_limit = ReadFileHandlesLimit,
 					      read_file_handles = {ReadHdls, ReadHdlsAge}
 					     }) ->
-    [{MsgId, _RefCount, File, Offset, _TotalSize}] = ets:lookup(MsgLocation, MsgId),
+    [{MsgId, _RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
     % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
     % of premature optimisation. So I might remove it and dump it overboard
     {FileHdl, ReadHdls1, ReadHdlsAge1}
@@ -221,7 +221,7 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 		   gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
 	  end,
     % read the message
-    {ok, {MsgBody, BodySize, _TotalSize}} = read_message_at_offset(FileHdl, Offset),
+    {ok, {MsgBody, BodySize}} = read_message_at_offset(FileHdl, Offset, TotalSize),
     [Obj = #dq_msg_loc {is_delivered = Delivered}]
 	= mnesia:dirty_read(rabbit_disk_queue, {MsgId, Q}),
     if Delivered -> ok;
@@ -270,7 +270,7 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
 						       current_file_handle = CurHdl,
 						       current_file_name = CurName,
-						       current_offset = Offset,
+						       current_offset = CurOffset,
 						       file_summary = FileSummary,
 						       file_detail = FileDetail
 						     }) ->
@@ -278,21 +278,21 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	[] ->
 	    % New message, lots to do
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
-	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, Offset, TotalSize}),
+	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
 	    [{CurName, FileSum = #dqfile { valid_data = ValidTotalSize,
 					   contiguous_prefix = ContiguousTop,
 					   right = undefined }}]
 		= ets:lookup(FileSummary, CurName),
-	    true = ets:insert_new(FileDetail, {{CurName, Offset}, TotalSize}),
+	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + (?FILE_PACKING_ADJUSTMENT),
-	    ContiguousTop1 = if Offset =:= ContiguousTop ->
+	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
 	    true = ets:insert(FileSummary, {CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
 								       contiguous_prefix = ContiguousTop1 }}),
-	    maybe_roll_to_new_file(Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT),
-				   State # dqstate {current_offset = Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT)});
+	    maybe_roll_to_new_file(CurOffset + TotalSize + (?FILE_PACKING_ADJUSTMENT),
+				   State # dqstate {current_offset = CurOffset + TotalSize + (?FILE_PACKING_ADJUSTMENT)});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
 	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
@@ -559,22 +559,15 @@ append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
 	KO -> KO
     end.
 
-read_message_at_offset(FileHdl, Offset) ->
+read_message_at_offset(FileHdl, Offset, TotalSize) ->
+    TotalSizeWriteOkBytes = TotalSize + 1,
     case file:position(FileHdl, {bof, Offset}) of
 	{ok, Offset} ->
-	    case file:read(FileHdl, 2 * (?INTEGER_SIZE_BYTES)) of
-		{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS)>>} ->
-		    ExpectedAbsPos = Offset + (2 * (?INTEGER_SIZE_BYTES)) + MsgIdBinSize,
-		    case file:position(FileHdl, {cur, MsgIdBinSize}) of
-			{ok, ExpectedAbsPos} ->
-			    BodySize = TotalSize - MsgIdBinSize,
-			    case file:read(FileHdl, 1 + BodySize) of
-				{ok, <<MsgBody:BodySize/binary, (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
-				    {ok, {MsgBody, BodySize, TotalSize}};
-				KO -> KO
-			    end;
-			KO -> KO
-		    end;
+	    case file:read(FileHdl, TotalSize + (?FILE_PACKING_ADJUSTMENT)) of
+		{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS), Rest:TotalSizeWriteOkBytes/binary>>} ->
+		    BodySize = TotalSize - MsgIdBinSize,
+		    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary, (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>> = Rest,
+		    {ok, {MsgBody, BodySize}};
 		KO -> KO
 	    end;
 	KO -> KO
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 0114eb25..a9f546dc 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -55,6 +55,7 @@ all_tests() ->
     passed = test_cluster_management(),
     passed = test_user_management(),
     passed = test_server_status(),
+    passed = test_disk_queue(),
     passed.
 
 test_parsing() ->
@@ -625,21 +626,33 @@ delete_log_handlers(Handlers) ->
     ok.
 
 test_disk_queue() ->
-    [begin rdq_time_tx_publish_commit(q, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
+    [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
 	MsgSize <- [128, 512, 2048, 8192, 32768, 131072],
+	Qs <- [[1], lists:seq(1,10), lists:seq(1,100), lists:seq(1,1000)],
 	MsgCount <- [1024, 2048, 4096, 8192, 16384]
     ],
-    rdq_virgin().
+    rdq_virgin(),
+    passed.
 
-rdq_time_tx_publish_commit(Q, MsgCount, MsgSizeBytes) ->
+rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     rdq_virgin(),
     rdq_start(),
+    QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
     {Micros, ok} = timer:tc(?MODULE, rdq_time_commands,
-			    [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List] end,
-			      fun() -> rabbit_disk_queue:tx_commit(Q, List) end]]),
-    io:format("Published ~p ~p-byte messages in ~p microseconds (~p microseconds/msg) (~p microseconds/byte)~n", [MsgCount, MsgSizeBytes, Micros, (Micros / MsgCount), (Micros / MsgCount / MsgSizeBytes)]),
+			   [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List, _ <- Qs] end,
+			     fun() -> [rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
+			    ]]),
+    io:format("Published ~p ~p-byte messages in ~p microseconds to ~p queues (~p microseconds/msg) (~p microseconds/byte)~n",
+	      [MsgCount, MsgSizeBytes, Micros, QCount, (Micros / (MsgCount * QCount)), (Micros / (MsgCount * QCount * MsgSizeBytes))]),
+    {Micros2, ok} = timer:tc(?MODULE, rdq_time_commands,
+			    [[fun() -> [begin [begin rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
+					      rabbit_disk_queue:ack(Q, List),
+					      rabbit_disk_queue:tx_commit(Q, [])
+					end || Q <- Qs]
+			      end]]),
+    io:format("Delivered ~p ~p-byte messages in ~p microseconds from ~p queues (~p microseconds/msg) (~p microseconds/byte)~n", [MsgCount, MsgSizeBytes, Micros2, QCount, (Micros2 / (MsgCount * QCount)), (Micros2 / (MsgCount * QCount * MsgSizeBytes))]),
     rdq_stop().
 
 rdq_time_commands(Funcs) ->
-- 
cgit v1.2.1


From f21f5006ee87a4fb9a8abba77e3254588929024d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 21:28:22 +0100
Subject: removed some parenthesis

---
 Makefile                  |  4 +--
 src/rabbit_disk_queue.erl | 62 +++++++++++++++++++++++------------------------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/Makefile b/Makefile
index b7464244..8744f637 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/tmp
+RABBITMQ_MNESIA_DIR=/data/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=/data/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8de9d22c..ebc1488e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -48,13 +48,13 @@
 -define(WRITE_OK_SIZE_BITS, 8).
 -define(WRITE_OK, 255).
 -define(INTEGER_SIZE_BYTES, 8).
--define(INTEGER_SIZE_BITS, 8 * ?INTEGER_SIZE_BYTES).
+-define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)).
 -define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
 -define(FILE_DETAIL_ETS_NAME, rabbit_disk_queue_file_detail).
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
--define(FILE_PACKING_ADJUSTMENT, 1 + (2* (?INTEGER_SIZE_BYTES))).
+-define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
 
 -define(SERVER, ?MODULE).
 
@@ -105,11 +105,11 @@ clean_stop() ->
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
-    InitName = "0" ++ (?FILE_EXTENSION),
-    FileSummary = ets:new((?FILE_SUMMARY_ETS_NAME), [set, private]),
-    State = #dqstate { msg_location = ets:new((?MSG_LOC_ETS_NAME), [set, private]),
+    InitName = "0" ++ ?FILE_EXTENSION,
+    FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
+    State = #dqstate { msg_location = ets:new(?MSG_LOC_ETS_NAME, [set, private]),
 		       file_summary = FileSummary,
-		       file_detail = ets:new((?FILE_DETAIL_ETS_NAME), [ordered_set, private]),
+		       file_detail = ets:new(?FILE_DETAIL_ETS_NAME, [ordered_set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
 		       current_file_handle = undefined,
@@ -251,7 +251,7 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
 				      true = ets:insert(FileSummary,
 							{File, FileSum #dqfile { valid_data = (ValidTotalSize -
-											       TotalSize - (?FILE_PACKING_ADJUSTMENT)),
+											       TotalSize - ?FILE_PACKING_ADJUSTMENT),
 										 contiguous_prefix = ContiguousTop1}}),
 				      if MnesiaDelete ->
 					      ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
@@ -284,15 +284,15 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 					   right = undefined }}]
 		= ets:lookup(FileSummary, CurName),
 	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize}),
-	    ValidTotalSize1 = ValidTotalSize + TotalSize + (?FILE_PACKING_ADJUSTMENT),
+	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
 	    true = ets:insert(FileSummary, {CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
 								       contiguous_prefix = ContiguousTop1 }}),
-	    maybe_roll_to_new_file(CurOffset + TotalSize + (?FILE_PACKING_ADJUSTMENT),
-				   State # dqstate {current_offset = CurOffset + TotalSize + (?FILE_PACKING_ADJUSTMENT)});
+	    maybe_roll_to_new_file(CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+				   State # dqstate {current_offset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
 	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
@@ -341,7 +341,7 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
     ok = file:sync(CurHdl),
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
-    NextName = integer_to_list(NextNum) ++ (?FILE_EXTENSION),
+    NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary, delayed_write]),
     [{CurName, FileSum = #dqfile {right = undefined}}] = ets:lookup(FileSummary, CurName),
     true = ets:insert(FileSummary, {CurName, FileSum #dqfile {right = NextName}}),
@@ -387,7 +387,7 @@ load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
     Offset = case ets:match_object(FileDetail, {{Left, '_'}, '_'}) of
 		 [] -> 0;
 		 L -> {{Left, Offset1}, TotalSize} = lists:last(L),
-		      Offset1 + TotalSize + (?FILE_PACKING_ADJUSTMENT)
+		      Offset1 + TotalSize + ?FILE_PACKING_ADJUSTMENT
 	     end,
     State # dqstate { current_file_num = Num, current_file_name = Left,
 		      current_offset = Offset };
@@ -408,7 +408,7 @@ load_messages(Left, [File|Files],
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
 			true = ets:insert_new(FileDetail, {{File, Offset}, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
-			 VTSAcc + TotalSize + (?FILE_PACKING_ADJUSTMENT)
+			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
 			}
 		end
 	end, {[], 0}, Messages),
@@ -435,7 +435,7 @@ recover_crashed_compactions(Files, TmpFiles) ->
 
 recover_crashed_compactions1(Files, TmpFile) ->
     GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
-    NonTmpRelatedFile = filename:rootname(TmpFile) ++ (?FILE_EXTENSION),
+    NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFile, Files),
     % [{MsgId, TotalSize, FileOffset}]
     {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(form_filename(TmpFile)),
@@ -492,7 +492,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    % extending truncate.
 	    % Remember the head of the list will be the highest entry in the file
 	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
-	    TmpSize = TmpTopOffset + TmpTopTotalSize + (?FILE_PACKING_ADJUSTMENT),
+	    TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ExpectedAbsPos = Top + TmpSize,
 	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
 	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
@@ -519,7 +519,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
-	{ok, Acc} -> {Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT), lists:reverse(Acc)};
+	{ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT, lists:reverse(Acc)};
 	Res -> Res
     end.
 find_contiguous_block_prefix([], 0, Acc) ->
@@ -527,7 +527,7 @@ find_contiguous_block_prefix([], 0, Acc) ->
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
-  when ExpectedOffset =:= Offset + TotalSize + (?FILE_PACKING_ADJUSTMENT) ->
+  when ExpectedOffset =:= Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT ->
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
 find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
     find_contiguous_block_prefix(List).
@@ -538,9 +538,9 @@ file_name_sort(A, B) ->
     ANum < BNum.
 
 get_disk_queue_files() ->
-    DQFiles = filelib:wildcard("*" ++ (?FILE_EXTENSION), base_directory()),
+    DQFiles = filelib:wildcard("*" ++ ?FILE_EXTENSION, base_directory()),
     DQFilesSorted = lists:sort(fun file_name_sort/2, DQFiles),
-    DQTFiles = filelib:wildcard("*" ++ (?FILE_EXTENSION_TMP), base_directory()),
+    DQTFiles = filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, base_directory()),
     DQTFilesSorted = lists:sort(fun file_name_sort/2, DQTFiles),
     {DQFilesSorted, DQTFilesSorted}.
 
@@ -551,10 +551,10 @@ append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
     MsgIdBin = term_to_binary(MsgId),
     MsgIdBinSize = size(MsgIdBin),
     TotalSize = BodySize + MsgIdBinSize,
-    case file:write(FileHdl, <<TotalSize:(?INTEGER_SIZE_BITS),
-			       MsgIdBinSize:(?INTEGER_SIZE_BITS),
+    case file:write(FileHdl, <<TotalSize:?INTEGER_SIZE_BITS,
+			       MsgIdBinSize:?INTEGER_SIZE_BITS,
 			       MsgIdBin:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-			       (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>) of
+			       ?WRITE_OK:?WRITE_OK_SIZE_BITS>>) of
 	ok -> {ok, TotalSize};
 	KO -> KO
     end.
@@ -563,10 +563,10 @@ read_message_at_offset(FileHdl, Offset, TotalSize) ->
     TotalSizeWriteOkBytes = TotalSize + 1,
     case file:position(FileHdl, {bof, Offset}) of
 	{ok, Offset} ->
-	    case file:read(FileHdl, TotalSize + (?FILE_PACKING_ADJUSTMENT)) of
-		{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS), Rest:TotalSizeWriteOkBytes/binary>>} ->
+	    case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
+		{ok, <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS, Rest:TotalSizeWriteOkBytes/binary>>} ->
 		    BodySize = TotalSize - MsgIdBinSize,
-		    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary, (?WRITE_OK):(?WRITE_OK_SIZE_BITS)>> = Rest,
+		    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary, ?WRITE_OK:?WRITE_OK_SIZE_BITS>> = Rest,
 		    {ok, {MsgBody, BodySize}};
 		KO -> KO
 	    end;
@@ -591,13 +591,13 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
 	    
 
 read_next_file_entry(FileHdl, Offset) ->
-    TwoIntegers = 2 * (?INTEGER_SIZE_BYTES),
+    TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
     case file:read(FileHdl, TwoIntegers) of
-	{ok, <<TotalSize:(?INTEGER_SIZE_BITS), MsgIdBinSize:(?INTEGER_SIZE_BITS)>>} ->
+	{ok, <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
 	    case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
 		{true, _} -> {ok, eof}; %% Nothing we can do other than stop
 		{false, true} -> %% current message corrupted, try skipping past it
-		    ExpectedAbsPos = Offset + (?FILE_PACKING_ADJUSTMENT) + TotalSize,
+		    ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
 		    case file:position(FileHdl, {cur, TotalSize + 1}) of
 			{ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
 			{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
@@ -610,11 +610,11 @@ read_next_file_entry(FileHdl, Offset) ->
 			    case file:position(FileHdl, {cur, TotalSize - MsgIdBinSize}) of
 				{ok, ExpectedAbsPos} ->
 				    case file:read(FileHdl, 1) of
-					{ok, <<(?WRITE_OK):(?WRITE_OK_SIZE_BITS)>>} ->
+					{ok, <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
 					    {ok, {ok, binary_to_term(MsgId), TotalSize,
-						  Offset + (?FILE_PACKING_ADJUSTMENT) + TotalSize}};
+						  Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize}};
 					{ok, _SomeOtherData} ->
-					    {ok, {corrupted, Offset + (?FILE_PACKING_ADJUSTMENT) + TotalSize}};
+					    {ok, {corrupted, Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize}};
 					KO -> KO
 				    end;
 				{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
-- 
cgit v1.2.1


From 875cbc00084e3e9788ceb6036ae63037e2ab530e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 21:29:36 +0100
Subject: gah, committed the damn makefile again. Sorry.

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 8744f637..b7464244 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/data/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/data/tmp
+RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
-- 
cgit v1.2.1


From d45981da39283a78e39dfb861417a9cadf87b9d2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 12 Apr 2009 23:59:02 +0100
Subject: initial work on compacter. If you ack messages in exactly the same
 order as they arrived in, then files will be deleted correctly.

---
 src/rabbit_disk_queue.erl | 70 +++++++++++++++++++++++++----------------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ebc1488e..869ed841 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -58,8 +58,6 @@
 
 -define(SERVER, ?MODULE).
 
--record(dqfile, {valid_data, contiguous_prefix, left, right}).
-
 -record(dqstate, {msg_location,
 		  file_summary,
 		  file_detail,
@@ -106,9 +104,8 @@ clean_stop() ->
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     InitName = "0" ++ ?FILE_EXTENSION,
-    FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
     State = #dqstate { msg_location = ets:new(?MSG_LOC_ETS_NAME, [set, private]),
-		       file_summary = FileSummary,
+		       file_summary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
 		       file_detail = ets:new(?FILE_DETAIL_ETS_NAME, [ordered_set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
@@ -236,7 +233,8 @@ internal_ack(Q, MsgIds, State) ->
 %% Q is only needed if MnesiaDelete = true
 remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
 							     file_summary = FileSummary,
-							     file_detail = FileDetail
+							     file_detail = FileDetail,
+							     current_file_name = CurName
 							   }) ->
     Files
 	= lists:foldl(fun (MsgId, Files2) ->
@@ -244,21 +242,21 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 				  = ets:lookup(MsgLocation, MsgId),
 			      if 1 =:= RefCount ->
 				      true = ets:delete(MsgLocation, MsgId),
-				      [{File, FileSum = #dqfile { valid_data = ValidTotalSize,
-								  contiguous_prefix = ContiguousTop }}]
+				      [{File, ValidTotalSize, ContiguousTop, Left, Right}]
 					  = ets:lookup(FileSummary, File),
 				      true = ets:delete(FileDetail, {File, Offset}),
 				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
 				      true = ets:insert(FileSummary,
-							{File, FileSum #dqfile { valid_data = (ValidTotalSize -
-											       TotalSize - ?FILE_PACKING_ADJUSTMENT),
-										 contiguous_prefix = ContiguousTop1}}),
+							{File, (ValidTotalSize - TotalSize - ?FILE_PACKING_ADJUSTMENT),
+							 ContiguousTop1, Left, Right}),
 				      if MnesiaDelete ->
 					      ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
 					 true ->
 					      ok
 				      end,
-				      sets:add_element(File, Files2);
+				      if CurName =:= File -> Files2;
+					 true -> sets:add_element(File, Files2)
+				      end;
 				 1 < RefCount ->
 				      ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 				      Files2
@@ -279,9 +277,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    % New message, lots to do
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
-	    [{CurName, FileSum = #dqfile { valid_data = ValidTotalSize,
-					   contiguous_prefix = ContiguousTop,
-					   right = undefined }}]
+	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}]
 		= ets:lookup(FileSummary, CurName),
 	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
@@ -289,8 +285,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 				     ValidTotalSize; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
-	    true = ets:insert(FileSummary, {CurName, FileSum #dqfile { valid_data = ValidTotalSize1,
-								       contiguous_prefix = ContiguousTop1 }}),
+	    true = ets:insert(FileSummary, {CurName, ValidTotalSize1, ContiguousTop1, Left, undefined}),
 	    maybe_roll_to_new_file(CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
 				   State # dqstate {current_offset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
@@ -343,12 +338,8 @@ maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimi
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary, delayed_write]),
-    [{CurName, FileSum = #dqfile {right = undefined}}] = ets:lookup(FileSummary, CurName),
-    true = ets:insert(FileSummary, {CurName, FileSum #dqfile {right = NextName}}),
-    true = ets:insert_new(FileSummary, {NextName, #dqfile { valid_data = 0,
-							    contiguous_prefix = 0,
-							    left = CurName,
-							    right = undefined }}),
+    true = ets:update_element(FileSummary, CurName, {5, NextName}), % 5 is Right
+    true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     {ok, State # dqstate { current_file_name = NextName,
 			   current_file_handle = NextHdl,
 			   current_file_num = NextNum,
@@ -359,9 +350,31 @@ maybe_roll_to_new_file(_, State) ->
 
 %% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
 
-compact(_FilesSet, State) ->
+compact(FilesSet, State) ->
+    % smallest number, hence eldest, hence left-most, first
+    Files = lists:sort(sets:to_list(FilesSet)),
+    % foldl reverses, so now youngest/right-most first
+    RemainingFiles = lists:foldl(fun(File, Acc) -> delete_empty_files(File, Acc, State) end, [], Files),
     State.
 
+delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
+    [{File, ValidData, _ContiguousTop, Left, Right}] = ets:lookup(FileSummary, File),
+    case ValidData of
+	% we should NEVER find the current file in here - hence right should always be a file, not undefined
+	0 -> case {Left, Right} of
+		 {undefined, _} when not(is_atom(Right)) ->
+		     % the eldest file is empty. YAY!
+		     true = ets:update_element(FileSummary, Right, {4, undefined}); % left is the 4th field
+		 {_, _} when not(is_atom(Right)) ->
+		     true = ets:update_element(FileSummary, Right, {4, Left}), % left is the 4th field
+		     true = ets:update_element(FileSummary, Left, {5, Right}) % right is the 5th field
+	     end,
+	     true = ets:delete(FileSummary, File),
+	     ok = file:delete(form_filename(File)),
+	     Acc;
+	_ -> [File|Acc]
+    end.
+
 %% ---- DISK RECOVERY ----
 
 load_from_disk(State) ->
@@ -377,10 +390,7 @@ load_from_disk(State) ->
 
 load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
 						current_file_name = CurName }) ->
-    true = ets:insert_new(FileSummary, {CurName, #dqfile { valid_data = 0,
-							   contiguous_prefix = 0,
-							   left = undefined,
-							   right = undefined}}),
+    true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
     State;
 load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
     Num = list_to_integer(filename:rootname(Left)),
@@ -419,11 +429,7 @@ load_messages(Left, [File|Files],
 		[] -> undefined;
 		[F|_] -> F
 	    end,
-    true = ets:insert_new(FileSummary, {File, #dqfile { valid_data = ValidTotalSize,
-							contiguous_prefix = ContiguousTop,
-							left = Left,
-							right = Right
-						       }}),
+    true = ets:insert_new(FileSummary, {File, ValidTotalSize, ContiguousTop, Left, Right}),
     load_messages(File, Files, State).
 
 %% ---- DISK RECOVERY OF FAILED COMPACTION ----
-- 
cgit v1.2.1


From 353bcc81497b60d2e5c74c5826d63aafca3069f6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 13 Apr 2009 00:14:07 +0100
Subject: tiny change to fix where the badmatch happens. Must be a bug in
 compaction as it would appear messages are being deleted early.

---
 Makefile                  | 4 ++--
 src/rabbit_disk_queue.erl | 2 +-
 src/rabbit_tests.erl      | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index b7464244..8744f637 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/tmp
+RABBITMQ_MNESIA_DIR=/data/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=/data/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 869ed841..3527fd2a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -384,7 +384,7 @@ load_from_disk(State) ->
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:foldl(fun ({MsgId, _Q}, true) -> 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+    true = lists:foldl(fun ({MsgId, _Q}, true) -> true = 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
 		       true, mnesia:dirty_all_keys(rabbit_disk_queue)),
     {ok, State1}.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a9f546dc..89c575bf 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -627,9 +627,9 @@ delete_log_handlers(Handlers) ->
 
 test_disk_queue() ->
     [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
-	MsgSize <- [128, 512, 2048, 8192, 32768, 131072],
-	Qs <- [[1], lists:seq(1,10), lists:seq(1,100), lists:seq(1,1000)],
-	MsgCount <- [1024, 2048, 4096, 8192, 16384]
+	MsgSize <- [512, 8192, 32768, 131072],
+	Qs <- [[1], lists:seq(1,10)], %, lists:seq(1,100), lists:seq(1,1000)],
+	MsgCount <- [1024, 4096, 16384]
     ],
     rdq_virgin(),
     passed.
-- 
cgit v1.2.1


From 7bef6d696a099a60724d911c0174f2005ef72dae Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 13 Apr 2009 00:49:18 +0100
Subject: Fix for that bug. Basically, mnesia wasn't being deleted correctly,
 which meant on the subsequent startup, it would report messages which we
 couldn't find anywhere, as they'd been deleted from disk.

---
 Makefile                  |  4 ++--
 src/rabbit_disk_queue.erl | 44 +++++++++++++++++++++++---------------------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile
index 8744f637..b7464244 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/data/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/data/tmp
+RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3527fd2a..e2da2ce1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -240,27 +240,29 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 	= lists:foldl(fun (MsgId, Files2) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}]
 				  = ets:lookup(MsgLocation, MsgId),
-			      if 1 =:= RefCount ->
-				      true = ets:delete(MsgLocation, MsgId),
-				      [{File, ValidTotalSize, ContiguousTop, Left, Right}]
-					  = ets:lookup(FileSummary, File),
-				      true = ets:delete(FileDetail, {File, Offset}),
-				      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				      true = ets:insert(FileSummary,
-							{File, (ValidTotalSize - TotalSize - ?FILE_PACKING_ADJUSTMENT),
-							 ContiguousTop1, Left, Right}),
-				      if MnesiaDelete ->
-					      ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
-					 true ->
-					      ok
-				      end,
-				      if CurName =:= File -> Files2;
-					 true -> sets:add_element(File, Files2)
-				      end;
-				 1 < RefCount ->
-				      ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-				      Files2
-			      end
+			      Files3 =
+				  if 1 =:= RefCount ->
+					  true = ets:delete(MsgLocation, MsgId),
+					  [{File, ValidTotalSize, ContiguousTop, Left, Right}]
+					      = ets:lookup(FileSummary, File),
+					  true = ets:delete(FileDetail, {File, Offset}),
+					  ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+					  true = ets:insert(FileSummary,
+							    {File, (ValidTotalSize - TotalSize - ?FILE_PACKING_ADJUSTMENT),
+							     ContiguousTop1, Left, Right}),
+					  if CurName =:= File -> Files2;
+					     true -> sets:add_element(File, Files2)
+					  end;
+				     1 < RefCount ->
+					  ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+					  Files2
+				  end,
+			      if MnesiaDelete ->
+				      ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
+				 true ->
+				      ok
+			      end,
+			      Files3
 		      end, sets:new(), MsgIds),
     State2 = compact(Files, State),
     {ok, State2}.
-- 
cgit v1.2.1


From c580065e1b8ec99f3073147116647e7e59a84e2a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 13 Apr 2009 10:46:43 +0100
Subject: initial logic prior to compaction

---
 src/rabbit_disk_queue.erl | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e2da2ce1..371a5e4b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -356,8 +356,42 @@ compact(FilesSet, State) ->
     % smallest number, hence eldest, hence left-most, first
     Files = lists:sort(sets:to_list(FilesSet)),
     % foldl reverses, so now youngest/right-most first
-    RemainingFiles = lists:foldl(fun(File, Acc) -> delete_empty_files(File, Acc, State) end, [], Files),
+    RemainingFiles = lists:foldl(fun (File, Acc) -> delete_empty_files(File, Acc, State) end, [], Files),
+    lists:foldl(fun combineFile/2, State, lists:reverse(RemainingFiles)).
+
+combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
+				     file_summary = FileSummary,
+				     current_file_name = CurName
+				   }) ->
+    [{File, ValidData, _ContiguousTop, Left, Right}] = ets:lookup(FileSummary, File),
+    GoRight = fun() ->
+		      case Right of
+			  undefined -> State;
+			  _ when not(CurName =:= Right) ->
+			      [{Right, RightValidData, _RightContiguousTop, File, _Right}]
+				  = ets:lookup(FileSummary, Right),
+			      if FileSizeLimit >= (ValidData + RightValidData) ->
+				      combineFiles(Right, File, State);
+				 true -> State
+			      end;
+			  _ -> State
+		      end
+		 end,
+    case Left of
+	undefined ->
+	    GoRight();
+	_ -> [{Left, LeftValidData, _LeftContiguousTop, _Left, File}]
+		 = ets:lookup(FileSummary, Left),
+	     if FileSizeLimit >= (ValidData + LeftValidData) ->
+		     combineFiles(File, Left, State);
+		true ->
+		     GoRight()
+	     end
+    end.
+
+combineFiles(Source, Destination, State) ->
     State.
+    
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     [{File, ValidData, _ContiguousTop, Left, Right}] = ets:lookup(FileSummary, File),
-- 
cgit v1.2.1


From f60a9393201b73f3a2594d4a208b1e04cf049589 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 14 Apr 2009 10:50:12 +0100
Subject: GC file movement is done, but updating the accounting information is
 not done. So don't use it!

---
 src/rabbit_disk_queue.erl | 145 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 116 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 371a5e4b..f70f31b4 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -167,6 +167,7 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { current_file_handle = FileHdl,
 			    read_file_handles = {ReadHdls, _ReadHdlsAge}
 			  }) ->
+    % deliberately ignoring return codes here
     if FileHdl =:= undefined -> ok;
        true -> file:sync(FileHdl),
 	       file:close(FileHdl)
@@ -281,7 +282,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
 	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}]
 		= ets:lookup(FileSummary, CurName),
-	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize}),
+	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize, MsgId}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
 				     ValidTotalSize; % can't be any holes in this file
@@ -363,35 +364,121 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 				     file_summary = FileSummary,
 				     current_file_name = CurName
 				   }) ->
-    [{File, ValidData, _ContiguousTop, Left, Right}] = ets:lookup(FileSummary, File),
-    GoRight = fun() ->
-		      case Right of
-			  undefined -> State;
-			  _ when not(CurName =:= Right) ->
-			      [{Right, RightValidData, _RightContiguousTop, File, _Right}]
-				  = ets:lookup(FileSummary, Right),
-			      if FileSizeLimit >= (ValidData + RightValidData) ->
-				      combineFiles(Right, File, State);
-				 true -> State
-			      end;
-			  _ -> State
-		      end
-		 end,
-    case Left of
-	undefined ->
-	    GoRight();
-	_ -> [{Left, LeftValidData, _LeftContiguousTop, _Left, File}]
-		 = ets:lookup(FileSummary, Left),
-	     if FileSizeLimit >= (ValidData + LeftValidData) ->
-		     combineFiles(File, Left, State);
-		true ->
-		     GoRight()
-	     end
+    % DELIBERATE BADMATCH. This code is not ready yet
+    ko = io:format("uh oh~n", []),
+    % the file we're looking at may no longer exist as it may have been deleted
+    % within the current GC run
+    case ets:lookup(FileSummary, File) of
+	[] -> State;
+	[FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
+	    GoRight = fun() ->
+			      case Right of
+				  undefined -> State;
+				  _ when not(CurName =:= Right) ->
+				      [RightObj = {Right, RightValidData, _RightContiguousTop, File, RightRight}]
+					  = ets:lookup(FileSummary, Right),
+				      RightSumData = ValidData + RightValidData,
+				      if FileSizeLimit >= RightSumData ->
+					      % here, Right will be the source and so will be deleted,
+					      %       File will be the destination
+					      State1 = combineFiles(RightObj, FileObj, State),
+					      % this could fail if RightRight is undefined
+					      ets:update_element(FileSummary, RightRight, {4, File}), % left is the 4th field
+					      true = ets:insert(FileSummary, {File, RightSumData, RightSumData, Left, RightRight}),
+					      true = ets:delete(FileSummary, Right),
+					      State1;
+					 true -> State
+				      end;
+				  _ -> State
+			      end
+		      end,
+	    case Left of
+		undefined ->
+		    GoRight();
+		_ -> [LeftObj = {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}]
+			 = ets:lookup(FileSummary, Left),
+		     LeftSumData = ValidData + LeftValidData,
+		     if FileSizeLimit >= LeftSumData ->
+			     % here, File will be the source and so will be deleted,
+			     %       Left will be the destination
+			     State1 = combineFiles(FileObj, LeftObj, State),
+			     % this could fail if Right is undefined
+			     ets:update_element(FileSummary, Right, {4, Left}), % left is the 4th field
+			     true = ets:insert(FileSummary, {Left, LeftSumData, LeftSumData, LeftLeft, Right}),
+			     true = ets:delete(FileSummary, File),
+			     State1;
+			true ->
+			     GoRight()
+		     end
+	    end
     end.
 
-combineFiles(Source, Destination, State) ->
+combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRight},
+	     {Destination, DestinationValid, DestinationContiguousTop, _DestinationLeft, _DestinationRight},
+	     State1) ->
+    (State = #dqstate { file_detail = FileDetail }) = closeFile(Source, closeFile(Destination, State1)),
+    {ok, SourceHdl} = file:open(form_filename(Source), [read, write, raw, binary, delayed_write, read_ahead]),
+    {ok, DestinationHdl} = file:open(form_filename(Destination), [read, write, raw, binary, delayed_write, read_ahead]),
+    ExpectedSize = SourceValid + DestinationValid,
+    % if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
+    % if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
+    %   then truncate, copy back in, and then copy over from Source
+    % otherwise we just truncate straight away and copy over from Source
+    if DestinationContiguousTop =:= DestinationValid ->
+	    {ok, DestinationValid} = file:position(DestinationHdl, {bof, DestinationValid}),
+	    ok = file:truncate(DestinationHdl),
+	    {ok, ExpectedSize} = file:position(DestinationHdl, {cur, SourceValid}),
+	    ok = file:truncate(DestinationHdl),
+	    {ok, DestinationValid} = file:position(DestinationHdl, {bof, DestinationValid});
+       true ->
+	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
+	    {ok, TmpHdl} = file:open(form_filename(Tmp), [read, write, raw, binary, delayed_write, read_ahead]),
+	    Worklist = lists:filter(fun ({{Destination2, Offset}, _TotalSize, _MsgId}) when Destination2 =:= Destination ->
+					    Offset > DestinationContiguousTop
+				    end, ets:match_object(FileDetail, {{Destination, '_'}, '_', '_'})),
+	    % RevMapping :: [{TmpOffset, DestinationOrigOffset, TotalSize}]
+	    {TmpSize, RevMapping} =
+		lists:foldl(fun ({{Destination2, Offset}, TotalSize}, {CurOffset, Acc}) when Destination2 =:= Destination ->
+				    {ok, Offset} = file:position(DestinationHdl, {bof, Offset}),
+				    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+				    {ok, Size} = file:copy(DestinationHdl, TmpHdl, Size),
+				    {CurOffset + Size, [{CurOffset, Offset, TotalSize}|Acc]}
+			    end, {0, []}, Worklist),
+	    % so now Tmp contains everything we need to salvage from Destination,
+	    % so truncate Destination and copy from Tmp back to the end
+	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
+	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
+	    ok = file:truncate(DestinationHdl),
+	    {ok, ExpectedSize} = file:position(DestinationHdl, {cur, SourceValid}),
+	    ok = file:truncate(DestinationHdl),
+	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
+	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
+	    ok = file:sync(DestinationHdl),
+	    ok = file:close(TmpHdl),
+	    ok = file:delete(form_filename(Tmp))
+    end,
+    SourceWorkList = ets:match_object(FileDetail, {{Source, '_'}, '_', '_'}),
+    {ExpectedSize, RevMapping2} =
+	lists:foldl(fun ({{Source2, Offset}, TotalSize, _MsgId}, {CurOffset, Acc}) when Source2 =:= Source ->
+			    {ok, Offset} = file:position(SourceHdl, {bof, Offset}),
+			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+			    {ok, Size} = file:copy(SourceHdl, DestinationHdl, Size),
+			    {CurOffset + Size, [{CurOffset, Offset, TotalSize}|Acc]}
+		    end, {DestinationContiguousTop, []}, SourceWorkList),
+    ok = file:sync(DestinationHdl),
+    ok = file:close(SourceHdl),
+    ok = file:close(DestinationHdl),
+    ok = file:delete(form_filename(Source)),
     State.
-    
+
+closeFile(File, State = #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge} }) ->
+    case dict:find(File, ReadHdls) of
+	error ->
+	    State;
+	{ok, {Hdl, Then}} ->
+	    ok = file:close(Hdl),
+	    State #dqstate { read_file_handles = { dict:erase(File, ReadHdls), gb_trees:delete(Then, ReadHdlsAge) } }
+    end.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     [{File, ValidData, _ContiguousTop, Left, Right}] = ets:lookup(FileSummary, File),
@@ -430,7 +517,7 @@ load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
     State;
 load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
     Num = list_to_integer(filename:rootname(Left)),
-    Offset = case ets:match_object(FileDetail, {{Left, '_'}, '_'}) of
+    Offset = case ets:match_object(FileDetail, {{Left, '_'}, '_', '_'}) of
 		 [] -> 0;
 		 L -> {{Left, Offset1}, TotalSize} = lists:last(L),
 		      Offset1 + TotalSize + ?FILE_PACKING_ADJUSTMENT
@@ -452,7 +539,7 @@ load_messages(Left, [File|Files],
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
-			true = ets:insert_new(FileDetail, {{File, Offset}, TotalSize}),
+			true = ets:insert_new(FileDetail, {{File, Offset}, TotalSize, MsgId}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
 			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
 			}
-- 
cgit v1.2.1


From a727a33340e97c280992198dd6ce4f4b7c8e9dc2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 14 Apr 2009 12:31:59 +0100
Subject: well, the GC is _written_ now. Not tested yet...

---
 src/rabbit_disk_queue.erl | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f70f31b4..22406b0b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -364,8 +364,6 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 				     file_summary = FileSummary,
 				     current_file_name = CurName
 				   }) ->
-    % DELIBERATE BADMATCH. This code is not ready yet
-    ko = io:format("uh oh~n", []),
     % the file we're looking at may no longer exist as it may have been deleted
     % within the current GC run
     case ets:lookup(FileSummary, File) of
@@ -416,7 +414,8 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRight},
 	     {Destination, DestinationValid, DestinationContiguousTop, _DestinationLeft, _DestinationRight},
 	     State1) ->
-    (State = #dqstate { file_detail = FileDetail }) = closeFile(Source, closeFile(Destination, State1)),
+    (State = #dqstate { file_detail = FileDetail, msg_location = MsgLocation })
+	= closeFile(Source, closeFile(Destination, State1)),
     {ok, SourceHdl} = file:open(form_filename(Source), [read, write, raw, binary, delayed_write, read_ahead]),
     {ok, DestinationHdl} = file:open(form_filename(Destination), [read, write, raw, binary, delayed_write, read_ahead]),
     ExpectedSize = SourceValid + DestinationValid,
@@ -433,18 +432,29 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
        true ->
 	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
 	    {ok, TmpHdl} = file:open(form_filename(Tmp), [read, write, raw, binary, delayed_write, read_ahead]),
-	    Worklist = lists:filter(fun ({{Destination2, Offset}, _TotalSize, _MsgId}) when Destination2 =:= Destination ->
+	    % as FileDetail is an ordered_set, we should have the lowest offsets first
+	    Worklist = lists:filter(fun ({{Destination2, Offset}, _TotalSize, _MsgId})
+					when Destination2 =:= Destination, Offset /= DestinationContiguousTop ->
+					    % it cannot be that Offset == DestinationContiguousTop
+					    % because if it was then DestinationContiguousTop would have been
+					    % extended by TotalSize
 					    Offset > DestinationContiguousTop
 				    end, ets:match_object(FileDetail, {{Destination, '_'}, '_', '_'})),
-	    % RevMapping :: [{TmpOffset, DestinationOrigOffset, TotalSize}]
-	    {TmpSize, RevMapping} =
-		lists:foldl(fun ({{Destination2, Offset}, TotalSize}, {CurOffset, Acc}) when Destination2 =:= Destination ->
+	    TmpSize =
+		lists:foldl(fun ({{Destination2, Offset}, TotalSize, MsgId}, CurOffset) when Destination2 =:= Destination ->
 				    {ok, Offset} = file:position(DestinationHdl, {bof, Offset}),
 				    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
 				    {ok, Size} = file:copy(DestinationHdl, TmpHdl, Size),
-				    {CurOffset + Size, [{CurOffset, Offset, TotalSize}|Acc]}
-			    end, {0, []}, Worklist),
+				    % this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
+				    FinalOffset = DestinationContiguousTop + CurOffset,
+				    true = ets:update_element(MsgLocation, MsgId, {4, FinalOffset}),
+				    % sadly you can't use update_element to change the key:
+				    true = ets:delete(FileDetail, {Destination, Offset}),
+				    true = ets:insert_new(FileDetail, {{Destination, FinalOffset}, TotalSize, MsgId}),
+				    CurOffset + Size
+			    end, 0, Worklist),
 	    % so now Tmp contains everything we need to salvage from Destination,
+	    % and both FileDetail and MsgLocation have been updated to reflect compaction of Destination
 	    % so truncate Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
 	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
@@ -453,18 +463,24 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 	    ok = file:truncate(DestinationHdl),
 	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
+	    % position in DestinationHdl should now be DestinationValid
 	    ok = file:sync(DestinationHdl),
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(form_filename(Tmp))
     end,
     SourceWorkList = ets:match_object(FileDetail, {{Source, '_'}, '_', '_'}),
-    {ExpectedSize, RevMapping2} =
-	lists:foldl(fun ({{Source2, Offset}, TotalSize, _MsgId}, {CurOffset, Acc}) when Source2 =:= Source ->
+    ExpectedSize =
+	lists:foldl(fun ({{Source2, Offset}, TotalSize, MsgId}, CurOffset) when Source2 =:= Source ->
 			    {ok, Offset} = file:position(SourceHdl, {bof, Offset}),
 			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
 			    {ok, Size} = file:copy(SourceHdl, DestinationHdl, Size),
-			    {CurOffset + Size, [{CurOffset, Offset, TotalSize}|Acc]}
-		    end, {DestinationContiguousTop, []}, SourceWorkList),
+			    % update MsgLocation to reflect change of file (3rd field) and offset (4th field)
+			    true = ets:update_element(MsgLocation, MsgId, [{3, Destination}, {4, CurOffset}]),
+			    % can't use update_element to change key:
+			    true = ets:delete(FileDetail, {Source, Offset}),
+			    true = ets:insert_new(FileDetail, {{Destination, CurOffset}, TotalSize, MsgId}),
+			    CurOffset + Size
+		    end, DestinationValid, SourceWorkList),
     ok = file:sync(DestinationHdl),
     ok = file:close(SourceHdl),
     ok = file:close(DestinationHdl),
-- 
cgit v1.2.1


From ff02f9a3543ac00d00c7d12d9a3e6518a4c4d1a6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 14 Apr 2009 12:46:35 +0100
Subject: fixed bug. GC does seem to work, though no explicit tests for it yet.

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 22406b0b..94a444e2 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -459,7 +459,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
 	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, ExpectedSize} = file:position(DestinationHdl, {cur, SourceValid}),
+	    {ok, ExpectedSize} = file:position(DestinationHdl, {bof, ExpectedSize}),
 	    ok = file:truncate(DestinationHdl),
 	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
-- 
cgit v1.2.1


From b7ca1c14e2a466d8cc46f583b5b35c19c07f2edd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 14 Apr 2009 16:32:58 +0100
Subject: Bugs fixed, and tests written. GC works. rabbit_disk_queue is
 functionally complete. However, fortunately, Matthias this morning changed
 its requirements so there are now several changes to make.

---
 src/rabbit_disk_queue.erl | 64 ++++++++++++++++++++++++++++++++++++-----------
 src/rabbit_tests.erl      | 29 ++++++++++++++++++++-
 2 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 94a444e2..4cd146ed 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -285,7 +285,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize, MsgId}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
-				     ValidTotalSize; % can't be any holes in this file
+				     ValidTotalSize1; % can't be any holes in this file
 				true -> ContiguousTop
 			     end,
 	    true = ets:insert(FileSummary, {CurName, ValidTotalSize1, ContiguousTop1, Left, undefined}),
@@ -440,19 +440,37 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 					    % extended by TotalSize
 					    Offset > DestinationContiguousTop
 				    end, ets:match_object(FileDetail, {{Destination, '_'}, '_', '_'})),
-	    TmpSize =
-		lists:foldl(fun ({{Destination2, Offset}, TotalSize, MsgId}, CurOffset) when Destination2 =:= Destination ->
-				    {ok, Offset} = file:position(DestinationHdl, {bof, Offset}),
+	    TmpSize = DestinationValid - DestinationContiguousTop,
+	    {TmpSize, BlockStart1, BlockEnd1} =
+		lists:foldl(fun ({{Destination2, Offset}, TotalSize, MsgId}, {CurOffset, BlockStart, BlockEnd}) when Destination2 =:= Destination ->
+				    % CurOffset is in the TmpFile.
+				    % Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
 				    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-				    {ok, Size} = file:copy(DestinationHdl, TmpHdl, Size),
 				    % this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
 				    FinalOffset = DestinationContiguousTop + CurOffset,
 				    true = ets:update_element(MsgLocation, MsgId, {4, FinalOffset}),
 				    % sadly you can't use update_element to change the key:
 				    true = ets:delete(FileDetail, {Destination, Offset}),
 				    true = ets:insert_new(FileDetail, {{Destination, FinalOffset}, TotalSize, MsgId}),
-				    CurOffset + Size
-			    end, 0, Worklist),
+				    NextOffset = CurOffset + Size,
+				    if BlockStart =:= undefined ->
+					    % base case, called only for the first list elem
+					    {NextOffset, Offset, Offset + Size};
+				       Offset =:= BlockEnd ->
+					    % extend the current block because the next msg follows straight on
+					    {NextOffset, BlockStart, BlockEnd + Size};
+				       true ->
+					    % found a gap, so actually do the work for the previous block
+					    BSize = BlockEnd - BlockStart,
+					    {ok, BlockStart} = file:position(DestinationHdl, {bof, BlockStart}),
+					    {ok, BSize} = file:copy(DestinationHdl, TmpHdl, BSize),
+					    {NextOffset, Offset, Offset + Size}
+				    end
+			    end, {0, undefined, undefined}, Worklist),
+	    % do the last remaining block
+	    BSize1 = BlockEnd1 - BlockStart1,
+	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
+	    {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
 	    % so now Tmp contains everything we need to salvage from Destination,
 	    % and both FileDetail and MsgLocation have been updated to reflect compaction of Destination
 	    % so truncate Destination and copy from Tmp back to the end
@@ -469,18 +487,36 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 	    ok = file:delete(form_filename(Tmp))
     end,
     SourceWorkList = ets:match_object(FileDetail, {{Source, '_'}, '_', '_'}),
-    ExpectedSize =
-	lists:foldl(fun ({{Source2, Offset}, TotalSize, MsgId}, CurOffset) when Source2 =:= Source ->
-			    {ok, Offset} = file:position(SourceHdl, {bof, Offset}),
+    {ExpectedSize, BlockStart2, BlockEnd2} =
+	lists:foldl(fun ({{Source2, Offset}, TotalSize, MsgId}, {CurOffset, BlockStart, BlockEnd}) when Source2 =:= Source ->
+			    % CurOffset is in the DestinationFile.
+			    % Offset, BlockStart and BlockEnd are in the SourceFile
 			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			    {ok, Size} = file:copy(SourceHdl, DestinationHdl, Size),
 			    % update MsgLocation to reflect change of file (3rd field) and offset (4th field)
 			    true = ets:update_element(MsgLocation, MsgId, [{3, Destination}, {4, CurOffset}]),
 			    % can't use update_element to change key:
 			    true = ets:delete(FileDetail, {Source, Offset}),
 			    true = ets:insert_new(FileDetail, {{Destination, CurOffset}, TotalSize, MsgId}),
-			    CurOffset + Size
-		    end, DestinationValid, SourceWorkList),
+			    NextOffset = CurOffset + Size,
+			    if BlockStart =:= undefined ->
+				    % base case, called only for the first list elem
+				    {NextOffset, Offset, Offset + Size};
+			       Offset =:= BlockEnd ->
+				    % extend the current block because the next msg follows straight on
+				    {NextOffset, BlockStart, BlockEnd + Size};
+			       true ->
+				    % found a gap, so actually do the work for the previous block
+				    BSize = BlockEnd - BlockStart,
+				    {ok, BlockStart} = file:position(SourceHdl, {bof, BlockStart}),
+				    {ok, BSize} = file:copy(SourceHdl, DestinationHdl, BSize),
+				    {NextOffset, Offset, Offset + Size}
+			    end
+		    end, {DestinationValid, undefined, undefined}, SourceWorkList),
+    % do the last remaining block
+    BSize2 = BlockEnd2 - BlockStart2,
+    {ok, BlockStart2} = file:position(SourceHdl, {bof, BlockStart2}),
+    {ok, BSize2} = file:copy(SourceHdl, DestinationHdl, BSize2),
+    % tidy up
     ok = file:sync(DestinationHdl),
     ok = file:close(SourceHdl),
     ok = file:close(DestinationHdl),
@@ -535,7 +571,7 @@ load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset = case ets:match_object(FileDetail, {{Left, '_'}, '_', '_'}) of
 		 [] -> 0;
-		 L -> {{Left, Offset1}, TotalSize} = lists:last(L),
+		 L -> {{Left, Offset1}, TotalSize, _MsgId} = lists:last(L),
 		      Offset1 + TotalSize + ?FILE_PACKING_ADJUSTMENT
 	     end,
     State # dqstate { current_file_num = Num, current_file_name = Left,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 89c575bf..491950d2 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -632,6 +632,8 @@ test_disk_queue() ->
 	MsgCount <- [1024, 4096, 16384]
     ],
     rdq_virgin(),
+    rdq_stress_gc(100),
+    rdq_stress_gc(1000),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
@@ -647,7 +649,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     io:format("Published ~p ~p-byte messages in ~p microseconds to ~p queues (~p microseconds/msg) (~p microseconds/byte)~n",
 	      [MsgCount, MsgSizeBytes, Micros, QCount, (Micros / (MsgCount * QCount)), (Micros / (MsgCount * QCount * MsgSizeBytes))]),
     {Micros2, ok} = timer:tc(?MODULE, rdq_time_commands,
-			    [[fun() -> [begin [begin rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
+			    [[fun() -> [begin [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
 					      rabbit_disk_queue:ack(Q, List),
 					      rabbit_disk_queue:tx_commit(Q, [])
 					end || Q <- Qs]
@@ -655,6 +657,31 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     io:format("Delivered ~p ~p-byte messages in ~p microseconds from ~p queues (~p microseconds/msg) (~p microseconds/byte)~n", [MsgCount, MsgSizeBytes, Micros2, QCount, (Micros2 / (MsgCount * QCount)), (Micros2 / (MsgCount * QCount * MsgSizeBytes))]),
     rdq_stop().
 
+% we know each file is going to be 1024*1024*10 bytes in size (10MB), so make sure we have
+% several files, and then keep punching holes in a reasonably sensible way.
+rdq_stress_gc(MsgCount) ->
+    rdq_virgin(),
+    rdq_start(),
+    MsgSizeBytes = 1024*1024,
+    Msg = <<0:(8*MsgSizeBytes)>>, % 1MB
+    List = lists:seq(1, MsgCount),
+    [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
+    rabbit_disk_queue:tx_commit(q, List),
+    % this list generation is _very_ slow, as it's O(N^2)
+    AckList =
+	lists:reverse(lists:foldl(fun (E, Acc) -> case lists:member(E, Acc) of
+						      true ->
+							  Acc;
+						      _False -> [E|Acc]
+						  end
+				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(4,MsgCount)])))
+	++ lists:seq(1, 3),
+    [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(q, N),
+	   rabbit_disk_queue:ack(q, [N]),
+	   rabbit_disk_queue:tx_commit(q, [])
+     end || N <- AckList],
+    rdq_stop().
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From 5f9ff3c94087d6e32a4b5a9d2191693724086c8a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 16 Apr 2009 09:03:10 +0100
Subject: funny what you realise in the middle of the night...

---
 src/rabbit_tests.erl | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 491950d2..a3da0ef4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -662,23 +662,26 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
 rdq_stress_gc(MsgCount) ->
     rdq_virgin(),
     rdq_start(),
-    MsgSizeBytes = 1024*1024,
-    Msg = <<0:(8*MsgSizeBytes)>>, % 1MB
+    MsgSizeBytes = 256*1024,
+    Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
     [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
     rabbit_disk_queue:tx_commit(q, List),
-    % this list generation is _very_ slow, as it's O(N^2)
+    StartChunk = round(MsgCount / 20), % 5%
     AckList =
-	lists:reverse(lists:foldl(fun (E, Acc) -> case lists:member(E, Acc) of
-						      true ->
-							  Acc;
-						      _False -> [E|Acc]
+	lists:reverse(lists:foldl(fun (E, Acc) -> case Acc of
+						      [] -> [E];
+						      [F|_Fs] ->
+							  case E rem F of
+							      0 -> Acc;
+							      _ -> [E|Acc]
+							  end
 						  end
-				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(4,MsgCount)])))
-	++ lists:seq(1, 3),
+				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(StartChunk,MsgCount)])))
+	++ lists:seq(1, (StartChunk - 1)),
     [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(q, N),
-	   rabbit_disk_queue:ack(q, [N]),
-	   rabbit_disk_queue:tx_commit(q, [])
+    	   rabbit_disk_queue:ack(q, [N]),
+    	   rabbit_disk_queue:tx_commit(q, [])
      end || N <- AckList],
     rdq_stop().
 
-- 
cgit v1.2.1


From a43b397593563affd2adf5b18b4c1d876ecf2b1f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 20 Apr 2009 11:09:47 +0100
Subject: just improving display of timing stats to make it easier to graph.
 This then means that it's much easier to test how performance changes with
 modifications to the disk queue.

---
 src/rabbit_tests.erl | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index d8770992..1426657a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -683,6 +683,9 @@ delete_log_handlers(Handlers) ->
     ok.
 
 test_disk_queue() ->
+    % unicode chars are supported properly from r13 onwards
+    % io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup μs\t| Publish μs\t| Pub μs/msg\t| Pub μs/byte\t| Deliver μs\t| Del μs/msg\t| Del μs/byte~n", []),
+    io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
     [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
 	MsgSize <- [512, 8192, 32768, 131072],
 	Qs <- [[1], lists:seq(1,10)], %, lists:seq(1,100), lists:seq(1,1000)],
@@ -694,24 +697,25 @@ test_disk_queue() ->
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
-    rdq_virgin(),
+    Startup = rdq_virgin(),
     rdq_start(),
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
-    {Micros, ok} = timer:tc(?MODULE, rdq_time_commands,
-			   [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List, _ <- Qs] end,
-			     fun() -> [rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
-			    ]]),
-    io:format("Published ~p ~p-byte messages in ~p microseconds to ~p queues (~p microseconds/msg) (~p microseconds/byte)~n",
-	      [MsgCount, MsgSizeBytes, Micros, QCount, (Micros / (MsgCount * QCount)), (Micros / (MsgCount * QCount * MsgSizeBytes))]),
-    {Micros2, ok} = timer:tc(?MODULE, rdq_time_commands,
-			    [[fun() -> [begin [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
-					      rabbit_disk_queue:ack(Q, List),
-					      rabbit_disk_queue:tx_commit(Q, [])
-					end || Q <- Qs]
-			      end]]),
-    io:format("Delivered ~p ~p-byte messages in ~p microseconds from ~p queues (~p microseconds/msg) (~p microseconds/byte)~n", [MsgCount, MsgSizeBytes, Micros2, QCount, (Micros2 / (MsgCount * QCount)), (Micros2 / (MsgCount * QCount * MsgSizeBytes))]),
+    {Publish, ok} = timer:tc(?MODULE, rdq_time_commands,
+			     [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List, _ <- Qs] end,
+			       fun() -> [rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
+			      ]]),
+    {Deliver, ok} = timer:tc(?MODULE, rdq_time_commands,
+			     [[fun() -> [begin [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
+					       rabbit_disk_queue:ack(Q, List),
+					       rabbit_disk_queue:tx_commit(Q, [])
+					 end || Q <- Qs]
+			       end]]),
+    io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.8f| ~14.1f| ~14.6f| ~14.8f~n",
+	      [MsgCount, MsgSizeBytes, QCount, float(Startup),
+	       float(Publish), (Publish / (MsgCount * QCount)), (Publish / (MsgCount * QCount * MsgSizeBytes)),
+	       float(Deliver), (Deliver / (MsgCount * QCount)), (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
     rdq_stop().
 
 % we know each file is going to be 1024*1024*10 bytes in size (10MB), so make sure we have
@@ -747,8 +751,8 @@ rdq_time_commands(Funcs) ->
 
 rdq_virgin() ->
     {Micros, {ok, _}} = timer:tc(rabbit_disk_queue, start_link, [1024*1024*10, 1000]),
-    io:format("Disk queue start up took ~p microseconds~n", [Micros]),
-    ok = rabbit_disk_queue:clean_stop().
+    ok = rabbit_disk_queue:clean_stop(),
+    Micros.
 
 rdq_start() ->
     {ok, _} = rabbit_disk_queue:start_link(1024*1024*10, 1000).
-- 
cgit v1.2.1


From c24956f02f475b8575b57ac6df468f20fd28805f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 20 Apr 2009 11:11:56 +0100
Subject: /byte figures so small could use a couple extra dp

---
 src/rabbit_tests.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1426657a..4f13ade3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -712,7 +712,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
 					       rabbit_disk_queue:tx_commit(Q, [])
 					 end || Q <- Qs]
 			       end]]),
-    io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.8f| ~14.1f| ~14.6f| ~14.8f~n",
+    io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
 	      [MsgCount, MsgSizeBytes, QCount, float(Startup),
 	       float(Publish), (Publish / (MsgCount * QCount)), (Publish / (MsgCount * QCount * MsgSizeBytes)),
 	       float(Deliver), (Deliver / (MsgCount * QCount)), (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
-- 
cgit v1.2.1


From aec0c55bf482509165528facefdf89086ce1c62f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 20 Apr 2009 11:29:19 +0100
Subject: no sense running the same test twice

---
 src/rabbit_tests.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4f13ade3..ba74a877 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -692,8 +692,7 @@ test_disk_queue() ->
 	MsgCount <- [1024, 4096, 16384]
     ],
     rdq_virgin(),
-    rdq_stress_gc(100),
-    rdq_stress_gc(1000),
+    rdq_stress_gc(10000),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
-- 
cgit v1.2.1


From 5c4f90663aa57c09a961b505090b1edbb2617924 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 20 Apr 2009 12:18:16 +0100
Subject: removed file_detail ets table and converted all use into use of
 msg_location ets table. Even though this is slightly less optimal because of
 the loss of doing index lookups in file_detail, this is actually slightly
 faster due to not having to maintain two tables. Performance:

Msg Count       | Msg Size      | Queue Count   | Startup mu s  | Publish mu s  | Pub mu s/msg  | Pub mu s/byte | Deliver mu s  | Del mu s/msg  | Del mu s/byte
            1024|            512|              1|         2644.0|        41061.0|      40.098633|   0.0783176422|       156031.0|     152.374023|   0.2976055145
            4096|            512|              1|        74843.0|       328683.0|      80.244873|   0.1567282677|       629441.0|     153.672119|   0.3001408577
           16384|            512|              1|       373729.0|      3614155.0|     220.590515|   0.4308408499|      2969499.0|     181.243835|   0.3539918661
            1024|            512|             10|      1605989.0|       281004.0|      27.441797|   0.0535972595|      1936168.0|     189.078906|   0.3692947388
            4096|            512|             10|        85912.0|      2940291.0|      71.784448|   0.1402040005|      7662259.0|     187.066870|   0.3653649807
           16384|            512|             10|       418213.0|     37962842.0|     231.706799|   0.4525523424|     32293492.0|     197.103833|   0.3849684238
            1024|           8192|              1|      1347269.0|       144988.0|     141.589844|   0.0172839165|       173906.0|     169.830078|   0.0207312107
            4096|           8192|              1|        93070.0|       606369.0|     148.039307|   0.0180712044|       829812.0|     202.590820|   0.0247303247
           16384|           8192|              1|        20014.0|      4976009.0|     303.711487|   0.0370741561|      3211632.0|     196.022461|   0.0239285231
            1024|           8192|             10|        77291.0|       348677.0|      34.050488|   0.0041565537|      1877374.0|     183.337305|   0.0223800421
            4096|           8192|             10|       104842.0|      2722730.0|      66.472900|   0.0081143677|      7787817.0|     190.132251|   0.0232095033
           16384|           8192|             10|        21746.0|     44301448.0|     270.394580|   0.0330071509|     32018244.0|     195.423853|   0.0238554507
            1024|          32768|              1|       120732.0|       426700.0|     416.699219|   0.0127166510|       210704.0|     205.765625|   0.0062794685
            4096|          32768|              1|         9355.0|      1925633.0|     470.125244|   0.0143470839|       824304.0|     201.246094|   0.0061415434
           16384|          32768|              1|        14734.0|     10371560.0|     633.029785|   0.0193185359|      3594753.0|     219.406311|   0.0066957492
            1024|          32768|             10|         6052.0|       629362.0|      61.461133|   0.0018756449|      2100901.0|     205.166113|   0.0062611729
            4096|          32768|             10|         5546.0|      4203683.0|     102.628979|   0.0031319879|      8899536.0|     217.273828|   0.0066306710
           16384|          32768|             10|        22657.0|     50306069.0|     307.043878|   0.0093702355|     36433817.0|     222.374371|   0.0067863273
            1024|         131072|              1|         7155.0|      1913696.0|    1868.843750|   0.0142581463|       444638.0|     434.216797|   0.0033128113
            4096|         131072|              1|         6671.0|      8232640.0|    2009.921875|   0.0153344870|      1907439.0|     465.683350|   0.0035528820
           16384|         131072|              1|         1699.0|     33886514.0|    2068.268677|   0.0157796377|      7291762.0|     445.053833|   0.0033954913
            1024|         131072|             10|         7506.0|      1991032.0|     194.436719|   0.0014834344|      4564850.0|     445.786133|   0.0034010783
            4096|         131072|             10|         7486.0|      9551800.0|     233.198242|   0.0017791614|     18048697.0|     440.642017|   0.0033618318
           16384|         131072|             10|         2771.0|     71072559.0|     433.792474|   0.0033095739|     81144745.0|     495.268219|   0.0037785966
---
 src/rabbit_disk_queue.erl | 66 +++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 4cd146ed..a704ff21 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -50,7 +50,6 @@
 -define(INTEGER_SIZE_BYTES, 8).
 -define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)).
 -define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
--define(FILE_DETAIL_ETS_NAME, rabbit_disk_queue_file_detail).
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
@@ -60,7 +59,6 @@
 
 -record(dqstate, {msg_location,
 		  file_summary,
-		  file_detail,
 		  current_file_num,
 		  current_file_name,
 		  current_file_handle,
@@ -106,7 +104,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     InitName = "0" ++ ?FILE_EXTENSION,
     State = #dqstate { msg_location = ets:new(?MSG_LOC_ETS_NAME, [set, private]),
 		       file_summary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
-		       file_detail = ets:new(?FILE_DETAIL_ETS_NAME, [ordered_set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
 		       current_file_handle = undefined,
@@ -133,13 +130,11 @@ handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(clean_stop, _From, State) ->
     State1 = #dqstate { msg_location = MsgLocation,
-			file_summary = FileSummary,
-			file_detail = FileDetail }
+			file_summary = FileSummary }
 	= shutdown(State), %% tidy up file handles early
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
     true = ets:delete(MsgLocation),
     true = ets:delete(FileSummary),
-    true = ets:delete(FileDetail),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok, State1 # dqstate { current_file_handle = undefined,
 					  read_file_handles = {dict:new(), gb_trees:empty()}}}.
@@ -234,7 +229,6 @@ internal_ack(Q, MsgIds, State) ->
 %% Q is only needed if MnesiaDelete = true
 remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
 							     file_summary = FileSummary,
-							     file_detail = FileDetail,
 							     current_file_name = CurName
 							   }) ->
     Files
@@ -246,7 +240,6 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 					  true = ets:delete(MsgLocation, MsgId),
 					  [{File, ValidTotalSize, ContiguousTop, Left, Right}]
 					      = ets:lookup(FileSummary, File),
-					  true = ets:delete(FileDetail, {File, Offset}),
 					  ContiguousTop1 = lists:min([ContiguousTop, Offset]),
 					  true = ets:insert(FileSummary,
 							    {File, (ValidTotalSize - TotalSize - ?FILE_PACKING_ADJUSTMENT),
@@ -272,8 +265,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 						       current_file_handle = CurHdl,
 						       current_file_name = CurName,
 						       current_offset = CurOffset,
-						       file_summary = FileSummary,
-						       file_detail = FileDetail
+						       file_summary = FileSummary
 						     }) ->
     case ets:lookup(MsgLocation, MsgId) of
 	[] ->
@@ -282,7 +274,6 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
 	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}]
 		= ets:lookup(FileSummary, CurName),
-	    true = ets:insert_new(FileDetail, {{CurName, CurOffset}, TotalSize, MsgId}),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
 				     ValidTotalSize1; % can't be any holes in this file
@@ -414,7 +405,7 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRight},
 	     {Destination, DestinationValid, DestinationContiguousTop, _DestinationLeft, _DestinationRight},
 	     State1) ->
-    (State = #dqstate { file_detail = FileDetail, msg_location = MsgLocation })
+    (State = #dqstate { msg_location = MsgLocation })
 	= closeFile(Source, closeFile(Destination, State1)),
     {ok, SourceHdl} = file:open(form_filename(Source), [read, write, raw, binary, delayed_write, read_ahead]),
     {ok, DestinationHdl} = file:open(form_filename(Destination), [read, write, raw, binary, delayed_write, read_ahead]),
@@ -432,26 +423,27 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
        true ->
 	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
 	    {ok, TmpHdl} = file:open(form_filename(Tmp), [read, write, raw, binary, delayed_write, read_ahead]),
-	    % as FileDetail is an ordered_set, we should have the lowest offsets first
-	    Worklist = lists:filter(fun ({{Destination2, Offset}, _TotalSize, _MsgId})
-					when Destination2 =:= Destination, Offset /= DestinationContiguousTop ->
-					    % it cannot be that Offset == DestinationContiguousTop
-					    % because if it was then DestinationContiguousTop would have been
-					    % extended by TotalSize
-					    Offset > DestinationContiguousTop
-				    end, ets:match_object(FileDetail, {{Destination, '_'}, '_', '_'})),
+	    Worklist
+		= lists:dropwhile(fun ({_, _, _, Offset, _}) when Offset /= DestinationContiguousTop ->
+						% it cannot be that Offset == DestinationContiguousTop
+						% because if it was then DestinationContiguousTop would have been
+						% extended by TotalSize
+					  Offset < DestinationContiguousTop
+				  % Given expected access patterns, I suspect that the list should be
+				  % naturally sorted as we require, however, we need to enforce it anyway
+				  end, lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+						      OffA < OffB
+						  end,
+						  ets:match_object(MsgLocation, {'_', '_', Destination, '_', '_'}))),
 	    TmpSize = DestinationValid - DestinationContiguousTop,
 	    {TmpSize, BlockStart1, BlockEnd1} =
-		lists:foldl(fun ({{Destination2, Offset}, TotalSize, MsgId}, {CurOffset, BlockStart, BlockEnd}) when Destination2 =:= Destination ->
+		lists:foldl(fun ({MsgId, _RefCount, _Destination, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
 				    % CurOffset is in the TmpFile.
 				    % Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
 				    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
 				    % this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
 				    FinalOffset = DestinationContiguousTop + CurOffset,
 				    true = ets:update_element(MsgLocation, MsgId, {4, FinalOffset}),
-				    % sadly you can't use update_element to change the key:
-				    true = ets:delete(FileDetail, {Destination, Offset}),
-				    true = ets:insert_new(FileDetail, {{Destination, FinalOffset}, TotalSize, MsgId}),
 				    NextOffset = CurOffset + Size,
 				    if BlockStart =:= undefined ->
 					    % base case, called only for the first list elem
@@ -472,7 +464,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
 	    {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
 	    % so now Tmp contains everything we need to salvage from Destination,
-	    % and both FileDetail and MsgLocation have been updated to reflect compaction of Destination
+	    % and MsgLocation has been updated to reflect compaction of Destination
 	    % so truncate Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
 	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
@@ -486,17 +478,16 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(form_filename(Tmp))
     end,
-    SourceWorkList = ets:match_object(FileDetail, {{Source, '_'}, '_', '_'}),
+    SourceWorkList = lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+					OffA < OffB
+				end, ets:match_object(MsgLocation, {'_', '_', Source, '_', '_'})),
     {ExpectedSize, BlockStart2, BlockEnd2} =
-	lists:foldl(fun ({{Source2, Offset}, TotalSize, MsgId}, {CurOffset, BlockStart, BlockEnd}) when Source2 =:= Source ->
+	lists:foldl(fun ({MsgId, _RefCount, _Source, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
 			    % CurOffset is in the DestinationFile.
 			    % Offset, BlockStart and BlockEnd are in the SourceFile
 			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
 			    % update MsgLocation to reflect change of file (3rd field) and offset (4th field)
 			    true = ets:update_element(MsgLocation, MsgId, [{3, Destination}, {4, CurOffset}]),
-			    % can't use update_element to change key:
-			    true = ets:delete(FileDetail, {Source, Offset}),
-			    true = ets:insert_new(FileDetail, {{Destination, CurOffset}, TotalSize, MsgId}),
 			    NextOffset = CurOffset + Size,
 			    if BlockStart =:= undefined ->
 				    % base case, called only for the first list elem
@@ -567,19 +558,21 @@ load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
 						current_file_name = CurName }) ->
     true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
     State;
-load_messages(Left, [], State = #dqstate { file_detail = FileDetail }) ->
+load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
     Num = list_to_integer(filename:rootname(Left)),
-    Offset = case ets:match_object(FileDetail, {{Left, '_'}, '_', '_'}) of
+    Offset = case ets:match_object(MsgLocation, {'_', '_', Left, '_', '_'}) of
 		 [] -> 0;
-		 L -> {{Left, Offset1}, TotalSize, _MsgId} = lists:last(L),
-		      Offset1 + TotalSize + ?FILE_PACKING_ADJUSTMENT
+		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_]
+			  = lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+					       OffB < OffA
+				       end, L),
+		      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
 	     end,
     State # dqstate { current_file_num = Num, current_file_name = Left,
 		      current_offset = Offset };
 load_messages(Left, [File|Files],
 	      State = #dqstate { msg_location = MsgLocation,
-				 file_summary = FileSummary,
-				 file_detail = FileDetail
+				 file_summary = FileSummary
 			       }) ->
     % [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
@@ -591,7 +584,6 @@ load_messages(Left, [File|Files],
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
-			true = ets:insert_new(FileDetail, {{File, Offset}, TotalSize, MsgId}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
 			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
 			}
-- 
cgit v1.2.1


From 802cfb45bb4d120d9c6ecb9161cdba7a836e4371 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 20 Apr 2009 14:01:36 +0100
Subject: moved the message location table to dets. performance is slightly
 worse than 10 times slower. This means that the test suite which used to take
 about 12 mins to run now takes about 2 hours. Looks like we could now be
 talking up to 40ms to publish a message. Interestingly, delivery is only
 twice as slow as with ets, it's publish that's taken the 10+times hit.
 Worryingly, the numbers show that performance per message is not constant,
 and wasn't in ets either. This must be the effect of buckets in both ets and
 dets filling up and chaining. The dets man page does say that it organises
 data as a linear hash list, which is a structure I know well, and I am
 surprised performance is dropping off in this way - maybe suggests poor
 distribution of their hashing algorithm or rebalancing.

---
 src/rabbit_disk_queue.erl | 58 +++++++++++++++++++++++++++--------------------
 src/rabbit_tests.erl      |  4 ++--
 2 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a704ff21..26aa9d4b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -49,10 +49,11 @@
 -define(WRITE_OK, 255).
 -define(INTEGER_SIZE_BYTES, 8).
 -define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)).
--define(MSG_LOC_ETS_NAME, rabbit_disk_queue_msg_location).
+-define(MSG_LOC_DETS_NAME, rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
+-define(FILE_EXTENSION_DETS, ".dets").
 -define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
 
 -define(SERVER, ?MODULE).
@@ -102,7 +103,14 @@ clean_stop() ->
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     InitName = "0" ++ ?FILE_EXTENSION,
-    State = #dqstate { msg_location = ets:new(?MSG_LOC_ETS_NAME, [set, private]),
+    {ok, MsgLocation} = dets:open_file(?MSG_LOC_DETS_NAME,
+				       [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++ ?FILE_EXTENSION_DETS)},
+					{min_no_slots, 1024*1024},
+					% man says this should be <= 32M. But it works...
+					{max_no_slots, 1024*1024*1024},
+					{type, set}
+				       ]),
+    State = #dqstate { msg_location = MsgLocation,
 		       file_summary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
@@ -129,11 +137,9 @@ handle_call({tx_commit, Q, MsgIds}, _From, State) ->
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(clean_stop, _From, State) ->
-    State1 = #dqstate { msg_location = MsgLocation,
-			file_summary = FileSummary }
+    State1 = #dqstate { file_summary = FileSummary }
 	= shutdown(State), %% tidy up file handles early
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
-    true = ets:delete(MsgLocation),
     true = ets:delete(FileSummary),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok, State1 # dqstate { current_file_handle = undefined,
@@ -159,10 +165,13 @@ handle_info(_Info, State) ->
 terminate(_Reason, State) ->
     shutdown(State).
 
-shutdown(State = #dqstate { current_file_handle = FileHdl,
+shutdown(State = #dqstate { msg_location = MsgLocation,
+			    current_file_handle = FileHdl,
 			    read_file_handles = {ReadHdls, _ReadHdlsAge}
 			  }) ->
     % deliberately ignoring return codes here
+    dets:close(MsgLocation),
+    file:delete(form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++ ?FILE_EXTENSION_DETS)),
     if FileHdl =:= undefined -> ok;
        true -> file:sync(FileHdl),
 	       file:close(FileHdl)
@@ -190,7 +199,7 @@ internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
 					      read_file_handles_limit = ReadFileHandlesLimit,
 					      read_file_handles = {ReadHdls, ReadHdlsAge}
 					     }) ->
-    [{MsgId, _RefCount, File, Offset, TotalSize}] = ets:lookup(MsgLocation, MsgId),
+    [{MsgId, _RefCount, File, Offset, TotalSize}] = dets:lookup(MsgLocation, MsgId),
     % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
     % of premature optimisation. So I might remove it and dump it overboard
     {FileHdl, ReadHdls1, ReadHdlsAge1}
@@ -234,10 +243,10 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
     Files
 	= lists:foldl(fun (MsgId, Files2) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}]
-				  = ets:lookup(MsgLocation, MsgId),
+				  = dets:lookup(MsgLocation, MsgId),
 			      Files3 =
 				  if 1 =:= RefCount ->
-					  true = ets:delete(MsgLocation, MsgId),
+					  ok = dets:delete(MsgLocation, MsgId),
 					  [{File, ValidTotalSize, ContiguousTop, Left, Right}]
 					      = ets:lookup(FileSummary, File),
 					  ContiguousTop1 = lists:min([ContiguousTop, Offset]),
@@ -248,7 +257,7 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 					     true -> sets:add_element(File, Files2)
 					  end;
 				     1 < RefCount ->
-					  ets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
+					  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 					  Files2
 				  end,
 			      if MnesiaDelete ->
@@ -267,11 +276,11 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 						       current_offset = CurOffset,
 						       file_summary = FileSummary
 						     }) ->
-    case ets:lookup(MsgLocation, MsgId) of
+    case dets:lookup(MsgLocation, MsgId) of
 	[] ->
 	    % New message, lots to do
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
-	    true = ets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
+	    true = dets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
 	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}]
 		= ets:lookup(FileSummary, CurName),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
@@ -284,7 +293,7 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 				   State # dqstate {current_offset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    % We already know about it, just update counter
-	    true = ets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
+	    ok = dets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
 	    {ok, State}
     end.
 
@@ -297,7 +306,7 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 	    fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
 		     lists:foldl(fun (MsgId, Acc) ->
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
-					     ets:lookup(MsgLocation, MsgId),
+					     dets:lookup(MsgLocation, MsgId),
 					 ok = mnesia:write(rabbit_disk_queue,
 							   #dq_msg_loc { msg_id_and_queue = {MsgId, Q},
 									 is_delivered = false}, write),
@@ -434,16 +443,17 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 				  end, lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
 						      OffA < OffB
 						  end,
-						  ets:match_object(MsgLocation, {'_', '_', Destination, '_', '_'}))),
+						  dets:match_object(MsgLocation, {'_', '_', Destination, '_', '_'}))),
 	    TmpSize = DestinationValid - DestinationContiguousTop,
 	    {TmpSize, BlockStart1, BlockEnd1} =
-		lists:foldl(fun ({MsgId, _RefCount, _Destination, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
+		lists:foldl(fun ({MsgId, RefCount, _Destination, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
 				    % CurOffset is in the TmpFile.
 				    % Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
 				    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
 				    % this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
 				    FinalOffset = DestinationContiguousTop + CurOffset,
-				    true = ets:update_element(MsgLocation, MsgId, {4, FinalOffset}),
+				    ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination, FinalOffset, TotalSize}),
+
 				    NextOffset = CurOffset + Size,
 				    if BlockStart =:= undefined ->
 					    % base case, called only for the first list elem
@@ -480,14 +490,14 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
     end,
     SourceWorkList = lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
 					OffA < OffB
-				end, ets:match_object(MsgLocation, {'_', '_', Source, '_', '_'})),
+				end, dets:match_object(MsgLocation, {'_', '_', Source, '_', '_'})),
     {ExpectedSize, BlockStart2, BlockEnd2} =
-	lists:foldl(fun ({MsgId, _RefCount, _Source, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
+	lists:foldl(fun ({MsgId, RefCount, _Source, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
 			    % CurOffset is in the DestinationFile.
 			    % Offset, BlockStart and BlockEnd are in the SourceFile
 			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			    % update MsgLocation to reflect change of file (3rd field) and offset (4th field)
-			    true = ets:update_element(MsgLocation, MsgId, [{3, Destination}, {4, CurOffset}]),
+			    % update MsgLocation to reflect change of file and offset
+			    ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination, CurOffset, TotalSize}),
 			    NextOffset = CurOffset + Size,
 			    if BlockStart =:= undefined ->
 				    % base case, called only for the first list elem
@@ -550,7 +560,7 @@ load_from_disk(State) ->
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:foldl(fun ({MsgId, _Q}, true) -> true = 1 =:= length(ets:lookup(MsgLocation, MsgId)) end,
+    true = lists:foldl(fun ({MsgId, _Q}, true) -> true = 1 =:= length(dets:lookup(MsgLocation, MsgId)) end,
 		       true, mnesia:dirty_all_keys(rabbit_disk_queue)),
     {ok, State1}.
 
@@ -560,7 +570,7 @@ load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
     State;
 load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
     Num = list_to_integer(filename:rootname(Left)),
-    Offset = case ets:match_object(MsgLocation, {'_', '_', Left, '_', '_'}) of
+    Offset = case dets:match_object(MsgLocation, {'_', '_', Left, '_', '_'}) of
 		 [] -> 0;
 		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_]
 			  = lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
@@ -583,7 +593,7 @@ load_messages(Left, [File|Files],
 								    is_delivered = '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
-			true = ets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
+			true = dets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
 			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
 			}
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ba74a877..736ddfd4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -703,12 +703,12 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     List = lists:seq(1, MsgCount),
     {Publish, ok} = timer:tc(?MODULE, rdq_time_commands,
 			     [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List, _ <- Qs] end,
-			       fun() -> [rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
+			       fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
 			      ]]),
     {Deliver, ok} = timer:tc(?MODULE, rdq_time_commands,
 			     [[fun() -> [begin [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
 					       rabbit_disk_queue:ack(Q, List),
-					       rabbit_disk_queue:tx_commit(Q, [])
+					       ok = rabbit_disk_queue:tx_commit(Q, [])
 					 end || Q <- Qs]
 			       end]]),
     io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
-- 
cgit v1.2.1


From 0df10591e5adec2c22664785dfc7ae2747af8e6e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 20 Apr 2009 17:40:00 +0100
Subject: so I figured it was still going too fast. Fortunately, switching
 mnesia to disc_only has solved that problem...

---
 src/rabbit_disk_queue.erl | 1 +
 src/rabbit_mnesia.erl     | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 26aa9d4b..f0fab00d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -102,6 +102,7 @@ clean_stop() ->
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
+    ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
     {ok, MsgLocation} = dets:open_file(?MSG_LOC_DETS_NAME,
 				       [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++ ?FILE_EXTENSION_DETS)},
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 858b024a..b3c4a926 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -146,9 +146,9 @@ table_definitions() ->
        {attributes, record_info(fields, amqqueue)}]},
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
-       {type, ordered_set},
+       {type, set},
        {attributes, record_info(fields, dq_msg_loc)},
-       {disc_copies, [node()]}]}
+       {disc_only_copies, [node()]}]}
     ].
 
 table_names() ->
-- 
cgit v1.2.1


From 3faffe19c9a3dc531aaf2fdc7e614a6535e65c7c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Apr 2009 12:56:36 +0100
Subject: Introduced sequence IDs internally. Sadly, because of the need to
 lookup seqids for each msgid in acks, acks are now very slow. Thus I'm going
 to alter the API so that deliver returns the seqid and then ack takes
 [seqid]. This should make things faster.

---
 include/rabbit.hrl        |   2 +-
 src/rabbit_disk_queue.erl | 217 +++++++++++++++++++++++++++++++---------------
 src/rabbit_mnesia.erl     |   1 +
 src/rabbit_tests.erl      |   5 +-
 4 files changed, 153 insertions(+), 72 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 4f06b833..44e13684 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -64,7 +64,7 @@
 
 -record(basic_message, {exchange_name, routing_key, content, persistent_key}).
 
--record(dq_msg_loc, {msg_id_and_queue, is_delivered}).
+-record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id}).
 
 %%----------------------------------------------------------------------------
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f0fab00d..9b0849c3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/2, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
+-export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
 
 -export([stop/0, clean_stop/0]).
 
@@ -51,6 +51,7 @@
 -define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)).
 -define(MSG_LOC_DETS_NAME, rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
+-define(SEQUENCE_ETS_NAME, rabbit_disk_queue_sequences).
 -define(FILE_EXTENSION, ".rdq").
 -define(FILE_EXTENSION_TMP, ".rdt").
 -define(FILE_EXTENSION_DETS, ".dets").
@@ -60,6 +61,7 @@
 
 -record(dqstate, {msg_location,
 		  file_summary,
+		  sequences,
 		  current_file_num,
 		  current_file_name,
 		  current_file_handle,
@@ -77,8 +79,8 @@ start_link(FileSizeLimit, ReadFileHandlesLimit) ->
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
 
-deliver(Q, MsgId) ->
-    gen_server:call(?SERVER, {deliver, Q, MsgId}, infinity).
+deliver(Q) ->
+    gen_server:call(?SERVER, {deliver, Q}, infinity).
 
 ack(Q, MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {ack, Q, MsgIds}).
@@ -113,6 +115,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 				       ]),
     State = #dqstate { msg_location = MsgLocation,
 		       file_summary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
+		       sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
 		       current_file_num = 0,
 		       current_file_name = InitName,
 		       current_file_handle = undefined,
@@ -124,24 +127,25 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State1 = #dqstate { current_file_name = CurrentName,
 			     current_offset = Offset } } = load_from_disk(State),
     Path = form_filename(CurrentName),
-    ok = filelib:ensure_dir(Path),
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]), %% read only needed so that we can seek
     {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     {ok, State1 # dqstate { current_file_handle = FileHdl }}.
 
-handle_call({deliver, Q, MsgId}, _From, State) ->
-    {ok, {MsgBody, BodySize, Delivered}, State1} = internal_deliver(Q, MsgId, State),
-    {reply, {MsgBody, BodySize, Delivered}, State1};
+handle_call({deliver, Q}, _From, State) ->
+    {ok, Result, State1} = internal_deliver(Q, State),
+    {reply, Result, State1};
 handle_call({tx_commit, Q, MsgIds}, _From, State) ->
     {ok, State1} = internal_tx_commit(Q, MsgIds, State),
     {reply, ok, State1};
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(clean_stop, _From, State) ->
-    State1 = #dqstate { file_summary = FileSummary }
+    State1 = #dqstate { file_summary = FileSummary,
+		        sequences = Sequences }
 	= shutdown(State), %% tidy up file handles early
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
     true = ets:delete(FileSummary),
+    true = ets:delete(Sequences),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok, State1 # dqstate { current_file_handle = undefined,
 					  read_file_handles = {dict:new(), gb_trees:empty()}}}.
@@ -196,53 +200,62 @@ base_directory() ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, MsgId, State = #dqstate { msg_location = MsgLocation,
-					      read_file_handles_limit = ReadFileHandlesLimit,
-					      read_file_handles = {ReadHdls, ReadHdlsAge}
-					     }) ->
-    [{MsgId, _RefCount, File, Offset, TotalSize}] = dets:lookup(MsgLocation, MsgId),
-    % so this next bit implements an LRU for file handles. But it's a bit insane, and smells
-    % of premature optimisation. So I might remove it and dump it overboard
-    {FileHdl, ReadHdls1, ReadHdlsAge1}
-	= case dict:find(File, ReadHdls) of
-	      error ->
-		  {ok, Hdl} = file:open(form_filename(File), [read, raw, binary, read_ahead]),
-		  Now = now(),
-		  case dict:size(ReadHdls) < ReadFileHandlesLimit of
-		      true ->
-			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, ReadHdlsAge)};
-		      _False ->
-			  {_Then, OldFile, ReadHdlsAge2} = gb_trees:take_smallest(ReadHdlsAge),
-			  {ok, {OldHdl, _Then}} = dict:find(OldFile, ReadHdls),
-			  ok = file:close(OldHdl),
-			  ReadHdls2 = dict:erase(OldFile, ReadHdls),
-			  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls2), gb_trees:enter(Now, File, ReadHdlsAge2)}
-		  end;
-	      {ok, {Hdl, Then}} ->
-		  Now = now(),
-		  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
-		   gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
-	  end,
-    % read the message
-    {ok, {MsgBody, BodySize}} = read_message_at_offset(FileHdl, Offset, TotalSize),
-    [Obj = #dq_msg_loc {is_delivered = Delivered}]
-	= mnesia:dirty_read(rabbit_disk_queue, {MsgId, Q}),
-    if Delivered -> ok;
-       true ->  ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
-    end,
-    {ok, {MsgBody, BodySize, Delivered},
-     State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}.
+internal_deliver(Q, State = #dqstate { msg_location = MsgLocation,
+				       sequences = Sequences,
+				       read_file_handles_limit = ReadFileHandlesLimit,
+				       read_file_handles = {ReadHdls, ReadHdlsAge}
+				      }) ->
+    case ets:lookup(Sequences, Q) of
+	[] -> {ok, empty, State};
+	[{Q, ReadSeqId, WriteSeqId}] ->
+	    case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
+		[] -> {ok, empty, State};
+		[Obj = #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
+		    [{MsgId, _RefCount, File, Offset, TotalSize}] = dets:lookup(MsgLocation, MsgId),
+		    {FileHdl, ReadHdls1, ReadHdlsAge1}
+			= case dict:find(File, ReadHdls) of
+			      error ->
+				  {ok, Hdl} = file:open(form_filename(File), [read, raw, binary, read_ahead]),
+				  Now = now(),
+				  case dict:size(ReadHdls) < ReadFileHandlesLimit of
+				      true ->
+					  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, ReadHdlsAge)};
+				      _False ->
+					  {_Then, OldFile, ReadHdlsAge2} = gb_trees:take_smallest(ReadHdlsAge),
+					  {ok, {OldHdl, _Then}} = dict:find(OldFile, ReadHdls),
+					  ok = file:close(OldHdl),
+					  ReadHdls2 = dict:erase(OldFile, ReadHdls),
+					  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls2), gb_trees:enter(Now, File, ReadHdlsAge2)}
+				  end;
+			      {ok, {Hdl, Then}} ->
+				  Now = now(),
+				  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
+				   gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
+			  end,
+		    % read the message
+		    {ok, {MsgBody, BodySize}} = read_message_at_offset(FileHdl, Offset, TotalSize),
+		    if Delivered -> ok;
+		       true ->  ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
+		    end,
+		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
+		    {ok, {MsgId, MsgBody, BodySize, Delivered},
+		     State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}
+	    end
+    end.
 
 internal_ack(Q, MsgIds, State) ->
     remove_messages(Q, MsgIds, true, State).
 
 %% Q is only needed if MnesiaDelete = true
+%% called from tx_cancel with MnesiaDelete = false
+%% called from ack with MnesiaDelete = true
 remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
+							     sequences = Sequences,
 							     file_summary = FileSummary,
 							     current_file_name = CurName
 							   }) ->
-    Files
-	= lists:foldl(fun (MsgId, Files2) ->
+    {Files, MaxSeqId}
+	= lists:foldl(fun (MsgId, {Files2, MaxSeqId2}) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}]
 				  = dets:lookup(MsgLocation, MsgId),
 			      Files3 =
@@ -261,13 +274,28 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 					  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 					  Files2
 				  end,
-			      if MnesiaDelete ->
-				      ok = mnesia:dirty_delete(rabbit_disk_queue, {MsgId, Q});
-				 true ->
-				      ok
-			      end,
-			      Files3
-		      end, sets:new(), MsgIds),
+			      {if MnesiaDelete ->
+				       [#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }]
+					   = mnesia:dirty_match_object(rabbit_disk_queue,
+								       #dq_msg_loc { msg_id = MsgId,
+										     queue_and_seq_id = {Q, '_'},
+										     is_delivered = '_'
+										   }),
+				       ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
+				       lists:max([SeqId, MaxSeqId2]);
+				  true ->
+				       MaxSeqId2
+			       end,
+			       Files3}
+		      end, {sets:new(), 0}, MsgIds),
+    true = if MnesiaDelete ->
+		   [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
+		   if MaxSeqId > ReadSeqId ->
+			   true = ets:insert(Sequences, {Q, MaxSeqId, WriteSeqId});
+		      true -> true
+		   end;
+	      true -> true
+	   end,
     State2 = compact(Files, State),
     {ok, State2}.
 
@@ -300,28 +328,45 @@ internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocatio
 
 internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
 						 current_file_handle = CurHdl,
-						 current_file_name = CurName
+						 current_file_name = CurName,
+						 sequences = Sequences
 					       }) ->
-    {atomic, Sync}
+    {ReadSeqId, InitWriteSeqId}
+	= case ets:lookup(Sequences, Q) of
+	      [] -> {0,0};
+	      [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
+	  end,
+    {atomic, {Sync, WriteSeqId}}
 	= mnesia:transaction(
 	    fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
-		     lists:foldl(fun (MsgId, Acc) ->
+		     lists:foldl(fun (MsgId, {Acc, NextWriteSeqId}) ->
 					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 					     dets:lookup(MsgLocation, MsgId),
 					 ok = mnesia:write(rabbit_disk_queue,
-							   #dq_msg_loc { msg_id_and_queue = {MsgId, Q},
-									 is_delivered = false}, write),
-					 Acc or (CurName =:= File)
-				 end, false, MsgIds)
+							   #dq_msg_loc { queue_and_seq_id = {Q, NextWriteSeqId},
+									 msg_id = MsgId, is_delivered = false},
+							   write),
+					 {Acc or (CurName =:= File), NextWriteSeqId + 1}
+				 end, {false, InitWriteSeqId}, MsgIds)
 	    end),
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId}),
     if Sync -> ok = file:sync(CurHdl);
        true -> ok
     end,
     {ok, State}.
 
 internal_publish(Q, MsgId, MsgBody, State) ->
-    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { msg_id_and_queue = {MsgId, Q},
+    {ok, State1 = #dqstate { sequences = Sequences }} = internal_tx_publish(MsgId, MsgBody, State),
+    WriteSeqId = case ets:lookup(Sequences, Q) of
+		     [] -> % previously unseen queue
+			 true = ets:insert_new(Sequences, {Q, 0, 1}),
+			 0;
+		     [{Q, ReadSeqId, WriteSeqId2}] ->
+			 true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2 +1}),
+			 WriteSeqId2
+		 end,
+    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
+							     msg_id = MsgId,
 							     is_delivered = false}),
     {ok, State1}.
 
@@ -561,9 +606,42 @@ load_from_disk(State) ->
     % There should be no more tmp files now, so go ahead and load the whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
     % Finally, check there is nothing in mnesia which we haven't loaded
-    true = lists:foldl(fun ({MsgId, _Q}, true) -> true = 1 =:= length(dets:lookup(MsgLocation, MsgId)) end,
-		       true, mnesia:dirty_all_keys(rabbit_disk_queue)),
-    {ok, State1}.
+    {atomic, true} = mnesia:transaction(
+	     fun() ->
+		     ok = mnesia:read_lock_table(rabbit_disk_queue),
+		     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
+					  true = 1 =:= length(dets:lookup(MsgLocation, MsgId)) end,
+				  true, rabbit_disk_queue)
+	     end),
+    State2 = extract_sequence_numbers(State1),
+    {ok, State2}.
+
+extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
+    % next-seqid-to-read is the lowest seqid which has is_delivered = false
+    {atomic, true} = mnesia:transaction(
+      fun() ->
+	      ok = mnesia:read_lock_table(rabbit_disk_queue),
+	      mnesia:foldl(
+		fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId},
+				   is_delivered = Delivered }, true) ->
+			NextRead = if Delivered -> SeqId + 1;
+				      true -> SeqId
+				   end,
+			NextWrite = SeqId + 1,
+			case ets:lookup(Sequences, Q) of
+			    [] ->
+				true = ets:insert_new(Sequences, {Q, NextRead, NextWrite});
+			    [Orig = {Q, Read, Write}] ->
+				Repl = {Q, lists:min([Read, NextRead]),
+					lists:max([Write, NextWrite])},
+				if Orig /= Repl ->
+					true = ets:insert(Sequences, Repl);
+				   true -> true
+				end
+			end
+		end, true, rabbit_disk_queue)
+      end),
+    State.
 
 load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
 						current_file_name = CurName }) ->
@@ -590,7 +668,8 @@ load_messages(Left, [File|Files],
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
 		case length(mnesia:dirty_match_object(rabbit_disk_queue,
-						      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'},
+						      #dq_msg_loc { msg_id = MsgId,
+								    queue_and_seq_id = '_',
 								    is_delivered = '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
@@ -627,7 +706,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
     % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
 			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue,
-								      #dq_msg_loc { msg_id_and_queue = {MsgId, '_'},
+								      #dq_msg_loc { msg_id = MsgId,
+										    queue_and_seq_id = '_',
 										    is_delivered = '_'}))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
@@ -661,7 +741,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    lists:foreach(fun (MsgId) ->
 				  true = 0 <
 				      length(mnesia:dirty_match_object(rabbit_disk_queue,
-								       #dq_msg_loc { msg_id_and_queue = {MsgId, '_'},
+								       #dq_msg_loc { msg_id = MsgId,
+										     queue_and_seq_id = '_',
 										     is_delivered = '_'}))
 			  end, MsgIds),
 	    % The main file should be contiguous
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index b3c4a926..39951669 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -147,6 +147,7 @@ table_definitions() ->
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
        {type, set},
+       {index, [msg_id]},
        {attributes, record_info(fields, dq_msg_loc)},
        {disc_only_copies, [node()]}]}
     ].
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 736ddfd4..1e66fe9a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -684,7 +684,6 @@ delete_log_handlers(Handlers) ->
 
 test_disk_queue() ->
     % unicode chars are supported properly from r13 onwards
-    % io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup μs\t| Publish μs\t| Pub μs/msg\t| Pub μs/byte\t| Deliver μs\t| Del μs/msg\t| Del μs/byte~n", []),
     io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
     [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
 	MsgSize <- [512, 8192, 32768, 131072],
@@ -706,7 +705,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
 			       fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
 			      ]]),
     {Deliver, ok} = timer:tc(?MODULE, rdq_time_commands,
-			     [[fun() -> [begin [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q, N), ok end || N <- List],
+			     [[fun() -> [begin [begin {N, Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q), ok end || N <- List],
 					       rabbit_disk_queue:ack(Q, List),
 					       ok = rabbit_disk_queue:tx_commit(Q, [])
 					 end || Q <- Qs]
@@ -739,7 +738,7 @@ rdq_stress_gc(MsgCount) ->
 						  end
 				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(StartChunk,MsgCount)])))
 	++ lists:seq(1, (StartChunk - 1)),
-    [begin {Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(q, N),
+    [begin {N, Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(q),
     	   rabbit_disk_queue:ack(q, [N]),
     	   rabbit_disk_queue:tx_commit(q, [])
      end || N <- AckList],
-- 
cgit v1.2.1


From c7b201f7f2035cc09b93ee60386c5ce070a306e7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Apr 2009 13:12:20 +0100
Subject: altered api so that deliver returns the seq_id (actually, it's a
 tuple of {msgid, seqid}, but that's irrelevant), and that ack requires this
 seq_id (tuple) back in. This avoids extra mnesia work and makes ack much
 faster. Given that the amqqueue already tracks unacked messages, this seems
 reasonable. However, if not, back off to the parent of this revision.

---
 src/rabbit_disk_queue.erl | 33 +++++++++++++++------------------
 src/rabbit_tests.erl      |  8 ++++----
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 9b0849c3..0623d77d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -82,8 +82,8 @@ publish(Q, MsgId, Msg) when is_binary(Msg) ->
 deliver(Q) ->
     gen_server:call(?SERVER, {deliver, Q}, infinity).
 
-ack(Q, MsgIds) when is_list(MsgIds) ->
-    gen_server:cast(?SERVER, {ack, Q, MsgIds}).
+ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
+    gen_server:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
 tx_publish(MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
@@ -154,8 +154,8 @@ handle_call(clean_stop, _From, State) ->
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
     {noreply, State1};
-handle_cast({ack, Q, MsgIds}, State) ->
-    {ok, State1} = internal_ack(Q, MsgIds, State),
+handle_cast({ack, Q, MsgSeqIds}, State) ->
+    {ok, State1} = internal_ack(Q, MsgSeqIds, State),
     {noreply, State1};
 handle_cast({tx_publish, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
@@ -238,7 +238,7 @@ internal_deliver(Q, State = #dqstate { msg_location = MsgLocation,
 		       true ->  ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
 		    end,
 		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
-		    {ok, {MsgId, MsgBody, BodySize, Delivered},
+		    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
 		     State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}
 	    end
     end.
@@ -249,13 +249,13 @@ internal_ack(Q, MsgIds, State) ->
 %% Q is only needed if MnesiaDelete = true
 %% called from tx_cancel with MnesiaDelete = false
 %% called from ack with MnesiaDelete = true
-remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
-							     sequences = Sequences,
-							     file_summary = FileSummary,
-							     current_file_name = CurName
-							   }) ->
+remove_messages(Q, MsgSeqIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
+								sequences = Sequences,
+								file_summary = FileSummary,
+								current_file_name = CurName
+							       }) ->
     {Files, MaxSeqId}
-	= lists:foldl(fun (MsgId, {Files2, MaxSeqId2}) ->
+	= lists:foldl(fun ({MsgId, SeqId}, {Files2, MaxSeqId2}) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}]
 				  = dets:lookup(MsgLocation, MsgId),
 			      Files3 =
@@ -275,19 +275,13 @@ remove_messages(Q, MsgIds, MnesiaDelete, State = # dqstate { msg_location = MsgL
 					  Files2
 				  end,
 			      {if MnesiaDelete ->
-				       [#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }]
-					   = mnesia:dirty_match_object(rabbit_disk_queue,
-								       #dq_msg_loc { msg_id = MsgId,
-										     queue_and_seq_id = {Q, '_'},
-										     is_delivered = '_'
-										   }),
 				       ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
 				       lists:max([SeqId, MaxSeqId2]);
 				  true ->
 				       MaxSeqId2
 			       end,
 			       Files3}
-		      end, {sets:new(), 0}, MsgIds),
+		      end, {sets:new(), 0}, MsgSeqIds),
     true = if MnesiaDelete ->
 		   [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
 		   if MaxSeqId > ReadSeqId ->
@@ -371,6 +365,9 @@ internal_publish(Q, MsgId, MsgBody, State) ->
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
+    % we don't need seq ids because we're not touching mnesia, because seqids were
+    % never assigned
+    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
     remove_messages(undefined, MsgIds, false, State).
 
 %% ---- ROLLING OVER THE APPEND FILE ----
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1e66fe9a..3eab352d 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -705,8 +705,8 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
 			       fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
 			      ]]),
     {Deliver, ok} = timer:tc(?MODULE, rdq_time_commands,
-			     [[fun() -> [begin [begin {N, Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(Q), ok end || N <- List],
-					       rabbit_disk_queue:ack(Q, List),
+			     [[fun() -> [begin SeqIds = [begin {N, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(Q), SeqId end || N <- List],
+					       rabbit_disk_queue:ack(Q, SeqIds),
 					       ok = rabbit_disk_queue:tx_commit(Q, [])
 					 end || Q <- Qs]
 			       end]]),
@@ -738,8 +738,8 @@ rdq_stress_gc(MsgCount) ->
 						  end
 				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(StartChunk,MsgCount)])))
 	++ lists:seq(1, (StartChunk - 1)),
-    [begin {N, Msg, MsgSizeBytes, false} = rabbit_disk_queue:deliver(q),
-    	   rabbit_disk_queue:ack(q, [N]),
+    [begin {N, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(q),
+    	   rabbit_disk_queue:ack(q, [SeqId]),
     	   rabbit_disk_queue:tx_commit(q, [])
      end || N <- AckList],
     rdq_stop().
-- 
cgit v1.2.1


From aeadefbf2d1736b27c6247fdd852834080105a2a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Apr 2009 13:39:56 +0100
Subject: bitten.

---
 src/rabbit_disk_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0623d77d..510a8590 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -95,10 +95,10 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
 
 stop() ->
-    gen_server:call(?SERVER, stop).
+    gen_server:call(?SERVER, stop, infinity).
 
 clean_stop() ->
-    gen_server:call(?SERVER, clean_stop).
+    gen_server:call(?SERVER, clean_stop, infinity).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
-- 
cgit v1.2.1


From 0ae1b8dd14a064ca1195ee7308e48ae9acab8507 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Apr 2009 17:56:00 +0100
Subject: *cough* *splutter* um. typo?

---
 src/rabbit_disk_queue.erl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 510a8590..32512c2b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -274,13 +274,14 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete, State = # dqstate { msg_location = M
 					  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 					  Files2
 				  end,
-			      {if MnesiaDelete ->
-				       ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
-				       lists:max([SeqId, MaxSeqId2]);
-				  true ->
-				       MaxSeqId2
-			       end,
-			       Files3}
+			      MaxSeqId3 =
+				  if MnesiaDelete ->
+					  ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
+					  lists:max([SeqId, MaxSeqId2]);
+				     true ->
+					  MaxSeqId2
+				  end,
+			      {Files3, MaxSeqId3}
 		      end, {sets:new(), 0}, MsgSeqIds),
     true = if MnesiaDelete ->
 		   [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
-- 
cgit v1.2.1


From 51de95fad3989828bbd71035f7f31356d6fd89ac Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 10:34:30 +0100
Subject: I don't understand why I thought I needed to adjust sequences in
 remove_messages. The only case that code would be called would be from acks,
 and the effect would be to increment the read seqid. But this would require
 acking a message which hasn't been delivered, which is clearly insane.

Also, fixed a bug in the tests.
---
 src/rabbit_disk_queue.erl | 30 +++++++++---------------------
 src/rabbit_tests.erl      |  7 ++++---
 2 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 32512c2b..0631a04e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -250,12 +250,11 @@ internal_ack(Q, MsgIds, State) ->
 %% called from tx_cancel with MnesiaDelete = false
 %% called from ack with MnesiaDelete = true
 remove_messages(Q, MsgSeqIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
-								sequences = Sequences,
 								file_summary = FileSummary,
 								current_file_name = CurName
 							       }) ->
-    {Files, MaxSeqId}
-	= lists:foldl(fun ({MsgId, SeqId}, {Files2, MaxSeqId2}) ->
+    Files
+	= lists:foldl(fun ({MsgId, SeqId}, Files2) ->
 			      [{MsgId, RefCount, File, Offset, TotalSize}]
 				  = dets:lookup(MsgLocation, MsgId),
 			      Files3 =
@@ -274,23 +273,12 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete, State = # dqstate { msg_location = M
 					  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
 					  Files2
 				  end,
-			      MaxSeqId3 =
-				  if MnesiaDelete ->
-					  ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
-					  lists:max([SeqId, MaxSeqId2]);
-				     true ->
-					  MaxSeqId2
-				  end,
-			      {Files3, MaxSeqId3}
-		      end, {sets:new(), 0}, MsgSeqIds),
-    true = if MnesiaDelete ->
-		   [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
-		   if MaxSeqId > ReadSeqId ->
-			   true = ets:insert(Sequences, {Q, MaxSeqId, WriteSeqId});
-		      true -> true
-		   end;
-	      true -> true
-	   end,
+			      if MnesiaDelete ->
+				      ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
+				 true -> ok
+			      end,
+			      Files3
+		      end, sets:new(), MsgSeqIds),
     State2 = compact(Files, State),
     {ok, State2}.
 
@@ -369,7 +357,7 @@ internal_tx_cancel(MsgIds, State) ->
     % we don't need seq ids because we're not touching mnesia, because seqids were
     % never assigned
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
-    remove_messages(undefined, MsgIds, false, State).
+    remove_messages(undefined, MsgSeqIds, false, State).
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3eab352d..6d973c45 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -691,7 +691,7 @@ test_disk_queue() ->
 	MsgCount <- [1024, 4096, 16384]
     ],
     rdq_virgin(),
-    rdq_stress_gc(10000),
+    passed = rdq_stress_gc(10000),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
@@ -738,11 +738,12 @@ rdq_stress_gc(MsgCount) ->
 						  end
 				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(StartChunk,MsgCount)])))
 	++ lists:seq(1, (StartChunk - 1)),
-    [begin {N, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(q),
+    [begin {_, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(q),
     	   rabbit_disk_queue:ack(q, [SeqId]),
     	   rabbit_disk_queue:tx_commit(q, [])
      end || N <- AckList],
-    rdq_stop().
+    rdq_stop(),
+    passed.
 
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
-- 
cgit v1.2.1


From 5c3379b8671fb8aa57b2539d17cd75386f6df989 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 10:54:17 +0100
Subject: stupid bug in start up code because I still fail to remember that
 messages only go away after being ack'd, thus we should redeliver messages.

Also, reworked the stress gc test so that as before it really does ack messages in a non-linear order. This got quite a bit harder now we can't deliver arbitrary messages and need to build the mapping between msgid in the delivery and the seqid needed for the acks.
---
 src/rabbit_disk_queue.erl | 10 +++-------
 src/rabbit_tests.erl      | 11 +++++++----
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0631a04e..25bc17e1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -608,17 +608,13 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
       fun() ->
 	      ok = mnesia:read_lock_table(rabbit_disk_queue),
 	      mnesia:foldl(
-		fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId},
-				   is_delivered = Delivered }, true) ->
-			NextRead = if Delivered -> SeqId + 1;
-				      true -> SeqId
-				   end,
+		fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
 			NextWrite = SeqId + 1,
 			case ets:lookup(Sequences, Q) of
 			    [] ->
-				true = ets:insert_new(Sequences, {Q, NextRead, NextWrite});
+				true = ets:insert_new(Sequences, {Q, SeqId, NextWrite});
 			    [Orig = {Q, Read, Write}] ->
-				Repl = {Q, lists:min([Read, NextRead]),
+				Repl = {Q, lists:min([Read, SeqId]),
 					lists:max([Write, NextWrite])},
 				if Orig /= Repl ->
 					true = ets:insert(Sequences, Repl);
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 6d973c45..a04c6f1b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -738,10 +738,13 @@ rdq_stress_gc(MsgCount) ->
 						  end
 				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(StartChunk,MsgCount)])))
 	++ lists:seq(1, (StartChunk - 1)),
-    [begin {_, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(q),
-    	   rabbit_disk_queue:ack(q, [SeqId]),
-    	   rabbit_disk_queue:tx_commit(q, [])
-     end || N <- AckList],
+    MsgIdToSeqDict
+	= lists:foldl(fun (_, Acc) ->
+			      {MsgId, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(q),
+			      dict:store(MsgId, SeqId, Acc)
+		      end, dict:new(), List),
+    rabbit_disk_queue:ack(q, [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict), SeqId end || MsgId <- AckList]),
+    rabbit_disk_queue:tx_commit(q, []),
     rdq_stop(),
     passed.
 
-- 
cgit v1.2.1


From ad2218df93fa498c80fdda22ce8519fc8773a3b1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 22 Apr 2009 11:16:17 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 32512c2b..35003196 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -45,17 +45,17 @@
 -include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
 
--define(WRITE_OK_SIZE_BITS, 8).
--define(WRITE_OK, 255).
--define(INTEGER_SIZE_BYTES, 8).
--define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)).
--define(MSG_LOC_DETS_NAME, rabbit_disk_queue_msg_location).
--define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
--define(SEQUENCE_ETS_NAME, rabbit_disk_queue_sequences).
--define(FILE_EXTENSION, ".rdq").
--define(FILE_EXTENSION_TMP, ".rdt").
--define(FILE_EXTENSION_DETS, ".dets").
--define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
+-define(WRITE_OK_SIZE_BITS,       8).
+-define(WRITE_OK,                 255).
+-define(INTEGER_SIZE_BYTES,       8).
+-define(INTEGER_SIZE_BITS,        (8 * ?INTEGER_SIZE_BYTES)).
+-define(MSG_LOC_DETS_NAME,        rabbit_disk_queue_msg_location).
+-define(FILE_SUMMARY_ETS_NAME,    rabbit_disk_queue_file_summary).
+-define(SEQUENCE_ETS_NAME,        rabbit_disk_queue_sequences).
+-define(FILE_EXTENSION,           ".rdq").
+-define(FILE_EXTENSION_TMP,       ".rdt").
+-define(FILE_EXTENSION_DETS,      ".dets").
+-define(FILE_PACKING_ADJUSTMENT,  (1 + (2* (?INTEGER_SIZE_BYTES)))).
 
 -define(SERVER, ?MODULE).
 
@@ -113,15 +113,15 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 					{max_no_slots, 1024*1024*1024},
 					{type, set}
 				       ]),
-    State = #dqstate { msg_location = MsgLocation,
-		       file_summary = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
-		       sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
-		       current_file_num = 0,
-		       current_file_name = InitName,
-		       current_file_handle = undefined,
-		       current_offset = 0,
-		       file_size_limit = FileSizeLimit,
-		       read_file_handles = {dict:new(), gb_trees:empty()},
+    State = #dqstate { msg_location            = MsgLocation,
+		       file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
+		       sequences               = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
+		       current_file_num        = 0,
+		       current_file_name       = InitName,
+		       current_file_handle     = undefined,
+		       current_offset          = 0,
+		       file_size_limit         = FileSizeLimit,
+		       read_file_handles       = {dict:new(), gb_trees:empty()},
 		       read_file_handles_limit = ReadFileHandlesLimit
 		     },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
@@ -460,7 +460,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRig
 	     State1) ->
     (State = #dqstate { msg_location = MsgLocation })
 	= closeFile(Source, closeFile(Destination, State1)),
-    {ok, SourceHdl} = file:open(form_filename(Source), [read, write, raw, binary, delayed_write, read_ahead]),
+    {ok, SourceHdl}      = file:open(form_filename(Source),      [read, write, raw, binary, delayed_write, read_ahead]),
     {ok, DestinationHdl} = file:open(form_filename(Destination), [read, write, raw, binary, delayed_write, read_ahead]),
     ExpectedSize = SourceValid + DestinationValid,
     % if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
-- 
cgit v1.2.1


From 7c8f9c30e8596230dcce89cdbf6bdbb1e9f4de9e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 12:15:38 +0100
Subject: well it's now much less readable. But at least the lines are shorter.

---
 src/rabbit_disk_queue.erl | 747 +++++++++++++++++++++++++++-------------------
 1 file changed, 437 insertions(+), 310 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c56b14d5..190c06f0 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -59,22 +59,23 @@
 
 -define(SERVER, ?MODULE).
 
--record(dqstate, {msg_location,
-		  file_summary,
-		  sequences,
-		  current_file_num,
-		  current_file_name,
-		  current_file_handle,
-		  current_offset,
-		  file_size_limit,
-		  read_file_handles,
-		  read_file_handles_limit
+-record(dqstate, {msg_location,            % where are messages?
+		  file_summary,            %% what's in the files?
+		  sequences,               %% next read and write for each q
+		  current_file_num,        %% current file name as number
+		  current_file_name,       %% current file name
+		  current_file_handle,     %% current file handle
+		  current_offset,          %% current offset within current file
+		  file_size_limit,         %% how big can our files get?
+		  read_file_handles,       %% file handles for reading (LRU)
+		  read_file_handles_limit  %% how many file handles can we open?
 		 }).
 
 %% ---- PUBLIC API ----
 
 start_link(FileSizeLimit, ReadFileHandlesLimit) ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE, [FileSizeLimit, ReadFileHandlesLimit], []).
+    gen_server:start_link({local, ?SERVER}, ?MODULE,
+			  [FileSizeLimit, ReadFileHandlesLimit], []).
 
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
@@ -106,28 +107,35 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
-    {ok, MsgLocation} = dets:open_file(?MSG_LOC_DETS_NAME,
-				       [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++ ?FILE_EXTENSION_DETS)},
-					{min_no_slots, 1024*1024},
-					% man says this should be <= 32M. But it works...
-					{max_no_slots, 1024*1024*1024},
-					{type, set}
-				       ]),
-    State = #dqstate { msg_location            = MsgLocation,
-		       file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME, [set, private]),
-		       sequences               = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
-		       current_file_num        = 0,
-		       current_file_name       = InitName,
-		       current_file_handle     = undefined,
-		       current_offset          = 0,
-		       file_size_limit         = FileSizeLimit,
-		       read_file_handles       = {dict:new(), gb_trees:empty()},
-		       read_file_handles_limit = ReadFileHandlesLimit
-		     },
+    {ok, MsgLocation}
+	= dets:open_file(?MSG_LOC_DETS_NAME,
+			 [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME)
+					       ++ ?FILE_EXTENSION_DETS)},
+			  {min_no_slots, 1024*1024},
+			  %% man says this should be <= 32M. But it works...
+			  {max_no_slots, 1024*1024*1024},
+			  {type, set}
+			 ]),
+    State
+	= #dqstate { msg_location            = MsgLocation,
+		     file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
+						       [set, private]),
+		     sequences               = ets:new(?SEQUENCE_ETS_NAME,
+						       [set, private]),
+		     current_file_num        = 0,
+		     current_file_name       = InitName,
+		     current_file_handle     = undefined,
+		     current_offset          = 0,
+		     file_size_limit         = FileSizeLimit,
+		     read_file_handles       = {dict:new(), gb_trees:empty()},
+		     read_file_handles_limit = ReadFileHandlesLimit
+		    },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
-			     current_offset = Offset } } = load_from_disk(State),
+			     current_offset = Offset } }
+	= load_from_disk(State),
     Path = form_filename(CurrentName),
-    {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]), %% read only needed so that we can seek
+    %% read is only needed so that we can seek
+    {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
     {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     {ok, State1 # dqstate { current_file_handle = FileHdl }}.
 
@@ -147,8 +155,9 @@ handle_call(clean_stop, _From, State) ->
     true = ets:delete(FileSummary),
     true = ets:delete(Sequences),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
-    {stop, normal, ok, State1 # dqstate { current_file_handle = undefined,
-					  read_file_handles = {dict:new(), gb_trees:empty()}}}.
+    {stop, normal, ok,
+     State1 # dqstate { current_file_handle = undefined,
+			read_file_handles = {dict:new(), gb_trees:empty()}}}.
     %% gen_server now calls terminate, which then calls shutdown
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
@@ -174,9 +183,10 @@ shutdown(State = #dqstate { msg_location = MsgLocation,
 			    current_file_handle = FileHdl,
 			    read_file_handles = {ReadHdls, _ReadHdlsAge}
 			  }) ->
-    % deliberately ignoring return codes here
+    %% deliberately ignoring return codes here
     dets:close(MsgLocation),
-    file:delete(form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++ ?FILE_EXTENSION_DETS)),
+    file:delete(form_filename(atom_to_list(?MSG_LOC_DETS_NAME)
+			      ++ ?FILE_EXTENSION_DETS)),
     if FileHdl =:= undefined -> ok;
        true -> file:sync(FileHdl),
 	       file:close(FileHdl)
@@ -200,42 +210,56 @@ base_directory() ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, State = #dqstate { msg_location = MsgLocation,
-				       sequences = Sequences,
-				       read_file_handles_limit = ReadFileHandlesLimit,
-				       read_file_handles = {ReadHdls, ReadHdlsAge}
-				      }) ->
+internal_deliver(Q, State
+		 = #dqstate { msg_location = MsgLocation,
+			      sequences = Sequences,
+			      read_file_handles_limit = ReadFileHandlesLimit,
+			      read_file_handles = {ReadHdls, ReadHdlsAge}
+			     }) ->
     case ets:lookup(Sequences, Q) of
 	[] -> {ok, empty, State};
 	[{Q, ReadSeqId, WriteSeqId}] ->
 	    case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
 		[] -> {ok, empty, State};
-		[Obj = #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
-		    [{MsgId, _RefCount, File, Offset, TotalSize}] = dets:lookup(MsgLocation, MsgId),
+		[Obj
+		 = #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
+		    [{MsgId, _RefCount, File, Offset, TotalSize}]
+			= dets:lookup(MsgLocation, MsgId),
 		    {FileHdl, ReadHdls1, ReadHdlsAge1}
 			= case dict:find(File, ReadHdls) of
 			      error ->
-				  {ok, Hdl} = file:open(form_filename(File), [read, raw, binary, read_ahead]),
+				  {ok, Hdl} = file:open(form_filename(File),
+							[read, raw, binary,
+							 read_ahead]),
 				  Now = now(),
 				  case dict:size(ReadHdls) < ReadFileHandlesLimit of
 				      true ->
-					  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls), gb_trees:enter(Now, File, ReadHdlsAge)};
+					  {Hdl,
+					   dict:store(File, {Hdl, Now}, ReadHdls),
+					   gb_trees:enter(Now, File, ReadHdlsAge)};
 				      _False ->
-					  {_Then, OldFile, ReadHdlsAge2} = gb_trees:take_smallest(ReadHdlsAge),
-					  {ok, {OldHdl, _Then}} = dict:find(OldFile, ReadHdls),
+					  {_Then, OldFile, ReadHdlsAge2}
+					      = gb_trees:take_smallest(ReadHdlsAge),
+					  {ok, {OldHdl, _Then}}
+					      = dict:find(OldFile, ReadHdls),
 					  ok = file:close(OldHdl),
 					  ReadHdls2 = dict:erase(OldFile, ReadHdls),
-					  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls2), gb_trees:enter(Now, File, ReadHdlsAge2)}
+					  {Hdl,
+					   dict:store(File, {Hdl, Now}, ReadHdls2),
+					   gb_trees:enter(Now, File, ReadHdlsAge2)}
 				  end;
 			      {ok, {Hdl, Then}} ->
 				  Now = now(),
 				  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
-				   gb_trees:enter(Now, File, gb_trees:delete(Then, ReadHdlsAge))}
+				   gb_trees:enter(Now, File,
+						  gb_trees:delete(Then, ReadHdlsAge))}
 			  end,
-		    % read the message
-		    {ok, {MsgBody, BodySize}} = read_message_at_offset(FileHdl, Offset, TotalSize),
+		    %% read the message
+		    {ok, {MsgBody, BodySize}}
+			= read_message_at_offset(FileHdl, Offset, TotalSize),
 		    if Delivered -> ok;
-		       true ->  ok = mnesia:dirty_write(rabbit_disk_queue, Obj #dq_msg_loc {is_delivered = true})
+		       true ->  ok = mnesia:dirty_write(rabbit_disk_queue,
+							Obj #dq_msg_loc {is_delivered = true})
 		    end,
 		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
 		    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
@@ -249,71 +273,83 @@ internal_ack(Q, MsgIds, State) ->
 %% Q is only needed if MnesiaDelete = true
 %% called from tx_cancel with MnesiaDelete = false
 %% called from ack with MnesiaDelete = true
-remove_messages(Q, MsgSeqIds, MnesiaDelete, State = # dqstate { msg_location = MsgLocation,
-								file_summary = FileSummary,
-								current_file_name = CurName
-							       }) ->
+remove_messages(Q, MsgSeqIds, MnesiaDelete,
+		State = # dqstate { msg_location = MsgLocation,
+				    file_summary = FileSummary,
+				    current_file_name = CurName
+				  }) ->
     Files
-	= lists:foldl(fun ({MsgId, SeqId}, Files2) ->
-			      [{MsgId, RefCount, File, Offset, TotalSize}]
-				  = dets:lookup(MsgLocation, MsgId),
-			      Files3 =
-				  if 1 =:= RefCount ->
-					  ok = dets:delete(MsgLocation, MsgId),
-					  [{File, ValidTotalSize, ContiguousTop, Left, Right}]
-					      = ets:lookup(FileSummary, File),
-					  ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-					  true = ets:insert(FileSummary,
-							    {File, (ValidTotalSize - TotalSize - ?FILE_PACKING_ADJUSTMENT),
-							     ContiguousTop1, Left, Right}),
-					  if CurName =:= File -> Files2;
-					     true -> sets:add_element(File, Files2)
-					  end;
-				     1 < RefCount ->
-					  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1, File, Offset, TotalSize}),
-					  Files2
-				  end,
-			      if MnesiaDelete ->
-				      ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
-				 true -> ok
-			      end,
-			      Files3
-		      end, sets:new(), MsgSeqIds),
+	= lists:foldl(
+	    fun ({MsgId, SeqId}, Files2) ->
+		    [{MsgId, RefCount, File, Offset, TotalSize}]
+			= dets:lookup(MsgLocation, MsgId),
+		    Files3
+			= if 1 =:= RefCount ->
+				  ok = dets:delete(MsgLocation, MsgId),
+				  [{File, ValidTotalSize, ContiguousTop, Left, Right}]
+				      = ets:lookup(FileSummary, File),
+				  ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+				  true = ets:insert(FileSummary,
+						    {File, (ValidTotalSize - TotalSize
+							    - ?FILE_PACKING_ADJUSTMENT),
+						     ContiguousTop1, Left, Right}),
+				  if CurName =:= File -> Files2;
+				     true -> sets:add_element(File, Files2)
+				  end;
+			     1 < RefCount ->
+				  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1,
+								 File, Offset, TotalSize}),
+				  Files2
+			  end,
+		    if MnesiaDelete ->
+			    ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
+		       true -> ok
+		    end,
+		    Files3
+	    end, sets:new(), MsgSeqIds),
     State2 = compact(Files, State),
     {ok, State2}.
 
-internal_tx_publish(MsgId, MsgBody, State = #dqstate { msg_location = MsgLocation,
-						       current_file_handle = CurHdl,
-						       current_file_name = CurName,
-						       current_offset = CurOffset,
-						       file_summary = FileSummary
-						     }) ->
+internal_tx_publish(MsgId, MsgBody,
+		    State = #dqstate { msg_location = MsgLocation,
+				       current_file_handle = CurHdl,
+				       current_file_name = CurName,
+				       current_offset = CurOffset,
+				       file_summary = FileSummary
+				      }) ->
     case dets:lookup(MsgLocation, MsgId) of
 	[] ->
-	    % New message, lots to do
+	    %% New message, lots to do
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
-	    true = dets:insert_new(MsgLocation, {MsgId, 1, CurName, CurOffset, TotalSize}),
+	    true = dets:insert_new(MsgLocation, {MsgId, 1, CurName,
+						 CurOffset, TotalSize}),
 	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}]
 		= ets:lookup(FileSummary, CurName),
-	    ValidTotalSize1 = ValidTotalSize + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+	    ValidTotalSize1 = ValidTotalSize + TotalSize +
+		?FILE_PACKING_ADJUSTMENT,
 	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
-				     ValidTotalSize1; % can't be any holes in this file
+				     %% can't be any holes in this file
+				     ValidTotalSize1;
 				true -> ContiguousTop
 			     end,
-	    true = ets:insert(FileSummary, {CurName, ValidTotalSize1, ContiguousTop1, Left, undefined}),
-	    maybe_roll_to_new_file(CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
-				   State # dqstate {current_offset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT});
+	    true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
+					    ContiguousTop1, Left, undefined}),
+	    NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+	    maybe_roll_to_new_file(NextOffset,
+				   State # dqstate {current_offset = NextOffset});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
-	    % We already know about it, just update counter
-	    ok = dets:insert(MsgLocation, {MsgId, RefCount + 1, File, Offset, TotalSize}),
+	    %% We already know about it, just update counter
+	    ok = dets:insert(MsgLocation, {MsgId, RefCount + 1, File,
+					   Offset, TotalSize}),
 	    {ok, State}
     end.
 
-internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
-						 current_file_handle = CurHdl,
-						 current_file_name = CurName,
-						 sequences = Sequences
-					       }) ->
+internal_tx_commit(Q, MsgIds,
+		   State = #dqstate { msg_location = MsgLocation,
+				      current_file_handle = CurHdl,
+				      current_file_name = CurName,
+				      sequences = Sequences
+				    }) ->
     {ReadSeqId, InitWriteSeqId}
 	= case ets:lookup(Sequences, Q) of
 	      [] -> {0,0};
@@ -322,15 +358,18 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
     {atomic, {Sync, WriteSeqId}}
 	= mnesia:transaction(
 	    fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
-		     lists:foldl(fun (MsgId, {Acc, NextWriteSeqId}) ->
-					 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
-					     dets:lookup(MsgLocation, MsgId),
-					 ok = mnesia:write(rabbit_disk_queue,
-							   #dq_msg_loc { queue_and_seq_id = {Q, NextWriteSeqId},
-									 msg_id = MsgId, is_delivered = false},
-							   write),
-					 {Acc or (CurName =:= File), NextWriteSeqId + 1}
-				 end, {false, InitWriteSeqId}, MsgIds)
+		     lists:foldl(
+		       fun (MsgId, {Acc, NextWriteSeqId}) ->
+			       [{MsgId, _RefCount, File, _Offset, _TotalSize}]
+				   = dets:lookup(MsgLocation, MsgId),
+			       ok = mnesia:write(rabbit_disk_queue,
+						 #dq_msg_loc { queue_and_seq_id
+							       = {Q, NextWriteSeqId},
+							       msg_id = MsgId,
+							       is_delivered = false},
+						 write),
+			       {Acc or (CurName =:= File), NextWriteSeqId + 1}
+		       end, {false, InitWriteSeqId}, MsgIds)
 	    end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId}),
     if Sync -> ok = file:sync(CurHdl);
@@ -339,41 +378,46 @@ internal_tx_commit(Q, MsgIds, State = #dqstate { msg_location = MsgLocation,
     {ok, State}.
 
 internal_publish(Q, MsgId, MsgBody, State) ->
-    {ok, State1 = #dqstate { sequences = Sequences }} = internal_tx_publish(MsgId, MsgBody, State),
+    {ok, State1 = #dqstate { sequences = Sequences }}
+	= internal_tx_publish(MsgId, MsgBody, State),
     WriteSeqId = case ets:lookup(Sequences, Q) of
-		     [] -> % previously unseen queue
+		     [] -> %% previously unseen queue
 			 true = ets:insert_new(Sequences, {Q, 0, 1}),
 			 0;
 		     [{Q, ReadSeqId, WriteSeqId2}] ->
-			 true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2 +1}),
+			 true = ets:insert(Sequences, {Q, ReadSeqId,
+						       WriteSeqId2 +1}),
 			 WriteSeqId2
 		 end,
-    ok = mnesia:dirty_write(rabbit_disk_queue, #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
-							     msg_id = MsgId,
-							     is_delivered = false}),
+    ok = mnesia:dirty_write(rabbit_disk_queue,
+			    #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
+					  msg_id = MsgId,
+					  is_delivered = false}),
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
-    % we don't need seq ids because we're not touching mnesia, because seqids were
-    % never assigned
+    %% we don't need seq ids because we're not touching mnesia,
+    %% because seqids were never assigned
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
-maybe_roll_to_new_file(Offset, State = #dqstate { file_size_limit = FileSizeLimit,
-						  current_file_name = CurName,
-						  current_file_handle = CurHdl,
-						  current_file_num = CurNum,
-						  file_summary = FileSummary
-						}
+maybe_roll_to_new_file(Offset,
+		       State = #dqstate { file_size_limit = FileSizeLimit,
+					  current_file_name = CurName,
+					  current_file_handle = CurHdl,
+					  current_file_num = CurNum,
+					  file_summary = FileSummary
+					}
 		      ) when Offset >= FileSizeLimit ->
     ok = file:sync(CurHdl),
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
-    {ok, NextHdl} = file:open(form_filename(NextName), [write, raw, binary, delayed_write]),
-    true = ets:update_element(FileSummary, CurName, {5, NextName}), % 5 is Right
+    {ok, NextHdl} = file:open(form_filename(NextName),
+			      [write, raw, binary, delayed_write]),
+    true = ets:update_element(FileSummary, CurName, {5, NextName}), %% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     {ok, State # dqstate { current_file_name = NextName,
 			   current_file_handle = NextHdl,
@@ -386,55 +430,70 @@ maybe_roll_to_new_file(_, State) ->
 %% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
 
 compact(FilesSet, State) ->
-    % smallest number, hence eldest, hence left-most, first
+    %% smallest number, hence eldest, hence left-most, first
     Files = lists:sort(sets:to_list(FilesSet)),
-    % foldl reverses, so now youngest/right-most first
-    RemainingFiles = lists:foldl(fun (File, Acc) -> delete_empty_files(File, Acc, State) end, [], Files),
+    %% foldl reverses, so now youngest/right-most first
+    RemainingFiles = lists:foldl(fun (File, Acc) ->
+					 delete_empty_files(File, Acc, State)
+				 end, [], Files),
     lists:foldl(fun combineFile/2, State, lists:reverse(RemainingFiles)).
 
 combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 				     file_summary = FileSummary,
 				     current_file_name = CurName
 				   }) ->
-    % the file we're looking at may no longer exist as it may have been deleted
-    % within the current GC run
+    %% the file we're looking at may no longer exist as it may have been deleted
+    %% within the current GC run
     case ets:lookup(FileSummary, File) of
 	[] -> State;
 	[FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
-	    GoRight = fun() ->
-			      case Right of
-				  undefined -> State;
-				  _ when not(CurName =:= Right) ->
-				      [RightObj = {Right, RightValidData, _RightContiguousTop, File, RightRight}]
-					  = ets:lookup(FileSummary, Right),
-				      RightSumData = ValidData + RightValidData,
-				      if FileSizeLimit >= RightSumData ->
-					      % here, Right will be the source and so will be deleted,
-					      %       File will be the destination
-					      State1 = combineFiles(RightObj, FileObj, State),
-					      % this could fail if RightRight is undefined
-					      ets:update_element(FileSummary, RightRight, {4, File}), % left is the 4th field
-					      true = ets:insert(FileSummary, {File, RightSumData, RightSumData, Left, RightRight}),
-					      true = ets:delete(FileSummary, Right),
-					      State1;
-					 true -> State
-				      end;
-				  _ -> State
-			      end
-		      end,
+	    GoRight
+		= fun() ->
+			  case Right of
+			      undefined -> State;
+			      _ when not(CurName =:= Right) ->
+				  [RightObj = {Right, RightValidData, 
+					       _RightContiguousTop, File, RightRight}]
+				      = ets:lookup(FileSummary, Right),
+				  RightSumData = ValidData + RightValidData,
+				  if FileSizeLimit >= RightSumData ->
+					  %% here, Right will be the source and so will be deleted,
+					  %%       File will be the destination
+					  State1 = combineFiles(RightObj, FileObj,
+								State),
+					  %% this could fail if RightRight is undefined
+					  %% left is the 4th field
+					  ets:update_element(FileSummary,
+							     RightRight, {4, File}),
+					  true = ets:insert(FileSummary, {File,
+									  RightSumData,
+									  RightSumData,
+									  Left,
+									  RightRight}),
+					  true = ets:delete(FileSummary, Right),
+					  State1;
+				     true -> State
+				  end;
+			      _ -> State
+			  end
+		  end,
 	    case Left of
 		undefined ->
 		    GoRight();
-		_ -> [LeftObj = {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}]
+		_ -> [LeftObj
+		      = {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}]
 			 = ets:lookup(FileSummary, Left),
 		     LeftSumData = ValidData + LeftValidData,
 		     if FileSizeLimit >= LeftSumData ->
-			     % here, File will be the source and so will be deleted,
-			     %       Left will be the destination
+			     %% here, File will be the source and so will be deleted,
+			     %%       Left will be the destination
 			     State1 = combineFiles(FileObj, LeftObj, State),
-			     % this could fail if Right is undefined
-			     ets:update_element(FileSummary, Right, {4, Left}), % left is the 4th field
-			     true = ets:insert(FileSummary, {Left, LeftSumData, LeftSumData, LeftLeft, Right}),
+			     %% this could fail if Right is undefined
+			     %% left is the 4th field
+			     ets:update_element(FileSummary, Right, {4, Left}),
+			     true = ets:insert(FileSummary, {Left, LeftSumData,
+							     LeftSumData,
+							     LeftLeft, Right}),
 			     true = ets:delete(FileSummary, File),
 			     State1;
 			true ->
@@ -443,113 +502,148 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 	    end
     end.
 
-combineFiles({Source, SourceValid, _SourceContiguousTop, _SourceLeft, _SourceRight},
-	     {Destination, DestinationValid, DestinationContiguousTop, _DestinationLeft, _DestinationRight},
+combineFiles({Source, SourceValid, _SourceContiguousTop,
+	      _SourceLeft, _SourceRight},
+	     {Destination, DestinationValid, DestinationContiguousTop,
+	      _DestinationLeft, _DestinationRight},
 	     State1) ->
     (State = #dqstate { msg_location = MsgLocation })
 	= closeFile(Source, closeFile(Destination, State1)),
-    {ok, SourceHdl}      = file:open(form_filename(Source),      [read, write, raw, binary, delayed_write, read_ahead]),
-    {ok, DestinationHdl} = file:open(form_filename(Destination), [read, write, raw, binary, delayed_write, read_ahead]),
+    {ok, SourceHdl}
+	= file:open(form_filename(Source),
+		    [read, write, raw, binary, delayed_write, read_ahead]),
+    {ok, DestinationHdl}
+	= file:open(form_filename(Destination),
+		    [read, write, raw, binary, delayed_write, read_ahead]),
     ExpectedSize = SourceValid + DestinationValid,
-    % if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
-    % if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
-    %   then truncate, copy back in, and then copy over from Source
-    % otherwise we just truncate straight away and copy over from Source
+    %% if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
+    %% if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
+    %%   then truncate, copy back in, and then copy over from Source
+    %% otherwise we just truncate straight away and copy over from Source
     if DestinationContiguousTop =:= DestinationValid ->
-	    {ok, DestinationValid} = file:position(DestinationHdl, {bof, DestinationValid}),
+	    {ok, DestinationValid} = file:position(DestinationHdl,
+						   {bof, DestinationValid}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, ExpectedSize} = file:position(DestinationHdl, {cur, SourceValid}),
+	    {ok, ExpectedSize}     = file:position(DestinationHdl,
+						   {cur, SourceValid}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, DestinationValid} = file:position(DestinationHdl, {bof, DestinationValid});
+	    {ok, DestinationValid} = file:position(DestinationHdl,
+						   {bof, DestinationValid});
        true ->
 	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
-	    {ok, TmpHdl} = file:open(form_filename(Tmp), [read, write, raw, binary, delayed_write, read_ahead]),
+	    {ok, TmpHdl}
+		= file:open(form_filename(Tmp),
+			    [read, write, raw, binary, delayed_write, read_ahead]),
 	    Worklist
-		= lists:dropwhile(fun ({_, _, _, Offset, _}) when Offset /= DestinationContiguousTop ->
-						% it cannot be that Offset == DestinationContiguousTop
-						% because if it was then DestinationContiguousTop would have been
-						% extended by TotalSize
-					  Offset < DestinationContiguousTop
-				  % Given expected access patterns, I suspect that the list should be
-				  % naturally sorted as we require, however, we need to enforce it anyway
-				  end, lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
-						      OffA < OffB
-						  end,
-						  dets:match_object(MsgLocation, {'_', '_', Destination, '_', '_'}))),
+		= lists:dropwhile(
+		    fun ({_, _, _, Offset, _})
+			when Offset /= DestinationContiguousTop ->
+			    %% it cannot be that Offset == DestinationContiguousTop
+			    %% because if it was then DestinationContiguousTop would have been
+			    %% extended by TotalSize
+			    Offset < DestinationContiguousTop
+			    %% Given expected access patterns, I suspect that the list should be
+			    %% naturally sorted as we require, however, we need to enforce it anyway
+		    end, lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+					    OffA < OffB
+				    end,
+				    dets:match_object(MsgLocation,
+						      {'_', '_', Destination,
+						       '_', '_'}))),
 	    TmpSize = DestinationValid - DestinationContiguousTop,
-	    {TmpSize, BlockStart1, BlockEnd1} =
-		lists:foldl(fun ({MsgId, RefCount, _Destination, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
-				    % CurOffset is in the TmpFile.
-				    % Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
-				    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-				    % this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
-				    FinalOffset = DestinationContiguousTop + CurOffset,
-				    ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination, FinalOffset, TotalSize}),
-
-				    NextOffset = CurOffset + Size,
-				    if BlockStart =:= undefined ->
-					    % base case, called only for the first list elem
-					    {NextOffset, Offset, Offset + Size};
-				       Offset =:= BlockEnd ->
-					    % extend the current block because the next msg follows straight on
-					    {NextOffset, BlockStart, BlockEnd + Size};
-				       true ->
-					    % found a gap, so actually do the work for the previous block
-					    BSize = BlockEnd - BlockStart,
-					    {ok, BlockStart} = file:position(DestinationHdl, {bof, BlockStart}),
-					    {ok, BSize} = file:copy(DestinationHdl, TmpHdl, BSize),
-					    {NextOffset, Offset, Offset + Size}
-				    end
-			    end, {0, undefined, undefined}, Worklist),
-	    % do the last remaining block
+	    {TmpSize, BlockStart1, BlockEnd1}
+		= lists:foldl(
+		    fun ({MsgId, RefCount, _Destination, Offset, TotalSize},
+			 {CurOffset, BlockStart, BlockEnd}) ->
+			    %% CurOffset is in the TmpFile.
+			    %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
+			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+			    %% this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
+			    FinalOffset = DestinationContiguousTop + CurOffset,
+			    ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
+							   FinalOffset, TotalSize}),
+
+			    NextOffset = CurOffset + Size,
+			    if BlockStart =:= undefined ->
+				    %% base case, called only for the first list elem
+				    {NextOffset, Offset, Offset + Size};
+			       Offset =:= BlockEnd ->
+				    %% extend the current block because the next msg follows straight on
+				    {NextOffset, BlockStart, BlockEnd + Size};
+			       true ->
+				    %% found a gap, so actually do the work for the previous block
+				    BSize = BlockEnd - BlockStart,
+				    {ok, BlockStart}
+					= file:position(DestinationHdl,
+							{bof, BlockStart}),
+				    {ok, BSize} = file:copy(DestinationHdl,
+							    TmpHdl, BSize),
+				    {NextOffset, Offset, Offset + Size}
+			    end
+		    end, {0, undefined, undefined}, Worklist),
+	    %% do the last remaining block
 	    BSize1 = BlockEnd1 - BlockStart1,
 	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
 	    {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
-	    % so now Tmp contains everything we need to salvage from Destination,
-	    % and MsgLocation has been updated to reflect compaction of Destination
-	    % so truncate Destination and copy from Tmp back to the end
+	    %% so now Tmp contains everything we need to salvage from Destination,
+	    %% and MsgLocation has been updated to reflect compaction of Destination
+	    %% so truncate Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
-	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
+	    {ok, DestinationContiguousTop}
+		= file:position(DestinationHdl,
+				{bof, DestinationContiguousTop}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, ExpectedSize} = file:position(DestinationHdl, {bof, ExpectedSize}),
+	    {ok, ExpectedSize}
+		= file:position(DestinationHdl,
+				{bof, ExpectedSize}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, DestinationContiguousTop} = file:position(DestinationHdl, {bof, DestinationContiguousTop}),
+	    {ok, DestinationContiguousTop}
+		= file:position(DestinationHdl,
+				{bof, DestinationContiguousTop}),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
-	    % position in DestinationHdl should now be DestinationValid
+	    %% position in DestinationHdl should now be DestinationValid
 	    ok = file:sync(DestinationHdl),
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(form_filename(Tmp))
     end,
-    SourceWorkList = lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
-					OffA < OffB
-				end, dets:match_object(MsgLocation, {'_', '_', Source, '_', '_'})),
-    {ExpectedSize, BlockStart2, BlockEnd2} =
-	lists:foldl(fun ({MsgId, RefCount, _Source, Offset, TotalSize}, {CurOffset, BlockStart, BlockEnd}) ->
-			    % CurOffset is in the DestinationFile.
-			    % Offset, BlockStart and BlockEnd are in the SourceFile
-			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			    % update MsgLocation to reflect change of file and offset
-			    ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination, CurOffset, TotalSize}),
-			    NextOffset = CurOffset + Size,
-			    if BlockStart =:= undefined ->
-				    % base case, called only for the first list elem
-				    {NextOffset, Offset, Offset + Size};
-			       Offset =:= BlockEnd ->
-				    % extend the current block because the next msg follows straight on
-				    {NextOffset, BlockStart, BlockEnd + Size};
-			       true ->
-				    % found a gap, so actually do the work for the previous block
-				    BSize = BlockEnd - BlockStart,
-				    {ok, BlockStart} = file:position(SourceHdl, {bof, BlockStart}),
-				    {ok, BSize} = file:copy(SourceHdl, DestinationHdl, BSize),
-				    {NextOffset, Offset, Offset + Size}
-			    end
-		    end, {DestinationValid, undefined, undefined}, SourceWorkList),
-    % do the last remaining block
+    SourceWorkList
+	= lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+			     OffA < OffB
+		     end, dets:match_object(MsgLocation, {'_', '_', Source,
+							  '_', '_'})),
+    {ExpectedSize, BlockStart2, BlockEnd2}
+	= lists:foldl(fun ({MsgId, RefCount, _Source, Offset, TotalSize},
+			   {CurOffset, BlockStart, BlockEnd}) ->
+			      %% CurOffset is in the DestinationFile.
+			      %% Offset, BlockStart and BlockEnd are in the SourceFile
+			      Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+			      %% update MsgLocation to reflect change of file and offset
+			      ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
+							     CurOffset, TotalSize}),
+			      NextOffset = CurOffset + Size,
+			      if BlockStart =:= undefined ->
+				      %% base case, called only for the first list elem
+				      {NextOffset, Offset, Offset + Size};
+				 Offset =:= BlockEnd ->
+				      %% extend the current block because the next msg follows straight on
+				      {NextOffset, BlockStart, BlockEnd + Size};
+				 true ->
+				      %% found a gap, so actually do the work for the previous block
+				      BSize = BlockEnd - BlockStart,
+				      {ok, BlockStart}
+					  = file:position(SourceHdl,
+							  {bof, BlockStart}),
+				      {ok, BSize}
+					  = file:copy(SourceHdl, DestinationHdl,
+						      BSize),
+				      {NextOffset, Offset, Offset + Size}
+			      end
+		      end, {DestinationValid, undefined, undefined}, SourceWorkList),
+    %% do the last remaining block
     BSize2 = BlockEnd2 - BlockStart2,
     {ok, BlockStart2} = file:position(SourceHdl, {bof, BlockStart2}),
     {ok, BSize2} = file:copy(SourceHdl, DestinationHdl, BSize2),
-    % tidy up
+    %% tidy up
     ok = file:sync(DestinationHdl),
     ok = file:close(SourceHdl),
     ok = file:close(DestinationHdl),
@@ -562,20 +656,27 @@ closeFile(File, State = #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge} }
 	    State;
 	{ok, {Hdl, Then}} ->
 	    ok = file:close(Hdl),
-	    State #dqstate { read_file_handles = { dict:erase(File, ReadHdls), gb_trees:delete(Then, ReadHdlsAge) } }
+	    State #dqstate { read_file_handles
+			     = { dict:erase(File, ReadHdls),
+				 gb_trees:delete(Then, ReadHdlsAge) } }
     end.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
-    [{File, ValidData, _ContiguousTop, Left, Right}] = ets:lookup(FileSummary, File),
+    [{File, ValidData, _ContiguousTop, Left, Right}]
+	= ets:lookup(FileSummary, File),
     case ValidData of
-	% we should NEVER find the current file in here - hence right should always be a file, not undefined
+	%% we should NEVER find the current file in here
+        %% hence right should always be a file, not undefined
 	0 -> case {Left, Right} of
 		 {undefined, _} when not(is_atom(Right)) ->
-		     % the eldest file is empty. YAY!
-		     true = ets:update_element(FileSummary, Right, {4, undefined}); % left is the 4th field
+		     %% the eldest file is empty. YAY!
+		     %% left is the 4th field
+		     true = ets:update_element(FileSummary, Right, {4, undefined});
 		 {_, _} when not(is_atom(Right)) ->
-		     true = ets:update_element(FileSummary, Right, {4, Left}), % left is the 4th field
-		     true = ets:update_element(FileSummary, Left, {5, Right}) % right is the 5th field
+		     %% left is the 4th field
+		     true = ets:update_element(FileSummary, Right, {4, Left}),
+		     %% right is the 5th field
+		     true = ets:update_element(FileSummary, Left, {5, Right})
 	     end,
 	     true = ets:delete(FileSummary, File),
 	     ok = file:delete(form_filename(File)),
@@ -586,24 +687,27 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
 %% ---- DISK RECOVERY ----
 
 load_from_disk(State) ->
-    % sorted so that smallest number is first. which also means eldest file (left-most) first
+    %% sorted so that smallest number is first. which also means eldest file (left-most) first
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
-    % There should be no more tmp files now, so go ahead and load the whole lot
-    (State1 = #dqstate{ msg_location = MsgLocation }) = load_messages(undefined, Files, State),
-    % Finally, check there is nothing in mnesia which we haven't loaded
+    %% There should be no more tmp files now, so go ahead and load the whole lot
+    (State1 = #dqstate{ msg_location = MsgLocation })
+	= load_messages(undefined, Files, State),
+    %% Finally, check there is nothing in mnesia which we haven't loaded
     {atomic, true} = mnesia:transaction(
 	     fun() ->
 		     ok = mnesia:read_lock_table(rabbit_disk_queue),
 		     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
-					  true = 1 =:= length(dets:lookup(MsgLocation, MsgId)) end,
+					  true = 1
+					      =:= length(dets:lookup(MsgLocation, MsgId))
+				  end,
 				  true, rabbit_disk_queue)
 	     end),
     State2 = extract_sequence_numbers(State1),
     {ok, State2}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
-    % next-seqid-to-read is the lowest seqid which has is_delivered = false
+    %% next-seqid-to-read is the lowest seqid which has is_delivered = false
     {atomic, true} = mnesia:transaction(
       fun() ->
 	      ok = mnesia:read_lock_table(rabbit_disk_queue),
@@ -612,7 +716,8 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
 			NextWrite = SeqId + 1,
 			case ets:lookup(Sequences, Q) of
 			    [] ->
-				true = ets:insert_new(Sequences, {Q, SeqId, NextWrite});
+				true = ets:insert_new(Sequences,
+						      {Q, SeqId, NextWrite});
 			    [Orig = {Q, Read, Write}] ->
 				Repl = {Q, lists:min([Read, SeqId]),
 					lists:max([Write, NextWrite])},
@@ -634,7 +739,8 @@ load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
     Offset = case dets:match_object(MsgLocation, {'_', '_', Left, '_', '_'}) of
 		 [] -> 0;
 		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_]
-			  = lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+			  = lists:sort(fun ({_, _, _, OffA, _},
+					    {_, _, _, OffB, _}) ->
 					       OffB < OffA
 				       end, L),
 		      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
@@ -645,24 +751,26 @@ load_messages(Left, [File|Files],
 	      State = #dqstate { msg_location = MsgLocation,
 				 file_summary = FileSummary
 			       }) ->
-    % [{MsgId, TotalSize, FileOffset}]
+    %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:dirty_match_object(rabbit_disk_queue,
-						      #dq_msg_loc { msg_id = MsgId,
-								    queue_and_seq_id = '_',
-								    is_delivered = '_'})) of
+		case length(mnesia:dirty_match_object
+			    (rabbit_disk_queue,
+			     #dq_msg_loc { msg_id = MsgId,
+					   queue_and_seq_id = '_',
+					   is_delivered = '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
-			true = dets:insert_new(MsgLocation, {MsgId, RefCount, File, Offset, TotalSize}),
+			true = dets:insert_new(MsgLocation, {MsgId, RefCount, File,
+							     Offset, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
 			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
 			}
 		end
 	end, {[], 0}, Messages),
-    % foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
-    % as from scan_file_for_valid_messages
+    %% foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
+    %% as from scan_file_for_valid_messages
     {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
     Right = case Files of
 		[] -> undefined;
@@ -682,17 +790,20 @@ recover_crashed_compactions1(Files, TmpFile) ->
     GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFile, Files),
-    % [{MsgId, TotalSize, FileOffset}]
-    {ok, UncorruptedMessagesTmp} = scan_file_for_valid_messages(form_filename(TmpFile)),
+    %% [{MsgId, TotalSize, FileOffset}]
+    {ok, UncorruptedMessagesTmp}
+	= scan_file_for_valid_messages(form_filename(TmpFile)),
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
-    % all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
+    %% all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-			  true = 0 < length(mnesia:dirty_match_object(rabbit_disk_queue,
-								      #dq_msg_loc { msg_id = MsgId,
-										    queue_and_seq_id = '_',
-										    is_delivered = '_'}))
+			  true = 0 < length(mnesia:dirty_match_object
+					    (rabbit_disk_queue,
+					     #dq_msg_loc { msg_id = MsgId,
+							   queue_and_seq_id = '_',
+							   is_delivered = '_'}))
 		  end, MsgIdsTmp),
-    {ok, UncorruptedMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+    {ok, UncorruptedMessages}
+	= scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
     %% 1) It's possible that everything in the tmp file is also in the main file
     %%    such that the main file is (prefix ++ tmpfile). This means that compaction
@@ -714,66 +825,74 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %%    Plan: Truncate the main file back to before any of the files in the tmp file and copy
     %%    them over again
     case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
-	true -> % we're in case 1, 2 or 3 above. Just delete the tmp file
-	        % note this also catches the case when the tmp file is empty
+	true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
+	        %% note this also catches the case when the tmp file is empty
 	    ok = file:delete(TmpFile);
 	_False ->
-	    % we're in case 4 above.
-	    % check that everything in the main file is a valid message in mnesia
+	    %% we're in case 4 above.
+	    %% check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  true = 0 <
-				      length(mnesia:dirty_match_object(rabbit_disk_queue,
-								       #dq_msg_loc { msg_id = MsgId,
-										     queue_and_seq_id = '_',
-										     is_delivered = '_'}))
+				  true = 0 < length(mnesia:dirty_match_object
+						    (rabbit_disk_queue,
+						     #dq_msg_loc { msg_id = MsgId,
+								   queue_and_seq_id = '_',
+								   is_delivered = '_'}))
 			  end, MsgIds),
-	    % The main file should be contiguous
+	    %% The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
-	    % we should have that none of the messages in the prefix are in the tmp file
-	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end, MsgIds),
+	    %% we should have that none of the messages in the prefix are in the tmp file
+	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end,
+			     MsgIds),
 
-	    {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile), [write, raw, binary, delayed_write]),
+	    {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile),
+				      [write, raw, binary, delayed_write]),
 	    {ok, Top} = file:position(MainHdl, Top),
-	    ok = file:truncate(MainHdl), % wipe out any rubbish at the end of the file
-	    % there really could be rubbish at the end of the file - we could have failed after the
-	    % extending truncate.
-	    % Remember the head of the list will be the highest entry in the file
+	    ok = file:truncate(MainHdl), %% wipe out any rubbish at the end of the file
+	    %% there really could be rubbish at the end of the file - we could have failed after the
+	    %% extending truncate.
+	    %% Remember the head of the list will be the highest entry in the file
 	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
 	    TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ExpectedAbsPos = Top + TmpSize,
 	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
-	    ok = file:truncate(MainHdl), % and now extend the main file as big as necessary in a single move
-					 % if we run out of disk space, this truncate could fail, but we still
-					 % aren't risking losing data
-	    {ok, TmpHdl} = file:open(form_filename(TmpFile), [read, raw, binary, read_ahead]),
+	    ok = file:truncate(MainHdl), %% and now extend the main file as big as necessary in a single move
+					 %% if we run out of disk space, this truncate could fail, but we still
+					 %% aren't risking losing data
+	    {ok, TmpHdl} = file:open(form_filename(TmpFile),
+				     [read, raw, binary, read_ahead]),
 	    {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
 	    ok = file:close(MainHdl),
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(TmpFile),
 
-	    {ok, MainMessages} = scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+	    {ok, MainMessages}
+		= scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
 	    MsgIdsMain = lists:map(GrabMsgId, MainMessages),
-	    % check that everything in MsgIds is in MsgIdsMain
-	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end, MsgIds),
-	    % check that everything in MsgIdsTmp is in MsgIdsMain
-	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end, MsgIdsTmp)
+	    %% check that everything in MsgIds is in MsgIdsMain
+	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
+			     MsgIds),
+	    %% check that everything in MsgIdsTmp is in MsgIdsMain
+	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
+			     MsgIdsTmp)
     end,
     ok.
 
-% this assumes that the messages are ordered such that the highest address is at
-% the head of the list.
-% this matches what scan_file_for_valid_messages produces
+%% this assumes that the messages are ordered such that the highest address is at
+%% the head of the list.
+%% this matches what scan_file_for_valid_messages produces
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
-	{ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT, lists:reverse(Acc)};
+	{ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+		      lists:reverse(Acc)};
 	Res -> Res
     end.
 find_contiguous_block_prefix([], 0, Acc) ->
     {ok, Acc};
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
-find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail], ExpectedOffset, Acc)
+find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail],
+			     ExpectedOffset, Acc)
   when ExpectedOffset =:= Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT ->
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
 find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
@@ -800,7 +919,8 @@ append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
     TotalSize = BodySize + MsgIdBinSize,
     case file:write(FileHdl, <<TotalSize:?INTEGER_SIZE_BITS,
 			       MsgIdBinSize:?INTEGER_SIZE_BITS,
-			       MsgIdBin:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+			       MsgIdBin:MsgIdBinSize/binary,
+			       MsgBody:BodySize/binary,
 			       ?WRITE_OK:?WRITE_OK_SIZE_BITS>>) of
 	ok -> {ok, TotalSize};
 	KO -> KO
@@ -811,9 +931,12 @@ read_message_at_offset(FileHdl, Offset, TotalSize) ->
     case file:position(FileHdl, {bof, Offset}) of
 	{ok, Offset} ->
 	    case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
-		{ok, <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS, Rest:TotalSizeWriteOkBytes/binary>>} ->
+		{ok, <<TotalSize:?INTEGER_SIZE_BITS,
+		       MsgIdBinSize:?INTEGER_SIZE_BITS,
+		       Rest:TotalSizeWriteOkBytes/binary>>} ->
 		    BodySize = TotalSize - MsgIdBinSize,
-		    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary, ?WRITE_OK:?WRITE_OK_SIZE_BITS>> = Rest,
+		    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+		      ?WRITE_OK:?WRITE_OK_SIZE_BITS>> = Rest,
 		    {ok, {MsgBody, BodySize}};
 		KO -> KO
 	    end;
@@ -823,7 +946,7 @@ read_message_at_offset(FileHdl, Offset, TotalSize) ->
 scan_file_for_valid_messages(File) ->
     {ok, Hdl} = file:open(File, [raw, binary, read]),
     Valid = scan_file_for_valid_messages(Hdl, 0, []),
-    _ = file:close(Hdl), % if something really bad's happened, the close could fail, but ignore
+    _ = file:close(Hdl), %% if something really bad's happened, the close could fail, but ignore
     Valid.
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
@@ -832,7 +955,8 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
 	{ok, {corrupted, NextOffset}} ->
 	    scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
 	{ok, {ok, MsgId, TotalSize, NextOffset}} ->
-	    scan_file_for_valid_messages(FileHdl, NextOffset, [{MsgId, TotalSize, Offset}|Acc]);
+	    scan_file_for_valid_messages(FileHdl, NextOffset,
+					 [{MsgId, TotalSize, Offset}|Acc]);
 	_KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
     end.
 	    
@@ -854,14 +978,17 @@ read_next_file_entry(FileHdl, Offset) ->
 		    case file:read(FileHdl, MsgIdBinSize) of
 			{ok, <<MsgId:MsgIdBinSize/binary>>} ->
 			    ExpectedAbsPos = Offset + TwoIntegers + TotalSize,
-			    case file:position(FileHdl, {cur, TotalSize - MsgIdBinSize}) of
+			    case file:position(FileHdl,
+					       {cur, TotalSize - MsgIdBinSize}) of
 				{ok, ExpectedAbsPos} ->
+				    NextOffset = Offset + TotalSize
+					+ ?FILE_PACKING_ADJUSTMENT,
 				    case file:read(FileHdl, 1) of
 					{ok, <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
-					    {ok, {ok, binary_to_term(MsgId), TotalSize,
-						  Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize}};
+					    {ok, {ok, binary_to_term(MsgId),
+						  TotalSize, NextOffset}};
 					{ok, _SomeOtherData} ->
-					    {ok, {corrupted, Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize}};
+					    {ok, {corrupted, NextOffset}};
 					KO -> KO
 				    end;
 				{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
-- 
cgit v1.2.1


From cb709956fa618cf05cae611fe9c8bf367b07c407 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 12:43:50 +0100
Subject: more reformatting and refactoring.

---
 src/rabbit_disk_queue.erl | 558 +++++++++++++++++++++++-----------------------
 src/rabbit_tests.erl      |  79 ++++---
 2 files changed, 328 insertions(+), 309 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 190c06f0..2952ca89 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -107,32 +107,32 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
-    {ok, MsgLocation}
-	= dets:open_file(?MSG_LOC_DETS_NAME,
-			 [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME)
-					       ++ ?FILE_EXTENSION_DETS)},
-			  {min_no_slots, 1024*1024},
-			  %% man says this should be <= 32M. But it works...
-			  {max_no_slots, 1024*1024*1024},
-			  {type, set}
-			 ]),
-    State
-	= #dqstate { msg_location            = MsgLocation,
-		     file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
-						       [set, private]),
-		     sequences               = ets:new(?SEQUENCE_ETS_NAME,
-						       [set, private]),
-		     current_file_num        = 0,
-		     current_file_name       = InitName,
-		     current_file_handle     = undefined,
-		     current_offset          = 0,
-		     file_size_limit         = FileSizeLimit,
-		     read_file_handles       = {dict:new(), gb_trees:empty()},
-		     read_file_handles_limit = ReadFileHandlesLimit
-		    },
+    {ok, MsgLocation} =
+	dets:open_file(?MSG_LOC_DETS_NAME,
+		       [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++
+					     ?FILE_EXTENSION_DETS)},
+			{min_no_slots, 1024*1024},
+			%% man says this should be <= 32M. But it works...
+			{max_no_slots, 1024*1024*1024},
+			{type, set}
+		       ]),
+    State =
+	#dqstate { msg_location            = MsgLocation,
+		   file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
+						     [set, private]),
+		   sequences               = ets:new(?SEQUENCE_ETS_NAME,
+						     [set, private]),
+		   current_file_num        = 0,
+		   current_file_name       = InitName,
+		   current_file_handle     = undefined,
+		   current_offset          = 0,
+		   file_size_limit         = FileSizeLimit,
+		   read_file_handles       = {dict:new(), gb_trees:empty()},
+		   read_file_handles_limit = ReadFileHandlesLimit
+		  },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
-			     current_offset = Offset } }
-	= load_from_disk(State),
+			     current_offset = Offset } } =
+	load_from_disk(State),
     Path = form_filename(CurrentName),
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
@@ -149,8 +149,8 @@ handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(clean_stop, _From, State) ->
     State1 = #dqstate { file_summary = FileSummary,
-		        sequences = Sequences }
-	= shutdown(State), %% tidy up file handles early
+		        sequences = Sequences } =
+	shutdown(State), %% tidy up file handles early
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
     true = ets:delete(FileSummary),
     true = ets:delete(Sequences),
@@ -185,8 +185,8 @@ shutdown(State = #dqstate { msg_location = MsgLocation,
 			  }) ->
     %% deliberately ignoring return codes here
     dets:close(MsgLocation),
-    file:delete(form_filename(atom_to_list(?MSG_LOC_DETS_NAME)
-			      ++ ?FILE_EXTENSION_DETS)),
+    file:delete(form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++
+			      ?FILE_EXTENSION_DETS)),
     if FileHdl =:= undefined -> ok;
        true -> file:sync(FileHdl),
 	       file:close(FileHdl)
@@ -210,53 +210,51 @@ base_directory() ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, State
-		 = #dqstate { msg_location = MsgLocation,
-			      sequences = Sequences,
-			      read_file_handles_limit = ReadFileHandlesLimit,
-			      read_file_handles = {ReadHdls, ReadHdlsAge}
-			     }) ->
+internal_deliver(Q, State =
+		 #dqstate { msg_location = MsgLocation,
+			    sequences = Sequences,
+			    read_file_handles_limit = ReadFileHandlesLimit,
+			    read_file_handles = {ReadHdls, ReadHdlsAge}
+			   }) ->
     case ets:lookup(Sequences, Q) of
 	[] -> {ok, empty, State};
 	[{Q, ReadSeqId, WriteSeqId}] ->
 	    case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
 		[] -> {ok, empty, State};
-		[Obj
-		 = #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
-		    [{MsgId, _RefCount, File, Offset, TotalSize}]
-			= dets:lookup(MsgLocation, MsgId),
-		    {FileHdl, ReadHdls1, ReadHdlsAge1}
-			= case dict:find(File, ReadHdls) of
-			      error ->
-				  {ok, Hdl} = file:open(form_filename(File),
-							[read, raw, binary,
-							 read_ahead]),
-				  Now = now(),
-				  case dict:size(ReadHdls) < ReadFileHandlesLimit of
-				      true ->
-					  {Hdl,
-					   dict:store(File, {Hdl, Now}, ReadHdls),
-					   gb_trees:enter(Now, File, ReadHdlsAge)};
-				      _False ->
-					  {_Then, OldFile, ReadHdlsAge2}
-					      = gb_trees:take_smallest(ReadHdlsAge),
-					  {ok, {OldHdl, _Then}}
-					      = dict:find(OldFile, ReadHdls),
-					  ok = file:close(OldHdl),
-					  ReadHdls2 = dict:erase(OldFile, ReadHdls),
-					  {Hdl,
-					   dict:store(File, {Hdl, Now}, ReadHdls2),
-					   gb_trees:enter(Now, File, ReadHdlsAge2)}
-				  end;
-			      {ok, {Hdl, Then}} ->
-				  Now = now(),
-				  {Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
-				   gb_trees:enter(Now, File,
-						  gb_trees:delete(Then, ReadHdlsAge))}
-			  end,
+		[Obj =
+		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
+		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
+			dets:lookup(MsgLocation, MsgId),
+		    Now = now(),
+		    {FileHdl, ReadHdls1, ReadHdlsAge1} =
+			case dict:find(File, ReadHdls) of
+			    error ->
+				{ok, Hdl} = file:open(form_filename(File),
+						      [read, raw, binary,
+						       read_ahead]),
+				{ReadHdls2, ReadHdlsAge2} =
+				    case dict:size(ReadHdls) < ReadFileHandlesLimit of
+					true ->
+					    {ReadHdls, ReadHdlsAge};
+					_False ->
+					    {_Then, OldFile, ReadHdlsAge3} =
+						gb_trees:take_smallest(ReadHdlsAge),
+					    {ok, {OldHdl, _Then}} =
+						dict:find(OldFile, ReadHdls),
+					    ok = file:close(OldHdl),
+					    {dict:erase(OldFile, ReadHdls),
+					     ReadHdlsAge3}
+				    end,
+				{Hdl, dict:store(File, {Hdl, Now}, ReadHdls2),
+				 gb_trees:enter(Now, File, ReadHdlsAge2)};
+			    {ok, {Hdl, Then}} ->
+				{Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
+				 gb_trees:enter(Now, File,
+						gb_trees:delete(Then, ReadHdlsAge))}
+			end,
 		    %% read the message
-		    {ok, {MsgBody, BodySize}}
-			= read_message_at_offset(FileHdl, Offset, TotalSize),
+		    {ok, {MsgBody, BodySize}} =
+			read_message_at_offset(FileHdl, Offset, TotalSize),
 		    if Delivered -> ok;
 		       true ->  ok = mnesia:dirty_write(rabbit_disk_queue,
 							Obj #dq_msg_loc {is_delivered = true})
@@ -278,35 +276,35 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
 				    file_summary = FileSummary,
 				    current_file_name = CurName
 				  }) ->
-    Files
-	= lists:foldl(
-	    fun ({MsgId, SeqId}, Files2) ->
-		    [{MsgId, RefCount, File, Offset, TotalSize}]
-			= dets:lookup(MsgLocation, MsgId),
-		    Files3
-			= if 1 =:= RefCount ->
-				  ok = dets:delete(MsgLocation, MsgId),
-				  [{File, ValidTotalSize, ContiguousTop, Left, Right}]
-				      = ets:lookup(FileSummary, File),
-				  ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-				  true = ets:insert(FileSummary,
-						    {File, (ValidTotalSize - TotalSize
-							    - ?FILE_PACKING_ADJUSTMENT),
-						     ContiguousTop1, Left, Right}),
-				  if CurName =:= File -> Files2;
-				     true -> sets:add_element(File, Files2)
-				  end;
-			     1 < RefCount ->
-				  ok = dets:insert(MsgLocation, {MsgId, RefCount - 1,
-								 File, Offset, TotalSize}),
-				  Files2
-			  end,
-		    if MnesiaDelete ->
-			    ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
-		       true -> ok
-		    end,
-		    Files3
-	    end, sets:new(), MsgSeqIds),
+    Files =
+	lists:foldl(
+	  fun ({MsgId, SeqId}, Files2) ->
+		  [{MsgId, RefCount, File, Offset, TotalSize}] =
+		      dets:lookup(MsgLocation, MsgId),
+		  Files3 =
+		      if 1 =:= RefCount ->
+			      ok = dets:delete(MsgLocation, MsgId),
+			      [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
+				  ets:lookup(FileSummary, File),
+			      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+			      true = ets:insert(FileSummary,
+						{File, (ValidTotalSize - TotalSize
+							- ?FILE_PACKING_ADJUSTMENT),
+						 ContiguousTop1, Left, Right}),
+			      if CurName =:= File -> Files2;
+				 true -> sets:add_element(File, Files2)
+			      end;
+			 1 < RefCount ->
+			      ok = dets:insert(MsgLocation, {MsgId, RefCount - 1,
+							     File, Offset, TotalSize}),
+			      Files2
+		      end,
+		  if MnesiaDelete ->
+			  ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
+		     true -> ok
+		  end,
+		  Files3
+	  end, sets:new(), MsgSeqIds),
     State2 = compact(Files, State),
     {ok, State2}.
 
@@ -323,8 +321,8 @@ internal_tx_publish(MsgId, MsgBody,
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
 	    true = dets:insert_new(MsgLocation, {MsgId, 1, CurName,
 						 CurOffset, TotalSize}),
-	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}]
-		= ets:lookup(FileSummary, CurName),
+	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
+		ets:lookup(FileSummary, CurName),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize +
 		?FILE_PACKING_ADJUSTMENT,
 	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
@@ -349,28 +347,28 @@ internal_tx_commit(Q, MsgIds,
 				      current_file_handle = CurHdl,
 				      current_file_name = CurName,
 				      sequences = Sequences
-				    }) ->
-    {ReadSeqId, InitWriteSeqId}
-	= case ets:lookup(Sequences, Q) of
-	      [] -> {0,0};
-	      [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
-	  end,
-    {atomic, {Sync, WriteSeqId}}
-	= mnesia:transaction(
-	    fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
-		     lists:foldl(
-		       fun (MsgId, {Acc, NextWriteSeqId}) ->
-			       [{MsgId, _RefCount, File, _Offset, _TotalSize}]
-				   = dets:lookup(MsgLocation, MsgId),
-			       ok = mnesia:write(rabbit_disk_queue,
-						 #dq_msg_loc { queue_and_seq_id
-							       = {Q, NextWriteSeqId},
-							       msg_id = MsgId,
-							       is_delivered = false},
-						 write),
-			       {Acc or (CurName =:= File), NextWriteSeqId + 1}
-		       end, {false, InitWriteSeqId}, MsgIds)
-	    end),
+				     }) ->
+    {ReadSeqId, InitWriteSeqId} =
+	case ets:lookup(Sequences, Q) of
+	    [] -> {0,0};
+	    [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
+	end,
+    {atomic, {Sync, WriteSeqId}} =
+	mnesia:transaction(
+	  fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
+		   lists:foldl(
+		     fun (MsgId, {Acc, NextWriteSeqId}) ->
+			     [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
+				 dets:lookup(MsgLocation, MsgId),
+			     ok = mnesia:write(rabbit_disk_queue,
+					       #dq_msg_loc { queue_and_seq_id =
+							     {Q, NextWriteSeqId},
+							     msg_id = MsgId,
+							     is_delivered = false},
+					       write),
+			     {Acc or (CurName =:= File), NextWriteSeqId + 1}
+		     end, {false, InitWriteSeqId}, MsgIds)
+	  end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId}),
     if Sync -> ok = file:sync(CurHdl);
        true -> ok
@@ -378,8 +376,8 @@ internal_tx_commit(Q, MsgIds,
     {ok, State}.
 
 internal_publish(Q, MsgId, MsgBody, State) ->
-    {ok, State1 = #dqstate { sequences = Sequences }}
-	= internal_tx_publish(MsgId, MsgBody, State),
+    {ok, State1 = #dqstate { sequences = Sequences }} =
+	internal_tx_publish(MsgId, MsgBody, State),
     WriteSeqId = case ets:lookup(Sequences, Q) of
 		     [] -> %% previously unseen queue
 			 true = ets:insert_new(Sequences, {Q, 0, 1}),
@@ -447,42 +445,42 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
     case ets:lookup(FileSummary, File) of
 	[] -> State;
 	[FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
-	    GoRight
-		= fun() ->
-			  case Right of
-			      undefined -> State;
-			      _ when not(CurName =:= Right) ->
-				  [RightObj = {Right, RightValidData, 
-					       _RightContiguousTop, File, RightRight}]
-				      = ets:lookup(FileSummary, Right),
-				  RightSumData = ValidData + RightValidData,
-				  if FileSizeLimit >= RightSumData ->
-					  %% here, Right will be the source and so will be deleted,
-					  %%       File will be the destination
-					  State1 = combineFiles(RightObj, FileObj,
-								State),
-					  %% this could fail if RightRight is undefined
-					  %% left is the 4th field
-					  ets:update_element(FileSummary,
-							     RightRight, {4, File}),
-					  true = ets:insert(FileSummary, {File,
-									  RightSumData,
-									  RightSumData,
-									  Left,
-									  RightRight}),
-					  true = ets:delete(FileSummary, Right),
-					  State1;
-				     true -> State
-				  end;
-			      _ -> State
-			  end
-		  end,
+	    GoRight =
+		fun() ->
+			case Right of
+			    undefined -> State;
+			    _ when not(CurName =:= Right) ->
+				[RightObj = {Right, RightValidData, 
+					     _RightContiguousTop, File, RightRight}] =
+				    ets:lookup(FileSummary, Right),
+				RightSumData = ValidData + RightValidData,
+				if FileSizeLimit >= RightSumData ->
+					%% here, Right will be the source and so will be deleted,
+					%%       File will be the destination
+					State1 = combineFiles(RightObj, FileObj,
+							      State),
+					%% this could fail if RightRight is undefined
+					%% left is the 4th field
+					ets:update_element(FileSummary,
+							   RightRight, {4, File}),
+					true = ets:insert(FileSummary, {File,
+									RightSumData,
+									RightSumData,
+									Left,
+									RightRight}),
+					true = ets:delete(FileSummary, Right),
+					State1;
+				   true -> State
+				end;
+			    _ -> State
+			end
+		end,
 	    case Left of
 		undefined ->
 		    GoRight();
-		_ -> [LeftObj
-		      = {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}]
-			 = ets:lookup(FileSummary, Left),
+		_ -> [LeftObj =
+		      {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}] =
+			 ets:lookup(FileSummary, Left),
 		     LeftSumData = ValidData + LeftValidData,
 		     if FileSizeLimit >= LeftSumData ->
 			     %% here, File will be the source and so will be deleted,
@@ -502,19 +500,27 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 	    end
     end.
 
+sortMsgLocationsByOffset(Asc, List) ->
+    Comp = if Asc -> fun(X, Y) -> X < Y end;
+	      true -> fun(X, Y) -> X > Y end
+	   end,
+    lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+		       Comp(OffA, OffB)
+	       end, List).
+
 combineFiles({Source, SourceValid, _SourceContiguousTop,
 	      _SourceLeft, _SourceRight},
 	     {Destination, DestinationValid, DestinationContiguousTop,
 	      _DestinationLeft, _DestinationRight},
 	     State1) ->
-    (State = #dqstate { msg_location = MsgLocation })
-	= closeFile(Source, closeFile(Destination, State1)),
-    {ok, SourceHdl}
-	= file:open(form_filename(Source),
-		    [read, write, raw, binary, delayed_write, read_ahead]),
-    {ok, DestinationHdl}
-	= file:open(form_filename(Destination),
-		    [read, write, raw, binary, delayed_write, read_ahead]),
+    (State = #dqstate { msg_location = MsgLocation }) =
+	closeFile(Source, closeFile(Destination, State1)),
+    {ok, SourceHdl} =
+	file:open(form_filename(Source),
+		  [read, write, raw, binary, delayed_write, read_ahead]),
+    {ok, DestinationHdl} =
+	file:open(form_filename(Destination),
+		  [read, write, raw, binary, delayed_write, read_ahead]),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
     %% if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
@@ -531,56 +537,54 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 						   {bof, DestinationValid});
        true ->
 	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
-	    {ok, TmpHdl}
-		= file:open(form_filename(Tmp),
-			    [read, write, raw, binary, delayed_write, read_ahead]),
-	    Worklist
-		= lists:dropwhile(
-		    fun ({_, _, _, Offset, _})
-			when Offset /= DestinationContiguousTop ->
-			    %% it cannot be that Offset == DestinationContiguousTop
-			    %% because if it was then DestinationContiguousTop would have been
-			    %% extended by TotalSize
-			    Offset < DestinationContiguousTop
-			    %% Given expected access patterns, I suspect that the list should be
-			    %% naturally sorted as we require, however, we need to enforce it anyway
-		    end, lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
-					    OffA < OffB
-				    end,
-				    dets:match_object(MsgLocation,
-						      {'_', '_', Destination,
-						       '_', '_'}))),
+	    {ok, TmpHdl} =
+		file:open(form_filename(Tmp),
+			  [read, write, raw, binary, delayed_write, read_ahead]),
+	    Worklist =
+		lists:dropwhile(
+		  fun ({_, _, _, Offset, _})
+		      when Offset /= DestinationContiguousTop ->
+			  %% it cannot be that Offset == DestinationContiguousTop
+			  %% because if it was then DestinationContiguousTop would have been
+			  %% extended by TotalSize
+			  Offset < DestinationContiguousTop
+			  %% Given expected access patterns, I suspect that the list should be
+			  %% naturally sorted as we require, however, we need to enforce it anyway
+		  end, sortMsgLocationsByOffset(true,
+						dets:match_object(MsgLocation,
+								  {'_', '_',
+								   Destination,
+								   '_', '_'}))),
 	    TmpSize = DestinationValid - DestinationContiguousTop,
-	    {TmpSize, BlockStart1, BlockEnd1}
-		= lists:foldl(
-		    fun ({MsgId, RefCount, _Destination, Offset, TotalSize},
-			 {CurOffset, BlockStart, BlockEnd}) ->
-			    %% CurOffset is in the TmpFile.
-			    %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
-			    Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			    %% this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
-			    FinalOffset = DestinationContiguousTop + CurOffset,
-			    ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
-							   FinalOffset, TotalSize}),
-
-			    NextOffset = CurOffset + Size,
-			    if BlockStart =:= undefined ->
-				    %% base case, called only for the first list elem
-				    {NextOffset, Offset, Offset + Size};
-			       Offset =:= BlockEnd ->
-				    %% extend the current block because the next msg follows straight on
-				    {NextOffset, BlockStart, BlockEnd + Size};
-			       true ->
-				    %% found a gap, so actually do the work for the previous block
-				    BSize = BlockEnd - BlockStart,
-				    {ok, BlockStart}
-					= file:position(DestinationHdl,
-							{bof, BlockStart}),
-				    {ok, BSize} = file:copy(DestinationHdl,
-							    TmpHdl, BSize),
-				    {NextOffset, Offset, Offset + Size}
-			    end
-		    end, {0, undefined, undefined}, Worklist),
+	    {TmpSize, BlockStart1, BlockEnd1} =
+		lists:foldl(
+		  fun ({MsgId, RefCount, _Destination, Offset, TotalSize},
+		       {CurOffset, BlockStart, BlockEnd}) ->
+			  %% CurOffset is in the TmpFile.
+			  %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
+			  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+			  %% this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
+			  FinalOffset = DestinationContiguousTop + CurOffset,
+			  ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
+							 FinalOffset, TotalSize}),
+			  NextOffset = CurOffset + Size,
+			  if BlockStart =:= undefined ->
+				  %% base case, called only for the first list elem
+				  {NextOffset, Offset, Offset + Size};
+			     Offset =:= BlockEnd ->
+				  %% extend the current block because the next msg follows straight on
+				  {NextOffset, BlockStart, BlockEnd + Size};
+			     true ->
+				  %% found a gap, so actually do the work for the previous block
+				  BSize = BlockEnd - BlockStart,
+				  {ok, BlockStart} =
+				      file:position(DestinationHdl,
+						    {bof, BlockStart}),
+				  {ok, BSize} = file:copy(DestinationHdl,
+							  TmpHdl, BSize),
+				  {NextOffset, Offset, Offset + Size}
+			  end
+		  end, {0, undefined, undefined}, Worklist),
 	    %% do the last remaining block
 	    BSize1 = BlockEnd1 - BlockStart1,
 	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
@@ -589,56 +593,54 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    %% and MsgLocation has been updated to reflect compaction of Destination
 	    %% so truncate Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
-	    {ok, DestinationContiguousTop}
-		= file:position(DestinationHdl,
-				{bof, DestinationContiguousTop}),
+	    {ok, DestinationContiguousTop} =
+		file:position(DestinationHdl,
+			      {bof, DestinationContiguousTop}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, ExpectedSize}
-		= file:position(DestinationHdl,
-				{bof, ExpectedSize}),
+	    {ok, ExpectedSize} =
+		file:position(DestinationHdl,
+			      {bof, ExpectedSize}),
 	    ok = file:truncate(DestinationHdl),
-	    {ok, DestinationContiguousTop}
-		= file:position(DestinationHdl,
-				{bof, DestinationContiguousTop}),
+	    {ok, DestinationContiguousTop} =
+		file:position(DestinationHdl,
+			      {bof, DestinationContiguousTop}),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
 	    %% position in DestinationHdl should now be DestinationValid
 	    ok = file:sync(DestinationHdl),
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(form_filename(Tmp))
     end,
-    SourceWorkList
-	= lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
-			     OffA < OffB
-		     end, dets:match_object(MsgLocation, {'_', '_', Source,
+    SourceWorkList =
+	sortMsgLocationsByOffset(true, dets:match_object(MsgLocation,
+							 {'_', '_', Source,
 							  '_', '_'})),
-    {ExpectedSize, BlockStart2, BlockEnd2}
-	= lists:foldl(fun ({MsgId, RefCount, _Source, Offset, TotalSize},
-			   {CurOffset, BlockStart, BlockEnd}) ->
-			      %% CurOffset is in the DestinationFile.
-			      %% Offset, BlockStart and BlockEnd are in the SourceFile
-			      Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			      %% update MsgLocation to reflect change of file and offset
-			      ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
-							     CurOffset, TotalSize}),
-			      NextOffset = CurOffset + Size,
-			      if BlockStart =:= undefined ->
-				      %% base case, called only for the first list elem
-				      {NextOffset, Offset, Offset + Size};
-				 Offset =:= BlockEnd ->
-				      %% extend the current block because the next msg follows straight on
-				      {NextOffset, BlockStart, BlockEnd + Size};
-				 true ->
-				      %% found a gap, so actually do the work for the previous block
-				      BSize = BlockEnd - BlockStart,
-				      {ok, BlockStart}
-					  = file:position(SourceHdl,
-							  {bof, BlockStart}),
-				      {ok, BSize}
-					  = file:copy(SourceHdl, DestinationHdl,
-						      BSize),
-				      {NextOffset, Offset, Offset + Size}
-			      end
-		      end, {DestinationValid, undefined, undefined}, SourceWorkList),
+    {ExpectedSize, BlockStart2, BlockEnd2} =
+	lists:foldl(
+	  fun ({MsgId, RefCount, _Source, Offset, TotalSize},
+	       {CurOffset, BlockStart, BlockEnd}) ->
+		  %% CurOffset is in the DestinationFile.
+		  %% Offset, BlockStart and BlockEnd are in the SourceFile
+		  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+		  %% update MsgLocation to reflect change of file and offset
+		  ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
+						 CurOffset, TotalSize}),
+		  NextOffset = CurOffset + Size,
+		  if BlockStart =:= undefined ->
+			  %% base case, called only for the first list elem
+			  {NextOffset, Offset, Offset + Size};
+		     Offset =:= BlockEnd ->
+			  %% extend the current block because the next msg follows straight on
+			  {NextOffset, BlockStart, BlockEnd + Size};
+		     true ->
+			  %% found a gap, so actually do the work for the previous block
+			  BSize = BlockEnd - BlockStart,
+			  {ok, BlockStart} =
+				file:position(SourceHdl, {bof, BlockStart}),
+			  {ok, BSize} =
+			      file:copy(SourceHdl, DestinationHdl, BSize),
+			  {NextOffset, Offset, Offset + Size}
+		  end
+	  end, {DestinationValid, undefined, undefined}, SourceWorkList),
     %% do the last remaining block
     BSize2 = BlockEnd2 - BlockStart2,
     {ok, BlockStart2} = file:position(SourceHdl, {bof, BlockStart2}),
@@ -650,20 +652,21 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
     ok = file:delete(form_filename(Source)),
     State.
 
-closeFile(File, State = #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge} }) ->
+closeFile(File, State = #dqstate { read_file_handles =
+				   {ReadHdls, ReadHdlsAge} }) ->
     case dict:find(File, ReadHdls) of
 	error ->
 	    State;
 	{ok, {Hdl, Then}} ->
 	    ok = file:close(Hdl),
-	    State #dqstate { read_file_handles
-			     = { dict:erase(File, ReadHdls),
-				 gb_trees:delete(Then, ReadHdlsAge) } }
+	    State #dqstate { read_file_handles =
+			     { dict:erase(File, ReadHdls),
+			       gb_trees:delete(Then, ReadHdlsAge) } }
     end.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
-    [{File, ValidData, _ContiguousTop, Left, Right}]
-	= ets:lookup(FileSummary, File),
+    [{File, ValidData, _ContiguousTop, Left, Right}] =
+	ets:lookup(FileSummary, File),
     case ValidData of
 	%% we should NEVER find the current file in here
         %% hence right should always be a file, not undefined
@@ -691,15 +694,15 @@ load_from_disk(State) ->
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
     %% There should be no more tmp files now, so go ahead and load the whole lot
-    (State1 = #dqstate{ msg_location = MsgLocation })
-	= load_messages(undefined, Files, State),
+    (State1 = #dqstate{ msg_location = MsgLocation }) =
+	load_messages(undefined, Files, State),
     %% Finally, check there is nothing in mnesia which we haven't loaded
     {atomic, true} = mnesia:transaction(
 	     fun() ->
 		     ok = mnesia:read_lock_table(rabbit_disk_queue),
 		     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
-					  true = 1
-					      =:= length(dets:lookup(MsgLocation, MsgId))
+					  true = 1 =:=
+					      length(dets:lookup(MsgLocation, MsgId))
 				  end,
 				  true, rabbit_disk_queue)
 	     end),
@@ -738,11 +741,8 @@ load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset = case dets:match_object(MsgLocation, {'_', '_', Left, '_', '_'}) of
 		 [] -> 0;
-		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_]
-			  = lists:sort(fun ({_, _, _, OffA, _},
-					    {_, _, _, OffB, _}) ->
-					       OffB < OffA
-				       end, L),
+		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_] =
+			  sortMsgLocationsByOffset(false, L),
 		      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
 	     end,
     State # dqstate { current_file_num = Num, current_file_name = Left,
@@ -791,8 +791,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFile, Files),
     %% [{MsgId, TotalSize, FileOffset}]
-    {ok, UncorruptedMessagesTmp}
-	= scan_file_for_valid_messages(form_filename(TmpFile)),
+    {ok, UncorruptedMessagesTmp} =
+	scan_file_for_valid_messages(form_filename(TmpFile)),
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     %% all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
@@ -802,8 +802,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
 							   queue_and_seq_id = '_',
 							   is_delivered = '_'}))
 		  end, MsgIdsTmp),
-    {ok, UncorruptedMessages}
-	= scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+    {ok, UncorruptedMessages} =
+	scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
     %% 1) It's possible that everything in the tmp file is also in the main file
     %%    such that the main file is (prefix ++ tmpfile). This means that compaction
@@ -865,8 +865,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(TmpFile),
 
-	    {ok, MainMessages}
-		= scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+	    {ok, MainMessages} =
+		scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
 	    MsgIdsMain = lists:map(GrabMsgId, MainMessages),
 	    %% check that everything in MsgIds is in MsgIdsMain
 	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
@@ -981,8 +981,8 @@ read_next_file_entry(FileHdl, Offset) ->
 			    case file:position(FileHdl,
 					       {cur, TotalSize - MsgIdBinSize}) of
 				{ok, ExpectedAbsPos} ->
-				    NextOffset = Offset + TotalSize
-					+ ?FILE_PACKING_ADJUSTMENT,
+				    NextOffset = Offset + TotalSize +
+					?FILE_PACKING_ADJUSTMENT,
 				    case file:read(FileHdl, 1) of
 					{ok, <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
 					    {ok, {ok, binary_to_term(MsgId),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a04c6f1b..cce9da1a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -685,7 +685,8 @@ delete_log_handlers(Handlers) ->
 test_disk_queue() ->
     % unicode chars are supported properly from r13 onwards
     io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
-    [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize), timer:sleep(1000) end || % 1000 milliseconds
+    [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize),
+	   timer:sleep(1000) end || % 1000 milliseconds
 	MsgSize <- [512, 8192, 32768, 131072],
 	Qs <- [[1], lists:seq(1,10)], %, lists:seq(1,100), lists:seq(1,1000)],
 	MsgCount <- [1024, 4096, 16384]
@@ -700,20 +701,29 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
-    {Publish, ok} = timer:tc(?MODULE, rdq_time_commands,
-			     [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg) || N <- List, _ <- Qs] end,
-			       fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List) || Q <- Qs] end
-			      ]]),
-    {Deliver, ok} = timer:tc(?MODULE, rdq_time_commands,
-			     [[fun() -> [begin SeqIds = [begin {N, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(Q), SeqId end || N <- List],
-					       rabbit_disk_queue:ack(Q, SeqIds),
-					       ok = rabbit_disk_queue:tx_commit(Q, [])
-					 end || Q <- Qs]
-			       end]]),
+    {Publish, ok} =
+	timer:tc(?MODULE, rdq_time_commands,
+		 [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg)
+			     || N <- List, _ <- Qs] end,
+		   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List)
+			     || Q <- Qs] end
+		  ]]),
+    {Deliver, ok} =
+	timer:tc(?MODULE, rdq_time_commands,
+		 [[fun() -> [begin SeqIds =
+				       [begin {N, Msg, MsgSizeBytes, false, SeqId} =
+						  rabbit_disk_queue:deliver(Q), SeqId end
+					|| N <- List],
+				   rabbit_disk_queue:ack(Q, SeqIds),
+				   ok = rabbit_disk_queue:tx_commit(Q, [])
+			     end || Q <- Qs]
+		   end]]),
     io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
 	      [MsgCount, MsgSizeBytes, QCount, float(Startup),
-	       float(Publish), (Publish / (MsgCount * QCount)), (Publish / (MsgCount * QCount * MsgSizeBytes)),
-	       float(Deliver), (Deliver / (MsgCount * QCount)), (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
+	       float(Publish), (Publish / (MsgCount * QCount)),
+	       (Publish / (MsgCount * QCount * MsgSizeBytes)),
+	       float(Deliver), (Deliver / (MsgCount * QCount)),
+	       (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
     rdq_stop().
 
 % we know each file is going to be 1024*1024*10 bytes in size (10MB), so make sure we have
@@ -728,22 +738,30 @@ rdq_stress_gc(MsgCount) ->
     rabbit_disk_queue:tx_commit(q, List),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
-	lists:reverse(lists:foldl(fun (E, Acc) -> case Acc of
-						      [] -> [E];
-						      [F|_Fs] ->
-							  case E rem F of
-							      0 -> Acc;
-							      _ -> [E|Acc]
-							  end
-						  end
-				  end, [], lists:flatten([lists:seq(N,MsgCount,N) || N <- lists:seq(StartChunk,MsgCount)])))
-	++ lists:seq(1, (StartChunk - 1)),
-    MsgIdToSeqDict
-	= lists:foldl(fun (_, Acc) ->
-			      {MsgId, Msg, MsgSizeBytes, false, SeqId} = rabbit_disk_queue:deliver(q),
-			      dict:store(MsgId, SeqId, Acc)
-		      end, dict:new(), List),
-    rabbit_disk_queue:ack(q, [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict), SeqId end || MsgId <- AckList]),
+	lists:reverse(
+	  lists:foldl(
+	    fun (E, Acc) ->
+		    case Acc of
+			[] -> [E];
+			[F|_Fs] ->
+			    case E rem F of
+				0 -> Acc;
+				_ -> [E|Acc]
+			    end
+		    end
+	    end, [], lists:flatten([lists:seq(N,MsgCount,N)
+				    || N <- lists:seq(StartChunk,MsgCount)]))) ++
+	lists:seq(1, (StartChunk - 1)),
+    MsgIdToSeqDict =
+	lists:foldl(
+	  fun (_, Acc) ->
+		  {MsgId, Msg, MsgSizeBytes, false, SeqId} =
+		      rabbit_disk_queue:deliver(q),
+		  dict:store(MsgId, SeqId, Acc)
+	  end, dict:new(), List),
+    rabbit_disk_queue:ack(q, [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict),
+				    SeqId end
+			      || MsgId <- AckList]),
     rabbit_disk_queue:tx_commit(q, []),
     rdq_stop(),
     passed.
@@ -752,7 +770,8 @@ rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
 rdq_virgin() ->
-    {Micros, {ok, _}} = timer:tc(rabbit_disk_queue, start_link, [1024*1024*10, 1000]),
+    {Micros, {ok, _}} =
+	timer:tc(rabbit_disk_queue, start_link, [1024*1024*10, 1000]),
     ok = rabbit_disk_queue:clean_stop(),
     Micros.
 
-- 
cgit v1.2.1


From fd0ef18a82aba492ded613102fc5f60a234a8a75 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 12:51:59 +0100
Subject: neater

---
 src/rabbit_disk_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2952ca89..26f831e3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -501,8 +501,8 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
     end.
 
 sortMsgLocationsByOffset(Asc, List) ->
-    Comp = if Asc -> fun(X, Y) -> X < Y end;
-	      true -> fun(X, Y) -> X > Y end
+    Comp = if Asc  -> fun erlang:'<'/2;
+	      true -> fun erlang:'>'/2
 	   end,
     lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
 		       Comp(OffA, OffB)
-- 
cgit v1.2.1


From f3a24a5ea691e6a060993f0d0eecc151eac3f396 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 13:01:53 +0100
Subject: a bit more refactoring. Also, drop file size and file handle count in
 tests to stress those code paths more.

---
 src/rabbit_disk_queue.erl | 70 +++++++++++++++++++++++------------------------
 src/rabbit_tests.erl      |  4 +--
 2 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 26f831e3..ecb8c91e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -137,7 +137,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
     {ok, Offset} = file:position(FileHdl, {bof, Offset}),
-    {ok, State1 # dqstate { current_file_handle = FileHdl }}.
+    {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, State),
@@ -156,8 +156,8 @@ handle_call(clean_stop, _From, State) ->
     true = ets:delete(Sequences),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok,
-     State1 # dqstate { current_file_handle = undefined,
-			read_file_handles = {dict:new(), gb_trees:empty()}}}.
+     State1 #dqstate { current_file_handle = undefined,
+		       read_file_handles = {dict:new(), gb_trees:empty()}}}.
     %% gen_server now calls terminate, which then calls shutdown
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
@@ -194,8 +194,8 @@ shutdown(State = #dqstate { msg_location = MsgLocation,
     dict:fold(fun (_File, Hdl, _Acc) ->
 		     file:close(Hdl)
 	      end, ok, ReadHdls),
-    State # dqstate { current_file_handle = undefined,
-		      read_file_handles = {dict:new(), gb_trees:empty()}}.
+    State #dqstate { current_file_handle = undefined,
+		     read_file_handles = {dict:new(), gb_trees:empty()}}.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -232,26 +232,24 @@ internal_deliver(Q, State =
 				{ok, Hdl} = file:open(form_filename(File),
 						      [read, raw, binary,
 						       read_ahead]),
-				{ReadHdls2, ReadHdlsAge2} =
-				    case dict:size(ReadHdls) < ReadFileHandlesLimit of
-					true ->
-					    {ReadHdls, ReadHdlsAge};
-					_False ->
-					    {_Then, OldFile, ReadHdlsAge3} =
-						gb_trees:take_smallest(ReadHdlsAge),
-					    {ok, {OldHdl, _Then}} =
-						dict:find(OldFile, ReadHdls),
-					    ok = file:close(OldHdl),
-					    {dict:erase(OldFile, ReadHdls),
-					     ReadHdlsAge3}
-				    end,
-				{Hdl, dict:store(File, {Hdl, Now}, ReadHdls2),
-				 gb_trees:enter(Now, File, ReadHdlsAge2)};
+				case dict:size(ReadHdls) < ReadFileHandlesLimit of
+				    true ->
+					{Hdl, ReadHdls, ReadHdlsAge};
+				    _False ->
+					{Then, OldFile, ReadHdlsAge3} =
+					    gb_trees:take_smallest(ReadHdlsAge),
+					{ok, {OldHdl, Then}} =
+					    dict:find(OldFile, ReadHdls),
+					ok = file:close(OldHdl),
+					{Hdl, dict:erase(OldFile, ReadHdls),
+					 ReadHdlsAge3}
+				end;
 			    {ok, {Hdl, Then}} ->
-				{Hdl, dict:store(File, {Hdl, Now}, ReadHdls),
-				 gb_trees:enter(Now, File,
-						gb_trees:delete(Then, ReadHdlsAge))}
+				{Hdl, ReadHdls,
+				 gb_trees:delete(Then, ReadHdlsAge)}
 			end,
+		    ReadHdls2 = dict:store(File, {FileHdl, Now}, ReadHdls1),
+		    ReadHdlsAge2 = gb_trees:enter(Now, File, ReadHdlsAge1),
 		    %% read the message
 		    {ok, {MsgBody, BodySize}} =
 			read_message_at_offset(FileHdl, Offset, TotalSize),
@@ -261,7 +259,7 @@ internal_deliver(Q, State =
 		    end,
 		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
 		    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
-		     State # dqstate { read_file_handles = {ReadHdls1, ReadHdlsAge1} }}
+		     State #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge2} }}
 	    end
     end.
 
@@ -272,10 +270,10 @@ internal_ack(Q, MsgIds, State) ->
 %% called from tx_cancel with MnesiaDelete = false
 %% called from ack with MnesiaDelete = true
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
-		State = # dqstate { msg_location = MsgLocation,
-				    file_summary = FileSummary,
-				    current_file_name = CurName
-				  }) ->
+		State = #dqstate { msg_location = MsgLocation,
+				   file_summary = FileSummary,
+				   current_file_name = CurName
+				 }) ->
     Files =
 	lists:foldl(
 	  fun ({MsgId, SeqId}, Files2) ->
@@ -334,7 +332,7 @@ internal_tx_publish(MsgId, MsgBody,
 					    ContiguousTop1, Left, undefined}),
 	    NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    maybe_roll_to_new_file(NextOffset,
-				   State # dqstate {current_offset = NextOffset});
+				   State #dqstate {current_offset = NextOffset});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    %% We already know about it, just update counter
 	    ok = dets:insert(MsgLocation, {MsgId, RefCount + 1, File,
@@ -417,11 +415,11 @@ maybe_roll_to_new_file(Offset,
 			      [write, raw, binary, delayed_write]),
     true = ets:update_element(FileSummary, CurName, {5, NextName}), %% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
-    {ok, State # dqstate { current_file_name = NextName,
-			   current_file_handle = NextHdl,
-			   current_file_num = NextNum,
-			   current_offset = 0
-			 }};
+    {ok, State #dqstate { current_file_name = NextName,
+			  current_file_handle = NextHdl,
+			  current_file_num = NextNum,
+			  current_offset = 0
+			}};
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
@@ -745,8 +743,8 @@ load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
 			  sortMsgLocationsByOffset(false, L),
 		      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
 	     end,
-    State # dqstate { current_file_num = Num, current_file_name = Left,
-		      current_offset = Offset };
+    State #dqstate { current_file_num = Num, current_file_name = Left,
+		     current_offset = Offset };
 load_messages(Left, [File|Files],
 	      State = #dqstate { msg_location = MsgLocation,
 				 file_summary = FileSummary
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index cce9da1a..08b05da2 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -771,12 +771,12 @@ rdq_time_commands(Funcs) ->
 
 rdq_virgin() ->
     {Micros, {ok, _}} =
-	timer:tc(rabbit_disk_queue, start_link, [1024*1024*10, 1000]),
+	timer:tc(rabbit_disk_queue, start_link, [1024*1024, 5]),
     ok = rabbit_disk_queue:clean_stop(),
     Micros.
 
 rdq_start() ->
-    {ok, _} = rabbit_disk_queue:start_link(1024*1024*10, 1000).
+    {ok, _} = rabbit_disk_queue:start_link(1024*1024, 5).
 
 rdq_stop() ->
     rabbit_disk_queue:stop().
-- 
cgit v1.2.1


From 14ceb26a380dd13d1c34c7d89844e2a34a725282 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 14:19:52 +0100
Subject: wrote a short essay

---
 src/rabbit_disk_queue.erl | 81 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ecb8c91e..0ae890d9 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -59,7 +59,7 @@
 
 -define(SERVER, ?MODULE).
 
--record(dqstate, {msg_location,            % where are messages?
+-record(dqstate, {msg_location,            %% where are messages?
 		  file_summary,            %% what's in the files?
 		  sequences,               %% next read and write for each q
 		  current_file_num,        %% current file name as number
@@ -71,6 +71,85 @@
 		  read_file_handles_limit  %% how many file handles can we open?
 		 }).
 
+%% The components:
+%%
+%% MsgLocation: this is a dets table which contains:
+%%              {MsgId, RefCount, File, Offset, TotalSize}
+%% FileSummary: this is an ets table which contains:
+%%              {File, ValidTotalSize, ContiguousTop, Left, Right}
+%% Sequences:   this is an ets table which contains:
+%%              {Q, ReadSeqId, WriteSeqId}
+%% rabbit_disk_queue: this is an mnesia table which contains:
+%%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
+%% 			      is_delivered = IsDelivered,
+%% 			      msg_id = MsgId
+%% 			    }
+%%
+
+%% The basic idea is that messages are appended to the current file up
+%% until that file becomes too big (> file_size_limit). At that point,
+%% the file is closed and a new file is created on the _right_ of the
+%% old file which is used for new messages. Files are named
+%% numerically ascending, thus the file with the lowest name is the
+%% eldest file.
+%%
+%% We need to keep track of which messages are in which files (this is
+%% the MsgLocation table); how much useful data is in each file and
+%% which files are on the left and right of each other. This is the
+%% purpose of the FileSummary table.
+%%
+%% As messages are removed from files, holes appear in these
+%% files. The field ValidTotalSize contains the total amount of useful
+%% data left in the file, whilst ContiguousTop contains the amount of
+%% valid data right at the start of each file. These are needed for
+%% garbage collection.
+%%
+%% On publish, we write the message to disk, record the changes to
+%% FileSummary and MsgLocation, and, should this be either a plain
+%% publish, or followed by a tx_commit, we record the message in the
+%% mnesia table. Sequences exists to enforce ordering of messages as
+%% they are published within a queue.
+%%
+%% On delivery, we read the next message to be read from disk
+%% (according to the ReadSeqId for the given queue) and record in the
+%% mnesia table that the message has been delivered.
+%%
+%% On ack we remove the relevant entry from MsgLocation, update
+%% FileSummary and delete from the mnesia table.
+%%
+%% In order to avoid extra mnesia searching, we return the SeqId
+%% during delivery which must be returned in ack - it is not possible
+%% to ack from MsgId alone.
+
+%% As messages are ack'd, holes develop in the files. When we discover
+%% that either a file is now empty or that it can be combined with the
+%% useful data in either its left or right file, we compact the two
+%% files together. This keeps disk utilisation high and aids
+%% performance.
+%%
+%% Given the compaction between two files, the left file is considered
+%% the ultimate destination for the good data in the right file. If
+%% necessary, the good data in the left file which is fragmented
+%% throughout the file is written out to a temporary file, then read
+%% back in to form a contiguous chunk of good data at the start of the
+%% left file. Thus the left file is garbage collected and
+%% compacted. Then the good data from the right file is copied onto
+%% the end of the left file. MsgLocation and FileSummary tables are
+%% updated.
+%%
+%% On startup, we scan the files we discover, dealing with the
+%% possibilites of a crash have occured during a compaction (this
+%% consists of tidyup - the compaction is deliberately designed such
+%% that data is duplicated on disk rather than risking it being lost),
+%% and rebuild the dets and ets tables (MsgLocation, FileSummary,
+%% Sequences) from what we find. We ensure that the messages we have
+%% discovered on disk match exactly with the messages recorded in the
+%% mnesia table.
+
+%% MsgLocation is deliberately a dets table, and the mnesia table is
+%% set to be a disk_only_table in order to ensure that we are not RAM
+%% constrained.
+
 %% ---- PUBLIC API ----
 
 start_link(FileSizeLimit, ReadFileHandlesLimit) ->
-- 
cgit v1.2.1


From 45e33f02d36f3db22c35fff51f656feb22d6cbcc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 14:30:23 +0100
Subject: copy + paste job extending essay

---
 src/rabbit_disk_queue.erl | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0ae890d9..7e2d8d12 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -150,6 +150,60 @@
 %% set to be a disk_only_table in order to ensure that we are not RAM
 %% constrained.
 
+%% So, with this design, messages move to the left. Eventually, they
+%% should end up in a contiguous block on the left and are then never
+%% rewritten. But this isn't quite the case. If in a file there is one
+%% message that is being ignored, for some reason, and messages in the
+%% file to the right and in the current block are being read all the
+%% time then it will repeatedly be the case that the good data from
+%% both files can be combined and will be written out to a new
+%% file. Whenever this happens, our shunned message will be rewritten.
+%%
+%% So, provided that we combine messages in the right order,
+%% (i.e. left file, bottom to top, right file, bottom to top),
+%% eventually our shunned message will end up at the bottom of the
+%% left file. The compaction/combining algorithm is smart enough to
+%% read in good data from the left file that is scattered throughout
+%% (i.e. C and D in the below diagram), then truncate the file to just
+%% above B (i.e. truncate to the limit of the good contiguous region
+%% at the start of the file), then write C and D on top and then write
+%% E, F and G from the right file on top. Thus contiguous blocks of
+%% good data at the bottom of files are not rewritten.
+%%
+%% ---------    ---------         ---------
+%% |   X   |    |   G   |	  |   G   |
+%% ---------    ---------	  ---------
+%% |   D   |    |   X   |	  |   F   |
+%% ---------    ---------	  ---------
+%% |   X   |    |   X   |	  |   E   |
+%% ---------    ---------	  ---------
+%% |   C   |    |   F   |   ===>  |   D   |
+%% ---------    ---------	  ---------
+%% |   X   |    |   X   |	  |   C   |
+%% ---------    ---------	  ---------
+%% |   B   |    |   X   |	  |   B   |
+%% ---------    ---------	  ---------
+%% |   A   |    |   E   |	  |   A   |
+%% ---------    ---------         ---------
+%%   left         right             left
+%%
+%% From this reasoning, we do have a bound on the number of times the
+%% message is rewritten. From when it is inserted, there can be no
+%% files inserted between it and the head of the queue, and the worst
+%% case is that everytime it is rewritten, it moves one position lower
+%% in the file (for it to stay at the same position requires that
+%% there are no holes beneath it, which means truncate would be used
+%% and so it would not be rewritten at all). Thus this seems to
+%% suggest the limit is the number of messages ahead of it in the
+%% queue, though it's likely that that's pessimistic, given the
+%% requirements for compaction/combination of files.
+%%
+%% The other property is that we have is the bound on the lowest
+%% utilisation, which should be 50% - worst case is that all files are
+%% fractionally over half full and can't be combined (equivalent is
+%% alternating full files and files with only one tiny message in
+%% them).
+
 %% ---- PUBLIC API ----
 
 start_link(FileSizeLimit, ReadFileHandlesLimit) ->
-- 
cgit v1.2.1


From 426342334fd08ee5e26f36d850146d1b11245411 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 14:42:27 +0100
Subject: minor improvement to documentation

---
 src/rabbit_disk_queue.erl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 7e2d8d12..f586713d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -168,7 +168,9 @@
 %% above B (i.e. truncate to the limit of the good contiguous region
 %% at the start of the file), then write C and D on top and then write
 %% E, F and G from the right file on top. Thus contiguous blocks of
-%% good data at the bottom of files are not rewritten.
+%% good data at the bottom of files are not rewritten (yes, this is
+%% the data the size of which is tracked by the ContiguousTop
+%% variable. Judicious use of a mirror is required).
 %%
 %% ---------    ---------         ---------
 %% |   X   |    |   G   |	  |   G   |
-- 
cgit v1.2.1


From 9cca381821da45a2129fdb1f52498e53f8c33711 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 14:57:56 +0100
Subject: rename clean_stop remove extra mnesia index - this has significantly
 improved performance!

---
 src/rabbit_disk_queue.erl | 4 ++--
 src/rabbit_mnesia.erl     | 1 -
 src/rabbit_tests.erl      | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f586713d..ba24dd93 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,7 @@
 
 -export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
 
--export([stop/0, clean_stop/0]).
+-export([stop/0, stop_and_obliterate/0]).
 
 -include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
@@ -233,7 +233,7 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 stop() ->
     gen_server:call(?SERVER, stop, infinity).
 
-clean_stop() ->
+stop_and_obliterate() ->
     gen_server:call(?SERVER, clean_stop, infinity).
 
 %% ---- GEN-SERVER INTERNAL API ----
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 39951669..b3c4a926 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -147,7 +147,6 @@ table_definitions() ->
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
        {type, set},
-       {index, [msg_id]},
        {attributes, record_info(fields, dq_msg_loc)},
        {disc_only_copies, [node()]}]}
     ].
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 08b05da2..1e765d29 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -772,7 +772,7 @@ rdq_time_commands(Funcs) ->
 rdq_virgin() ->
     {Micros, {ok, _}} =
 	timer:tc(rabbit_disk_queue, start_link, [1024*1024, 5]),
-    ok = rabbit_disk_queue:clean_stop(),
+    ok = rabbit_disk_queue:stop_and_obliterate(),
     Micros.
 
 rdq_start() ->
-- 
cgit v1.2.1


From 4c1b340294d56afd1f6efae73963fe4026e924e1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 16:12:58 +0100
Subject: added specs. ensured mnesia is local

---
 src/rabbit_disk_queue.erl | 112 +++++++++++++++++++++++++++++++---------------
 src/rabbit_mnesia.erl     |   1 +
 2 files changed, 78 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ba24dd93..7ee02f99 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -206,6 +206,27 @@
 %% alternating full files and files with only one tiny message in
 %% them).
 
+%% ---- SPECS ----
+
+-ifdef(use_specs).
+
+-type(seq_id() :: non_neg_integer()).
+
+-spec(start_link/2 :: (non_neg_integer(), non_neg_integer()) ->
+	      {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
+-spec(deliver/1 :: (queue_name()) ->
+	     {'empty' | {msg_id(), binary(), non_neg_integer(),
+			 bool(), {msg_id(), seq_id()}}}).
+-spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
+-spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
+-spec(tx_commit/2 :: (queue_name(), [msg_id()]) -> 'ok').
+-spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
+-spec(stop/0 :: () -> 'ok').
+-spec(stop_and_obliterate/0 :: () -> 'ok').
+
+-endif.
+
 %% ---- PUBLIC API ----
 
 start_link(FileSizeLimit, ReadFileHandlesLimit) ->
@@ -234,11 +255,13 @@ stop() ->
     gen_server:call(?SERVER, stop, infinity).
 
 stop_and_obliterate() ->
-    gen_server:call(?SERVER, clean_stop, infinity).
+    gen_server:call(?SERVER, stop_vaporise, infinity).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
+    %% gen_server does not trap by default. Without this, terminate/2
+    %% won't be called
     process_flag(trap_exit, true),
     ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
@@ -282,7 +305,7 @@ handle_call({tx_commit, Q, MsgIds}, _From, State) ->
     {reply, ok, State1};
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
-handle_call(clean_stop, _From, State) ->
+handle_call(stop_vaporise, _From, State) ->
     State1 = #dqstate { file_summary = FileSummary,
 		        sequences = Sequences } =
 	shutdown(State), %% tidy up file handles early
@@ -573,8 +596,8 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 				     file_summary = FileSummary,
 				     current_file_name = CurName
 				   }) ->
-    %% the file we're looking at may no longer exist as it may have been deleted
-    %% within the current GC run
+    %% the file we're looking at may no longer exist as it may have
+    %% been deleted within the current GC run
     case ets:lookup(FileSummary, File) of
 	[] -> State;
 	[FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
@@ -696,19 +719,24 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 			  %% CurOffset is in the TmpFile.
 			  %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
 			  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			  %% this message is going to end up back in Destination, at DestinationContiguousTop + CurOffset
+			  %% this message is going to end up back in
+			  %% Destination, at DestinationContiguousTop
+			  %% + CurOffset
 			  FinalOffset = DestinationContiguousTop + CurOffset,
 			  ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
 							 FinalOffset, TotalSize}),
 			  NextOffset = CurOffset + Size,
 			  if BlockStart =:= undefined ->
-				  %% base case, called only for the first list elem
+				  %% base case, called only for the
+				  %% first list elem
 				  {NextOffset, Offset, Offset + Size};
 			     Offset =:= BlockEnd ->
-				  %% extend the current block because the next msg follows straight on
+				  %% extend the current block because
+				  %% the next msg follows straight on
 				  {NextOffset, BlockStart, BlockEnd + Size};
 			     true ->
-				  %% found a gap, so actually do the work for the previous block
+				  %% found a gap, so actually do the
+				  %% work for the previous block
 				  BSize = BlockEnd - BlockStart,
 				  {ok, BlockStart} =
 				      file:position(DestinationHdl,
@@ -722,9 +750,10 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    BSize1 = BlockEnd1 - BlockStart1,
 	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
 	    {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
-	    %% so now Tmp contains everything we need to salvage from Destination,
-	    %% and MsgLocation has been updated to reflect compaction of Destination
-	    %% so truncate Destination and copy from Tmp back to the end
+	    %% so now Tmp contains everything we need to salvage from
+	    %% Destination, and MsgLocation has been updated to
+	    %% reflect compaction of Destination so truncate
+	    %% Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
 	    {ok, DestinationContiguousTop} =
 		file:position(DestinationHdl,
@@ -738,7 +767,8 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 		file:position(DestinationHdl,
 			      {bof, DestinationContiguousTop}),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
-	    %% position in DestinationHdl should now be DestinationValid
+	    %% position in DestinationHdl should now be
+	    %% DestinationValid
 	    ok = file:sync(DestinationHdl),
 	    ok = file:close(TmpHdl),
 	    ok = file:delete(form_filename(Tmp))
@@ -759,13 +789,16 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 						 CurOffset, TotalSize}),
 		  NextOffset = CurOffset + Size,
 		  if BlockStart =:= undefined ->
-			  %% base case, called only for the first list elem
+			  %% base case, called only for the first list
+			  %% elem
 			  {NextOffset, Offset, Offset + Size};
 		     Offset =:= BlockEnd ->
-			  %% extend the current block because the next msg follows straight on
+			  %% extend the current block because the next
+			  %% msg follows straight on
 			  {NextOffset, BlockStart, BlockEnd + Size};
 		     true ->
-			  %% found a gap, so actually do the work for the previous block
+			  %% found a gap, so actually do the work for
+			  %% the previous block
 			  BSize = BlockEnd - BlockStart,
 			  {ok, BlockStart} =
 				file:position(SourceHdl, {bof, BlockStart}),
@@ -801,8 +834,8 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     [{File, ValidData, _ContiguousTop, Left, Right}] =
 	ets:lookup(FileSummary, File),
     case ValidData of
-	%% we should NEVER find the current file in here
-        %% hence right should always be a file, not undefined
+	%% we should NEVER find the current file in here hence right
+        %% should always be a file, not undefined
 	0 -> case {Left, Right} of
 		 {undefined, _} when not(is_atom(Right)) ->
 		     %% the eldest file is empty. YAY!
@@ -823,13 +856,16 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
 %% ---- DISK RECOVERY ----
 
 load_from_disk(State) ->
-    %% sorted so that smallest number is first. which also means eldest file (left-most) first
+    %% sorted so that smallest number is first. which also means
+    %% eldest file (left-most) first
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
-    %% There should be no more tmp files now, so go ahead and load the whole lot
+    %% There should be no more tmp files now, so go ahead and load the
+    %% whole lot
     (State1 = #dqstate{ msg_location = MsgLocation }) =
 	load_messages(undefined, Files, State),
-    %% Finally, check there is nothing in mnesia which we haven't loaded
+    %% Finally, check there is nothing in mnesia which we haven't
+    %% loaded
     {atomic, true} = mnesia:transaction(
 	     fun() ->
 		     ok = mnesia:read_lock_table(rabbit_disk_queue),
@@ -843,7 +879,8 @@ load_from_disk(State) ->
     {ok, State2}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
-    %% next-seqid-to-read is the lowest seqid which has is_delivered = false
+    %% next-seqid-to-read is the lowest seqid which has is_delivered =
+    %% false
     {atomic, true} = mnesia:transaction(
       fun() ->
 	      ok = mnesia:read_lock_table(rabbit_disk_queue),
@@ -902,8 +939,8 @@ load_messages(Left, [File|Files],
 			}
 		end
 	end, {[], 0}, Messages),
-    %% foldl reverses lists and find_contiguous_block_prefix needs elems in the same order
-    %% as from scan_file_for_valid_messages
+    %% foldl reverses lists and find_contiguous_block_prefix needs
+    %% elems in the same order as from scan_file_for_valid_messages
     {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
     Right = case Files of
 		[] -> undefined;
@@ -927,7 +964,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
     {ok, UncorruptedMessagesTmp} =
 	scan_file_for_valid_messages(form_filename(TmpFile)),
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
-    %% all of these messages should appear in the mnesia table, otherwise they wouldn't have been copied out
+    %% all of these messages should appear in the mnesia table,
+    %% otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
 			  true = 0 < length(mnesia:dirty_match_object
 					    (rabbit_disk_queue,
@@ -959,7 +997,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %%    them over again
     case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
 	true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
-	        %% note this also catches the case when the tmp file is empty
+	        %% note this also catches the case when the tmp file
+	        %% is empty
 	    ok = file:delete(TmpFile);
 	_False ->
 	    %% we're in case 4 above.
@@ -973,7 +1012,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
 			  end, MsgIds),
 	    %% The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
-	    %% we should have that none of the messages in the prefix are in the tmp file
+	    %% we should have that none of the messages in the prefix
+	    %% are in the tmp file
 	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end,
 			     MsgIds),
 
@@ -981,16 +1021,18 @@ recover_crashed_compactions1(Files, TmpFile) ->
 				      [write, raw, binary, delayed_write]),
 	    {ok, Top} = file:position(MainHdl, Top),
 	    ok = file:truncate(MainHdl), %% wipe out any rubbish at the end of the file
-	    %% there really could be rubbish at the end of the file - we could have failed after the
-	    %% extending truncate.
-	    %% Remember the head of the list will be the highest entry in the file
+	    %% there really could be rubbish at the end of the file -
+	    %% we could have failed after the extending truncate.
+	    %% Remember the head of the list will be the highest entry
+	    %% in the file
 	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
 	    TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
 	    ExpectedAbsPos = Top + TmpSize,
 	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
-	    ok = file:truncate(MainHdl), %% and now extend the main file as big as necessary in a single move
-					 %% if we run out of disk space, this truncate could fail, but we still
-					 %% aren't risking losing data
+	    %% and now extend the main file as big as necessary in a
+	    %% single move if we run out of disk space, this truncate
+	    %% could fail, but we still aren't risking losing data
+	    ok = file:truncate(MainHdl),
 	    {ok, TmpHdl} = file:open(form_filename(TmpFile),
 				     [read, raw, binary, read_ahead]),
 	    {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
@@ -1010,9 +1052,9 @@ recover_crashed_compactions1(Files, TmpFile) ->
     end,
     ok.
 
-%% this assumes that the messages are ordered such that the highest address is at
-%% the head of the list.
-%% this matches what scan_file_for_valid_messages produces
+%% this assumes that the messages are ordered such that the highest
+%% address is at the head of the list. This matches what
+%% scan_file_for_valid_messages produces
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index b3c4a926..d2b2b15c 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -147,6 +147,7 @@ table_definitions() ->
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
        {type, set},
+       {local_content, true},
        {attributes, record_info(fields, dq_msg_loc)},
        {disc_only_copies, [node()]}]}
     ].
-- 
cgit v1.2.1


From f8d0f6933ea4a5b83df7eb1060f8e3503000e8a1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 18:01:48 +0100
Subject: some refactorings. Not all done yet.

---
 src/rabbit_disk_queue.erl | 101 +++++++++++++++++++++++-----------------------
 1 file changed, 51 insertions(+), 50 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 7ee02f99..b32fffe4 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -260,8 +260,14 @@ stop_and_obliterate() ->
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
-    %% gen_server does not trap by default. Without this, terminate/2
-    %% won't be called
+    %% If the gen_server is part of a supervision tree and is ordered
+    %% by its supervisor to terminate, terminate will be called with
+    %% Reason=shutdown if the following conditions apply:
+    %%     * the gen_server has been set to trap exit signals, and
+    %%     * the shutdown strategy as defined in the supervisor's
+    %%       child specification is an integer timeout value, not
+    %%       brutal_kill.
+    %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
     ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
@@ -370,9 +376,7 @@ base_directory() ->
 
 internal_deliver(Q, State =
 		 #dqstate { msg_location = MsgLocation,
-			    sequences = Sequences,
-			    read_file_handles_limit = ReadFileHandlesLimit,
-			    read_file_handles = {ReadHdls, ReadHdlsAge}
+			    sequences = Sequences
 			   }) ->
     case ets:lookup(Sequences, Q) of
 	[] -> {ok, empty, State};
@@ -383,31 +387,7 @@ internal_deliver(Q, State =
 		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
 		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
 			dets:lookup(MsgLocation, MsgId),
-		    Now = now(),
-		    {FileHdl, ReadHdls1, ReadHdlsAge1} =
-			case dict:find(File, ReadHdls) of
-			    error ->
-				{ok, Hdl} = file:open(form_filename(File),
-						      [read, raw, binary,
-						       read_ahead]),
-				case dict:size(ReadHdls) < ReadFileHandlesLimit of
-				    true ->
-					{Hdl, ReadHdls, ReadHdlsAge};
-				    _False ->
-					{Then, OldFile, ReadHdlsAge3} =
-					    gb_trees:take_smallest(ReadHdlsAge),
-					{ok, {OldHdl, Then}} =
-					    dict:find(OldFile, ReadHdls),
-					ok = file:close(OldHdl),
-					{Hdl, dict:erase(OldFile, ReadHdls),
-					 ReadHdlsAge3}
-				end;
-			    {ok, {Hdl, Then}} ->
-				{Hdl, ReadHdls,
-				 gb_trees:delete(Then, ReadHdlsAge)}
-			end,
-		    ReadHdls2 = dict:store(File, {FileHdl, Now}, ReadHdls1),
-		    ReadHdlsAge2 = gb_trees:enter(Now, File, ReadHdlsAge1),
+		    {FileHdl, State1} = getReadHandle(File, State),
 		    %% read the message
 		    {ok, {MsgBody, BodySize}} =
 			read_message_at_offset(FileHdl, Offset, TotalSize),
@@ -417,10 +397,38 @@ internal_deliver(Q, State =
 		    end,
 		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
 		    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
-		     State #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge2} }}
+		     State1}
 	    end
     end.
 
+getReadHandle(File, State =
+	      #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
+			 read_file_handles_limit = ReadFileHandlesLimit }) ->
+    Now = now(),
+    {FileHdl, ReadHdls1, ReadHdlsAge1} =
+	case dict:find(File, ReadHdls) of
+	    error ->
+		{ok, Hdl} = file:open(form_filename(File),
+				      [read, raw, binary,
+				       read_ahead]),
+		case dict:size(ReadHdls) < ReadFileHandlesLimit of
+		    true ->
+			{Hdl, ReadHdls, ReadHdlsAge};
+		    _False ->
+			{Then, OldFile, ReadHdlsAge2} =
+			    gb_trees:take_smallest(ReadHdlsAge),
+			{ok, {OldHdl, Then}} =
+			    dict:find(OldFile, ReadHdls),
+			ok = file:close(OldHdl),
+			{Hdl, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
+		end;
+	    {ok, {Hdl, Then}} ->
+		{Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
+	end,
+    ReadHdls3 = dict:store(File, {FileHdl, Now}, ReadHdls1),
+    ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
+    {FileHdl, State #dqstate {read_file_handles = {ReadHdls3, ReadHdlsAge3}}}.
+
 internal_ack(Q, MsgIds, State) ->
     remove_messages(Q, MsgIds, true, State).
 
@@ -664,6 +672,14 @@ sortMsgLocationsByOffset(Asc, List) ->
 		       Comp(OffA, OffB)
 	       end, List).
 
+truncateAndExtendFile(FileHdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
+    ok = file:truncate(FileHdl),
+    {ok, Highpoint} = file:position(FileHdl, {bof, Highpoint}),
+    ok = file:truncate(FileHdl),
+    {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
+    ok.
+
 combineFiles({Source, SourceValid, _SourceContiguousTop,
 	      _SourceLeft, _SourceRight},
 	     {Destination, DestinationValid, DestinationContiguousTop,
@@ -683,14 +699,8 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
     %%   then truncate, copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
     if DestinationContiguousTop =:= DestinationValid ->
-	    {ok, DestinationValid} = file:position(DestinationHdl,
-						   {bof, DestinationValid}),
-	    ok = file:truncate(DestinationHdl),
-	    {ok, ExpectedSize}     = file:position(DestinationHdl,
-						   {cur, SourceValid}),
-	    ok = file:truncate(DestinationHdl),
-	    {ok, DestinationValid} = file:position(DestinationHdl,
-						   {bof, DestinationValid});
+	    ok = truncateAndExtendFile(DestinationHdl,
+				       DestinationValid, ExpectedSize);
        true ->
 	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
 	    {ok, TmpHdl} =
@@ -755,17 +765,8 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    %% reflect compaction of Destination so truncate
 	    %% Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
-	    {ok, DestinationContiguousTop} =
-		file:position(DestinationHdl,
-			      {bof, DestinationContiguousTop}),
-	    ok = file:truncate(DestinationHdl),
-	    {ok, ExpectedSize} =
-		file:position(DestinationHdl,
-			      {bof, ExpectedSize}),
-	    ok = file:truncate(DestinationHdl),
-	    {ok, DestinationContiguousTop} =
-		file:position(DestinationHdl,
-			      {bof, DestinationContiguousTop}),
+	    ok = truncateAndExtendFile(DestinationHdl,
+				       DestinationContiguousTop, ExpectedSize),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
 	    %% position in DestinationHdl should now be
 	    %% DestinationValid
-- 
cgit v1.2.1


From ee67a447b346908983eb6fb610bd9c5a981dcc10 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 21:39:57 +0100
Subject: added a test

---
 Makefile             |  2 +-
 src/rabbit_tests.erl | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b7464244..d55ab376 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_MNESIA_DIR=~/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
 RABBITMQ_LOG_BASE=/tmp
 
 SOURCE_DIR=src
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1e765d29..4b72a742 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -766,6 +766,23 @@ rdq_stress_gc(MsgCount) ->
     rdq_stop(),
     passed.
 
+rdq_time_insane_startup() ->
+    rdq_virgin(),
+    OneGig = 1024*1024*1024,
+    rabbit_disk_queue:start_link(OneGig, 5),
+    Msg = <<>>,
+    List = lists:seq(1, 1024*1024),
+    %% 1M empty messages, at say, 100B per message, should all fit
+    %% within 1GB and thus in a single file
+    io:format("Publishing 1M empty messages...~n",[]),
+    [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
+    rabbit_disk_queue:tx_commit(q, List),
+    io:format("...done. Timing restart...~n", []),
+    rdq_stop(),
+    Micros = rdq_virgin(),
+    io:format("...startup took ~w microseconds.~n", [Micros]),
+    rdq_stop().
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From 453e09ed6434ef2e0c2c61b69079bbfd90affdf0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 22 Apr 2009 23:53:26 +0100
Subject: just taking out the param of file handles

---
 src/rabbit_disk_queue.erl | 10 ++++++----
 src/rabbit_tests.erl      |  6 +++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b32fffe4..55840ce9 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server).
 
--export([start_link/2]).
+-export([start_link/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -59,6 +59,8 @@
 
 -define(SERVER, ?MODULE).
 
+-define(MAX_READ_FILE_HANDLES, 256).
+
 -record(dqstate, {msg_location,            %% where are messages?
 		  file_summary,            %% what's in the files?
 		  sequences,               %% next read and write for each q
@@ -212,7 +214,7 @@
 
 -type(seq_id() :: non_neg_integer()).
 
--spec(start_link/2 :: (non_neg_integer(), non_neg_integer()) ->
+-spec(start_link/1 :: (non_neg_integer()) ->
 	      {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
@@ -229,9 +231,9 @@
 
 %% ---- PUBLIC API ----
 
-start_link(FileSizeLimit, ReadFileHandlesLimit) ->
+start_link(FileSizeLimit) ->
     gen_server:start_link({local, ?SERVER}, ?MODULE,
-			  [FileSizeLimit, ReadFileHandlesLimit], []).
+			  [FileSizeLimit, ?MAX_READ_FILE_HANDLES], []).
 
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4b72a742..2f1d0c43 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -769,7 +769,7 @@ rdq_stress_gc(MsgCount) ->
 rdq_time_insane_startup() ->
     rdq_virgin(),
     OneGig = 1024*1024*1024,
-    rabbit_disk_queue:start_link(OneGig, 5),
+    rabbit_disk_queue:start_link(OneGig),
     Msg = <<>>,
     List = lists:seq(1, 1024*1024),
     %% 1M empty messages, at say, 100B per message, should all fit
@@ -788,12 +788,12 @@ rdq_time_commands(Funcs) ->
 
 rdq_virgin() ->
     {Micros, {ok, _}} =
-	timer:tc(rabbit_disk_queue, start_link, [1024*1024, 5]),
+	timer:tc(rabbit_disk_queue, start_link, [1024*1024]),
     ok = rabbit_disk_queue:stop_and_obliterate(),
     Micros.
 
 rdq_start() ->
-    {ok, _} = rabbit_disk_queue:start_link(1024*1024, 5).
+    {ok, _} = rabbit_disk_queue:start_link(1024*1024).
 
 rdq_stop() ->
     rabbit_disk_queue:stop().
-- 
cgit v1.2.1


From 78fab6c4843fe4c0d26cfc75825e5535f0dbac25 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 12 May 2009 13:39:06 +0100
Subject: Added to the disk queue the ability to dynamically switch between
 disk-only and disk+ram modes. The disk+ram mode uses disk_copies for mnesia
 and ets for msg_location. This results in a substantial performance
 improvement (minimum 5 times faster), but is ram limited by number of
 messages. The disk-only mode uses dets and disk_only_copies for mnesia. This
 is much slower, but should not be limited.

---
 src/rabbit_disk_queue.erl | 181 +++++++++++++++++++++++++++++++---------------
 src/rabbit_tests.erl      |   4 +-
 2 files changed, 127 insertions(+), 58 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 55840ce9..16208fd0 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,7 @@
 
 -export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
 
--export([stop/0, stop_and_obliterate/0]).
+-export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
 -include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
@@ -49,7 +49,7 @@
 -define(WRITE_OK,                 255).
 -define(INTEGER_SIZE_BYTES,       8).
 -define(INTEGER_SIZE_BITS,        (8 * ?INTEGER_SIZE_BYTES)).
--define(MSG_LOC_DETS_NAME,        rabbit_disk_queue_msg_location).
+-define(MSG_LOC_NAME,             rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME,    rabbit_disk_queue_file_summary).
 -define(SEQUENCE_ETS_NAME,        rabbit_disk_queue_sequences).
 -define(FILE_EXTENSION,           ".rdq").
@@ -61,7 +61,9 @@
 
 -define(MAX_READ_FILE_HANDLES, 256).
 
--record(dqstate, {msg_location,            %% where are messages?
+-record(dqstate, {msg_location_dets,       %% where are messages?
+		  msg_location_ets,        %% as above, but for ets version
+                  operation_mode,          %% ram_disk | disk_only
 		  file_summary,            %% what's in the files?
 		  sequences,               %% next read and write for each q
 		  current_file_num,        %% current file name as number
@@ -259,6 +261,12 @@ stop() ->
 stop_and_obliterate() ->
     gen_server:call(?SERVER, stop_vaporise, infinity).
 
+to_disk_only_mode() ->
+    gen_server:call(?SERVER, to_disk_only_mode, infinity).
+
+to_ram_disk_mode() ->
+    gen_server:call(?SERVER, to_ram_disk_mode, infinity).
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -271,19 +279,30 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
+    Node = node(),
+    ok = 
+	case mnesia:change_table_copy_type(rabbit_disk_queue, Node, disc_only_copies) of
+	    {atomic, ok} -> ok;
+	    {aborted, {already_exists, rabbit_disk_queue, Node, disc_only_copies}} -> ok;
+	    E -> E
+	end,
     ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
-    {ok, MsgLocation} =
-	dets:open_file(?MSG_LOC_DETS_NAME,
-		       [{file, form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++
+    {ok, MsgLocationDets} =
+	dets:open_file(?MSG_LOC_NAME,
+		       [{file, form_filename(atom_to_list(?MSG_LOC_NAME) ++
 					     ?FILE_EXTENSION_DETS)},
 			{min_no_slots, 1024*1024},
 			%% man says this should be <= 32M. But it works...
 			{max_no_slots, 1024*1024*1024},
 			{type, set}
 		       ]),
+    MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
+    true = ets:safe_fixtable(MsgLocationEts, true),
     State =
-	#dqstate { msg_location            = MsgLocation,
+	#dqstate { msg_location_dets       = MsgLocationDets,
+		   msg_location_ets        = MsgLocationEts,
+		   operation_mode          = disk_only,
 		   file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
 						     [set, private]),
 		   sequences               = ets:new(?SEQUENCE_ETS_NAME,
@@ -323,8 +342,26 @@ handle_call(stop_vaporise, _From, State) ->
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok,
      State1 #dqstate { current_file_handle = undefined,
-		       read_file_handles = {dict:new(), gb_trees:empty()}}}.
+		       read_file_handles = {dict:new(), gb_trees:empty()}}};
     %% gen_server now calls terminate, which then calls shutdown
+handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = disk_only }) ->
+    {reply, ok, State};
+handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = ram_disk,
+							 msg_location_dets = MsgLocationDets,
+							 msg_location_ets = MsgLocationEts }) ->
+    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_only_copies),
+    ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
+    true = ets:delete_all_objects(MsgLocationEts),
+    {reply, ok, State #dqstate { operation_mode = disk_only }};
+handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = ram_disk }) ->
+    {reply, ok, State};
+handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = disk_only,
+							msg_location_dets = MsgLocationDets,
+							msg_location_ets = MsgLocationEts }) ->
+    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_copies),
+    true = ets:from_dets(MsgLocationEts, MsgLocationDets),
+    ok = dets:delete_all_objects(MsgLocationDets),
+    {reply, ok, State #dqstate { operation_mode = ram_disk }}.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
@@ -345,14 +382,16 @@ handle_info(_Info, State) ->
 terminate(_Reason, State) ->
     shutdown(State).
 
-shutdown(State = #dqstate { msg_location = MsgLocation,
+shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
+			    msg_location_ets = MsgLocationEts,
 			    current_file_handle = FileHdl,
 			    read_file_handles = {ReadHdls, _ReadHdlsAge}
 			  }) ->
     %% deliberately ignoring return codes here
-    dets:close(MsgLocation),
-    file:delete(form_filename(atom_to_list(?MSG_LOC_DETS_NAME) ++
+    dets:close(MsgLocationDets),
+    file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
 			      ?FILE_EXTENSION_DETS)),
+    true = ets:delete_all_objects(MsgLocationEts),
     if FileHdl =:= undefined -> ok;
        true -> file:sync(FileHdl),
 	       file:close(FileHdl)
@@ -374,12 +413,46 @@ form_filename(Name) ->
 base_directory() ->
     filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
 
+dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+		Key) ->
+    dets:lookup(MsgLocationDets, Key);
+dets_ets_lookup(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+	        Key) ->
+    ets:lookup(MsgLocationEts, Key).
+
+dets_ets_delete(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+		Key) ->
+    ok = dets:delete(MsgLocationDets, Key);
+dets_ets_delete(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+	        Key) ->
+    true = ets:delete(MsgLocationEts, Key),
+    ok.
+
+dets_ets_insert(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+		Obj) ->
+    ok = dets:insert(MsgLocationDets, Obj);
+dets_ets_insert(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+		Obj) ->
+    true = ets:insert(MsgLocationEts, Obj),
+    ok.
+
+dets_ets_insert_new(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+		    Obj) ->
+    true = dets:insert_new(MsgLocationDets, Obj);
+dets_ets_insert_new(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+		    Obj) ->
+    true = ets:insert_new(MsgLocationEts, Obj).
+
+dets_ets_match_object(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+		      Obj) ->
+    dets:match_object(MsgLocationDets, Obj);
+dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+		      Obj) ->
+    ets:match_object(MsgLocationEts, Obj).
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, State =
-		 #dqstate { msg_location = MsgLocation,
-			    sequences = Sequences
-			   }) ->
+internal_deliver(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
 	[] -> {ok, empty, State};
 	[{Q, ReadSeqId, WriteSeqId}] ->
@@ -388,7 +461,7 @@ internal_deliver(Q, State =
 		[Obj =
 		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
 		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
-			dets:lookup(MsgLocation, MsgId),
+			dets_ets_lookup(State, MsgId),
 		    {FileHdl, State1} = getReadHandle(File, State),
 		    %% read the message
 		    {ok, {MsgBody, BodySize}} =
@@ -438,18 +511,17 @@ internal_ack(Q, MsgIds, State) ->
 %% called from tx_cancel with MnesiaDelete = false
 %% called from ack with MnesiaDelete = true
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
-		State = #dqstate { msg_location = MsgLocation,
-				   file_summary = FileSummary,
+		State = #dqstate { file_summary = FileSummary,
 				   current_file_name = CurName
 				 }) ->
     Files =
 	lists:foldl(
 	  fun ({MsgId, SeqId}, Files2) ->
 		  [{MsgId, RefCount, File, Offset, TotalSize}] =
-		      dets:lookup(MsgLocation, MsgId),
+		      dets_ets_lookup(State, MsgId),
 		  Files3 =
 		      if 1 =:= RefCount ->
-			      ok = dets:delete(MsgLocation, MsgId),
+			      ok = dets_ets_delete(State, MsgId),
 			      [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
 				  ets:lookup(FileSummary, File),
 			      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
@@ -461,8 +533,8 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
 				 true -> sets:add_element(File, Files2)
 			      end;
 			 1 < RefCount ->
-			      ok = dets:insert(MsgLocation, {MsgId, RefCount - 1,
-							     File, Offset, TotalSize}),
+			      ok = dets_ets_insert(State, {MsgId, RefCount - 1,
+							   File, Offset, TotalSize}),
 			      Files2
 		      end,
 		  if MnesiaDelete ->
@@ -475,18 +547,17 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
     {ok, State2}.
 
 internal_tx_publish(MsgId, MsgBody,
-		    State = #dqstate { msg_location = MsgLocation,
-				       current_file_handle = CurHdl,
+		    State = #dqstate { current_file_handle = CurHdl,
 				       current_file_name = CurName,
 				       current_offset = CurOffset,
 				       file_summary = FileSummary
 				      }) ->
-    case dets:lookup(MsgLocation, MsgId) of
+    case dets_ets_lookup(State, MsgId) of
 	[] ->
 	    %% New message, lots to do
 	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
-	    true = dets:insert_new(MsgLocation, {MsgId, 1, CurName,
-						 CurOffset, TotalSize}),
+	    true = dets_ets_insert_new(State, {MsgId, 1, CurName,
+					       CurOffset, TotalSize}),
 	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
 		ets:lookup(FileSummary, CurName),
 	    ValidTotalSize1 = ValidTotalSize + TotalSize +
@@ -503,14 +574,13 @@ internal_tx_publish(MsgId, MsgBody,
 				   State #dqstate {current_offset = NextOffset});
 	[{MsgId, RefCount, File, Offset, TotalSize}] ->
 	    %% We already know about it, just update counter
-	    ok = dets:insert(MsgLocation, {MsgId, RefCount + 1, File,
-					   Offset, TotalSize}),
+	    ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
+					 Offset, TotalSize}),
 	    {ok, State}
     end.
 
 internal_tx_commit(Q, MsgIds,
-		   State = #dqstate { msg_location = MsgLocation,
-				      current_file_handle = CurHdl,
+		   State = #dqstate { current_file_handle = CurHdl,
 				      current_file_name = CurName,
 				      sequences = Sequences
 				     }) ->
@@ -525,7 +595,7 @@ internal_tx_commit(Q, MsgIds,
 		   lists:foldl(
 		     fun (MsgId, {Acc, NextWriteSeqId}) ->
 			     [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
-				 dets:lookup(MsgLocation, MsgId),
+				 dets_ets_lookup(State, MsgId),
 			     ok = mnesia:write(rabbit_disk_queue,
 					       #dq_msg_loc { queue_and_seq_id =
 							     {Q, NextWriteSeqId},
@@ -687,8 +757,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	     {Destination, DestinationValid, DestinationContiguousTop,
 	      _DestinationLeft, _DestinationRight},
 	     State1) ->
-    (State = #dqstate { msg_location = MsgLocation }) =
-	closeFile(Source, closeFile(Destination, State1)),
+    State = closeFile(Source, closeFile(Destination, State1)),
     {ok, SourceHdl} =
 	file:open(form_filename(Source),
 		  [read, write, raw, binary, delayed_write, read_ahead]),
@@ -719,10 +788,10 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 			  %% Given expected access patterns, I suspect that the list should be
 			  %% naturally sorted as we require, however, we need to enforce it anyway
 		  end, sortMsgLocationsByOffset(true,
-						dets:match_object(MsgLocation,
-								  {'_', '_',
-								   Destination,
-								   '_', '_'}))),
+						dets_ets_match_object(State,
+								      {'_', '_',
+								       Destination,
+								       '_', '_'}))),
 	    TmpSize = DestinationValid - DestinationContiguousTop,
 	    {TmpSize, BlockStart1, BlockEnd1} =
 		lists:foldl(
@@ -735,8 +804,8 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 			  %% Destination, at DestinationContiguousTop
 			  %% + CurOffset
 			  FinalOffset = DestinationContiguousTop + CurOffset,
-			  ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
-							 FinalOffset, TotalSize}),
+			  ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
+						       FinalOffset, TotalSize}),
 			  NextOffset = CurOffset + Size,
 			  if BlockStart =:= undefined ->
 				  %% base case, called only for the
@@ -763,7 +832,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
 	    {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
 	    %% so now Tmp contains everything we need to salvage from
-	    %% Destination, and MsgLocation has been updated to
+	    %% Destination, and MsgLocationDets has been updated to
 	    %% reflect compaction of Destination so truncate
 	    %% Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
@@ -777,9 +846,10 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    ok = file:delete(form_filename(Tmp))
     end,
     SourceWorkList =
-	sortMsgLocationsByOffset(true, dets:match_object(MsgLocation,
-							 {'_', '_', Source,
-							  '_', '_'})),
+	sortMsgLocationsByOffset(true,
+				 dets_ets_match_object(State,
+						       {'_', '_', Source,
+							'_', '_'})),
     {ExpectedSize, BlockStart2, BlockEnd2} =
 	lists:foldl(
 	  fun ({MsgId, RefCount, _Source, Offset, TotalSize},
@@ -787,9 +857,9 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 		  %% CurOffset is in the DestinationFile.
 		  %% Offset, BlockStart and BlockEnd are in the SourceFile
 		  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-		  %% update MsgLocation to reflect change of file and offset
-		  ok = dets:insert(MsgLocation, {MsgId, RefCount, Destination,
-						 CurOffset, TotalSize}),
+		  %% update MsgLocationDets to reflect change of file and offset
+		  ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
+					       CurOffset, TotalSize}),
 		  NextOffset = CurOffset + Size,
 		  if BlockStart =:= undefined ->
 			  %% base case, called only for the first list
@@ -865,8 +935,7 @@ load_from_disk(State) ->
     ok = recover_crashed_compactions(Files, TmpFiles),
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
-    (State1 = #dqstate{ msg_location = MsgLocation }) =
-	load_messages(undefined, Files, State),
+    State1 = load_messages(undefined, Files, State),
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
     {atomic, true} = mnesia:transaction(
@@ -874,7 +943,7 @@ load_from_disk(State) ->
 		     ok = mnesia:read_lock_table(rabbit_disk_queue),
 		     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
 					  true = 1 =:=
-					      length(dets:lookup(MsgLocation, MsgId))
+					      length(dets_ets_lookup(State1, MsgId))
 				  end,
 				  true, rabbit_disk_queue)
 	     end),
@@ -910,9 +979,9 @@ load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
 						current_file_name = CurName }) ->
     true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
     State;
-load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
+load_messages(Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
-    Offset = case dets:match_object(MsgLocation, {'_', '_', Left, '_', '_'}) of
+    Offset = case dets_ets_match_object(State, {'_', '_', Left, '_', '_'}) of
 		 [] -> 0;
 		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_] =
 			  sortMsgLocationsByOffset(false, L),
@@ -921,9 +990,7 @@ load_messages(Left, [], State = #dqstate { msg_location = MsgLocation }) ->
     State #dqstate { current_file_num = Num, current_file_name = Left,
 		     current_offset = Offset };
 load_messages(Left, [File|Files],
-	      State = #dqstate { msg_location = MsgLocation,
-				 file_summary = FileSummary
-			       }) ->
+	      State = #dqstate { file_summary = FileSummary }) ->
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
@@ -935,8 +1002,8 @@ load_messages(Left, [File|Files],
 					   is_delivered = '_'})) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
-			true = dets:insert_new(MsgLocation, {MsgId, RefCount, File,
-							     Offset, TotalSize}),
+			true = dets_ets_insert_new(State, {MsgId, RefCount, File,
+							   Offset, TotalSize}),
 			{[{MsgId, TotalSize, Offset}|VMAcc],
 			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
 			}
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 2f1d0c43..9ce62f86 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -770,6 +770,7 @@ rdq_time_insane_startup() ->
     rdq_virgin(),
     OneGig = 1024*1024*1024,
     rabbit_disk_queue:start_link(OneGig),
+    rabbit_disk_queue:to_ram_disk_mode(),
     Msg = <<>>,
     List = lists:seq(1, 1024*1024),
     %% 1M empty messages, at say, 100B per message, should all fit
@@ -796,4 +797,5 @@ rdq_start() ->
     {ok, _} = rabbit_disk_queue:start_link(1024*1024).
 
 rdq_stop() ->
-    rabbit_disk_queue:stop().
+    rabbit_disk_queue:stop(),
+    timer:sleep(1000).
-- 
cgit v1.2.1


From 9a632e1ddf8ca23e4da005236b0d8235fe407811 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 12 May 2009 13:40:10 +0100
Subject: sigh. I while ago I managed to commit in change to the Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d55ab376..b7464244 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=~/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
 RABBITMQ_LOG_BASE=/tmp
 
 SOURCE_DIR=src
-- 
cgit v1.2.1


From a17d9599a64b326e24146f1e90261589fde3dade Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 May 2009 11:43:02 +0100
Subject: Startup requires a match in mnesia per message we encounter on disk.
 This lead to a 22minute startup time for 100000 messages. However, by
 dynamically adding an index during startup to mnesia, and then later removing
 it, this is reduced to 13.5 seconds. Note however, that to test this with
 rabbit_tests:rdq_time_insane_startup() requires the disk queue to be edited
 so that it starts up in ram_disk mode, not disk_only mode as is the code
 default.

---
 src/rabbit_disk_queue.erl | 29 +++++++++++++++++++++++------
 src/rabbit_tests.erl      |  9 +++++----
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 16208fd0..803f358b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -228,6 +228,8 @@
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
+-spec(to_ram_disk_mode/0 :: () -> 'ok').
+-spec(to_disk_only_mode/0 :: () -> 'ok').
 
 -endif.
 
@@ -931,6 +933,11 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
 load_from_disk(State) ->
     %% sorted so that smallest number is first. which also means
     %% eldest file (left-most) first
+    ok = case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
+	     {atomic, ok} -> ok;
+	     {aborted,{already_exists,rabbit_disk_queue,_}} -> ok;
+	     E -> E
+	 end,
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
     %% There should be no more tmp files now, so go ahead and load the
@@ -948,6 +955,13 @@ load_from_disk(State) ->
 				  true, rabbit_disk_queue)
 	     end),
     State2 = extract_sequence_numbers(State1),
+    ok = case mnesia:del_table_index(rabbit_disk_queue, msg_id) of
+	     {atomic, ok} -> ok;
+	     %% hmm, something weird must be going on, but it's
+	     %% probably not the end of the world
+	     {aborted,{no_exists,rabbit_disk_queue,_}} -> ok;
+	     E2 -> E2
+	 end,
     {ok, State2}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
@@ -995,11 +1009,12 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
 	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:dirty_match_object
+		case length(mnesia:dirty_index_match_object
 			    (rabbit_disk_queue,
 			     #dq_msg_loc { msg_id = MsgId,
 					   queue_and_seq_id = '_',
-					   is_delivered = '_'})) of
+					   is_delivered = '_'},
+			     msg_id)) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
 			true = dets_ets_insert_new(State, {MsgId, RefCount, File,
@@ -1037,11 +1052,12 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %% all of these messages should appear in the mnesia table,
     %% otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-			  true = 0 < length(mnesia:dirty_match_object
+			  true = 0 < length(mnesia:dirty_index_match_object
 					    (rabbit_disk_queue,
 					     #dq_msg_loc { msg_id = MsgId,
 							   queue_and_seq_id = '_',
-							   is_delivered = '_'}))
+							   is_delivered = '_'},
+					     msg_id))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} =
 	scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
@@ -1074,11 +1090,12 @@ recover_crashed_compactions1(Files, TmpFile) ->
 	    %% we're in case 4 above.
 	    %% check that everything in the main file is a valid message in mnesia
 	    lists:foreach(fun (MsgId) ->
-				  true = 0 < length(mnesia:dirty_match_object
+				  true = 0 < length(mnesia:dirty_index_match_object
 						    (rabbit_disk_queue,
 						     #dq_msg_loc { msg_id = MsgId,
 								   queue_and_seq_id = '_',
-								   is_delivered = '_'}))
+								   is_delivered = '_'},
+						     msg_id))
 			  end, MsgIds),
 	    %% The main file should be contiguous
 	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 9ce62f86..fcd3d5f6 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -772,17 +772,17 @@ rdq_time_insane_startup() ->
     rabbit_disk_queue:start_link(OneGig),
     rabbit_disk_queue:to_ram_disk_mode(),
     Msg = <<>>,
-    List = lists:seq(1, 1024*1024),
+    Count = 100000,
+    List = lists:seq(1, Count),
     %% 1M empty messages, at say, 100B per message, should all fit
     %% within 1GB and thus in a single file
-    io:format("Publishing 1M empty messages...~n",[]),
+    io:format("Publishing ~p empty messages...~n",[Count]),
     [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
     rabbit_disk_queue:tx_commit(q, List),
     io:format("...done. Timing restart...~n", []),
     rdq_stop(),
     Micros = rdq_virgin(),
-    io:format("...startup took ~w microseconds.~n", [Micros]),
-    rdq_stop().
+    io:format("...startup took ~w microseconds.~n", [Micros]).
 
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
@@ -791,6 +791,7 @@ rdq_virgin() ->
     {Micros, {ok, _}} =
 	timer:tc(rabbit_disk_queue, start_link, [1024*1024]),
     ok = rabbit_disk_queue:stop_and_obliterate(),
+    timer:sleep(1000),
     Micros.
 
 rdq_start() ->
-- 
cgit v1.2.1


From 51e8d5d1479b23b6b3bc5fdb4016852658b8637f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 May 2009 14:44:29 +0100
Subject: Made tx_commit take a list of seq_ids which are things to be ack'd.
 This means that some external thing should keep track of exactly what is in a
 transaction (this is already the case for publishes, it just needs to be
 extended for acks), and then present them for the commit. Also, fixed a
 stupid bug in the stress_gc test which was previously acking everything at
 once (albeit in a weird order as desired) which meant all files got emptied
 before the gc ran, not quite what was desired.

---
 src/rabbit_disk_queue.erl | 86 ++++++++++++++++++++++++++++-------------------
 src/rabbit_tests.erl      | 19 ++++++-----
 2 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 803f358b..c08258c8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/2, tx_cancel/1]).
+-export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1]).
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
@@ -152,7 +152,12 @@
 
 %% MsgLocation is deliberately a dets table, and the mnesia table is
 %% set to be a disk_only_table in order to ensure that we are not RAM
-%% constrained.
+%% constrained. However, for performance reasons, it is possible to
+%% call to_ram_disk_mode/0 which will alter the mnesia table to
+%% disc_copies and convert MsgLocation to an ets table. This results
+%% in a massive performance improvement, at the expense of greater RAM
+%% usage. The idea is that when memory gets tight, we switch to
+%% disk_only mode but otherwise try to run in ram_disk mode.
 
 %% So, with this design, messages move to the left. Eventually, they
 %% should end up in a contiguous block on the left and are then never
@@ -176,21 +181,21 @@
 %% the data the size of which is tracked by the ContiguousTop
 %% variable. Judicious use of a mirror is required).
 %%
-%% ---------    ---------         ---------
+%% +-------+    +--------         ---------
 %% |   X   |    |   G   |	  |   G   |
-%% ---------    ---------	  ---------
+%% +-------+    +--------	  ---------
 %% |   D   |    |   X   |	  |   F   |
-%% ---------    ---------	  ---------
+%% +-------+    +--------	  ---------
 %% |   X   |    |   X   |	  |   E   |
-%% ---------    ---------	  ---------
+%% +-------+    +--------	  ---------
 %% |   C   |    |   F   |   ===>  |   D   |
-%% ---------    ---------	  ---------
+%% +-------+    +--------	  ---------
 %% |   X   |    |   X   |	  |   C   |
-%% ---------    ---------	  ---------
+%% +-------+    +--------	  ---------
 %% |   B   |    |   X   |	  |   B   |
-%% ---------    ---------	  ---------
+%% +-------+    +--------	  ---------
 %% |   A   |    |   E   |	  |   A   |
-%% ---------    ---------         ---------
+%% +-------+    +--------         ---------
 %%   left         right             left
 %%
 %% From this reasoning, we do have a bound on the number of times the
@@ -224,7 +229,7 @@
 			 bool(), {msg_id(), seq_id()}}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
--spec(tx_commit/2 :: (queue_name(), [msg_id()]) -> 'ok').
+-spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
@@ -251,8 +256,8 @@ ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
 tx_publish(MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
 
-tx_commit(Q, MsgIds) when is_list(MsgIds) ->
-    gen_server:call(?SERVER, {tx_commit, Q, MsgIds}, infinity).
+tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
+    gen_server:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
 
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
@@ -329,8 +334,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, State),
     {reply, Result, State1};
-handle_call({tx_commit, Q, MsgIds}, _From, State) ->
-    {ok, State1} = internal_tx_commit(Q, MsgIds, State),
+handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
+    {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
     {reply, ok, State1};
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
@@ -506,11 +511,12 @@ getReadHandle(File, State =
     ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
     {FileHdl, State #dqstate {read_file_handles = {ReadHdls3, ReadHdlsAge3}}}.
 
-internal_ack(Q, MsgIds, State) ->
-    remove_messages(Q, MsgIds, true, State).
+internal_ack(Q, MsgSeqIds, State) ->
+    remove_messages(Q, MsgSeqIds, true, State).
 
-%% Q is only needed if MnesiaDelete = true
+%% Q is only needed if MnesiaDelete /= false
 %% called from tx_cancel with MnesiaDelete = false
+%% called from internal_tx_cancel with MnesiaDelete = txn
 %% called from ack with MnesiaDelete = true
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
 		State = #dqstate { file_summary = FileSummary,
@@ -539,10 +545,12 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
 							   File, Offset, TotalSize}),
 			      Files2
 		      end,
-		  if MnesiaDelete ->
-			  ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
-		     true -> ok
-		  end,
+		  ok = if MnesiaDelete ->
+			       mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
+			  MnesiaDelete =:= txn ->
+			       mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write);
+			  true -> ok
+		       end,
 		  Files3
 	  end, sets:new(), MsgSeqIds),
     State2 = compact(Files, State),
@@ -581,7 +589,7 @@ internal_tx_publish(MsgId, MsgBody,
 	    {ok, State}
     end.
 
-internal_tx_commit(Q, MsgIds,
+internal_tx_commit(Q, PubMsgIds, AckSeqIds,
 		   State = #dqstate { current_file_handle = CurHdl,
 				      current_file_name = CurName,
 				      sequences = Sequences
@@ -594,18 +602,26 @@ internal_tx_commit(Q, MsgIds,
     {atomic, {Sync, WriteSeqId}} =
 	mnesia:transaction(
 	  fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
-		   lists:foldl(
-		     fun (MsgId, {Acc, NextWriteSeqId}) ->
-			     [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
-				 dets_ets_lookup(State, MsgId),
-			     ok = mnesia:write(rabbit_disk_queue,
-					       #dq_msg_loc { queue_and_seq_id =
-							     {Q, NextWriteSeqId},
-							     msg_id = MsgId,
-							     is_delivered = false},
-					       write),
-			     {Acc or (CurName =:= File), NextWriteSeqId + 1}
-		     end, {false, InitWriteSeqId}, MsgIds)
+		   %% must deal with publishes first, if we didn't
+		   %% then we could end up acking a message before
+		   %% it's been published, which is clearly
+		   %% nonsense. I.e. in commit, do not do things in an
+		   %% order which _could_not_ have happened.
+		   {Sync2, WriteSeqId3} =
+		       lists:foldl(
+			 fun (MsgId, {Acc, NextWriteSeqId}) ->
+				 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
+				     dets_ets_lookup(State, MsgId),
+				 ok = mnesia:write(rabbit_disk_queue,
+						   #dq_msg_loc { queue_and_seq_id =
+								 {Q, NextWriteSeqId},
+								 msg_id = MsgId,
+								 is_delivered = false},
+						   write),
+				 {Acc or (CurName =:= File), NextWriteSeqId + 1}
+			 end, {false, InitWriteSeqId}, PubMsgIds),
+		   remove_messages(Q, AckSeqIds, txn, State),
+		   {Sync2, WriteSeqId3}
 	  end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId}),
     if Sync -> ok = file:sync(CurHdl);
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index fcd3d5f6..2640439e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -698,6 +698,7 @@ test_disk_queue() ->
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Startup = rdq_virgin(),
     rdq_start(),
+    rabbit_disk_queue:to_ram_disk_mode(),
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
@@ -705,7 +706,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
 	timer:tc(?MODULE, rdq_time_commands,
 		 [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg)
 			     || N <- List, _ <- Qs] end,
-		   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List)
+		   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List, [])
 			     || Q <- Qs] end
 		  ]]),
     {Deliver, ok} =
@@ -714,8 +715,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
 				       [begin {N, Msg, MsgSizeBytes, false, SeqId} =
 						  rabbit_disk_queue:deliver(Q), SeqId end
 					|| N <- List],
-				   rabbit_disk_queue:ack(Q, SeqIds),
-				   ok = rabbit_disk_queue:tx_commit(Q, [])
+				   ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
 			     end || Q <- Qs]
 		   end]]),
     io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
@@ -735,7 +735,7 @@ rdq_stress_gc(MsgCount) ->
     Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
     [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
-    rabbit_disk_queue:tx_commit(q, List),
+    rabbit_disk_queue:tx_commit(q, List, []),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
 	lists:reverse(
@@ -759,10 +759,11 @@ rdq_stress_gc(MsgCount) ->
 		      rabbit_disk_queue:deliver(q),
 		  dict:store(MsgId, SeqId, Acc)
 	  end, dict:new(), List),
-    rabbit_disk_queue:ack(q, [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict),
-				    SeqId end
-			      || MsgId <- AckList]),
-    rabbit_disk_queue:tx_commit(q, []),
+    %% we really do want to ack each of this individually
+    [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict),
+	   rabbit_disk_queue:ack(q, [SeqId]) end
+     || MsgId <- AckList],
+    rabbit_disk_queue:tx_commit(q, [], []),
     rdq_stop(),
     passed.
 
@@ -778,7 +779,7 @@ rdq_time_insane_startup() ->
     %% within 1GB and thus in a single file
     io:format("Publishing ~p empty messages...~n",[Count]),
     [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
-    rabbit_disk_queue:tx_commit(q, List),
+    rabbit_disk_queue:tx_commit(q, List, []),
     io:format("...done. Timing restart...~n", []),
     rdq_stop(),
     Micros = rdq_virgin(),
-- 
cgit v1.2.1


From c78df4ac0ce2519a4d4a523ecd0fc33e18f23514 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 14 May 2009 15:52:13 +0100
Subject: Some cosmetic changes (erlang style function names) and improvements
 to documentation. Also, implemented requeue, and corrected bug in startup
 which would have lead to crashes if acks had appeared non contiguously prior
 to shutdown.

---
 src/rabbit_disk_queue.erl | 145 +++++++++++++++++++++++++++++++++++++---------
 src/rabbit_tests.erl      |  78 +++++++++++++++++++++++++
 2 files changed, 195 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c08258c8..63076eb9 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1]).
+-export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
@@ -181,21 +181,21 @@
 %% the data the size of which is tracked by the ContiguousTop
 %% variable. Judicious use of a mirror is required).
 %%
-%% +-------+    +--------         ---------
+%% +-------+    +-------+         +-------+
 %% |   X   |    |   G   |	  |   G   |
-%% +-------+    +--------	  ---------
+%% +-------+    +-------+	  +-------+
 %% |   D   |    |   X   |	  |   F   |
-%% +-------+    +--------	  ---------
+%% +-------+    +-------+	  +-------+
 %% |   X   |    |   X   |	  |   E   |
-%% +-------+    +--------	  ---------
+%% +-------+    +-------+	  +-------+
 %% |   C   |    |   F   |   ===>  |   D   |
-%% +-------+    +--------	  ---------
+%% +-------+    +-------+	  +-------+
 %% |   X   |    |   X   |	  |   C   |
-%% +-------+    +--------	  ---------
+%% +-------+    +-------+	  +-------+
 %% |   B   |    |   X   |	  |   B   |
-%% +-------+    +--------	  ---------
+%% +-------+    +-------+	  +-------+
 %% |   A   |    |   E   |	  |   A   |
-%% +-------+    +--------         ---------
+%% +-------+    +-------+         +-------+
 %%   left         right             left
 %%
 %% From this reasoning, we do have a bound on the number of times the
@@ -231,6 +231,7 @@
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
+-spec(requeue/2 :: (queue_name(), [seq_id()]) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
@@ -262,6 +263,9 @@ tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSe
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
 
+requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
+    gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
+
 stop() ->
     gen_server:call(?SERVER, stop, infinity).
 
@@ -305,7 +309,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 			{type, set}
 		       ]),
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
-    true = ets:safe_fixtable(MsgLocationEts, true),
     State =
 	#dqstate { msg_location_dets       = MsgLocationDets,
 		   msg_location_ets        = MsgLocationEts,
@@ -381,6 +384,9 @@ handle_cast({tx_publish, MsgId, MsgBody}, State) ->
     {noreply, State1};
 handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
+    {noreply, State1};
+handle_cast({requeue, Q, MsgSeqIds}, State) ->
+    {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
     {noreply, State1}.
 
 handle_info(_Info, State) ->
@@ -469,7 +475,7 @@ internal_deliver(Q, State = #dqstate { sequences = Sequences }) ->
 		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
 		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
 			dets_ets_lookup(State, MsgId),
-		    {FileHdl, State1} = getReadHandle(File, State),
+		    {FileHdl, State1} = get_read_handle(File, State),
 		    %% read the message
 		    {ok, {MsgBody, BodySize}} =
 			read_message_at_offset(FileHdl, Offset, TotalSize),
@@ -483,7 +489,7 @@ internal_deliver(Q, State = #dqstate { sequences = Sequences }) ->
 	    end
     end.
 
-getReadHandle(File, State =
+get_read_handle(File, State =
 	      #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
 			 read_file_handles_limit = ReadFileHandlesLimit }) ->
     Now = now(),
@@ -653,6 +659,49 @@ internal_tx_cancel(MsgIds, State) ->
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
+internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
+    %% We know that every seq_id in here is less than the ReadSeqId
+    %% you'll get if you look up this queue in Sequences (i.e. they've
+    %% already been delivered). We also know that the rows for these
+    %% messages are still in rabbit_disk_queue (i.e. they've not been
+    %% ack'd).
+
+    %% Now, it would be nice if we could adjust the sequence ids in
+    %% rabbit_disk_queue (mnesia) to create a contiguous block and
+    %% then drop the ReadSeqId for the queue by the corresponding
+    %% amount. However, this is not safe because there may be other
+    %% sequence ids which have been sent out as part of deliveries
+    %% which are not being requeued. As such, moving things about in
+    %% rabbit_disk_queue _under_ the current ReadSeqId would result in
+    %% such sequence ids referring to the wrong messages.
+
+    %% Therefore, the only solution is to take these messages, and to
+    %% reenqueue them at the top of the queue. Usefully, this only
+    %% affects the Sequences and rabbit_disk_queue structures - there
+    %% is no need to physically move the messages about on disk, so
+    %% MsgLocation and FileSummary stay put (which makes further sense
+    %% as they have no concept of sequence id anyway).
+
+    %% the Q _must_ already exist
+    [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
+    {atomic, WriteSeqId2} =
+	mnesia:transaction(
+	  fun() ->
+		  ok = mnesia:write_lock_table(rabbit_disk_queue),
+		  lists:foldl(
+		    fun ({MsgId, SeqId}, NextWriteSeqId) ->
+			    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
+				mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+			    mnesia:write(rabbit_disk_queue,
+					 Obj #dq_msg_loc { queue_and_seq_id = {Q, NextWriteSeqId }},
+					 write),
+			    mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
+			    NextWriteSeqId + 1
+		    end, WriteSeqId, MsgSeqIds)
+	  end),
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2}),
+    {ok, State}.
+
 %% ---- ROLLING OVER THE APPEND FILE ----
 
 maybe_roll_to_new_file(Offset,
@@ -688,9 +737,9 @@ compact(FilesSet, State) ->
     RemainingFiles = lists:foldl(fun (File, Acc) ->
 					 delete_empty_files(File, Acc, State)
 				 end, [], Files),
-    lists:foldl(fun combineFile/2, State, lists:reverse(RemainingFiles)).
+    lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
 
-combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
+combine_file(File, State = #dqstate { file_size_limit = FileSizeLimit,
 				     file_summary = FileSummary,
 				     current_file_name = CurName
 				   }) ->
@@ -711,7 +760,7 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 				if FileSizeLimit >= RightSumData ->
 					%% here, Right will be the source and so will be deleted,
 					%%       File will be the destination
-					State1 = combineFiles(RightObj, FileObj,
+					State1 = combine_files(RightObj, FileObj,
 							      State),
 					%% this could fail if RightRight is undefined
 					%% left is the 4th field
@@ -739,7 +788,7 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 		     if FileSizeLimit >= LeftSumData ->
 			     %% here, File will be the source and so will be deleted,
 			     %%       Left will be the destination
-			     State1 = combineFiles(FileObj, LeftObj, State),
+			     State1 = combine_files(FileObj, LeftObj, State),
 			     %% this could fail if Right is undefined
 			     %% left is the 4th field
 			     ets:update_element(FileSummary, Right, {4, Left}),
@@ -754,7 +803,7 @@ combineFile(File, State = #dqstate { file_size_limit = FileSizeLimit,
 	    end
     end.
 
-sortMsgLocationsByOffset(Asc, List) ->
+sort_msg_locations_by_offset(Asc, List) ->
     Comp = if Asc  -> fun erlang:'<'/2;
 	      true -> fun erlang:'>'/2
 	   end,
@@ -762,7 +811,7 @@ sortMsgLocationsByOffset(Asc, List) ->
 		       Comp(OffA, OffB)
 	       end, List).
 
-truncateAndExtendFile(FileHdl, Lowpoint, Highpoint) ->
+truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
     ok = file:truncate(FileHdl),
     {ok, Highpoint} = file:position(FileHdl, {bof, Highpoint}),
@@ -770,12 +819,12 @@ truncateAndExtendFile(FileHdl, Lowpoint, Highpoint) ->
     {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
     ok.
 
-combineFiles({Source, SourceValid, _SourceContiguousTop,
+combine_files({Source, SourceValid, _SourceContiguousTop,
 	      _SourceLeft, _SourceRight},
 	     {Destination, DestinationValid, DestinationContiguousTop,
 	      _DestinationLeft, _DestinationRight},
 	     State1) ->
-    State = closeFile(Source, closeFile(Destination, State1)),
+    State = close_file(Source, close_file(Destination, State1)),
     {ok, SourceHdl} =
 	file:open(form_filename(Source),
 		  [read, write, raw, binary, delayed_write, read_ahead]),
@@ -788,7 +837,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
     %%   then truncate, copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
     if DestinationContiguousTop =:= DestinationValid ->
-	    ok = truncateAndExtendFile(DestinationHdl,
+	    ok = truncate_and_extend_file(DestinationHdl,
 				       DestinationValid, ExpectedSize);
        true ->
 	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
@@ -805,7 +854,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 			  Offset < DestinationContiguousTop
 			  %% Given expected access patterns, I suspect that the list should be
 			  %% naturally sorted as we require, however, we need to enforce it anyway
-		  end, sortMsgLocationsByOffset(true,
+		  end, sort_msg_locations_by_offset(true,
 						dets_ets_match_object(State,
 								      {'_', '_',
 								       Destination,
@@ -854,7 +903,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    %% reflect compaction of Destination so truncate
 	    %% Destination and copy from Tmp back to the end
 	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
-	    ok = truncateAndExtendFile(DestinationHdl,
+	    ok = truncate_and_extend_file(DestinationHdl,
 				       DestinationContiguousTop, ExpectedSize),
 	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
 	    %% position in DestinationHdl should now be
@@ -864,7 +913,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
 	    ok = file:delete(form_filename(Tmp))
     end,
     SourceWorkList =
-	sortMsgLocationsByOffset(true,
+	sort_msg_locations_by_offset(true,
 				 dets_ets_match_object(State,
 						       {'_', '_', Source,
 							'_', '_'})),
@@ -909,7 +958,7 @@ combineFiles({Source, SourceValid, _SourceContiguousTop,
     ok = file:delete(form_filename(Source)),
     State.
 
-closeFile(File, State = #dqstate { read_file_handles =
+close_file(File, State = #dqstate { read_file_handles =
 				   {ReadHdls, ReadHdlsAge} }) ->
     case dict:find(File, ReadHdls) of
 	error ->
@@ -981,8 +1030,6 @@ load_from_disk(State) ->
     {ok, State2}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
-    %% next-seqid-to-read is the lowest seqid which has is_delivered =
-    %% false
     {atomic, true} = mnesia:transaction(
       fun() ->
 	      ok = mnesia:read_lock_table(rabbit_disk_queue),
@@ -1003,8 +1050,50 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
 			end
 		end, true, rabbit_disk_queue)
       end),
+    remove_gaps_in_sequences(State),
     State.
 
+remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
+    %% read the comments at internal_requeue.
+
+    %% Because we are at startup, we know that no sequence ids have
+    %% been issued (or at least, they were, but have been
+    %% forgotten). Therefore, we can nicely shuffle up and not
+    %% worry. Note that I'm choosing to shuffle up, but alternatively
+    %% we could shuffle downwards. However, I think there's greater
+    %% likelihood of gaps being at the bottom rather than the top of
+    %% the queue, so shuffling up should be the better bet.
+    {atomic, _} =
+	mnesia:transaction(
+	  fun() ->
+		  ok = mnesia:write_lock_table(rabbit_disk_queue),
+		  lists:foreach(
+		    fun ({Q, ReadSeqId, WriteSeqId}) ->
+			    Gap = shuffle_up(Q, WriteSeqId - 1, WriteSeqId - ReadSeqId, 0),
+			    true = ets:insert(Sequences, {Q, ReadSeqId + Gap, WriteSeqId})
+		    end, ets:match_object(Sequences, '_'))
+	  end).
+
+shuffle_up(_Q, _SeqId, 0, Gap) ->
+    Gap;
+shuffle_up(Q, SeqId, N, 0) ->
+    %% no gaps so far so don't need to rewrite
+    case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
+	[] -> shuffle_up(Q, SeqId - 1, N - 1, 1);
+	_ -> shuffle_up(Q, SeqId - 1, N - 1, 0)
+    end;
+shuffle_up(Q, SeqId, N, Gap) ->
+    %% have gaps, so whenever we find something, rewrite it higher up
+    case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
+	[] -> shuffle_up(Q, SeqId - 1, N - 1, Gap + 1);
+	[Obj = #dq_msg_loc { is_delivered = true }] ->
+	    mnesia:write(rabbit_disk_queue,
+			 Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap }},
+			 write),
+	    mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
+	    shuffle_up(Q, SeqId - 1, N - 1, Gap)
+    end.
+
 load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
 						current_file_name = CurName }) ->
     true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
@@ -1014,7 +1103,7 @@ load_messages(Left, [], State) ->
     Offset = case dets_ets_match_object(State, {'_', '_', Left, '_', '_'}) of
 		 [] -> 0;
 		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_] =
-			  sortMsgLocationsByOffset(false, L),
+			  sort_msg_locations_by_offset(false, L),
 		      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
 	     end,
     State #dqstate { current_file_num = Num, current_file_name = Left,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 2640439e..5924bb38 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -693,6 +693,8 @@ test_disk_queue() ->
     ],
     rdq_virgin(),
     passed = rdq_stress_gc(10000),
+    passed = rdq_test_startup_with_queue_gaps(),
+    passed = rdq_test_redeliver(),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
@@ -785,6 +787,82 @@ rdq_time_insane_startup() ->
     Micros = rdq_virgin(),
     io:format("...startup took ~w microseconds.~n", [Micros]).
 
+rdq_test_startup_with_queue_gaps() ->
+    rdq_virgin(),
+    rdq_start(),
+    Msg = <<0:(8*256)>>,
+    Total = 1000,
+    Half = round(Total/2),
+    All = lists:seq(1,Total),
+    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    rabbit_disk_queue:tx_commit(q, All, []),
+    io:format("Publish done~n", []),
+    %% deliver first half
+    Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	    || N <- lists:seq(1,Half)],
+    io:format("Deliver first half done~n", []),
+    %% ack every other message we have delivered (starting at the _first_)
+    lists:foldl(fun (SeqId2, true) ->
+			rabbit_disk_queue:ack(q, [SeqId2]),
+			false;
+		    (_SeqId2, false) ->
+			true
+		end, true, Seqs),
+    rabbit_disk_queue:tx_commit(q, [], []),
+    io:format("Acked every other message delivered done~n", []),
+    rdq_stop(),
+    rdq_start(),
+    io:format("Startup (with shuffle) done~n", []),
+    %% should have shuffled up. So we should now get lists:seq(2,500,2) already delivered
+    Seqs2 = [begin {N, Msg, 256, true, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	     || N <- lists:seq(2,Half,2)],
+    rabbit_disk_queue:tx_commit(q, [], Seqs2),
+    io:format("Reread non-acked messages done~n", []),
+    %% and now fetch the rest
+    Seqs3 = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	     || N <- lists:seq(1 + Half,Total)],
+    rabbit_disk_queue:tx_commit(q, [], Seqs3),
+    io:format("Read second half done~n", []),
+    empty = rabbit_disk_queue:deliver(q),
+    rdq_stop(),
+    passed.
+
+rdq_test_redeliver() ->
+    rdq_virgin(),
+    rdq_start(),
+    Msg = <<0:(8*256)>>,
+    Total = 1000,
+    Half = round(Total/2),
+    All = lists:seq(1,Total),
+    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    rabbit_disk_queue:tx_commit(q, All, []),
+    io:format("Publish done~n", []),
+    %% deliver first half
+    Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	    || N <- lists:seq(1,Half)],
+    io:format("Deliver first half done~n", []),
+    %% now requeue every other message (starting at the _first_)
+    %% and ack the other ones
+    lists:foldl(fun (SeqId2, true) ->
+			rabbit_disk_queue:requeue(q, [SeqId2]),
+			false;
+		    (SeqId2, false) ->
+			rabbit_disk_queue:ack(q, [SeqId2]),
+			true
+		end, true, Seqs),
+    rabbit_disk_queue:tx_commit(q, [], []),
+    io:format("Redeliver and acking done~n", []),
+    %% we should now get the 2nd half in order, followed by every-other-from-the-first-half
+    Seqs2 = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	    || N <- lists:seq(1+Half, Total)],
+    rabbit_disk_queue:tx_commit(q, [], Seqs2),
+    Seqs3 = [begin {N, Msg, 256, true, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	    || N <- lists:seq(1, Half, 2)],
+    rabbit_disk_queue:tx_commit(q, [], Seqs3),
+    empty = rabbit_disk_queue:deliver(q),
+    rdq_stop(),
+    passed.
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From 7783d77f7ef52b1879806ba535db03b3366583d3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 14 May 2009 17:57:42 +0100
Subject: Reworking shuffle_up to sate Matthias's passion for beautiful code

---
 src/rabbit_disk_queue.erl | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 63076eb9..e3b47e89 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1069,30 +1069,28 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
 		  ok = mnesia:write_lock_table(rabbit_disk_queue),
 		  lists:foreach(
 		    fun ({Q, ReadSeqId, WriteSeqId}) ->
-			    Gap = shuffle_up(Q, WriteSeqId - 1, WriteSeqId - ReadSeqId, 0),
+			    Gap = shuffle_up(Q, ReadSeqId - 1, WriteSeqId - 1, 0),
 			    true = ets:insert(Sequences, {Q, ReadSeqId + Gap, WriteSeqId})
 		    end, ets:match_object(Sequences, '_'))
 	  end).
 
-shuffle_up(_Q, _SeqId, 0, Gap) ->
+shuffle_up(_Q, SeqId, SeqId, Gap) ->
     Gap;
-shuffle_up(Q, SeqId, N, 0) ->
-    %% no gaps so far so don't need to rewrite
-    case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
-	[] -> shuffle_up(Q, SeqId - 1, N - 1, 1);
-	_ -> shuffle_up(Q, SeqId - 1, N - 1, 0)
-    end;
-shuffle_up(Q, SeqId, N, Gap) ->
-    %% have gaps, so whenever we find something, rewrite it higher up
-    case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
-	[] -> shuffle_up(Q, SeqId - 1, N - 1, Gap + 1);
-	[Obj = #dq_msg_loc { is_delivered = true }] ->
-	    mnesia:write(rabbit_disk_queue,
-			 Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap }},
-			 write),
-	    mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
-	    shuffle_up(Q, SeqId - 1, N - 1, Gap)
-    end.
+shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
+    GapInc =
+	case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
+	    [] -> 1;
+	    [Obj = #dq_msg_loc { is_delivered = IsDelivered }] when IsDelivered
+								    orelse (Gap =:= 0) ->
+		if Gap =:= 0 -> ok;
+		   true -> mnesia:write(rabbit_disk_queue,
+					Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap }},
+					write),
+			   mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
+		end,
+		0
+	end,
+    shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
 
 load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
 						current_file_name = CurName }) ->
-- 
cgit v1.2.1


From 7eb2831da1431d4faadd6c7b24786da687660c47 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 14 May 2009 18:00:03 +0100
Subject: An implementation of the queue using database. This will only work
 with postgres - it really is impossible to make this kinda thing db agnostic.
 Performance is terrible, but it does work, and has an identical API to the
 rabbit_disk_queue

---
 src/rabbit_db_queue.erl        | 414 +++++++++++++++++++++++++++++++++++++++++
 src/rabbit_db_queue_schema.sql |  22 +++
 2 files changed, 436 insertions(+)
 create mode 100644 src/rabbit_db_queue.erl
 create mode 100644 src/rabbit_db_queue_schema.sql

diff --git a/src/rabbit_db_queue.erl b/src/rabbit_db_queue.erl
new file mode 100644
index 00000000..bd6820d5
--- /dev/null
+++ b/src/rabbit_db_queue.erl
@@ -0,0 +1,414 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+%% So, assuming you're on some debian linux type system,
+%% apt-get install postgresql odbc-postgresql unixodbc unixodbc-bin
+%% sudo odbcinst -i -d -f /usr/share/psqlodbc/odbcinst.ini.template
+
+%% Now set up in postgresql a user and a database that user can
+%% access. For example, the database could be called rabbit_db_queue
+%% and the username could be rabbit and the password could be rabbit.
+
+%% sudo ODBCConfig
+%% set up a system wide dsn with the above settings in it.
+%% now drop into the erlang shell, and you should not get an error after:
+
+%% > odbc:start().
+%% < ok.
+%% > odbc:connect("DSN=rabbit_db_queue", []).
+%% < {ok,<0.325.0>}
+%% ( replace rabbit_db_queue with the name of your DSN that you configured )
+
+%% the connection string (eg "DSN=rabbit_db_queue") is what you pass
+%% to start_link. Don't just pass the DSN name.
+
+-module(rabbit_db_queue).
+
+-behaviour(gen_server).
+
+-export([start_link/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
+
+-export([stop/0, stop_and_obliterate/0]).
+
+-include_lib("stdlib/include/qlc.hrl").
+-include("rabbit.hrl").
+
+-define(SERVER, ?MODULE).
+
+%% ---- SPECS ----
+
+-ifdef(use_specs).
+
+-type(seq_id() :: non_neg_integer()).
+
+-spec(start_link/1 :: (non_neg_integer()) ->
+	      {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
+-spec(deliver/1 :: (queue_name()) ->
+	     {'empty' | {msg_id(), binary(), non_neg_integer(),
+			 bool(), {msg_id(), seq_id()}}}).
+-spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
+-spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
+-spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
+-spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
+-spec(requeue/2 :: (queue_name(), [seq_id()]) -> 'ok').
+-spec(stop/0 :: () -> 'ok').
+-spec(stop_and_obliterate/0 :: () -> 'ok').
+
+-endif.
+
+%% ---- PUBLIC API ----
+
+start_link(DSN) ->
+    gen_server:start_link({local, ?SERVER}, ?MODULE,
+			  [DSN], []).
+
+publish(Q, MsgId, Msg) when is_binary(Msg) ->
+    gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
+
+deliver(Q) ->
+    gen_server:call(?SERVER, {deliver, Q}, infinity).
+
+ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
+    gen_server:cast(?SERVER, {ack, Q, MsgSeqIds}).
+
+tx_publish(MsgId, Msg) when is_binary(Msg) ->
+    gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
+
+tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
+    gen_server:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
+
+tx_cancel(MsgIds) when is_list(MsgIds) ->
+    gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
+
+requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
+    gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
+
+stop() ->
+    gen_server:call(?SERVER, stop, infinity).
+
+stop_and_obliterate() ->
+    gen_server:call(?SERVER, stop_vaporise, infinity).
+
+%% ---- GEN-SERVER INTERNAL API ----
+-record(dbstate, { db_conn }).
+
+init([DSN]) ->
+    process_flag(trap_exit, true),
+    odbc:start(),
+    {ok, Conn} = odbc:connect(DSN, [{auto_commit, off}, {tuple_row, on},
+				    {scrollable_cursors, off}, {trace_driver, off}]),
+    State = #dbstate { db_conn = Conn },
+    compact_already_delivered(State),
+    {ok, State}.
+
+handle_call({deliver, Q}, _From, State) ->
+    {ok, Result, State1} = internal_deliver(Q, State),
+    {reply, Result, State1};
+handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
+    {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
+    {reply, ok, State1};
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}; %% gen_server now calls terminate
+handle_call(stop_vaporise, _From, State = #dbstate { db_conn = Conn }) ->
+    odbc:sql_query(Conn, "delete from ledger"),
+    odbc:sql_query(Conn, "delete from sequence"),
+    odbc:sql_query(Conn, "delete from message"),
+    odbc:commit(Conn, commit),
+    {stop, normal, ok, State}.
+    %% gen_server now calls terminate, which then calls shutdown
+
+handle_cast({publish, Q, MsgId, MsgBody}, State) ->
+    {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
+    {noreply, State1};
+handle_cast({ack, Q, MsgSeqIds}, State) ->
+    {ok, State1} = internal_ack(Q, MsgSeqIds, State),
+    {noreply, State1};
+handle_cast({tx_publish, MsgId, MsgBody}, State) ->
+    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
+    {noreply, State1};
+handle_cast({tx_cancel, MsgIds}, State) ->
+    {ok, State1} = internal_tx_cancel(MsgIds, State),
+    {noreply, State1};
+handle_cast({requeue, Q, MsgSeqIds}, State) ->
+    {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
+    {noreply, State1}.
+
+handle_info(_Info, State) ->
+    {noreply, State}.
+
+terminate(_Reason, State) ->
+    shutdown(State).
+
+shutdown(State = #dbstate { db_conn = Conn }) ->
+    odbc:disconnect(Conn),
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%% ---- UTILITY FUNCTIONS ----
+
+binary_to_escaped_string(Bin) when is_binary(Bin) ->
+    "E'" ++ lists:flatten(lists:reverse(binary_to_escaped_string(Bin, []))) ++ "'".
+
+binary_to_escaped_string(<<>>, Acc) ->
+    Acc;
+binary_to_escaped_string(<<Byte:8, Rest/binary>>, Acc) ->
+    binary_to_escaped_string(Rest, [escape_byte(Byte) | Acc]).
+
+escape_byte(39) ->
+    "\\\\047";
+escape_byte(92) ->
+    "\\\\134";
+escape_byte(B) when B > 31 andalso B < 127 ->
+    B;
+escape_byte(B) ->
+    case io_lib:format("~.8B", [B]) of
+	O1 = [[_]] ->
+	    "\\\\00" ++ O1;
+	O2 = [[_,_]] ->
+	    "\\\\0" ++ O2;
+	O3 = [[_,_,_]] ->
+	    "\\\\" ++ O3
+    end.
+
+escaped_string_to_binary(Str) when is_list(Str) ->
+    list_to_binary(lists:reverse(escaped_string_to_binary(Str, []))).
+
+escaped_string_to_binary([], Acc) ->
+    Acc;
+escaped_string_to_binary([$\\,$\\|Rest], Acc) ->
+    escaped_string_to_binary(Rest, [$\\ | Acc]);
+escaped_string_to_binary([$\\,A,B,C|Rest], Acc) ->
+    escaped_string_to_binary(Rest, [(list_to_integer([A])*64) +
+				    (list_to_integer([B])*8) +
+				    list_to_integer([C])
+				   | Acc]);
+escaped_string_to_binary([C|Rest], Acc) ->
+    escaped_string_to_binary(Rest, [C|Acc]).
+
+hex_string_to_binary(Str) when is_list(Str) ->
+    list_to_binary(lists:reverse(hex_string_to_binary(Str, []))).
+
+hex_string_to_binary([], Acc) ->
+    Acc;
+hex_string_to_binary([A,B|Rest], Acc) ->
+    {ok, [N], []} = io_lib:fread("~16u", [A,B]),
+    hex_string_to_binary(Rest, [N | Acc]).
+
+%% ---- INTERNAL RAW FUNCTIONS ----
+
+internal_deliver(Q, State = #dbstate { db_conn = Conn }) ->
+    QStr = binary_to_escaped_string(term_to_binary(Q)),
+    case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
+	{selected, _, []} ->
+	    odbc:commit(Conn, commit),
+	    {ok, empty, State};
+	{selected, _, [{ReadSeqId}]} ->
+	    case odbc:sql_query(Conn, "select is_delivered, msg_id from ledger where queue = " ++ QStr ++
+				" and seq_id = " ++ integer_to_list(ReadSeqId)) of
+		{selected, _, []} ->
+		    {ok, empty, State};
+		{selected, _, [{IsDeliveredStr, MsgIdStr}]} ->
+		    IsDelivered = IsDeliveredStr /= "0",
+		    if IsDelivered -> ok;
+		       true -> odbc:sql_query(Conn, "update ledger set is_delivered = true where queue = " ++
+					      QStr ++ " and seq_id = " ++ integer_to_list(ReadSeqId))
+		    end,
+		    MsgId = binary_to_term(hex_string_to_binary(MsgIdStr)),
+		    %% yeah, this is really necessary. sigh
+		    MsgIdStr2 = binary_to_escaped_string(term_to_binary(MsgId)),
+		    {selected, _, [{MsgBodyStr}]} =
+			odbc:sql_query(Conn, "select msg from message where msg_id = " ++ MsgIdStr2),
+		    odbc:sql_query(Conn, "update sequence set next_read = " ++ integer_to_list(ReadSeqId + 1) ++
+				   " where queue = " ++ QStr),
+		    odbc:commit(Conn, commit),
+		    MsgBody = hex_string_to_binary(MsgBodyStr),
+		    BodySize = size(MsgBody),
+		    {ok, {MsgId, MsgBody, BodySize, IsDelivered, {MsgId, ReadSeqId}}, State}
+	    end
+    end.
+
+internal_ack(Q, MsgSeqIds, State) ->
+    remove_messages(Q, MsgSeqIds, true, State).
+
+%% Q is only needed if LedgerDelete /= false
+%% called from tx_cancel with LedgerDelete = false
+%% called from internal_tx_cancel with LedgerDelete = true
+%% called from ack with LedgerDelete = true
+remove_messages(Q, MsgSeqIds, LedgerDelete, State = #dbstate { db_conn = Conn }) ->
+    QStr = binary_to_escaped_string(term_to_binary(Q)),
+    lists:foreach(
+      fun ({MsgId, SeqId}) ->
+	      MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
+	      {selected, _, [{RefCount}]} =
+		  odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++
+				 MsgIdStr),
+	      case RefCount of
+		  1 -> odbc:sql_query(Conn, "delete from message where msg_id = " ++
+				      MsgIdStr);
+		  _ -> odbc:sql_query(Conn, "update message set ref_count = " ++
+				      integer_to_list(RefCount - 1) ++ " where msg_id = " ++
+				      MsgIdStr)
+	      end,
+	      if LedgerDelete ->
+		      odbc:sql_query(Conn, "delete from ledger where queue = " ++
+				     QStr ++ " and seq_id = " ++ integer_to_list(SeqId));
+		 true -> ok
+	      end
+      end, MsgSeqIds),
+    odbc:commit(Conn, commit),
+    {ok, State}.
+
+internal_tx_publish(MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
+    MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
+    MsgStr = binary_to_escaped_string(MsgBody),
+    case odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++ MsgIdStr) of
+	{selected, _, []} ->
+	    odbc:sql_query(Conn, "insert into message (msg_id, msg, ref_count) values (" ++
+			   MsgIdStr ++ ", " ++ MsgStr ++ ", 1)");
+	{selected, _, [{RefCount}]} ->
+	    odbc:sql_query(Conn, "update message set ref_count = " ++
+			   integer_to_list(RefCount + 1) ++ " where msg_id = " ++ MsgIdStr)
+    end,
+    odbc:commit(Conn, commit),
+    {ok, State}.
+
+internal_tx_commit(Q, PubMsgIds, AckSeqIds, State = #dbstate { db_conn = Conn }) ->
+    QStr = binary_to_escaped_string(term_to_binary(Q)),
+    {InsertOrUpdate, NextWrite} =
+	case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
+	    {selected, _, []} -> {insert, 0};
+	    {selected, _, [{NextWrite2}]} -> {update, NextWrite2}
+	end,
+    NextWrite3 =
+	lists:foldl(fun (MsgId, WriteSeqInteger) ->
+			    MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
+			    odbc:sql_query(Conn,
+					   "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
+					   QStr ++ ", " ++ integer_to_list(WriteSeqInteger) ++ ", false, " ++
+					   MsgIdStr ++ ")"),
+			    WriteSeqInteger + 1
+		    end, NextWrite, PubMsgIds),
+    case InsertOrUpdate of
+	update -> odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(NextWrite3) ++
+				 " where queue = " ++ QStr);
+	insert -> odbc:sql_query(Conn, "insert into sequence (queue, next_read, next_write) values (" ++
+				 QStr ++ ", 0, " ++ integer_to_list(NextWrite3) ++ ")")
+    end,
+    odbc:commit(Conn, commit),
+    remove_messages(Q, AckSeqIds, true, State),
+    {ok, State}.
+
+internal_publish(Q, MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
+    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
+    MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
+    QStr = binary_to_escaped_string(term_to_binary(Q)),
+    NextWrite =
+	case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
+	    {selected, _, []} ->
+		odbc:sql_query(Conn,
+			       "insert into sequence (queue, next_read, next_write) values (" ++
+			       QStr ++ ", 0, 1)"),
+		0;
+	    {selected, _, [{NextWrite2}]} ->
+		odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(1 + NextWrite2) ++
+			       " where queue = " ++ QStr),
+		NextWrite2
+	end,
+    odbc:sql_query(Conn, "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
+		   QStr ++ ", " ++ integer_to_list(NextWrite) ++ ", false, " ++ MsgIdStr ++ ")"),
+    odbc:commit(Conn, commit),
+    {ok, State1}.
+
+internal_tx_cancel(MsgIds, State) ->
+    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
+    remove_messages(undefined, MsgSeqIds, false, State).
+
+internal_requeue(Q, MsgSeqIds, State = #dbstate { db_conn = Conn }) ->
+    QStr = binary_to_escaped_string(term_to_binary(Q)),
+    {selected, _, [{WriteSeqId}]} =
+	odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr),
+    WriteSeqId2 =
+	lists:foldl(
+	  fun ({_MsgId, SeqId}, NextWriteSeqId) ->
+		  odbc:sql_query(Conn, "update ledger set seq_id = " ++ integer_to_list(NextWriteSeqId) ++
+				 " where seq_id = " ++ integer_to_list(SeqId) ++ " and queue = " ++ QStr),
+		  NextWriteSeqId + 1
+	  end, WriteSeqId, MsgSeqIds),
+    odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(WriteSeqId2) ++
+		   " where queue = " ++ QStr),
+    odbc:commit(Conn, commit),
+    {ok, State}.
+				 
+
+compact_already_delivered(#dbstate { db_conn = Conn }) ->
+    {selected, _, Seqs} = odbc:sql_query(Conn, "select queue, next_read from sequence"),
+    lists:foreach(
+      fun ({QHexStr, ReadSeqId}) ->
+	      Q = binary_to_term(hex_string_to_binary(QHexStr)),
+	      QStr = binary_to_escaped_string(term_to_binary(Q)),
+	      case odbc:sql_query(Conn, "select min(seq_id) from ledger where queue = "
+				  ++ QStr) of
+		  {selected, _, []} -> ok;
+		  {selected, _, [{null}]} -> ok; %% AGH!
+		  {selected, _, [{Min}]} ->
+		      Gap = shuffle_up(Conn, QStr, Min - 1, ReadSeqId - 1, 0),
+		      odbc:sql_query(Conn, "update sequence set next_read = " ++
+				     integer_to_list(Min + Gap) ++
+				     " where queue = " ++ QStr)
+	      end
+      end, Seqs),
+    odbc:commit(Conn, commit).
+
+shuffle_up(_Conn, _QStr, SeqId, SeqId, Gap) ->
+    Gap;
+shuffle_up(Conn, QStr, BaseSeqId, SeqId, Gap) ->
+    GapInc =
+	case odbc:sql_query(Conn, "select count(1) from ledger where queue = " ++
+			    QStr ++ " and seq_id = " ++ integer_to_list(SeqId)) of
+	    {selected, _, [{"0"}]} ->
+		1;
+	    {selected, _, [{"1"}]} ->
+		if Gap =:= 0 -> ok;
+		   true -> odbc:sql_query(Conn, "update ledger set seq_id = " ++
+					  integer_to_list(SeqId + Gap) ++ " where seq_id = " ++
+					  integer_to_list(SeqId) ++ " and queue = " ++ QStr)
+		end,
+		0
+	end,
+    shuffle_up(Conn, QStr, BaseSeqId, SeqId - 1, Gap + GapInc).
diff --git a/src/rabbit_db_queue_schema.sql b/src/rabbit_db_queue_schema.sql
new file mode 100644
index 00000000..f5c49e8d
--- /dev/null
+++ b/src/rabbit_db_queue_schema.sql
@@ -0,0 +1,22 @@
+create table message (
+       msg_id bytea PRIMARY KEY,
+       msg bytea,
+       ref_count integer NOT NULL
+);
+create index message_msg_id_index on message (msg_id);
+
+create table sequence (
+       queue bytea PRIMARY KEY,
+       next_read integer NOT NULL,
+       next_write integer NOT NULL
+);
+create index sequence_queue_index on sequence (queue);
+
+create table ledger (
+       queue bytea NOT NULL,
+       seq_id integer NOT NULL,
+       is_delivered boolean NOT NULL,
+       msg_id bytea NOT NULL
+);
+create index ledger_queue_seq_id_index on ledger (queue, seq_id);
+
-- 
cgit v1.2.1


From e99eaba3fab844690f985f6e19aac66594e98dc4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 15 May 2009 11:39:44 +0100
Subject: added phantom_deliver. This does everything that deliver does but it
 doesn't actually read the message. This is useful if the same messages are
 being tracked in multiple different queues (eg a RAM queue and a disk-backed
 queue) and you want to mark the message delivered without it being retrieved.
 It still needs acking in the normal way.

---
 src/rabbit_db_queue.erl   | 31 ++++++++++++++++++++++---------
 src/rabbit_disk_queue.erl | 38 +++++++++++++++++++++++++-------------
 2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_db_queue.erl b/src/rabbit_db_queue.erl
index bd6820d5..495bdafb 100644
--- a/src/rabbit_db_queue.erl
+++ b/src/rabbit_db_queue.erl
@@ -59,7 +59,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
+-export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
 
 -export([stop/0, stop_and_obliterate/0]).
 
@@ -80,6 +80,8 @@
 -spec(deliver/1 :: (queue_name()) ->
 	     {'empty' | {msg_id(), binary(), non_neg_integer(),
 			 bool(), {msg_id(), seq_id()}}}).
+-spec(phantom_deliver/1 :: (queue_name()) ->
+	     { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
@@ -102,6 +104,9 @@ publish(Q, MsgId, Msg) when is_binary(Msg) ->
 deliver(Q) ->
     gen_server:call(?SERVER, {deliver, Q}, infinity).
 
+phantom_deliver(Q) ->
+    gen_server:call(?SERVER, {phantom_deliver, Q}).
+
 ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
@@ -136,7 +141,10 @@ init([DSN]) ->
     {ok, State}.
 
 handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, State),
+    {ok, Result, State1} = internal_deliver(Q, true, State),
+    {reply, Result, State1};
+handle_call({phantom_deliver, Q}, _From, State) ->
+    {ok, Result, State1} = internal_deliver(Q, false, State),
     {reply, Result, State1};
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
     {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
@@ -232,7 +240,7 @@ hex_string_to_binary([A,B|Rest], Acc) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, State = #dbstate { db_conn = Conn }) ->
+internal_deliver(Q, ReadMsg, State = #dbstate { db_conn = Conn }) ->
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
 	{selected, _, []} ->
@@ -252,14 +260,19 @@ internal_deliver(Q, State = #dbstate { db_conn = Conn }) ->
 		    MsgId = binary_to_term(hex_string_to_binary(MsgIdStr)),
 		    %% yeah, this is really necessary. sigh
 		    MsgIdStr2 = binary_to_escaped_string(term_to_binary(MsgId)),
-		    {selected, _, [{MsgBodyStr}]} =
-			odbc:sql_query(Conn, "select msg from message where msg_id = " ++ MsgIdStr2),
 		    odbc:sql_query(Conn, "update sequence set next_read = " ++ integer_to_list(ReadSeqId + 1) ++
 				   " where queue = " ++ QStr),
-		    odbc:commit(Conn, commit),
-		    MsgBody = hex_string_to_binary(MsgBodyStr),
-		    BodySize = size(MsgBody),
-		    {ok, {MsgId, MsgBody, BodySize, IsDelivered, {MsgId, ReadSeqId}}, State}
+		    if ReadMsg ->
+			    {selected, _, [{MsgBodyStr}]} =
+				odbc:sql_query(Conn, "select msg from message where msg_id = " ++ MsgIdStr2),
+			    odbc:commit(Conn, commit),
+			    MsgBody = hex_string_to_binary(MsgBodyStr),
+			    BodySize = size(MsgBody),
+			    {ok, {MsgId, MsgBody, BodySize, IsDelivered, {MsgId, ReadSeqId}}, State};
+		       true ->
+			    odbc:commit(Conn, commit),
+			    {ok, {MsgId, IsDelivered, {MsgId, ReadSeqId}}, State}
+		    end
 	    end
     end.
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e3b47e89..1a19fd6f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
+-export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
@@ -227,6 +227,8 @@
 -spec(deliver/1 :: (queue_name()) ->
 	     {'empty' | {msg_id(), binary(), non_neg_integer(),
 			 bool(), {msg_id(), seq_id()}}}).
+-spec(phantom_deliver/1 :: (queue_name()) ->
+	     { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
@@ -251,6 +253,9 @@ publish(Q, MsgId, Msg) when is_binary(Msg) ->
 deliver(Q) ->
     gen_server:call(?SERVER, {deliver, Q}, infinity).
 
+phantom_deliver(Q) ->
+    gen_server:call(?SERVER, {phantom_deliver, Q}).
+
 ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
@@ -335,7 +340,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
 handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, State),
+    {ok, Result, State1} = internal_deliver(Q, true, State),
+    {reply, Result, State1};
+handle_call({phantom_deliver, Q}, _From, State) ->
+    {ok, Result, State1} = internal_deliver(Q, false, State),
     {reply, Result, State1};
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
     {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
@@ -465,7 +473,7 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mo
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, State = #dqstate { sequences = Sequences }) ->
+internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
 	[] -> {ok, empty, State};
 	[{Q, ReadSeqId, WriteSeqId}] ->
@@ -475,17 +483,21 @@ internal_deliver(Q, State = #dqstate { sequences = Sequences }) ->
 		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
 		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
 			dets_ets_lookup(State, MsgId),
-		    {FileHdl, State1} = get_read_handle(File, State),
-		    %% read the message
-		    {ok, {MsgBody, BodySize}} =
-			read_message_at_offset(FileHdl, Offset, TotalSize),
-		    if Delivered -> ok;
-		       true ->  ok = mnesia:dirty_write(rabbit_disk_queue,
-							Obj #dq_msg_loc {is_delivered = true})
-		    end,
 		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
-		    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
-		     State1}
+		    ok =
+			if Delivered -> ok;
+			   true ->
+				mnesia:dirty_write(rabbit_disk_queue,
+						   Obj #dq_msg_loc {is_delivered = true})
+			end,
+		    if ReadMsg ->
+			    {FileHdl, State1} = get_read_handle(File, State),
+			    {ok, {MsgBody, BodySize}} =
+				read_message_at_offset(FileHdl, Offset, TotalSize),
+			    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
+			     State1};
+		       true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
+		    end
 	    end
     end.
 
-- 
cgit v1.2.1


From 884a78260fb0a992d5e6694feccdbc2c1da29afc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 18 May 2009 10:43:27 +0100
Subject: Removeal of unnecessary includes

---
 src/rabbit_db_queue.erl   | 1 -
 src/rabbit_disk_queue.erl | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/rabbit_db_queue.erl b/src/rabbit_db_queue.erl
index 495bdafb..97f1b986 100644
--- a/src/rabbit_db_queue.erl
+++ b/src/rabbit_db_queue.erl
@@ -63,7 +63,6 @@
 
 -export([stop/0, stop_and_obliterate/0]).
 
--include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
 
 -define(SERVER, ?MODULE).
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 1a19fd6f..fc04899e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,7 +42,6 @@
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
--include_lib("stdlib/include/qlc.hrl").
 -include("rabbit.hrl").
 
 -define(WRITE_OK_SIZE_BITS,       8).
-- 
cgit v1.2.1


From da01273c9a4f5e26b8918f1941399919945aadb5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 May 2009 11:36:05 +0100
Subject: Added a call to compact at the moment we start a new file. This
 closes a hole where the GC isn't being fired soon enough on new files. Also
 turned on preallocation for new files.

---
 src/rabbit_disk_queue.erl | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index fc04899e..0c4c2e2a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -333,8 +333,17 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 			     current_offset = Offset } } =
 	load_from_disk(State),
     Path = form_filename(CurrentName),
+    Exists = case file:read_file_info(Path) of
+		 {error,enoent} -> false;
+		 {ok, _} -> true
+	     end,
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
+    ok = if Exists -> ok;
+	    true -> %% new file, so preallocate
+		 {ok, FileSizeLimit} = file:position(FileHdl, {bof, FileSizeLimit}),
+		 file:truncate(FileHdl)
+	 end,
     {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
@@ -729,13 +738,17 @@ maybe_roll_to_new_file(Offset,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = file:open(form_filename(NextName),
 			      [write, raw, binary, delayed_write]),
+    {ok, FileSizeLimit} = file:position(NextHdl, {bof, FileSizeLimit}),
+    ok = file:truncate(NextHdl),
+    {ok, 0} = file:position(NextHdl, {bof, 0}),
     true = ets:update_element(FileSummary, CurName, {5, NextName}), %% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
-    {ok, State #dqstate { current_file_name = NextName,
-			  current_file_handle = NextHdl,
-			  current_file_num = NextNum,
-			  current_offset = 0
-			}};
+    State1 = State #dqstate { current_file_name = NextName,
+			      current_file_handle = NextHdl,
+			      current_file_num = NextNum,
+			      current_offset = 0
+			     },
+    {ok, compact(sets:from_list([CurName]), State1)};
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
-- 
cgit v1.2.1


From 41113a7dcb4294a47a105e735b5a32ff27052e12 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 May 2009 13:21:30 +0100
Subject: Added support for purging queues. This is done per the 0-9-1 spec so
 we purge everything that hasn't been delivered.

---
 src/rabbit_db_queue.erl   | 30 +++++++++++++++++++++++++++++-
 src/rabbit_disk_queue.erl | 39 ++++++++++++++++++++++++++++++++++-----
 src/rabbit_tests.erl      | 23 +++++++++++++++++++++++
 3 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_db_queue.erl b/src/rabbit_db_queue.erl
index 97f1b986..897a4a6f 100644
--- a/src/rabbit_db_queue.erl
+++ b/src/rabbit_db_queue.erl
@@ -59,7 +59,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
+-export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2,
+	 tx_commit/3, tx_cancel/1, requeue/2, purge/1]).
 
 -export([stop/0, stop_and_obliterate/0]).
 
@@ -86,6 +87,7 @@
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [seq_id()]) -> 'ok').
+-spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 
@@ -121,6 +123,9 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
+purge(Q) ->
+    gen_server:call(?SERVER, {purge, Q}).
+
 stop() ->
     gen_server:call(?SERVER, stop, infinity).
 
@@ -148,6 +153,9 @@ handle_call({phantom_deliver, Q}, _From, State) ->
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
     {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
     {reply, ok, State1};
+handle_call({purge, Q}, _From, State) ->
+    {ok, Count, State1} = internal_purge(Q, State),
+    {reply, Count, State1};
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State = #dbstate { db_conn = Conn }) ->
@@ -424,3 +432,23 @@ shuffle_up(Conn, QStr, BaseSeqId, SeqId, Gap) ->
 		0
 	end,
     shuffle_up(Conn, QStr, BaseSeqId, SeqId - 1, Gap + GapInc).
+
+internal_purge(Q, State = #dbstate { db_conn = Conn }) ->
+    QStr = binary_to_escaped_string(term_to_binary(Q)),
+    case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
+	{selected, _, []} ->
+	    odbc:commit(Conn, commit),
+	    {ok, 0, State};
+	{selected, _, [{ReadSeqId}]} ->
+	    odbc:sql_query(Conn, "update sequence set next_read = next_write where queue = " ++ QStr),
+	    {selected, _, MsgSeqIds} =
+		odbc:sql_query(Conn, "select msg_id, seq_id from ledger where queue = " ++
+			       QStr ++ " and seq_id >= " ++ ReadSeqId),
+	    MsgSeqIds2 = lists:map(
+			   fun ({MsgIdStr, SeqIdStr}) ->
+				   { binary_to_term(hex_string_to_binary(MsgIdStr)),
+				     list_to_integer(SeqIdStr) }
+			   end, MsgSeqIds),
+	    {ok, State2} = remove_messages(Q, MsgSeqIds2, true, State),
+	    {ok, length(MsgSeqIds2), State2}
+    end.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0c4c2e2a..5aae2298 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,7 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2, tx_commit/3, tx_cancel/1, requeue/2]).
+-export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2,
+	 tx_commit/3, tx_cancel/1, requeue/2, purge/1]).
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
@@ -233,6 +234,7 @@
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [seq_id()]) -> 'ok').
+-spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
@@ -270,6 +272,9 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
+purge(Q) ->
+    gen_server:call(?SERVER, {purge, Q}).
+
 stop() ->
     gen_server:call(?SERVER, stop, infinity).
 
@@ -356,6 +361,9 @@ handle_call({phantom_deliver, Q}, _From, State) ->
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
     {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
     {reply, ok, State1};
+handle_call({purge, Q}, _From, State) ->
+    {ok, Count, State1} = internal_purge(Q, State),
+    {reply, Count, State1};
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
@@ -544,6 +552,7 @@ internal_ack(Q, MsgSeqIds, State) ->
 %% called from tx_cancel with MnesiaDelete = false
 %% called from internal_tx_cancel with MnesiaDelete = txn
 %% called from ack with MnesiaDelete = true
+%% called from purge with MnesiaDelete = txn
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
 		State = #dqstate { file_summary = FileSummary,
 				   current_file_name = CurName
@@ -625,7 +634,7 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds,
 	    [] -> {0,0};
 	    [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
 	end,
-    {atomic, {Sync, WriteSeqId}} =
+    {atomic, {Sync, WriteSeqId, State2}} =
 	mnesia:transaction(
 	  fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
 		   %% must deal with publishes first, if we didn't
@@ -646,14 +655,14 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds,
 						   write),
 				 {Acc or (CurName =:= File), NextWriteSeqId + 1}
 			 end, {false, InitWriteSeqId}, PubMsgIds),
-		   remove_messages(Q, AckSeqIds, txn, State),
-		   {Sync2, WriteSeqId3}
+		   {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
+		   {Sync2, WriteSeqId3, State3}
 	  end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId}),
     if Sync -> ok = file:sync(CurHdl);
        true -> ok
     end,
-    {ok, State}.
+    {ok, State2}.
 
 internal_publish(Q, MsgId, MsgBody, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
@@ -722,6 +731,26 @@ internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2}),
     {ok, State}.
 
+internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
+    case ets:lookup(Sequences, Q) of
+	[] -> {ok, 0, State};
+	[{Q, ReadSeqId, WriteSeqId}] ->
+	    {atomic, {ok, State2}} =
+		mnesia:transaction(
+		  fun() ->
+			  ok = mnesia:write_lock_table(rabbit_disk_queue),
+			  MsgSeqIds = lists:foldl(
+			    fun (SeqId, Acc) ->
+				    [#dq_msg_loc { is_delivered = false, msg_id = MsgId }] =
+					mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+				    [{MsgId, SeqId} | Acc]
+			    end, [], lists:seq(ReadSeqId, WriteSeqId - 1)),
+			  remove_messages(Q, MsgSeqIds, txn, State)
+		  end),
+	    true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
+	    {ok, WriteSeqId - ReadSeqId, State2}
+    end.
+
 %% ---- ROLLING OVER THE APPEND FILE ----
 
 maybe_roll_to_new_file(Offset,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 5924bb38..14461abb 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -695,6 +695,7 @@ test_disk_queue() ->
     passed = rdq_stress_gc(10000),
     passed = rdq_test_startup_with_queue_gaps(),
     passed = rdq_test_redeliver(),
+    passed = rdq_test_purge(),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
@@ -863,6 +864,28 @@ rdq_test_redeliver() ->
     rdq_stop(),
     passed.
 
+rdq_test_purge() ->
+    rdq_virgin(),
+    rdq_start(),
+    Msg = <<0:(8*256)>>,
+    Total = 1000,
+    Half = round(Total/2),
+    All = lists:seq(1,Total),
+    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    rabbit_disk_queue:tx_commit(q, All, []),
+    io:format("Publish done~n", []),
+    %% deliver first half
+    Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
+	    || N <- lists:seq(1,Half)],
+    io:format("Deliver first half done~n", []),
+    rabbit_disk_queue:purge(q),
+    io:format("Purge done~n", []),
+    rabbit_disk_queue:tx_commit(q, [], Seqs),
+    io:format("Ack first half done~n", []),
+    empty = rabbit_disk_queue:deliver(q),
+    rdq_stop(),
+    passed.    
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From 215253f1ae530fc29f6689ae273c0c0391ea9b2c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 May 2009 13:22:38 +0100
Subject: Some initial reworkings, making sure all messages have a guid, and
 moving the persister flag around slightly. Also various weird comments
 appearing in _process.erl for me!

---
 include/rabbit.hrl              |  5 +++--
 src/rabbit_amqqueue_process.erl | 44 ++++++++++++++++++++---------------------
 src/rabbit_channel.erl          |  7 ++-----
 src/rabbit_exchange.erl         |  4 +++-
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 44e13684..6212d4f3 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -62,7 +62,7 @@
 
 -record(listener, {node, protocol, host, port}).
 
--record(basic_message, {exchange_name, routing_key, content, persistent_key}).
+-record(basic_message, {exchange_name, routing_key, content, guid, is_persistent}).
 
 -record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id}).
 
@@ -134,7 +134,8 @@
       #basic_message{exchange_name  :: exchange_name(),
                      routing_key    :: routing_key(),
                      content        :: content(),
-                     persistent_key :: maybe(pkey())}).
+		     guid           :: guid(),
+                     is_persistent  :: bool()}).
 -type(message() :: basic_message()).
 %% this really should be an abstract type
 -type(msg_id() :: non_neg_integer()).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index c390b2b7..69edb64f 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -203,15 +203,15 @@ attempt_delivery(none, Message, State) ->
         {offered, false, State1} ->
             {true, State1};
         {offered, true, State1} ->
-            persist_message(none, qname(State), Message),
-            persist_delivery(qname(State), Message, false),
+            persist_message(none, qname(State), Message), %% DQ HERE
+            persist_delivery(qname(State), Message, false), %% DQ HERE
             {true, State1};
         {not_offered, State1} ->
             {false, State1}
     end;
 attempt_delivery(Txn, Message, State) ->
-    persist_message(Txn, qname(State), Message),
-    record_pending_message(Txn, Message),
+    persist_message(Txn, qname(State), Message), %% DQ tx_commit and store msgid in txn map
+    record_pending_message(Txn, Message), %% DQ seems to be done here!
     {true, State}.
 
 deliver_or_enqueue(Txn, Message, State) ->
@@ -219,8 +219,8 @@ deliver_or_enqueue(Txn, Message, State) ->
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
-            persist_message(Txn, qname(State), Message),
-            NewMB = queue:in({Message, false}, NewState#q.message_buffer),
+            persist_message(Txn, qname(State), Message), %% DQ Txn must be false here
+            NewMB = queue:in({Message, false}, NewState#q.message_buffer), %% DQ magic here
             {false, NewState#q{message_buffer = NewMB}}
     end.
 
@@ -302,7 +302,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder,
             case check_auto_delete(
                    deliver_or_enqueue_n(
                      [{Message, true} ||
-                         {_Messsage_id, Message} <- dict:to_list(UAM)],
+                         {_Messsage_id, Message} <- dict:to_list(UAM)], %% DQ alter all this stuff?
                      State#q{
                        exclusive_consumer = case Holder of
                                                 {ChPid, _} -> none;
@@ -343,10 +343,10 @@ run_poke_burst(MessageBuffer, State) ->
         {{value, {Message, Delivered}}, BufferTail} ->
             case deliver_immediately(Message, Delivered, State) of
                 {offered, true, NewState} ->
-                    persist_delivery(qname(State), Message, Delivered),
+                    persist_delivery(qname(State), Message, Delivered), %% DQ ack needed
                     run_poke_burst(BufferTail, NewState);
                 {offered, false, NewState} ->
-                    persist_auto_ack(qname(State), Message),
+                    persist_auto_ack(qname(State), Message), %% DQ record? We don't persist acks anyway now...
                     run_poke_burst(BufferTail, NewState);
                 {not_offered, NewState} ->
                     NewState#q{message_buffer = MessageBuffer}
@@ -371,7 +371,7 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
-persist_message(_Txn, _QName, #basic_message{persistent_key = none}) ->
+persist_message(_Txn, _QName, #basic_message{is_persistent = false}) -> %% DQ
     ok;
 persist_message(Txn, QName, Message) ->
     M = Message#basic_message{
@@ -379,29 +379,29 @@ persist_message(Txn, QName, Message) ->
           content = rabbit_binary_parser:clear_decoded_content(
                       Message#basic_message.content)},
     persist_work(Txn, QName,
-                 [{publish, M, {QName, M#basic_message.persistent_key}}]).
+                 [{publish, M, {QName, M#basic_message.guid}}]).
 
-persist_delivery(_QName, _Message,
+persist_delivery(_QName, _Message, %% DQ
                  true) ->
     ok;
-persist_delivery(_QName, #basic_message{persistent_key = none},
+persist_delivery(_QName, #basic_message{is_persistent = false}, %% DQ
                  _Delivered) ->
     ok;
-persist_delivery(QName, #basic_message{persistent_key = PKey},
+persist_delivery(QName, #basic_message{guid = MsgId}, %% DQ
                  _Delivered) ->
-    persist_work(none, QName, [{deliver, {QName, PKey}}]).
+    persist_work(none, QName, [{deliver, {QName, MsgId}}]).
 
-persist_acks(Txn, QName, Messages) ->
+persist_acks(Txn, QName, Messages) ->  %% DQ 
     persist_work(Txn, QName,
-                 [{ack, {QName, PKey}} ||
-                     #basic_message{persistent_key = PKey} <- Messages,
-                     PKey =/= none]).
+                 [{ack, {QName, MsgId}} ||
+                     #basic_message{guid = MsgId, is_persistent = P} <- Messages,
+                     P]).
 
-persist_auto_ack(_QName, #basic_message{persistent_key = none}) ->
+persist_auto_ack(_QName, #basic_message{is_persistent = false}) ->
     ok;
-persist_auto_ack(QName, #basic_message{persistent_key = PKey}) ->
+persist_auto_ack(QName, #basic_message{is_persistent = true, guid = MsgId}) ->
     %% auto-acks are always non-transactional
-    rabbit_persister:dirty_work([{ack, {QName, PKey}}]).
+    rabbit_persister:dirty_work([{ack, {QName, MsgId}}]).
 
 persist_work(_Txn,_QName, []) ->
     ok;
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 7574cd67..aeb15bd1 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -313,15 +313,12 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin,
     %% We decode the content's properties here because we're almost
     %% certain to want to look at delivery-mode and priority.
     DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content),
-    PersistentKey = case is_message_persistent(DecodedContent) of
-                        true  -> rabbit_guid:guid();
-                        false -> none
-                    end,
     {noreply, publish(Mandatory, Immediate,
                       #basic_message{exchange_name  = ExchangeName,
                                      routing_key    = RoutingKey,
                                      content        = DecodedContent,
-                                     persistent_key = PersistentKey},
+				     guid           = rabbit_guid:guid(),
+                                     is_persistent  = is_message_persistent(DecodedContent)},
                       rabbit_exchange:route(Exchange, RoutingKey, DecodedContent), State)};
 
 handle_method(#'basic.ack'{delivery_tag = DeliveryTag,
diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl
index a57e8076..9b3bbb18 100644
--- a/src/rabbit_exchange.erl
+++ b/src/rabbit_exchange.erl
@@ -204,7 +204,9 @@ simple_publish(Mandatory, Immediate, ExchangeName, RoutingKeyBin,
     Message = #basic_message{exchange_name = ExchangeName,
                              routing_key = RoutingKeyBin,
                              content = Content,
-                             persistent_key = none},
+			     is_persistent = false,
+			     guid = rabbit_guid:guid()
+			    },
     simple_publish(Mandatory, Immediate, Message).
 
 %% Usable by Erlang code that wants to publish messages.
-- 
cgit v1.2.1


From 1504003a4572d55699896f71d7959343cd991792 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 20 May 2009 10:10:20 +0100
Subject: Just adding a comment which was missed out some days ago.

---
 src/rabbit_disk_queue.erl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 5aae2298..90713723 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -317,6 +317,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 			{max_no_slots, 1024*1024*1024},
 			{type, set}
 		       ]),
+
+    %% it would be better to have this as private, but dets:from_ets/2
+    %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
     State =
 	#dqstate { msg_location_dets       = MsgLocationDets,
-- 
cgit v1.2.1


From c3bd33c00ca1b26abab458a8f54271db4657998f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 20 May 2009 12:46:57 +0100
Subject: So the mnesia table now contains next_seq_id. This means we can cope
 with gaps appearing. There are now two forms of publish, tx_commit and
 requeue. The first forms behave as before, with contiguous seq_ids. The
 second form (_with_seq(s)) take either a n extra seq arg or in the case of a
 list, a tuple in which the desired seq_id is mentioned. This can always just
 be the atom 'next'. The invariant on these lists is that the seq_ids are
 always ascending. Thus requeue now effectively takes a mapping:
 [{{msgId,oldSeqId} -> newSeqId}].

On startup, because the sequence Ids are still private at that point, shuffle_up is still called as before, although now gaps can appear anywhere, not just in the delivered section. We collapse everything up simply because we don't know where the previous NextReadSeqId marker was. If we did know that then we'd know that beyond that point we had a well formed linked list, and only had to patch up before that. This further requires that even though we have a linked list, the seq_ids must be ascending.
---
 include/rabbit.hrl        |   2 +-
 src/rabbit_disk_queue.erl | 175 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 129 insertions(+), 48 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 6212d4f3..0096ada1 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -64,7 +64,7 @@
 
 -record(basic_message, {exchange_name, routing_key, content, guid, is_persistent}).
 
--record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id}).
+-record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id, next_seq_id}).
 
 %%----------------------------------------------------------------------------
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 90713723..b2d086b2 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,8 +38,9 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2,
-	 tx_commit/3, tx_cancel/1, requeue/2, purge/1]).
+-export([publish/3, publish_with_seq/4, deliver/1, phantom_deliver/1, ack/2,
+	 tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
+	 requeue/2, requeue_with_seqs/2, purge/1]).
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
@@ -86,7 +87,8 @@
 %% rabbit_disk_queue: this is an mnesia table which contains:
 %%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
 %% 			      is_delivered = IsDelivered,
-%% 			      msg_id = MsgId
+%% 			      msg_id = MsgId,
+%%                            next_seq_id = SeqId
 %% 			    }
 %%
 
@@ -224,6 +226,7 @@
 -spec(start_link/1 :: (non_neg_integer()) ->
 	      {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
+-spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id(), binary()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
 	     {'empty' | {msg_id(), binary(), non_neg_integer(),
 			 bool(), {msg_id(), seq_id()}}}).
@@ -232,8 +235,10 @@
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
+-spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id()}], [seq_id()]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
--spec(requeue/2 :: (queue_name(), [seq_id()]) -> 'ok').
+-spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
+-spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id()}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
@@ -251,6 +256,9 @@ start_link(FileSizeLimit) ->
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
 
+publish_with_seq(Q, MsgId, SeqId, Msg) when is_binary(Msg) ->
+    gen_server:cast(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg}).
+
 deliver(Q) ->
     gen_server:call(?SERVER, {deliver, Q}, infinity).
 
@@ -266,12 +274,19 @@ tx_publish(MsgId, Msg) when is_binary(Msg) ->
 tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
     gen_server:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
 
+tx_commit_with_seqs(Q, PubMsgSeqIds, AckSeqIds)
+  when is_list(PubMsgSeqIds) andalso is_list(AckSeqIds) ->
+    gen_server:call(?SERVER, {tx_commit_with_seqs, Q, PubMsgSeqIds, AckSeqIds}, infinity).
+
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
 
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
+requeue_with_seqs(Q, MsgSeqSeqIds) when is_list(MsgSeqSeqIds) ->
+    gen_server:cast(?SERVER, {requeue_with_seqs, Q, MsgSeqSeqIds}).
+
 purge(Q) ->
     gen_server:call(?SERVER, {purge, Q}).
 
@@ -362,7 +377,11 @@ handle_call({phantom_deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, false, State),
     {reply, Result, State1};
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
-    {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
+    PubMsgSeqIds = lists:zip(PubMsgIds, lists:duplicate(length(PubMsgIds), next)),
+    {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, State),
+    {reply, ok, State1};
+handle_call({tx_commit_with_seqs, Q, PubSeqMsgIds, AckSeqIds}, _From, State) ->
+    {ok, State1} = internal_tx_commit(Q, PubSeqMsgIds, AckSeqIds, State),
     {reply, ok, State1};
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
@@ -401,7 +420,10 @@ handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = disk_on
     {reply, ok, State #dqstate { operation_mode = ram_disk }}.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
-    {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
+    {ok, State1} = internal_publish(Q, MsgId, next, MsgBody, State),
+    {noreply, State1};
+handle_cast({publish_with_seq, Q, MsgId, SeqId, MsgBody}, State) ->
+    {ok, State1} = internal_publish(Q, MsgId, SeqId, MsgBody, State),
     {noreply, State1};
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
@@ -413,7 +435,11 @@ handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
     {noreply, State1};
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
+    MsgSeqSeqIds = lists:zip(MsgSeqIds, lists:duplicate(length(MsgSeqIds), next)),
+    {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
+    {noreply, State1};
+handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
+    {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
     {noreply, State1}.
 
 handle_info(_Info, State) ->
@@ -499,10 +525,11 @@ internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
 	    case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
 		[] -> {ok, empty, State};
 		[Obj =
-		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] ->
+		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
+			      next_seq_id = ReadSeqId2}] ->
 		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
 			dets_ets_lookup(State, MsgId),
-		    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
+		    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId}),
 		    ok =
 			if Delivered -> ok;
 			   true ->
@@ -627,15 +654,35 @@ internal_tx_publish(MsgId, MsgBody,
 	    {ok, State}
     end.
 
-internal_tx_commit(Q, PubMsgIds, AckSeqIds,
+adjust_last_msg_seq_id(_Q, ExpectedSeqId, next) ->
+    ExpectedSeqId;
+adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId) ->
+    SuppliedSeqId;
+adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId) ->
+    ExpectedSeqId;
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId) when SuppliedSeqId > ExpectedSeqId ->
+    [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
+    ok = mnesia:dirty_write(rabbit_disk_queue,
+			    Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
+    SuppliedSeqId.
+
+%% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
+internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
 		   State = #dqstate { current_file_handle = CurHdl,
 				      current_file_name = CurName,
 				      sequences = Sequences
 				     }) ->
-    {ReadSeqId, InitWriteSeqId} =
-	case ets:lookup(Sequences, Q) of
-	    [] -> {0,0};
-	    [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
+    {PubList, PubAcc, ReadSeqId} =
+	case PubMsgSeqIds of
+	    [] -> {[], undefined, undefined};
+	    [_|PubMsgSeqIdsTail] ->
+		{InitReadSeqId, InitWriteSeqId} =
+		    case ets:lookup(Sequences, Q) of
+			[] -> {0,0};
+			[{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
+		    end,
+		{ lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
+		  InitWriteSeqId, InitReadSeqId}
 	end,
     {atomic, {Sync, WriteSeqId, State2}} =
 	mnesia:transaction(
@@ -647,41 +694,55 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds,
 		   %% order which _could_not_ have happened.
 		   {Sync2, WriteSeqId3} =
 		       lists:foldl(
-			 fun (MsgId, {Acc, NextWriteSeqId}) ->
+			 fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
+			       {Acc, ExpectedSeqId}) ->
 				 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
 				     dets_ets_lookup(State, MsgId),
+				 SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId),
+				 NextSeqId2 = if NextSeqId =:= next -> SeqId2 + 1;
+						 true -> NextSeqId
+					      end,
+				 true = NextSeqId2 > SeqId2,
 				 ok = mnesia:write(rabbit_disk_queue,
 						   #dq_msg_loc { queue_and_seq_id =
-								 {Q, NextWriteSeqId},
+								 {Q, SeqId2},
 								 msg_id = MsgId,
-								 is_delivered = false},
+								 is_delivered = false,
+								 next_seq_id = NextSeqId2
+								},
 						   write),
-				 {Acc or (CurName =:= File), NextWriteSeqId + 1}
-			 end, {false, InitWriteSeqId}, PubMsgIds),
+				 {Acc or (CurName =:= File), NextSeqId2}
+			 end, {false, PubAcc}, PubList),
+
 		   {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
 		   {Sync2, WriteSeqId3, State3}
 	  end),
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId}),
-    if Sync -> ok = file:sync(CurHdl);
-       true -> ok
-    end,
+    true = if PubList =:= [] -> true;
+	      true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId})
+	   end,
+    ok = if Sync -> file:sync(CurHdl);
+	    true -> ok
+	 end,
     {ok, State2}.
 
-internal_publish(Q, MsgId, MsgBody, State) ->
+%% SeqId can be 'next'
+internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
 	internal_tx_publish(MsgId, MsgBody, State),
-    WriteSeqId = case ets:lookup(Sequences, Q) of
-		     [] -> %% previously unseen queue
-			 true = ets:insert_new(Sequences, {Q, 0, 1}),
-			 0;
-		     [{Q, ReadSeqId, WriteSeqId2}] ->
-			 true = ets:insert(Sequences, {Q, ReadSeqId,
-						       WriteSeqId2 +1}),
-			 WriteSeqId2
-		 end,
+    {ReadSeqId, WriteSeqId} =
+	case ets:lookup(Sequences, Q) of
+	    [] -> %% previously unseen queue
+		{0, 0};
+	    [{Q, ReadSeqId2, WriteSeqId2}] ->
+		{ReadSeqId2, WriteSeqId2}
+	end,
+    WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId),
+    WriteSeqId3Next = WriteSeqId3 + 1,
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId3Next}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
-			    #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
+			    #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
 					  msg_id = MsgId,
+					  next_seq_id = WriteSeqId3Next,
 					  is_delivered = false}),
     {ok, State1}.
 
@@ -691,7 +752,10 @@ internal_tx_cancel(MsgIds, State) ->
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
-internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
+internal_requeue(_Q, [], State) ->
+    {ok, State};
+internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
+		 State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
     %% already been delivered). We also know that the rows for these
@@ -716,20 +780,30 @@ internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
 
     %% the Q _must_ already exist
     [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
+    MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
     {atomic, WriteSeqId2} =
 	mnesia:transaction(
 	  fun() ->
 		  ok = mnesia:write_lock_table(rabbit_disk_queue),
 		  lists:foldl(
-		    fun ({MsgId, SeqId}, NextWriteSeqId) ->
+		    fun ({{{MsgId, SeqIdOrig}, SeqIdTo},
+			  {_NextMsgSeqId, NextSeqIdTo}},
+			 ExpectedSeqIdTo) ->
+			    SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo),
+			    NextSeqIdTo2 = if NextSeqIdTo =:= next -> SeqIdTo2 + 1;
+					      true -> NextSeqIdTo
+					   end,
+			    true = NextSeqIdTo2 > SeqIdTo2,
 			    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
-				mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+				mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
 			    mnesia:write(rabbit_disk_queue,
-					 Obj #dq_msg_loc { queue_and_seq_id = {Q, NextWriteSeqId }},
+					 Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqIdTo2},
+							   next_seq_id = NextSeqIdTo2
+							 },
 					 write),
-			    mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
-			    NextWriteSeqId + 1
-		    end, WriteSeqId, MsgSeqIds)
+			    mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write),
+			    NextSeqIdTo2
+		    end, WriteSeqId, MsgSeqIdsZipped)
 	  end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2}),
     {ok, State}.
@@ -1136,11 +1210,12 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
     GapInc =
 	case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
 	    [] -> 1;
-	    [Obj = #dq_msg_loc { is_delivered = IsDelivered }] when IsDelivered
-								    orelse (Gap =:= 0) ->
+	    [Obj] ->
 		if Gap =:= 0 -> ok;
 		   true -> mnesia:write(rabbit_disk_queue,
-					Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap }},
+					Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap },
+							  next_seq_id = SeqId + Gap + 1
+							},
 					write),
 			   mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
 		end,
@@ -1172,7 +1247,9 @@ load_messages(Left, [File|Files],
 			    (rabbit_disk_queue,
 			     #dq_msg_loc { msg_id = MsgId,
 					   queue_and_seq_id = '_',
-					   is_delivered = '_'},
+					   is_delivered = '_',
+					   next_seq_id = '_'
+					 },
 			     msg_id)) of
 		    0 -> {VMAcc, VTSAcc};
 		    RefCount ->
@@ -1215,7 +1292,9 @@ recover_crashed_compactions1(Files, TmpFile) ->
 					    (rabbit_disk_queue,
 					     #dq_msg_loc { msg_id = MsgId,
 							   queue_and_seq_id = '_',
-							   is_delivered = '_'},
+							   is_delivered = '_',
+							   next_seq_id = '_'
+							 },
 					     msg_id))
 		  end, MsgIdsTmp),
     {ok, UncorruptedMessages} =
@@ -1253,7 +1332,9 @@ recover_crashed_compactions1(Files, TmpFile) ->
 						    (rabbit_disk_queue,
 						     #dq_msg_loc { msg_id = MsgId,
 								   queue_and_seq_id = '_',
-								   is_delivered = '_'},
+								   is_delivered = '_',
+								   next_seq_id = '_'
+								 },
 						     msg_id))
 			  end, MsgIds),
 	    %% The main file should be contiguous
-- 
cgit v1.2.1


From cc804d1807a21052950ace9702b2e27252ded07f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 20 May 2009 14:30:33 +0100
Subject: Wrote the mixed_queue. This is totally untested just now. This module
 makes decisions about when to hand off to the disk_queue and when to hold
 messages in RAM. Both UnackedMessages and the contents of Transactions are
 still held externally, by the amqqueue_process as they need to be associated
 with channels. Currently there is no way to create the initial state, nor
 make transitions between the two different modes. But in theory, it should
 work ;)

---
 src/rabbit_disk_queue.erl  |   5 +-
 src/rabbit_mixed_queue.erl | 166 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 2 deletions(-)
 create mode 100644 src/rabbit_mixed_queue.erl

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b2d086b2..37c91a85 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -234,8 +234,9 @@
 	     { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
--spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id()}], [seq_id()]) -> 'ok').
+-spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) -> 'ok').
+-spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id()}],
+				[{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id()}]) -> 'ok').
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
new file mode 100644
index 00000000..c7c76eb2
--- /dev/null
+++ b/src/rabbit_mixed_queue.erl
@@ -0,0 +1,166 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_mixed_queue).
+
+-export([publish/4, deliver/1, ack/2,
+	 tx_publish/4, tx_commit/3, tx_cancel/2,
+	 requeue/2, purge/1]).
+
+-record(mqstate, { mode,
+		   msg_buf,
+		   next_write_seq,
+		   queue
+		 }
+       ).
+
+publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk, queue = Q }) ->
+    ok = rabbit_disk_queue:publish(Q, MsgId, Msg),
+    {ok, State};
+publish(MsgId, Msg, IsPersistent,
+	State = #mqstate { queue = Q, mode = mixed,
+			   next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
+    if IsPersistent ->
+	    ok = rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, Msg);
+       true -> ok
+    end,
+    {ok, State #mqstate { next_write_seq = NextSeq + 1,
+			  msg_buf = queue:in({NextSeq, {MsgId, Msg, IsPersistent}},
+					     MsgBuf)
+			}}.
+
+deliver(State = #mqstate { mode = disk, queue = Q }) ->
+    {rabbit_disk_queue:deliver(Q), State};
+deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf }) ->
+    {Result, MsgBuf2} = queue:out(MsgBuf),
+    case Result of
+	empty ->
+	    {empty, State};
+	{value, {_Seq, {MsgId, Msg, IsPersistent}}} ->
+	    {IsDelivered, Ack} =
+		if IsPersistent ->
+			{MsgId, IsDelivered2, Ack2} = rabbit_disk_queue:phantom_deliver(Q),
+			{IsDelivered2, Ack2};
+		   true -> {false, noack}
+		end,
+	    {{MsgId, Msg, size(Msg), IsDelivered, Ack},
+	     State #mqstate { msg_buf = MsgBuf2 }}
+    end.
+
+remove_noacks(Acks) ->
+    lists:filter(fun (A) -> A /= noack end, Acks).
+
+ack(Acks, State = #mqstate { queue = Q }) ->	     
+    ok = rabbit_disk_queue:ack(Q, remove_noacks(Acks)),
+    {ok, State}.
+						   
+tx_publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk }) ->
+    ok = rabbit_disk_queue:tx_publish(MsgId, Msg),
+    {ok, State};
+tx_publish(MsgId, Msg, true, State = #mqstate { mode = mixed }) ->
+    ok = rabbit_disk_queue:tx_publish(MsgId, Msg),
+    {ok, State};
+tx_publish(_MsgId, _Msg, false, State = #mqstate { mode = mixed }) ->
+    {ok, State}.
+
+only_msg_ids(Pubs) ->
+    lists:map(fun (P) -> element(1, P) end, Pubs).
+
+tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
+    ok = rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes), Acks),
+    {ok, State};
+tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
+					      msg_buf = MsgBuf,
+					      next_write_seq = NextSeq
+					     }) ->
+    {PersistentPubs, MsgBuf2, NextSeq2} =
+	lists:foldl(fun ({MsgId, Msg, IsPersistent}, {Acc, MsgBuf3, NextSeq3}) ->
+			    Acc2 =
+				if IsPersistent ->
+					[{MsgId, NextSeq3} | Acc];
+				   true -> Acc
+				end,
+			    MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
+					       MsgBuf3),
+			    {Acc2, MsgBuf4, NextSeq3 + 1}
+		    end, {[], MsgBuf, NextSeq}, Publishes),
+    %% foldl reverses, so re-reverse PersistentPubs to match
+    %% requirements of rabbit_disk_queue (ascending SeqIds)
+    ok = rabbit_disk_queue:tx_commit_with_seqs(Q, lists:reverse(PersistentPubs),
+					       remove_noacks(Acks)),
+    {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
+
+only_persistent_msg_ids(Pubs) ->
+    lists:reverse(lists:foldl(fun ({MsgId, _, IsPersistent}, Acc) ->
+				      if IsPersistent -> [MsgId | Acc];
+					 true -> Acc
+				      end
+			      end, [], Pubs)).
+
+tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
+    ok = rabbit_disk_queue:tx_cancel(only_msg_ids(Publishes)),
+    {ok, State};
+tx_cancel(Publishes, State = #mqstate { mode = mixed }) ->
+    ok = rabbit_disk_queue:tx_cancel(only_persistent_msg_ids(Publishes)),
+    {ok, State}.
+
+only_ack_tags(MsgWithAcks) ->
+    lists:map(fun (P) -> element(2, P) end, MsgWithAcks).
+
+requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q }) ->
+    rabbit_disk_queue:requeue(Q, only_ack_tags(MessagesWithAckTags)),
+    {ok, State};
+requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
+						msg_buf = MsgBuf,
+						next_write_seq = NextSeq
+					      }) ->
+    {PersistentPubs, MsgBuf2, NextSeq2} =
+	lists:foldl(fun ({{MsgId, Msg, IsPersistent}, AckTag}, {Acc, MsgBuf3, NextSeq3}) ->
+			    Acc2 =
+				if IsPersistent ->
+					{MsgId, _OldSeqId} = AckTag,
+					[{AckTag, NextSeq3} | Acc];
+				   true -> Acc
+				end,
+			    MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
+					       MsgBuf3),
+			    {Acc2, MsgBuf4, NextSeq3 + 1}
+		    end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
+    ok = rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(PersistentPubs)),
+    {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
+
+purge(State = #mqstate { queue = Q, mode = disk }) ->
+    Count = rabbit_disk_queue:purge(Q),
+    {Count, State};
+purge(State = #mqstate { queue = Q, msg_buf = MsgBuf, mode = mixed }) ->
+    rabbit_disk_queue:purge(Q),
+    Count = queue:len(MsgBuf),
+    {Count, State #mqstate { msg_buf = queue:new() }}.
-- 
cgit v1.2.1


From 84dbeb9022ad5585b84fba5443e55ae7ee1a3296 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 May 2009 11:37:02 +0100
Subject: Formatting only. Only just realised emacs was using tabs. Fixed.

---
 src/rabbit_disk_queue.erl  | 1432 ++++++++++++++++++++++----------------------
 src/rabbit_mixed_queue.erl |  123 ++--
 src/rabbit_tests.erl       |  126 ++--
 3 files changed, 845 insertions(+), 836 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 37c91a85..8c602b53 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -196,7 +196,7 @@
 %% +-------+    +-------+	  +-------+
 %% |   B   |    |   X   |	  |   B   |
 %% +-------+    +-------+	  +-------+
-%% |   A   |    |   E   |	  |   A   |
+%% |   A   |    |   E   |          |   A   |
 %% +-------+    +-------+         +-------+
 %%   left         right             left
 %%
@@ -224,19 +224,19 @@
 -type(seq_id() :: non_neg_integer()).
 
 -spec(start_link/1 :: (non_neg_integer()) ->
-	      {'ok', pid()} | 'ignore' | {'error', any()}).
+              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
 -spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id(), binary()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
-	     {'empty' | {msg_id(), binary(), non_neg_integer(),
-			 bool(), {msg_id(), seq_id()}}}).
+             {'empty' | {msg_id(), binary(), non_neg_integer(),
+                         bool(), {msg_id(), seq_id()}}}).
 -spec(phantom_deliver/1 :: (queue_name()) ->
-	     { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
+             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id()}],
-				[{msg_id(), seq_id()}]) -> 'ok').
+                                [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id()}]) -> 'ok').
@@ -252,7 +252,7 @@
 
 start_link(FileSizeLimit) ->
     gen_server:start_link({local, ?SERVER}, ?MODULE,
-			  [FileSizeLimit, ?MAX_READ_FILE_HANDLES], []).
+                          [FileSizeLimit, ?MAX_READ_FILE_HANDLES], []).
 
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
@@ -317,57 +317,57 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     Node = node(),
     ok = 
-	case mnesia:change_table_copy_type(rabbit_disk_queue, Node, disc_only_copies) of
-	    {atomic, ok} -> ok;
-	    {aborted, {already_exists, rabbit_disk_queue, Node, disc_only_copies}} -> ok;
-	    E -> E
-	end,
+        case mnesia:change_table_copy_type(rabbit_disk_queue, Node, disc_only_copies) of
+            {atomic, ok} -> ok;
+            {aborted, {already_exists, rabbit_disk_queue, Node, disc_only_copies}} -> ok;
+            E -> E
+        end,
     ok = filelib:ensure_dir(form_filename("nothing")),
     InitName = "0" ++ ?FILE_EXTENSION,
     {ok, MsgLocationDets} =
-	dets:open_file(?MSG_LOC_NAME,
-		       [{file, form_filename(atom_to_list(?MSG_LOC_NAME) ++
-					     ?FILE_EXTENSION_DETS)},
-			{min_no_slots, 1024*1024},
-			%% man says this should be <= 32M. But it works...
-			{max_no_slots, 1024*1024*1024},
-			{type, set}
-		       ]),
+        dets:open_file(?MSG_LOC_NAME,
+                       [{file, form_filename(atom_to_list(?MSG_LOC_NAME) ++
+                                             ?FILE_EXTENSION_DETS)},
+                        {min_no_slots, 1024*1024},
+                        %% man says this should be <= 32M. But it works...
+                        {max_no_slots, 1024*1024*1024},
+                        {type, set}
+                       ]),
 
     %% it would be better to have this as private, but dets:from_ets/2
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
     State =
-	#dqstate { msg_location_dets       = MsgLocationDets,
-		   msg_location_ets        = MsgLocationEts,
-		   operation_mode          = disk_only,
-		   file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
-						     [set, private]),
-		   sequences               = ets:new(?SEQUENCE_ETS_NAME,
-						     [set, private]),
-		   current_file_num        = 0,
-		   current_file_name       = InitName,
-		   current_file_handle     = undefined,
-		   current_offset          = 0,
-		   file_size_limit         = FileSizeLimit,
-		   read_file_handles       = {dict:new(), gb_trees:empty()},
-		   read_file_handles_limit = ReadFileHandlesLimit
-		  },
+        #dqstate { msg_location_dets       = MsgLocationDets,
+                   msg_location_ets        = MsgLocationEts,
+                   operation_mode          = disk_only,
+                   file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
+                                                     [set, private]),
+                   sequences               = ets:new(?SEQUENCE_ETS_NAME,
+                                                     [set, private]),
+                   current_file_num        = 0,
+                   current_file_name       = InitName,
+                   current_file_handle     = undefined,
+                   current_offset          = 0,
+                   file_size_limit         = FileSizeLimit,
+                   read_file_handles       = {dict:new(), gb_trees:empty()},
+                   read_file_handles_limit = ReadFileHandlesLimit
+                  },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
-			     current_offset = Offset } } =
-	load_from_disk(State),
+                             current_offset = Offset } } =
+        load_from_disk(State),
     Path = form_filename(CurrentName),
     Exists = case file:read_file_info(Path) of
-		 {error,enoent} -> false;
-		 {ok, _} -> true
-	     end,
+                 {error,enoent} -> false;
+                 {ok, _} -> true
+             end,
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
     ok = if Exists -> ok;
-	    true -> %% new file, so preallocate
-		 {ok, FileSizeLimit} = file:position(FileHdl, {bof, FileSizeLimit}),
-		 file:truncate(FileHdl)
-	 end,
+            true -> %% new file, so preallocate
+                 {ok, FileSizeLimit} = file:position(FileHdl, {bof, FileSizeLimit}),
+                 file:truncate(FileHdl)
+         end,
     {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
@@ -391,21 +391,21 @@ handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
     State1 = #dqstate { file_summary = FileSummary,
-		        sequences = Sequences } =
-	shutdown(State), %% tidy up file handles early
+                        sequences = Sequences } =
+        shutdown(State), %% tidy up file handles early
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
     true = ets:delete(FileSummary),
     true = ets:delete(Sequences),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok,
      State1 #dqstate { current_file_handle = undefined,
-		       read_file_handles = {dict:new(), gb_trees:empty()}}};
+                       read_file_handles = {dict:new(), gb_trees:empty()}}};
     %% gen_server now calls terminate, which then calls shutdown
 handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = disk_only }) ->
     {reply, ok, State};
 handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = ram_disk,
-							 msg_location_dets = MsgLocationDets,
-							 msg_location_ets = MsgLocationEts }) ->
+                                                         msg_location_dets = MsgLocationDets,
+                                                         msg_location_ets = MsgLocationEts }) ->
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_only_copies),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
@@ -413,8 +413,8 @@ handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = ram_di
 handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = ram_disk }) ->
     {reply, ok, State};
 handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = disk_only,
-							msg_location_dets = MsgLocationDets,
-							msg_location_ets = MsgLocationEts }) ->
+                                                        msg_location_dets = MsgLocationDets,
+                                                        msg_location_ets = MsgLocationEts }) ->
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
@@ -450,24 +450,24 @@ terminate(_Reason, State) ->
     shutdown(State).
 
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
-			    msg_location_ets = MsgLocationEts,
-			    current_file_handle = FileHdl,
-			    read_file_handles = {ReadHdls, _ReadHdlsAge}
-			  }) ->
+                            msg_location_ets = MsgLocationEts,
+                            current_file_handle = FileHdl,
+                            read_file_handles = {ReadHdls, _ReadHdlsAge}
+                          }) ->
     %% deliberately ignoring return codes here
     dets:close(MsgLocationDets),
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
-			      ?FILE_EXTENSION_DETS)),
+                              ?FILE_EXTENSION_DETS)),
     true = ets:delete_all_objects(MsgLocationEts),
     if FileHdl =:= undefined -> ok;
        true -> file:sync(FileHdl),
-	       file:close(FileHdl)
+               file:close(FileHdl)
     end,
     dict:fold(fun (_File, Hdl, _Acc) ->
-		     file:close(Hdl)
-	      end, ok, ReadHdls),
+                     file:close(Hdl)
+              end, ok, ReadHdls),
     State #dqstate { current_file_handle = undefined,
-		     read_file_handles = {dict:new(), gb_trees:empty()}}.
+                     read_file_handles = {dict:new(), gb_trees:empty()}}.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -481,97 +481,97 @@ base_directory() ->
     filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
 
 dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
-		Key) ->
+                Key) ->
     dets:lookup(MsgLocationDets, Key);
 dets_ets_lookup(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
-	        Key) ->
+                Key) ->
     ets:lookup(MsgLocationEts, Key).
 
 dets_ets_delete(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
-		Key) ->
+                Key) ->
     ok = dets:delete(MsgLocationDets, Key);
 dets_ets_delete(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
-	        Key) ->
+                Key) ->
     true = ets:delete(MsgLocationEts, Key),
     ok.
 
 dets_ets_insert(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
-		Obj) ->
+                Obj) ->
     ok = dets:insert(MsgLocationDets, Obj);
 dets_ets_insert(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
-		Obj) ->
+                Obj) ->
     true = ets:insert(MsgLocationEts, Obj),
     ok.
 
 dets_ets_insert_new(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
-		    Obj) ->
+                    Obj) ->
     true = dets:insert_new(MsgLocationDets, Obj);
 dets_ets_insert_new(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
-		    Obj) ->
+                    Obj) ->
     true = ets:insert_new(MsgLocationEts, Obj).
 
 dets_ets_match_object(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
-		      Obj) ->
+                      Obj) ->
     dets:match_object(MsgLocationDets, Obj);
 dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
-		      Obj) ->
+                      Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
-	[] -> {ok, empty, State};
-	[{Q, ReadSeqId, WriteSeqId}] ->
-	    case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
-		[] -> {ok, empty, State};
-		[Obj =
-		 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
-			      next_seq_id = ReadSeqId2}] ->
-		    [{MsgId, _RefCount, File, Offset, TotalSize}] =
-			dets_ets_lookup(State, MsgId),
-		    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId}),
-		    ok =
-			if Delivered -> ok;
-			   true ->
-				mnesia:dirty_write(rabbit_disk_queue,
-						   Obj #dq_msg_loc {is_delivered = true})
-			end,
-		    if ReadMsg ->
-			    {FileHdl, State1} = get_read_handle(File, State),
-			    {ok, {MsgBody, BodySize}} =
-				read_message_at_offset(FileHdl, Offset, TotalSize),
-			    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
-			     State1};
-		       true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
-		    end
-	    end
+        [] -> {ok, empty, State};
+        [{Q, ReadSeqId, WriteSeqId}] ->
+            case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
+                [] -> {ok, empty, State};
+                [Obj =
+                 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
+                              next_seq_id = ReadSeqId2}] ->
+                    [{MsgId, _RefCount, File, Offset, TotalSize}] =
+                        dets_ets_lookup(State, MsgId),
+                    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId}),
+                    ok =
+                        if Delivered -> ok;
+                           true ->
+                                mnesia:dirty_write(rabbit_disk_queue,
+                                                   Obj #dq_msg_loc {is_delivered = true})
+                        end,
+                    if ReadMsg ->
+                            {FileHdl, State1} = get_read_handle(File, State),
+                            {ok, {MsgBody, BodySize}} =
+                                read_message_at_offset(FileHdl, Offset, TotalSize),
+                            {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
+                             State1};
+                       true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
+                    end
+            end
     end.
 
 get_read_handle(File, State =
-	      #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
-			 read_file_handles_limit = ReadFileHandlesLimit }) ->
+              #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
+                         read_file_handles_limit = ReadFileHandlesLimit }) ->
     Now = now(),
     {FileHdl, ReadHdls1, ReadHdlsAge1} =
-	case dict:find(File, ReadHdls) of
-	    error ->
-		{ok, Hdl} = file:open(form_filename(File),
-				      [read, raw, binary,
-				       read_ahead]),
-		case dict:size(ReadHdls) < ReadFileHandlesLimit of
-		    true ->
-			{Hdl, ReadHdls, ReadHdlsAge};
-		    _False ->
-			{Then, OldFile, ReadHdlsAge2} =
-			    gb_trees:take_smallest(ReadHdlsAge),
-			{ok, {OldHdl, Then}} =
-			    dict:find(OldFile, ReadHdls),
-			ok = file:close(OldHdl),
-			{Hdl, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
-		end;
-	    {ok, {Hdl, Then}} ->
-		{Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
-	end,
+        case dict:find(File, ReadHdls) of
+            error ->
+                {ok, Hdl} = file:open(form_filename(File),
+                                      [read, raw, binary,
+                                       read_ahead]),
+                case dict:size(ReadHdls) < ReadFileHandlesLimit of
+                    true ->
+                        {Hdl, ReadHdls, ReadHdlsAge};
+                    _False ->
+                        {Then, OldFile, ReadHdlsAge2} =
+                            gb_trees:take_smallest(ReadHdlsAge),
+                        {ok, {OldHdl, Then}} =
+                            dict:find(OldFile, ReadHdls),
+                        ok = file:close(OldHdl),
+                        {Hdl, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
+                end;
+            {ok, {Hdl, Then}} ->
+                {Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
+        end,
     ReadHdls3 = dict:store(File, {FileHdl, Now}, ReadHdls1),
     ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
     {FileHdl, State #dqstate {read_file_handles = {ReadHdls3, ReadHdlsAge3}}}.
@@ -585,74 +585,74 @@ internal_ack(Q, MsgSeqIds, State) ->
 %% called from ack with MnesiaDelete = true
 %% called from purge with MnesiaDelete = txn
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
-		State = #dqstate { file_summary = FileSummary,
-				   current_file_name = CurName
-				 }) ->
+                State = #dqstate { file_summary = FileSummary,
+                                   current_file_name = CurName
+                                 }) ->
     Files =
-	lists:foldl(
-	  fun ({MsgId, SeqId}, Files2) ->
-		  [{MsgId, RefCount, File, Offset, TotalSize}] =
-		      dets_ets_lookup(State, MsgId),
-		  Files3 =
-		      if 1 =:= RefCount ->
-			      ok = dets_ets_delete(State, MsgId),
-			      [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
-				  ets:lookup(FileSummary, File),
-			      ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-			      true = ets:insert(FileSummary,
-						{File, (ValidTotalSize - TotalSize
-							- ?FILE_PACKING_ADJUSTMENT),
-						 ContiguousTop1, Left, Right}),
-			      if CurName =:= File -> Files2;
-				 true -> sets:add_element(File, Files2)
-			      end;
-			 1 < RefCount ->
-			      ok = dets_ets_insert(State, {MsgId, RefCount - 1,
-							   File, Offset, TotalSize}),
-			      Files2
-		      end,
-		  ok = if MnesiaDelete ->
-			       mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
-			  MnesiaDelete =:= txn ->
-			       mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write);
-			  true -> ok
-		       end,
-		  Files3
-	  end, sets:new(), MsgSeqIds),
+        lists:foldl(
+          fun ({MsgId, SeqId}, Files2) ->
+                  [{MsgId, RefCount, File, Offset, TotalSize}] =
+                      dets_ets_lookup(State, MsgId),
+                  Files3 =
+                      if 1 =:= RefCount ->
+                              ok = dets_ets_delete(State, MsgId),
+                              [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
+                                  ets:lookup(FileSummary, File),
+                              ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+                              true = ets:insert(FileSummary,
+                                                {File, (ValidTotalSize - TotalSize
+                                                        - ?FILE_PACKING_ADJUSTMENT),
+                                                 ContiguousTop1, Left, Right}),
+                              if CurName =:= File -> Files2;
+                                 true -> sets:add_element(File, Files2)
+                              end;
+                         1 < RefCount ->
+                              ok = dets_ets_insert(State, {MsgId, RefCount - 1,
+                                                           File, Offset, TotalSize}),
+                              Files2
+                      end,
+                  ok = if MnesiaDelete ->
+                               mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
+                          MnesiaDelete =:= txn ->
+                               mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write);
+                          true -> ok
+                       end,
+                  Files3
+          end, sets:new(), MsgSeqIds),
     State2 = compact(Files, State),
     {ok, State2}.
 
 internal_tx_publish(MsgId, MsgBody,
-		    State = #dqstate { current_file_handle = CurHdl,
-				       current_file_name = CurName,
-				       current_offset = CurOffset,
-				       file_summary = FileSummary
-				      }) ->
+                    State = #dqstate { current_file_handle = CurHdl,
+                                       current_file_name = CurName,
+                                       current_offset = CurOffset,
+                                       file_summary = FileSummary
+                                      }) ->
     case dets_ets_lookup(State, MsgId) of
-	[] ->
-	    %% New message, lots to do
-	    {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
-	    true = dets_ets_insert_new(State, {MsgId, 1, CurName,
-					       CurOffset, TotalSize}),
-	    [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
-		ets:lookup(FileSummary, CurName),
-	    ValidTotalSize1 = ValidTotalSize + TotalSize +
-		?FILE_PACKING_ADJUSTMENT,
-	    ContiguousTop1 = if CurOffset =:= ContiguousTop ->
-				     %% can't be any holes in this file
-				     ValidTotalSize1;
-				true -> ContiguousTop
-			     end,
-	    true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
-					    ContiguousTop1, Left, undefined}),
-	    NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
-	    maybe_roll_to_new_file(NextOffset,
-				   State #dqstate {current_offset = NextOffset});
-	[{MsgId, RefCount, File, Offset, TotalSize}] ->
-	    %% We already know about it, just update counter
-	    ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
-					 Offset, TotalSize}),
-	    {ok, State}
+        [] ->
+            %% New message, lots to do
+            {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
+            true = dets_ets_insert_new(State, {MsgId, 1, CurName,
+                                               CurOffset, TotalSize}),
+            [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
+                ets:lookup(FileSummary, CurName),
+            ValidTotalSize1 = ValidTotalSize + TotalSize +
+                ?FILE_PACKING_ADJUSTMENT,
+            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
+                                     %% can't be any holes in this file
+                                     ValidTotalSize1;
+                                true -> ContiguousTop
+                             end,
+            true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
+                                            ContiguousTop1, Left, undefined}),
+            NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+            maybe_roll_to_new_file(NextOffset,
+                                   State #dqstate {current_offset = NextOffset});
+        [{MsgId, RefCount, File, Offset, TotalSize}] ->
+            %% We already know about it, just update counter
+            ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
+                                         Offset, TotalSize}),
+            {ok, State}
     end.
 
 adjust_last_msg_seq_id(_Q, ExpectedSeqId, next) ->
@@ -664,87 +664,87 @@ adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId) ->
 adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId) when SuppliedSeqId > ExpectedSeqId ->
     [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
-			    Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
+                            Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
     SuppliedSeqId.
 
 %% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
 internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
-		   State = #dqstate { current_file_handle = CurHdl,
-				      current_file_name = CurName,
-				      sequences = Sequences
-				     }) ->
+                   State = #dqstate { current_file_handle = CurHdl,
+                                      current_file_name = CurName,
+                                      sequences = Sequences
+                                     }) ->
     {PubList, PubAcc, ReadSeqId} =
-	case PubMsgSeqIds of
-	    [] -> {[], undefined, undefined};
-	    [_|PubMsgSeqIdsTail] ->
-		{InitReadSeqId, InitWriteSeqId} =
-		    case ets:lookup(Sequences, Q) of
-			[] -> {0,0};
-			[{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
-		    end,
-		{ lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
-		  InitWriteSeqId, InitReadSeqId}
-	end,
+        case PubMsgSeqIds of
+            [] -> {[], undefined, undefined};
+            [_|PubMsgSeqIdsTail] ->
+                {InitReadSeqId, InitWriteSeqId} =
+                    case ets:lookup(Sequences, Q) of
+                        [] -> {0,0};
+                        [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
+                    end,
+                { lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
+                  InitWriteSeqId, InitReadSeqId}
+        end,
     {atomic, {Sync, WriteSeqId, State2}} =
-	mnesia:transaction(
-	  fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
-		   %% must deal with publishes first, if we didn't
-		   %% then we could end up acking a message before
-		   %% it's been published, which is clearly
-		   %% nonsense. I.e. in commit, do not do things in an
-		   %% order which _could_not_ have happened.
-		   {Sync2, WriteSeqId3} =
-		       lists:foldl(
-			 fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
-			       {Acc, ExpectedSeqId}) ->
-				 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
-				     dets_ets_lookup(State, MsgId),
-				 SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId),
-				 NextSeqId2 = if NextSeqId =:= next -> SeqId2 + 1;
-						 true -> NextSeqId
-					      end,
-				 true = NextSeqId2 > SeqId2,
-				 ok = mnesia:write(rabbit_disk_queue,
-						   #dq_msg_loc { queue_and_seq_id =
-								 {Q, SeqId2},
-								 msg_id = MsgId,
-								 is_delivered = false,
-								 next_seq_id = NextSeqId2
-								},
-						   write),
-				 {Acc or (CurName =:= File), NextSeqId2}
-			 end, {false, PubAcc}, PubList),
-
-		   {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
-		   {Sync2, WriteSeqId3, State3}
-	  end),
+        mnesia:transaction(
+          fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
+                   %% must deal with publishes first, if we didn't
+                   %% then we could end up acking a message before
+                   %% it's been published, which is clearly
+                   %% nonsense. I.e. in commit, do not do things in an
+                   %% order which _could_not_ have happened.
+                   {Sync2, WriteSeqId3} =
+                       lists:foldl(
+                         fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
+                               {Acc, ExpectedSeqId}) ->
+                                 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
+                                     dets_ets_lookup(State, MsgId),
+                                 SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId),
+                                 NextSeqId2 = if NextSeqId =:= next -> SeqId2 + 1;
+                                                 true -> NextSeqId
+                                              end,
+                                 true = NextSeqId2 > SeqId2,
+                                 ok = mnesia:write(rabbit_disk_queue,
+                                                   #dq_msg_loc { queue_and_seq_id =
+                                                                 {Q, SeqId2},
+                                                                 msg_id = MsgId,
+                                                                 is_delivered = false,
+                                                                 next_seq_id = NextSeqId2
+                                                                },
+                                                   write),
+                                 {Acc or (CurName =:= File), NextSeqId2}
+                         end, {false, PubAcc}, PubList),
+
+                   {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
+                   {Sync2, WriteSeqId3, State3}
+          end),
     true = if PubList =:= [] -> true;
-	      true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId})
-	   end,
+              true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId})
+           end,
     ok = if Sync -> file:sync(CurHdl);
-	    true -> ok
-	 end,
+            true -> ok
+         end,
     {ok, State2}.
 
 %% SeqId can be 'next'
 internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
-	internal_tx_publish(MsgId, MsgBody, State),
+        internal_tx_publish(MsgId, MsgBody, State),
     {ReadSeqId, WriteSeqId} =
-	case ets:lookup(Sequences, Q) of
-	    [] -> %% previously unseen queue
-		{0, 0};
-	    [{Q, ReadSeqId2, WriteSeqId2}] ->
-		{ReadSeqId2, WriteSeqId2}
-	end,
+        case ets:lookup(Sequences, Q) of
+            [] -> %% previously unseen queue
+                {0, 0};
+            [{Q, ReadSeqId2, WriteSeqId2}] ->
+                {ReadSeqId2, WriteSeqId2}
+        end,
     WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId),
     WriteSeqId3Next = WriteSeqId3 + 1,
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId3Next}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
-			    #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
-					  msg_id = MsgId,
-					  next_seq_id = WriteSeqId3Next,
-					  is_delivered = false}),
+                            #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
+                                          msg_id = MsgId,
+                                          next_seq_id = WriteSeqId3Next,
+                                          is_delivered = false}),
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
@@ -756,7 +756,7 @@ internal_tx_cancel(MsgIds, State) ->
 internal_requeue(_Q, [], State) ->
     {ok, State};
 internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
-		 State = #dqstate { sequences = Sequences }) ->
+                 State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
     %% already been delivered). We also know that the rows for these
@@ -783,78 +783,78 @@ internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
     [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
     MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
     {atomic, WriteSeqId2} =
-	mnesia:transaction(
-	  fun() ->
-		  ok = mnesia:write_lock_table(rabbit_disk_queue),
-		  lists:foldl(
-		    fun ({{{MsgId, SeqIdOrig}, SeqIdTo},
-			  {_NextMsgSeqId, NextSeqIdTo}},
-			 ExpectedSeqIdTo) ->
-			    SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo),
-			    NextSeqIdTo2 = if NextSeqIdTo =:= next -> SeqIdTo2 + 1;
-					      true -> NextSeqIdTo
-					   end,
-			    true = NextSeqIdTo2 > SeqIdTo2,
-			    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
-				mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
-			    mnesia:write(rabbit_disk_queue,
-					 Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqIdTo2},
-							   next_seq_id = NextSeqIdTo2
-							 },
-					 write),
-			    mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write),
-			    NextSeqIdTo2
-		    end, WriteSeqId, MsgSeqIdsZipped)
-	  end),
+        mnesia:transaction(
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  lists:foldl(
+                    fun ({{{MsgId, SeqIdOrig}, SeqIdTo},
+                          {_NextMsgSeqId, NextSeqIdTo}},
+                         ExpectedSeqIdTo) ->
+                            SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo),
+                            NextSeqIdTo2 = if NextSeqIdTo =:= next -> SeqIdTo2 + 1;
+                                              true -> NextSeqIdTo
+                                           end,
+                            true = NextSeqIdTo2 > SeqIdTo2,
+                            [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
+                                mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
+                            mnesia:write(rabbit_disk_queue,
+                                         Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqIdTo2},
+                                                           next_seq_id = NextSeqIdTo2
+                                                         },
+                                         write),
+                            mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write),
+                            NextSeqIdTo2
+                    end, WriteSeqId, MsgSeqIdsZipped)
+          end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2}),
     {ok, State}.
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
-	[] -> {ok, 0, State};
-	[{Q, ReadSeqId, WriteSeqId}] ->
-	    {atomic, {ok, State2}} =
-		mnesia:transaction(
-		  fun() ->
-			  ok = mnesia:write_lock_table(rabbit_disk_queue),
-			  MsgSeqIds = lists:foldl(
-			    fun (SeqId, Acc) ->
-				    [#dq_msg_loc { is_delivered = false, msg_id = MsgId }] =
-					mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
-				    [{MsgId, SeqId} | Acc]
-			    end, [], lists:seq(ReadSeqId, WriteSeqId - 1)),
-			  remove_messages(Q, MsgSeqIds, txn, State)
-		  end),
-	    true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
-	    {ok, WriteSeqId - ReadSeqId, State2}
+        [] -> {ok, 0, State};
+        [{Q, ReadSeqId, WriteSeqId}] ->
+            {atomic, {ok, State2}} =
+                mnesia:transaction(
+                  fun() ->
+                          ok = mnesia:write_lock_table(rabbit_disk_queue),
+                          MsgSeqIds = lists:foldl(
+                            fun (SeqId, Acc) ->
+                                    [#dq_msg_loc { is_delivered = false, msg_id = MsgId }] =
+                                        mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+                                    [{MsgId, SeqId} | Acc]
+                            end, [], lists:seq(ReadSeqId, WriteSeqId - 1)),
+                          remove_messages(Q, MsgSeqIds, txn, State)
+                  end),
+            true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
+            {ok, WriteSeqId - ReadSeqId, State2}
     end.
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
 maybe_roll_to_new_file(Offset,
-		       State = #dqstate { file_size_limit = FileSizeLimit,
-					  current_file_name = CurName,
-					  current_file_handle = CurHdl,
-					  current_file_num = CurNum,
-					  file_summary = FileSummary
-					}
-		      ) when Offset >= FileSizeLimit ->
+                       State = #dqstate { file_size_limit = FileSizeLimit,
+                                          current_file_name = CurName,
+                                          current_file_handle = CurHdl,
+                                          current_file_num = CurNum,
+                                          file_summary = FileSummary
+                                        }
+                      ) when Offset >= FileSizeLimit ->
     ok = file:sync(CurHdl),
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = file:open(form_filename(NextName),
-			      [write, raw, binary, delayed_write]),
+                              [write, raw, binary, delayed_write]),
     {ok, FileSizeLimit} = file:position(NextHdl, {bof, FileSizeLimit}),
     ok = file:truncate(NextHdl),
     {ok, 0} = file:position(NextHdl, {bof, 0}),
     true = ets:update_element(FileSummary, CurName, {5, NextName}), %% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     State1 = State #dqstate { current_file_name = NextName,
-			      current_file_handle = NextHdl,
-			      current_file_num = NextNum,
-			      current_offset = 0
-			     },
+                              current_file_handle = NextHdl,
+                              current_file_num = NextNum,
+                              current_offset = 0
+                             },
     {ok, compact(sets:from_list([CurName]), State1)};
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
@@ -866,81 +866,81 @@ compact(FilesSet, State) ->
     Files = lists:sort(sets:to_list(FilesSet)),
     %% foldl reverses, so now youngest/right-most first
     RemainingFiles = lists:foldl(fun (File, Acc) ->
-					 delete_empty_files(File, Acc, State)
-				 end, [], Files),
+                                         delete_empty_files(File, Acc, State)
+                                 end, [], Files),
     lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
 
 combine_file(File, State = #dqstate { file_size_limit = FileSizeLimit,
-				     file_summary = FileSummary,
-				     current_file_name = CurName
-				   }) ->
+                                     file_summary = FileSummary,
+                                     current_file_name = CurName
+                                   }) ->
     %% the file we're looking at may no longer exist as it may have
     %% been deleted within the current GC run
     case ets:lookup(FileSummary, File) of
-	[] -> State;
-	[FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
-	    GoRight =
-		fun() ->
-			case Right of
-			    undefined -> State;
-			    _ when not(CurName =:= Right) ->
-				[RightObj = {Right, RightValidData, 
-					     _RightContiguousTop, File, RightRight}] =
-				    ets:lookup(FileSummary, Right),
-				RightSumData = ValidData + RightValidData,
-				if FileSizeLimit >= RightSumData ->
-					%% here, Right will be the source and so will be deleted,
-					%%       File will be the destination
-					State1 = combine_files(RightObj, FileObj,
-							      State),
-					%% this could fail if RightRight is undefined
-					%% left is the 4th field
-					ets:update_element(FileSummary,
-							   RightRight, {4, File}),
-					true = ets:insert(FileSummary, {File,
-									RightSumData,
-									RightSumData,
-									Left,
-									RightRight}),
-					true = ets:delete(FileSummary, Right),
-					State1;
-				   true -> State
-				end;
-			    _ -> State
-			end
-		end,
-	    case Left of
-		undefined ->
-		    GoRight();
-		_ -> [LeftObj =
-		      {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}] =
-			 ets:lookup(FileSummary, Left),
-		     LeftSumData = ValidData + LeftValidData,
-		     if FileSizeLimit >= LeftSumData ->
-			     %% here, File will be the source and so will be deleted,
-			     %%       Left will be the destination
-			     State1 = combine_files(FileObj, LeftObj, State),
-			     %% this could fail if Right is undefined
-			     %% left is the 4th field
-			     ets:update_element(FileSummary, Right, {4, Left}),
-			     true = ets:insert(FileSummary, {Left, LeftSumData,
-							     LeftSumData,
-							     LeftLeft, Right}),
-			     true = ets:delete(FileSummary, File),
-			     State1;
-			true ->
-			     GoRight()
-		     end
-	    end
+        [] -> State;
+        [FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
+            GoRight =
+                fun() ->
+                        case Right of
+                            undefined -> State;
+                            _ when not(CurName =:= Right) ->
+                                [RightObj = {Right, RightValidData, 
+                                             _RightContiguousTop, File, RightRight}] =
+                                    ets:lookup(FileSummary, Right),
+                                RightSumData = ValidData + RightValidData,
+                                if FileSizeLimit >= RightSumData ->
+                                        %% here, Right will be the source and so will be deleted,
+                                        %%       File will be the destination
+                                        State1 = combine_files(RightObj, FileObj,
+                                                              State),
+                                        %% this could fail if RightRight is undefined
+                                        %% left is the 4th field
+                                        ets:update_element(FileSummary,
+                                                           RightRight, {4, File}),
+                                        true = ets:insert(FileSummary, {File,
+                                                                        RightSumData,
+                                                                        RightSumData,
+                                                                        Left,
+                                                                        RightRight}),
+                                        true = ets:delete(FileSummary, Right),
+                                        State1;
+                                   true -> State
+                                end;
+                            _ -> State
+                        end
+                end,
+            case Left of
+                undefined ->
+                    GoRight();
+                _ -> [LeftObj =
+                      {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}] =
+                         ets:lookup(FileSummary, Left),
+                     LeftSumData = ValidData + LeftValidData,
+                     if FileSizeLimit >= LeftSumData ->
+                             %% here, File will be the source and so will be deleted,
+                             %%       Left will be the destination
+                             State1 = combine_files(FileObj, LeftObj, State),
+                             %% this could fail if Right is undefined
+                             %% left is the 4th field
+                             ets:update_element(FileSummary, Right, {4, Left}),
+                             true = ets:insert(FileSummary, {Left, LeftSumData,
+                                                             LeftSumData,
+                                                             LeftLeft, Right}),
+                             true = ets:delete(FileSummary, File),
+                             State1;
+                        true ->
+                             GoRight()
+                     end
+            end
     end.
 
 sort_msg_locations_by_offset(Asc, List) ->
     Comp = if Asc  -> fun erlang:'<'/2;
-	      true -> fun erlang:'>'/2
-	   end,
+              true -> fun erlang:'>'/2
+           end,
     lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
-		       Comp(OffA, OffB)
-	       end, List).
+                       Comp(OffA, OffB)
+               end, List).
 
 truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
@@ -951,133 +951,133 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     ok.
 
 combine_files({Source, SourceValid, _SourceContiguousTop,
-	      _SourceLeft, _SourceRight},
-	     {Destination, DestinationValid, DestinationContiguousTop,
-	      _DestinationLeft, _DestinationRight},
-	     State1) ->
+              _SourceLeft, _SourceRight},
+             {Destination, DestinationValid, DestinationContiguousTop,
+              _DestinationLeft, _DestinationRight},
+             State1) ->
     State = close_file(Source, close_file(Destination, State1)),
     {ok, SourceHdl} =
-	file:open(form_filename(Source),
-		  [read, write, raw, binary, delayed_write, read_ahead]),
+        file:open(form_filename(Source),
+                  [read, write, raw, binary, delayed_write, read_ahead]),
     {ok, DestinationHdl} =
-	file:open(form_filename(Destination),
-		  [read, write, raw, binary, delayed_write, read_ahead]),
+        file:open(form_filename(Destination),
+                  [read, write, raw, binary, delayed_write, read_ahead]),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
     %% if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
     %%   then truncate, copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
     if DestinationContiguousTop =:= DestinationValid ->
-	    ok = truncate_and_extend_file(DestinationHdl,
-				       DestinationValid, ExpectedSize);
+            ok = truncate_and_extend_file(DestinationHdl,
+                                       DestinationValid, ExpectedSize);
        true ->
-	    Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
-	    {ok, TmpHdl} =
-		file:open(form_filename(Tmp),
-			  [read, write, raw, binary, delayed_write, read_ahead]),
-	    Worklist =
-		lists:dropwhile(
-		  fun ({_, _, _, Offset, _})
-		      when Offset /= DestinationContiguousTop ->
-			  %% it cannot be that Offset == DestinationContiguousTop
-			  %% because if it was then DestinationContiguousTop would have been
-			  %% extended by TotalSize
-			  Offset < DestinationContiguousTop
-			  %% Given expected access patterns, I suspect that the list should be
-			  %% naturally sorted as we require, however, we need to enforce it anyway
-		  end, sort_msg_locations_by_offset(true,
-						dets_ets_match_object(State,
-								      {'_', '_',
-								       Destination,
-								       '_', '_'}))),
-	    TmpSize = DestinationValid - DestinationContiguousTop,
-	    {TmpSize, BlockStart1, BlockEnd1} =
-		lists:foldl(
-		  fun ({MsgId, RefCount, _Destination, Offset, TotalSize},
-		       {CurOffset, BlockStart, BlockEnd}) ->
-			  %% CurOffset is in the TmpFile.
-			  %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
-			  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-			  %% this message is going to end up back in
-			  %% Destination, at DestinationContiguousTop
-			  %% + CurOffset
-			  FinalOffset = DestinationContiguousTop + CurOffset,
-			  ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
-						       FinalOffset, TotalSize}),
-			  NextOffset = CurOffset + Size,
-			  if BlockStart =:= undefined ->
-				  %% base case, called only for the
-				  %% first list elem
-				  {NextOffset, Offset, Offset + Size};
-			     Offset =:= BlockEnd ->
-				  %% extend the current block because
-				  %% the next msg follows straight on
-				  {NextOffset, BlockStart, BlockEnd + Size};
-			     true ->
-				  %% found a gap, so actually do the
-				  %% work for the previous block
-				  BSize = BlockEnd - BlockStart,
-				  {ok, BlockStart} =
-				      file:position(DestinationHdl,
-						    {bof, BlockStart}),
-				  {ok, BSize} = file:copy(DestinationHdl,
-							  TmpHdl, BSize),
-				  {NextOffset, Offset, Offset + Size}
-			  end
-		  end, {0, undefined, undefined}, Worklist),
-	    %% do the last remaining block
-	    BSize1 = BlockEnd1 - BlockStart1,
-	    {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
-	    {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
-	    %% so now Tmp contains everything we need to salvage from
-	    %% Destination, and MsgLocationDets has been updated to
-	    %% reflect compaction of Destination so truncate
-	    %% Destination and copy from Tmp back to the end
-	    {ok, 0} = file:position(TmpHdl, {bof, 0}),
-	    ok = truncate_and_extend_file(DestinationHdl,
-				       DestinationContiguousTop, ExpectedSize),
-	    {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
-	    %% position in DestinationHdl should now be
-	    %% DestinationValid
-	    ok = file:sync(DestinationHdl),
-	    ok = file:close(TmpHdl),
-	    ok = file:delete(form_filename(Tmp))
+            Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
+            {ok, TmpHdl} =
+                file:open(form_filename(Tmp),
+                          [read, write, raw, binary, delayed_write, read_ahead]),
+            Worklist =
+                lists:dropwhile(
+                  fun ({_, _, _, Offset, _})
+                      when Offset /= DestinationContiguousTop ->
+                          %% it cannot be that Offset == DestinationContiguousTop
+                          %% because if it was then DestinationContiguousTop would have been
+                          %% extended by TotalSize
+                          Offset < DestinationContiguousTop
+                          %% Given expected access patterns, I suspect that the list should be
+                          %% naturally sorted as we require, however, we need to enforce it anyway
+                  end, sort_msg_locations_by_offset(true,
+                                                dets_ets_match_object(State,
+                                                                      {'_', '_',
+                                                                       Destination,
+                                                                       '_', '_'}))),
+            TmpSize = DestinationValid - DestinationContiguousTop,
+            {TmpSize, BlockStart1, BlockEnd1} =
+                lists:foldl(
+                  fun ({MsgId, RefCount, _Destination, Offset, TotalSize},
+                       {CurOffset, BlockStart, BlockEnd}) ->
+                          %% CurOffset is in the TmpFile.
+                          %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
+                          Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+                          %% this message is going to end up back in
+                          %% Destination, at DestinationContiguousTop
+                          %% + CurOffset
+                          FinalOffset = DestinationContiguousTop + CurOffset,
+                          ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
+                                                       FinalOffset, TotalSize}),
+                          NextOffset = CurOffset + Size,
+                          if BlockStart =:= undefined ->
+                                  %% base case, called only for the
+                                  %% first list elem
+                                  {NextOffset, Offset, Offset + Size};
+                             Offset =:= BlockEnd ->
+                                  %% extend the current block because
+                                  %% the next msg follows straight on
+                                  {NextOffset, BlockStart, BlockEnd + Size};
+                             true ->
+                                  %% found a gap, so actually do the
+                                  %% work for the previous block
+                                  BSize = BlockEnd - BlockStart,
+                                  {ok, BlockStart} =
+                                      file:position(DestinationHdl,
+                                                    {bof, BlockStart}),
+                                  {ok, BSize} = file:copy(DestinationHdl,
+                                                          TmpHdl, BSize),
+                                  {NextOffset, Offset, Offset + Size}
+                          end
+                  end, {0, undefined, undefined}, Worklist),
+            %% do the last remaining block
+            BSize1 = BlockEnd1 - BlockStart1,
+            {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
+            {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
+            %% so now Tmp contains everything we need to salvage from
+            %% Destination, and MsgLocationDets has been updated to
+            %% reflect compaction of Destination so truncate
+            %% Destination and copy from Tmp back to the end
+            {ok, 0} = file:position(TmpHdl, {bof, 0}),
+            ok = truncate_and_extend_file(DestinationHdl,
+                                       DestinationContiguousTop, ExpectedSize),
+            {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
+            %% position in DestinationHdl should now be
+            %% DestinationValid
+            ok = file:sync(DestinationHdl),
+            ok = file:close(TmpHdl),
+            ok = file:delete(form_filename(Tmp))
     end,
     SourceWorkList =
-	sort_msg_locations_by_offset(true,
-				 dets_ets_match_object(State,
-						       {'_', '_', Source,
-							'_', '_'})),
+        sort_msg_locations_by_offset(true,
+                                 dets_ets_match_object(State,
+                                                       {'_', '_', Source,
+                                                        '_', '_'})),
     {ExpectedSize, BlockStart2, BlockEnd2} =
-	lists:foldl(
-	  fun ({MsgId, RefCount, _Source, Offset, TotalSize},
-	       {CurOffset, BlockStart, BlockEnd}) ->
-		  %% CurOffset is in the DestinationFile.
-		  %% Offset, BlockStart and BlockEnd are in the SourceFile
-		  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-		  %% update MsgLocationDets to reflect change of file and offset
-		  ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
-					       CurOffset, TotalSize}),
-		  NextOffset = CurOffset + Size,
-		  if BlockStart =:= undefined ->
-			  %% base case, called only for the first list
-			  %% elem
-			  {NextOffset, Offset, Offset + Size};
-		     Offset =:= BlockEnd ->
-			  %% extend the current block because the next
-			  %% msg follows straight on
-			  {NextOffset, BlockStart, BlockEnd + Size};
-		     true ->
-			  %% found a gap, so actually do the work for
-			  %% the previous block
-			  BSize = BlockEnd - BlockStart,
-			  {ok, BlockStart} =
-				file:position(SourceHdl, {bof, BlockStart}),
-			  {ok, BSize} =
-			      file:copy(SourceHdl, DestinationHdl, BSize),
-			  {NextOffset, Offset, Offset + Size}
-		  end
-	  end, {DestinationValid, undefined, undefined}, SourceWorkList),
+        lists:foldl(
+          fun ({MsgId, RefCount, _Source, Offset, TotalSize},
+               {CurOffset, BlockStart, BlockEnd}) ->
+                  %% CurOffset is in the DestinationFile.
+                  %% Offset, BlockStart and BlockEnd are in the SourceFile
+                  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
+                  %% update MsgLocationDets to reflect change of file and offset
+                  ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
+                                               CurOffset, TotalSize}),
+                  NextOffset = CurOffset + Size,
+                  if BlockStart =:= undefined ->
+                          %% base case, called only for the first list
+                          %% elem
+                          {NextOffset, Offset, Offset + Size};
+                     Offset =:= BlockEnd ->
+                          %% extend the current block because the next
+                          %% msg follows straight on
+                          {NextOffset, BlockStart, BlockEnd + Size};
+                     true ->
+                          %% found a gap, so actually do the work for
+                          %% the previous block
+                          BSize = BlockEnd - BlockStart,
+                          {ok, BlockStart} =
+                                file:position(SourceHdl, {bof, BlockStart}),
+                          {ok, BSize} =
+                              file:copy(SourceHdl, DestinationHdl, BSize),
+                          {NextOffset, Offset, Offset + Size}
+                  end
+          end, {DestinationValid, undefined, undefined}, SourceWorkList),
     %% do the last remaining block
     BSize2 = BlockEnd2 - BlockStart2,
     {ok, BlockStart2} = file:position(SourceHdl, {bof, BlockStart2}),
@@ -1090,38 +1090,38 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     State.
 
 close_file(File, State = #dqstate { read_file_handles =
-				   {ReadHdls, ReadHdlsAge} }) ->
+                                   {ReadHdls, ReadHdlsAge} }) ->
     case dict:find(File, ReadHdls) of
-	error ->
-	    State;
-	{ok, {Hdl, Then}} ->
-	    ok = file:close(Hdl),
-	    State #dqstate { read_file_handles =
-			     { dict:erase(File, ReadHdls),
-			       gb_trees:delete(Then, ReadHdlsAge) } }
+        error ->
+            State;
+        {ok, {Hdl, Then}} ->
+            ok = file:close(Hdl),
+            State #dqstate { read_file_handles =
+                             { dict:erase(File, ReadHdls),
+                               gb_trees:delete(Then, ReadHdlsAge) } }
     end.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     [{File, ValidData, _ContiguousTop, Left, Right}] =
-	ets:lookup(FileSummary, File),
+        ets:lookup(FileSummary, File),
     case ValidData of
-	%% we should NEVER find the current file in here hence right
+        %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
-	0 -> case {Left, Right} of
-		 {undefined, _} when not(is_atom(Right)) ->
-		     %% the eldest file is empty. YAY!
-		     %% left is the 4th field
-		     true = ets:update_element(FileSummary, Right, {4, undefined});
-		 {_, _} when not(is_atom(Right)) ->
-		     %% left is the 4th field
-		     true = ets:update_element(FileSummary, Right, {4, Left}),
-		     %% right is the 5th field
-		     true = ets:update_element(FileSummary, Left, {5, Right})
-	     end,
-	     true = ets:delete(FileSummary, File),
-	     ok = file:delete(form_filename(File)),
-	     Acc;
-	_ -> [File|Acc]
+        0 -> case {Left, Right} of
+                 {undefined, _} when not(is_atom(Right)) ->
+                     %% the eldest file is empty. YAY!
+                     %% left is the 4th field
+                     true = ets:update_element(FileSummary, Right, {4, undefined});
+                 {_, _} when not(is_atom(Right)) ->
+                     %% left is the 4th field
+                     true = ets:update_element(FileSummary, Right, {4, Left}),
+                     %% right is the 5th field
+                     true = ets:update_element(FileSummary, Left, {5, Right})
+             end,
+             true = ets:delete(FileSummary, File),
+             ok = file:delete(form_filename(File)),
+             Acc;
+        _ -> [File|Acc]
     end.
 
 %% ---- DISK RECOVERY ----
@@ -1130,10 +1130,10 @@ load_from_disk(State) ->
     %% sorted so that smallest number is first. which also means
     %% eldest file (left-most) first
     ok = case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
-	     {atomic, ok} -> ok;
-	     {aborted,{already_exists,rabbit_disk_queue,_}} -> ok;
-	     E -> E
-	 end,
+             {atomic, ok} -> ok;
+             {aborted,{already_exists,rabbit_disk_queue,_}} -> ok;
+             E -> E
+         end,
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
     %% There should be no more tmp files now, so go ahead and load the
@@ -1142,44 +1142,44 @@ load_from_disk(State) ->
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
     {atomic, true} = mnesia:transaction(
-	     fun() ->
-		     ok = mnesia:read_lock_table(rabbit_disk_queue),
-		     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
-					  true = 1 =:=
-					      length(dets_ets_lookup(State1, MsgId))
-				  end,
-				  true, rabbit_disk_queue)
-	     end),
+             fun() ->
+                     ok = mnesia:read_lock_table(rabbit_disk_queue),
+                     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
+                                          true = 1 =:=
+                                              length(dets_ets_lookup(State1, MsgId))
+                                  end,
+                                  true, rabbit_disk_queue)
+             end),
     State2 = extract_sequence_numbers(State1),
     ok = case mnesia:del_table_index(rabbit_disk_queue, msg_id) of
-	     {atomic, ok} -> ok;
-	     %% hmm, something weird must be going on, but it's
-	     %% probably not the end of the world
-	     {aborted,{no_exists,rabbit_disk_queue,_}} -> ok;
-	     E2 -> E2
-	 end,
+             {atomic, ok} -> ok;
+             %% hmm, something weird must be going on, but it's
+             %% probably not the end of the world
+             {aborted,{no_exists,rabbit_disk_queue,_}} -> ok;
+             E2 -> E2
+         end,
     {ok, State2}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
     {atomic, true} = mnesia:transaction(
       fun() ->
-	      ok = mnesia:read_lock_table(rabbit_disk_queue),
-	      mnesia:foldl(
-		fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
-			NextWrite = SeqId + 1,
-			case ets:lookup(Sequences, Q) of
-			    [] ->
-				true = ets:insert_new(Sequences,
-						      {Q, SeqId, NextWrite});
-			    [Orig = {Q, Read, Write}] ->
-				Repl = {Q, lists:min([Read, SeqId]),
-					lists:max([Write, NextWrite])},
-				if Orig /= Repl ->
-					true = ets:insert(Sequences, Repl);
-				   true -> true
-				end
-			end
-		end, true, rabbit_disk_queue)
+              ok = mnesia:read_lock_table(rabbit_disk_queue),
+              mnesia:foldl(
+                fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
+                        NextWrite = SeqId + 1,
+                        case ets:lookup(Sequences, Q) of
+                            [] ->
+                                true = ets:insert_new(Sequences,
+                                                      {Q, SeqId, NextWrite});
+                            [Orig = {Q, Read, Write}] ->
+                                Repl = {Q, lists:min([Read, SeqId]),
+                                        lists:max([Write, NextWrite])},
+                                if Orig /= Repl ->
+                                        true = ets:insert(Sequences, Repl);
+                                   true -> true
+                                end
+                        end
+                end, true, rabbit_disk_queue)
       end),
     remove_gaps_in_sequences(State),
     State.
@@ -1195,79 +1195,79 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
     %% likelihood of gaps being at the bottom rather than the top of
     %% the queue, so shuffling up should be the better bet.
     {atomic, _} =
-	mnesia:transaction(
-	  fun() ->
-		  ok = mnesia:write_lock_table(rabbit_disk_queue),
-		  lists:foreach(
-		    fun ({Q, ReadSeqId, WriteSeqId}) ->
-			    Gap = shuffle_up(Q, ReadSeqId - 1, WriteSeqId - 1, 0),
-			    true = ets:insert(Sequences, {Q, ReadSeqId + Gap, WriteSeqId})
-		    end, ets:match_object(Sequences, '_'))
-	  end).
+        mnesia:transaction(
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  lists:foreach(
+                    fun ({Q, ReadSeqId, WriteSeqId}) ->
+                            Gap = shuffle_up(Q, ReadSeqId - 1, WriteSeqId - 1, 0),
+                            true = ets:insert(Sequences, {Q, ReadSeqId + Gap, WriteSeqId})
+                    end, ets:match_object(Sequences, '_'))
+          end).
 
 shuffle_up(_Q, SeqId, SeqId, Gap) ->
     Gap;
 shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
     GapInc =
-	case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
-	    [] -> 1;
-	    [Obj] ->
-		if Gap =:= 0 -> ok;
-		   true -> mnesia:write(rabbit_disk_queue,
-					Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap },
-							  next_seq_id = SeqId + Gap + 1
-							},
-					write),
-			   mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
-		end,
-		0
-	end,
+        case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
+            [] -> 1;
+            [Obj] ->
+                if Gap =:= 0 -> ok;
+                   true -> mnesia:write(rabbit_disk_queue,
+                                        Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap },
+                                                          next_seq_id = SeqId + Gap + 1
+                                                        },
+                                        write),
+                           mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
+                end,
+                0
+        end,
     shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
 
 load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
-						current_file_name = CurName }) ->
+                                                current_file_name = CurName }) ->
     true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
     State;
 load_messages(Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset = case dets_ets_match_object(State, {'_', '_', Left, '_', '_'}) of
-		 [] -> 0;
-		 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_] =
-			  sort_msg_locations_by_offset(false, L),
-		      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
-	     end,
+                 [] -> 0;
+                 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_] =
+                          sort_msg_locations_by_offset(false, L),
+                      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
+             end,
     State #dqstate { current_file_num = Num, current_file_name = Left,
-		     current_offset = Offset };
+                     current_offset = Offset };
 load_messages(Left, [File|Files],
-	      State = #dqstate { file_summary = FileSummary }) ->
+              State = #dqstate { file_summary = FileSummary }) ->
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
-	fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-		case length(mnesia:dirty_index_match_object
-			    (rabbit_disk_queue,
-			     #dq_msg_loc { msg_id = MsgId,
-					   queue_and_seq_id = '_',
-					   is_delivered = '_',
-					   next_seq_id = '_'
-					 },
-			     msg_id)) of
-		    0 -> {VMAcc, VTSAcc};
-		    RefCount ->
-			true = dets_ets_insert_new(State, {MsgId, RefCount, File,
-							   Offset, TotalSize}),
-			{[{MsgId, TotalSize, Offset}|VMAcc],
-			 VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
-			}
-		end
-	end, {[], 0}, Messages),
+        fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+                case length(mnesia:dirty_index_match_object
+                            (rabbit_disk_queue,
+                             #dq_msg_loc { msg_id = MsgId,
+                                           queue_and_seq_id = '_',
+                                           is_delivered = '_',
+                                           next_seq_id = '_'
+                                         },
+                             msg_id)) of
+                    0 -> {VMAcc, VTSAcc};
+                    RefCount ->
+                        true = dets_ets_insert_new(State, {MsgId, RefCount, File,
+                                                           Offset, TotalSize}),
+                        {[{MsgId, TotalSize, Offset}|VMAcc],
+                         VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
+                        }
+                end
+        end, {[], 0}, Messages),
     %% foldl reverses lists and find_contiguous_block_prefix needs
     %% elems in the same order as from scan_file_for_valid_messages
     {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
     Right = case Files of
-		[] -> undefined;
-		[F|_] -> F
-	    end,
+                [] -> undefined;
+                [F|_] -> F
+            end,
     true = ets:insert_new(FileSummary, {File, ValidTotalSize, ContiguousTop, Left, Right}),
     load_messages(File, Files, State).
 
@@ -1275,7 +1275,7 @@ load_messages(Left, [File|Files],
 
 recover_crashed_compactions(Files, TmpFiles) ->
     lists:foreach(fun (TmpFile) -> ok = recover_crashed_compactions1(Files, TmpFile) end,
-		  TmpFiles),
+                  TmpFiles),
     ok.
 
 recover_crashed_compactions1(Files, TmpFile) ->
@@ -1284,22 +1284,22 @@ recover_crashed_compactions1(Files, TmpFile) ->
     true = lists:member(NonTmpRelatedFile, Files),
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, UncorruptedMessagesTmp} =
-	scan_file_for_valid_messages(form_filename(TmpFile)),
+        scan_file_for_valid_messages(form_filename(TmpFile)),
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     %% all of these messages should appear in the mnesia table,
     %% otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-			  true = 0 < length(mnesia:dirty_index_match_object
-					    (rabbit_disk_queue,
-					     #dq_msg_loc { msg_id = MsgId,
-							   queue_and_seq_id = '_',
-							   is_delivered = '_',
-							   next_seq_id = '_'
-							 },
-					     msg_id))
-		  end, MsgIdsTmp),
+                          true = 0 < length(mnesia:dirty_index_match_object
+                                            (rabbit_disk_queue,
+                                             #dq_msg_loc { msg_id = MsgId,
+                                                           queue_and_seq_id = '_',
+                                                           is_delivered = '_',
+                                                           next_seq_id = '_'
+                                                         },
+                                             msg_id))
+                  end, MsgIdsTmp),
     {ok, UncorruptedMessages} =
-	scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+        scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
     %% 1) It's possible that everything in the tmp file is also in the main file
     %%    such that the main file is (prefix ++ tmpfile). This means that compaction
@@ -1321,62 +1321,62 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %%    Plan: Truncate the main file back to before any of the files in the tmp file and copy
     %%    them over again
     case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
-	true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
-	        %% note this also catches the case when the tmp file
-	        %% is empty
-	    ok = file:delete(TmpFile);
-	_False ->
-	    %% we're in case 4 above.
-	    %% check that everything in the main file is a valid message in mnesia
-	    lists:foreach(fun (MsgId) ->
-				  true = 0 < length(mnesia:dirty_index_match_object
-						    (rabbit_disk_queue,
-						     #dq_msg_loc { msg_id = MsgId,
-								   queue_and_seq_id = '_',
-								   is_delivered = '_',
-								   next_seq_id = '_'
-								 },
-						     msg_id))
-			  end, MsgIds),
-	    %% The main file should be contiguous
-	    {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
-	    %% we should have that none of the messages in the prefix
-	    %% are in the tmp file
-	    true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end,
-			     MsgIds),
-
-	    {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile),
-				      [write, raw, binary, delayed_write]),
-	    {ok, Top} = file:position(MainHdl, Top),
-	    ok = file:truncate(MainHdl), %% wipe out any rubbish at the end of the file
-	    %% there really could be rubbish at the end of the file -
-	    %% we could have failed after the extending truncate.
-	    %% Remember the head of the list will be the highest entry
-	    %% in the file
-	    [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
-	    TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
-	    ExpectedAbsPos = Top + TmpSize,
-	    {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
-	    %% and now extend the main file as big as necessary in a
-	    %% single move if we run out of disk space, this truncate
-	    %% could fail, but we still aren't risking losing data
-	    ok = file:truncate(MainHdl),
-	    {ok, TmpHdl} = file:open(form_filename(TmpFile),
-				     [read, raw, binary, read_ahead]),
-	    {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
-	    ok = file:close(MainHdl),
-	    ok = file:close(TmpHdl),
-	    ok = file:delete(TmpFile),
-
-	    {ok, MainMessages} =
-		scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
-	    MsgIdsMain = lists:map(GrabMsgId, MainMessages),
-	    %% check that everything in MsgIds is in MsgIdsMain
-	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
-			     MsgIds),
-	    %% check that everything in MsgIdsTmp is in MsgIdsMain
-	    true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
-			     MsgIdsTmp)
+        true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
+                %% note this also catches the case when the tmp file
+                %% is empty
+            ok = file:delete(TmpFile);
+        _False ->
+            %% we're in case 4 above.
+            %% check that everything in the main file is a valid message in mnesia
+            lists:foreach(fun (MsgId) ->
+                                  true = 0 < length(mnesia:dirty_index_match_object
+                                                    (rabbit_disk_queue,
+                                                     #dq_msg_loc { msg_id = MsgId,
+                                                                   queue_and_seq_id = '_',
+                                                                   is_delivered = '_',
+                                                                   next_seq_id = '_'
+                                                                 },
+                                                     msg_id))
+                          end, MsgIds),
+            %% The main file should be contiguous
+            {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
+            %% we should have that none of the messages in the prefix
+            %% are in the tmp file
+            true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end,
+                             MsgIds),
+
+            {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile),
+                                      [write, raw, binary, delayed_write]),
+            {ok, Top} = file:position(MainHdl, Top),
+            ok = file:truncate(MainHdl), %% wipe out any rubbish at the end of the file
+            %% there really could be rubbish at the end of the file -
+            %% we could have failed after the extending truncate.
+            %% Remember the head of the list will be the highest entry
+            %% in the file
+            [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
+            TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
+            ExpectedAbsPos = Top + TmpSize,
+            {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
+            %% and now extend the main file as big as necessary in a
+            %% single move if we run out of disk space, this truncate
+            %% could fail, but we still aren't risking losing data
+            ok = file:truncate(MainHdl),
+            {ok, TmpHdl} = file:open(form_filename(TmpFile),
+                                     [read, raw, binary, read_ahead]),
+            {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
+            ok = file:close(MainHdl),
+            ok = file:close(TmpHdl),
+            ok = file:delete(TmpFile),
+
+            {ok, MainMessages} =
+                scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
+            MsgIdsMain = lists:map(GrabMsgId, MainMessages),
+            %% check that everything in MsgIds is in MsgIdsMain
+            true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
+                             MsgIds),
+            %% check that everything in MsgIdsTmp is in MsgIdsMain
+            true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
+                             MsgIdsTmp)
     end,
     ok.
 
@@ -1386,16 +1386,16 @@ recover_crashed_compactions1(Files, TmpFile) ->
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
-	{ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
-		      lists:reverse(Acc)};
-	Res -> Res
+        {ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+                      lists:reverse(Acc)};
+        Res -> Res
     end.
 find_contiguous_block_prefix([], 0, Acc) ->
     {ok, Acc};
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
 find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail],
-			     ExpectedOffset, Acc)
+                             ExpectedOffset, Acc)
   when ExpectedOffset =:= Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT ->
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
 find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
@@ -1421,29 +1421,29 @@ append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
     MsgIdBinSize = size(MsgIdBin),
     TotalSize = BodySize + MsgIdBinSize,
     case file:write(FileHdl, <<TotalSize:?INTEGER_SIZE_BITS,
-			       MsgIdBinSize:?INTEGER_SIZE_BITS,
-			       MsgIdBin:MsgIdBinSize/binary,
-			       MsgBody:BodySize/binary,
-			       ?WRITE_OK:?WRITE_OK_SIZE_BITS>>) of
-	ok -> {ok, TotalSize};
-	KO -> KO
+                               MsgIdBinSize:?INTEGER_SIZE_BITS,
+                               MsgIdBin:MsgIdBinSize/binary,
+                               MsgBody:BodySize/binary,
+                               ?WRITE_OK:?WRITE_OK_SIZE_BITS>>) of
+        ok -> {ok, TotalSize};
+        KO -> KO
     end.
 
 read_message_at_offset(FileHdl, Offset, TotalSize) ->
     TotalSizeWriteOkBytes = TotalSize + 1,
     case file:position(FileHdl, {bof, Offset}) of
-	{ok, Offset} ->
-	    case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
-		{ok, <<TotalSize:?INTEGER_SIZE_BITS,
-		       MsgIdBinSize:?INTEGER_SIZE_BITS,
-		       Rest:TotalSizeWriteOkBytes/binary>>} ->
-		    BodySize = TotalSize - MsgIdBinSize,
-		    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-		      ?WRITE_OK:?WRITE_OK_SIZE_BITS>> = Rest,
-		    {ok, {MsgBody, BodySize}};
-		KO -> KO
-	    end;
-	KO -> KO
+        {ok, Offset} ->
+            case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
+                {ok, <<TotalSize:?INTEGER_SIZE_BITS,
+                       MsgIdBinSize:?INTEGER_SIZE_BITS,
+                       Rest:TotalSizeWriteOkBytes/binary>>} ->
+                    BodySize = TotalSize - MsgIdBinSize,
+                    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+                      ?WRITE_OK:?WRITE_OK_SIZE_BITS>> = Rest,
+                    {ok, {MsgBody, BodySize}};
+                KO -> KO
+            end;
+        KO -> KO
     end.
 
 scan_file_for_valid_messages(File) ->
@@ -1454,53 +1454,53 @@ scan_file_for_valid_messages(File) ->
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
     case read_next_file_entry(FileHdl, Offset) of
-	{ok, eof} -> {ok, Acc};
-	{ok, {corrupted, NextOffset}} ->
-	    scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
-	{ok, {ok, MsgId, TotalSize, NextOffset}} ->
-	    scan_file_for_valid_messages(FileHdl, NextOffset,
-					 [{MsgId, TotalSize, Offset}|Acc]);
-	_KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
+        {ok, eof} -> {ok, Acc};
+        {ok, {corrupted, NextOffset}} ->
+            scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
+        {ok, {ok, MsgId, TotalSize, NextOffset}} ->
+            scan_file_for_valid_messages(FileHdl, NextOffset,
+                                         [{MsgId, TotalSize, Offset}|Acc]);
+        _KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
     end.
-	    
+            
 
 read_next_file_entry(FileHdl, Offset) ->
     TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
     case file:read(FileHdl, TwoIntegers) of
-	{ok, <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
-	    case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
-		{true, _} -> {ok, eof}; %% Nothing we can do other than stop
-		{false, true} -> %% current message corrupted, try skipping past it
-		    ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
-		    case file:position(FileHdl, {cur, TotalSize + 1}) of
-			{ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
-			{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
-			KO -> KO
-		    end;
-		{false, false} -> %% all good, let's continue
-		    case file:read(FileHdl, MsgIdBinSize) of
-			{ok, <<MsgId:MsgIdBinSize/binary>>} ->
-			    ExpectedAbsPos = Offset + TwoIntegers + TotalSize,
-			    case file:position(FileHdl,
-					       {cur, TotalSize - MsgIdBinSize}) of
-				{ok, ExpectedAbsPos} ->
-				    NextOffset = Offset + TotalSize +
-					?FILE_PACKING_ADJUSTMENT,
-				    case file:read(FileHdl, 1) of
-					{ok, <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
-					    {ok, {ok, binary_to_term(MsgId),
-						  TotalSize, NextOffset}};
-					{ok, _SomeOtherData} ->
-					    {ok, {corrupted, NextOffset}};
-					KO -> KO
-				    end;
-				{ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
-				KO -> KO
-			    end;
-			eof -> {ok, eof};
-			KO -> KO
-		    end
-	    end;
-	eof -> {ok, eof};
-	KO -> KO
+        {ok, <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
+            case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
+                {true, _} -> {ok, eof}; %% Nothing we can do other than stop
+                {false, true} -> %% current message corrupted, try skipping past it
+                    ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
+                    case file:position(FileHdl, {cur, TotalSize + 1}) of
+                        {ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
+                        {ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
+                        KO -> KO
+                    end;
+                {false, false} -> %% all good, let's continue
+                    case file:read(FileHdl, MsgIdBinSize) of
+                        {ok, <<MsgId:MsgIdBinSize/binary>>} ->
+                            ExpectedAbsPos = Offset + TwoIntegers + TotalSize,
+                            case file:position(FileHdl,
+                                               {cur, TotalSize - MsgIdBinSize}) of
+                                {ok, ExpectedAbsPos} ->
+                                    NextOffset = Offset + TotalSize +
+                                        ?FILE_PACKING_ADJUSTMENT,
+                                    case file:read(FileHdl, 1) of
+                                        {ok, <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
+                                            {ok, {ok, binary_to_term(MsgId),
+                                                  TotalSize, NextOffset}};
+                                        {ok, _SomeOtherData} ->
+                                            {ok, {corrupted, NextOffset}};
+                                        KO -> KO
+                                    end;
+                                {ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
+                                KO -> KO
+                            end;
+                        eof -> {ok, eof};
+                        KO -> KO
+                    end
+            end;
+        eof -> {ok, eof};
+        KO -> KO
     end.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index c7c76eb2..4749e1da 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -31,57 +31,66 @@
 
 -module(rabbit_mixed_queue).
 
+-export([start_link/2]).
+
 -export([publish/4, deliver/1, ack/2,
-	 tx_publish/4, tx_commit/3, tx_cancel/2,
-	 requeue/2, purge/1]).
+         tx_publish/4, tx_commit/3, tx_cancel/2,
+         requeue/2, purge/1]).
 
 -record(mqstate, { mode,
-		   msg_buf,
-		   next_write_seq,
-		   queue
-		 }
+                   msg_buf,
+                   next_write_seq,
+                   queue
+                 }
        ).
 
+-define(FILE_SIZE_LIMIT, (100*1024*1024)).
+
+start_link(Queue, Mode) when Mode =:= disk orelse Mode =:= mixed ->
+    rabbit_disk_queue:start_link(?FILE_SIZE_LIMIT),
+    rabbit_disk_queue:to_ram_disk_mode(), %% TODO, CHANGE ME
+    {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 0, queue = Queue }}.
+
 publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk, queue = Q }) ->
     ok = rabbit_disk_queue:publish(Q, MsgId, Msg),
     {ok, State};
 publish(MsgId, Msg, IsPersistent,
-	State = #mqstate { queue = Q, mode = mixed,
-			   next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
+        State = #mqstate { queue = Q, mode = mixed,
+                           next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
     if IsPersistent ->
-	    ok = rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, Msg);
+            ok = rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, Msg);
        true -> ok
     end,
     {ok, State #mqstate { next_write_seq = NextSeq + 1,
-			  msg_buf = queue:in({NextSeq, {MsgId, Msg, IsPersistent}},
-					     MsgBuf)
-			}}.
+                          msg_buf = queue:in({NextSeq, {MsgId, Msg, IsPersistent}},
+                                             MsgBuf)
+                        }}.
 
 deliver(State = #mqstate { mode = disk, queue = Q }) ->
     {rabbit_disk_queue:deliver(Q), State};
 deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf }) ->
     {Result, MsgBuf2} = queue:out(MsgBuf),
     case Result of
-	empty ->
-	    {empty, State};
-	{value, {_Seq, {MsgId, Msg, IsPersistent}}} ->
-	    {IsDelivered, Ack} =
-		if IsPersistent ->
-			{MsgId, IsDelivered2, Ack2} = rabbit_disk_queue:phantom_deliver(Q),
-			{IsDelivered2, Ack2};
-		   true -> {false, noack}
-		end,
-	    {{MsgId, Msg, size(Msg), IsDelivered, Ack},
-	     State #mqstate { msg_buf = MsgBuf2 }}
+        empty ->
+            {empty, State};
+        {value, {_Seq, {MsgId, Msg, IsPersistent}}} ->
+            {IsDelivered, Ack} =
+                if IsPersistent ->
+                        {MsgId, IsDelivered2, Ack2} = rabbit_disk_queue:phantom_deliver(Q),
+                        {IsDelivered2, Ack2};
+                   true -> {false, noack}
+                end,
+            {{MsgId, Msg, size(Msg), IsDelivered, Ack},
+             State #mqstate { msg_buf = MsgBuf2 }}
     end.
 
 remove_noacks(Acks) ->
     lists:filter(fun (A) -> A /= noack end, Acks).
 
-ack(Acks, State = #mqstate { queue = Q }) ->	     
+ack(Acks, State = #mqstate { queue = Q }) ->             
     ok = rabbit_disk_queue:ack(Q, remove_noacks(Acks)),
     {ok, State}.
-						   
+                                                   
 tx_publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_publish(MsgId, Msg),
     {ok, State};
@@ -98,32 +107,32 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
     ok = rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes), Acks),
     {ok, State};
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
-					      msg_buf = MsgBuf,
-					      next_write_seq = NextSeq
-					     }) ->
+                                              msg_buf = MsgBuf,
+                                              next_write_seq = NextSeq
+                                             }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
-	lists:foldl(fun ({MsgId, Msg, IsPersistent}, {Acc, MsgBuf3, NextSeq3}) ->
-			    Acc2 =
-				if IsPersistent ->
-					[{MsgId, NextSeq3} | Acc];
-				   true -> Acc
-				end,
-			    MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
-					       MsgBuf3),
-			    {Acc2, MsgBuf4, NextSeq3 + 1}
-		    end, {[], MsgBuf, NextSeq}, Publishes),
+        lists:foldl(fun ({MsgId, Msg, IsPersistent}, {Acc, MsgBuf3, NextSeq3}) ->
+                            Acc2 =
+                                if IsPersistent ->
+                                        [{MsgId, NextSeq3} | Acc];
+                                   true -> Acc
+                                end,
+                            MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
+                                               MsgBuf3),
+                            {Acc2, MsgBuf4, NextSeq3 + 1}
+                    end, {[], MsgBuf, NextSeq}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
     %% requirements of rabbit_disk_queue (ascending SeqIds)
     ok = rabbit_disk_queue:tx_commit_with_seqs(Q, lists:reverse(PersistentPubs),
-					       remove_noacks(Acks)),
+                                               remove_noacks(Acks)),
     {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
 
 only_persistent_msg_ids(Pubs) ->
     lists:reverse(lists:foldl(fun ({MsgId, _, IsPersistent}, Acc) ->
-				      if IsPersistent -> [MsgId | Acc];
-					 true -> Acc
-				      end
-			      end, [], Pubs)).
+                                      if IsPersistent -> [MsgId | Acc];
+                                         true -> Acc
+                                      end
+                              end, [], Pubs)).
 
 tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_cancel(only_msg_ids(Publishes)),
@@ -139,21 +148,21 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q }) ->
     rabbit_disk_queue:requeue(Q, only_ack_tags(MessagesWithAckTags)),
     {ok, State};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
-						msg_buf = MsgBuf,
-						next_write_seq = NextSeq
-					      }) ->
+                                                msg_buf = MsgBuf,
+                                                next_write_seq = NextSeq
+                                              }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
-	lists:foldl(fun ({{MsgId, Msg, IsPersistent}, AckTag}, {Acc, MsgBuf3, NextSeq3}) ->
-			    Acc2 =
-				if IsPersistent ->
-					{MsgId, _OldSeqId} = AckTag,
-					[{AckTag, NextSeq3} | Acc];
-				   true -> Acc
-				end,
-			    MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
-					       MsgBuf3),
-			    {Acc2, MsgBuf4, NextSeq3 + 1}
-		    end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
+        lists:foldl(fun ({{MsgId, Msg, IsPersistent}, AckTag}, {Acc, MsgBuf3, NextSeq3}) ->
+                            Acc2 =
+                                if IsPersistent ->
+                                        {MsgId, _OldSeqId} = AckTag,
+                                        [{AckTag, NextSeq3} | Acc];
+                                   true -> Acc
+                                end,
+                            MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
+                                               MsgBuf3),
+                            {Acc2, MsgBuf4, NextSeq3 + 1}
+                    end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(PersistentPubs)),
     {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 14461abb..552e4ed9 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -686,10 +686,10 @@ test_disk_queue() ->
     % unicode chars are supported properly from r13 onwards
     io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
     [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize),
-	   timer:sleep(1000) end || % 1000 milliseconds
-	MsgSize <- [512, 8192, 32768, 131072],
-	Qs <- [[1], lists:seq(1,10)], %, lists:seq(1,100), lists:seq(1,1000)],
-	MsgCount <- [1024, 4096, 16384]
+           timer:sleep(1000) end || % 1000 milliseconds
+        MsgSize <- [512, 8192, 32768, 131072],
+        Qs <- [[1], lists:seq(1,10)], %, lists:seq(1,100), lists:seq(1,1000)],
+        MsgCount <- [1024, 4096, 16384]
     ],
     rdq_virgin(),
     passed = rdq_stress_gc(10000),
@@ -706,27 +706,27 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
     {Publish, ok} =
-	timer:tc(?MODULE, rdq_time_commands,
-		 [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg)
-			     || N <- List, _ <- Qs] end,
-		   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List, [])
-			     || Q <- Qs] end
-		  ]]),
+        timer:tc(?MODULE, rdq_time_commands,
+                 [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg)
+                             || N <- List, _ <- Qs] end,
+                   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List, [])
+                             || Q <- Qs] end
+                  ]]),
     {Deliver, ok} =
-	timer:tc(?MODULE, rdq_time_commands,
-		 [[fun() -> [begin SeqIds =
-				       [begin {N, Msg, MsgSizeBytes, false, SeqId} =
-						  rabbit_disk_queue:deliver(Q), SeqId end
-					|| N <- List],
-				   ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
-			     end || Q <- Qs]
-		   end]]),
+        timer:tc(?MODULE, rdq_time_commands,
+                 [[fun() -> [begin SeqIds =
+                                       [begin {N, Msg, MsgSizeBytes, false, SeqId} =
+                                                  rabbit_disk_queue:deliver(Q), SeqId end
+                                        || N <- List],
+                                   ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
+                             end || Q <- Qs]
+                   end]]),
     io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
-	      [MsgCount, MsgSizeBytes, QCount, float(Startup),
-	       float(Publish), (Publish / (MsgCount * QCount)),
-	       (Publish / (MsgCount * QCount * MsgSizeBytes)),
-	       float(Deliver), (Deliver / (MsgCount * QCount)),
-	       (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
+              [MsgCount, MsgSizeBytes, QCount, float(Startup),
+               float(Publish), (Publish / (MsgCount * QCount)),
+               (Publish / (MsgCount * QCount * MsgSizeBytes)),
+               float(Deliver), (Deliver / (MsgCount * QCount)),
+               (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
     rdq_stop().
 
 % we know each file is going to be 1024*1024*10 bytes in size (10MB), so make sure we have
@@ -741,30 +741,30 @@ rdq_stress_gc(MsgCount) ->
     rabbit_disk_queue:tx_commit(q, List, []),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
-	lists:reverse(
-	  lists:foldl(
-	    fun (E, Acc) ->
-		    case Acc of
-			[] -> [E];
-			[F|_Fs] ->
-			    case E rem F of
-				0 -> Acc;
-				_ -> [E|Acc]
-			    end
-		    end
-	    end, [], lists:flatten([lists:seq(N,MsgCount,N)
-				    || N <- lists:seq(StartChunk,MsgCount)]))) ++
-	lists:seq(1, (StartChunk - 1)),
+        lists:reverse(
+          lists:foldl(
+            fun (E, Acc) ->
+                    case Acc of
+                        [] -> [E];
+                        [F|_Fs] ->
+                            case E rem F of
+                                0 -> Acc;
+                                _ -> [E|Acc]
+                            end
+                    end
+            end, [], lists:flatten([lists:seq(N,MsgCount,N)
+                                    || N <- lists:seq(StartChunk,MsgCount)]))) ++
+        lists:seq(1, (StartChunk - 1)),
     MsgIdToSeqDict =
-	lists:foldl(
-	  fun (_, Acc) ->
-		  {MsgId, Msg, MsgSizeBytes, false, SeqId} =
-		      rabbit_disk_queue:deliver(q),
-		  dict:store(MsgId, SeqId, Acc)
-	  end, dict:new(), List),
+        lists:foldl(
+          fun (_, Acc) ->
+                  {MsgId, Msg, MsgSizeBytes, false, SeqId} =
+                      rabbit_disk_queue:deliver(q),
+                  dict:store(MsgId, SeqId, Acc)
+          end, dict:new(), List),
     %% we really do want to ack each of this individually
     [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict),
-	   rabbit_disk_queue:ack(q, [SeqId]) end
+           rabbit_disk_queue:ack(q, [SeqId]) end
      || MsgId <- AckList],
     rabbit_disk_queue:tx_commit(q, [], []),
     rdq_stop(),
@@ -800,15 +800,15 @@ rdq_test_startup_with_queue_gaps() ->
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	    || N <- lists:seq(1,Half)],
+            || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     %% ack every other message we have delivered (starting at the _first_)
     lists:foldl(fun (SeqId2, true) ->
-			rabbit_disk_queue:ack(q, [SeqId2]),
-			false;
-		    (_SeqId2, false) ->
-			true
-		end, true, Seqs),
+                        rabbit_disk_queue:ack(q, [SeqId2]),
+                        false;
+                    (_SeqId2, false) ->
+                        true
+                end, true, Seqs),
     rabbit_disk_queue:tx_commit(q, [], []),
     io:format("Acked every other message delivered done~n", []),
     rdq_stop(),
@@ -816,12 +816,12 @@ rdq_test_startup_with_queue_gaps() ->
     io:format("Startup (with shuffle) done~n", []),
     %% should have shuffled up. So we should now get lists:seq(2,500,2) already delivered
     Seqs2 = [begin {N, Msg, 256, true, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	     || N <- lists:seq(2,Half,2)],
+             || N <- lists:seq(2,Half,2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     io:format("Reread non-acked messages done~n", []),
     %% and now fetch the rest
     Seqs3 = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	     || N <- lists:seq(1 + Half,Total)],
+             || N <- lists:seq(1 + Half,Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
     io:format("Read second half done~n", []),
     empty = rabbit_disk_queue:deliver(q),
@@ -840,25 +840,25 @@ rdq_test_redeliver() ->
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	    || N <- lists:seq(1,Half)],
+            || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     %% now requeue every other message (starting at the _first_)
     %% and ack the other ones
     lists:foldl(fun (SeqId2, true) ->
-			rabbit_disk_queue:requeue(q, [SeqId2]),
-			false;
-		    (SeqId2, false) ->
-			rabbit_disk_queue:ack(q, [SeqId2]),
-			true
-		end, true, Seqs),
+                        rabbit_disk_queue:requeue(q, [SeqId2]),
+                        false;
+                    (SeqId2, false) ->
+                        rabbit_disk_queue:ack(q, [SeqId2]),
+                        true
+                end, true, Seqs),
     rabbit_disk_queue:tx_commit(q, [], []),
     io:format("Redeliver and acking done~n", []),
     %% we should now get the 2nd half in order, followed by every-other-from-the-first-half
     Seqs2 = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	    || N <- lists:seq(1+Half, Total)],
+            || N <- lists:seq(1+Half, Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     Seqs3 = [begin {N, Msg, 256, true, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	    || N <- lists:seq(1, Half, 2)],
+            || N <- lists:seq(1, Half, 2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
     empty = rabbit_disk_queue:deliver(q),
     rdq_stop(),
@@ -876,7 +876,7 @@ rdq_test_purge() ->
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-	    || N <- lists:seq(1,Half)],
+            || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     rabbit_disk_queue:purge(q),
     io:format("Purge done~n", []),
@@ -891,7 +891,7 @@ rdq_time_commands(Funcs) ->
 
 rdq_virgin() ->
     {Micros, {ok, _}} =
-	timer:tc(rabbit_disk_queue, start_link, [1024*1024]),
+        timer:tc(rabbit_disk_queue, start_link, [1024*1024]),
     ok = rabbit_disk_queue:stop_and_obliterate(),
     timer:sleep(1000),
     Micros.
-- 
cgit v1.2.1


From 4db040ac638b25e1d3034fcffd75755c7a55a384 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 May 2009 13:38:20 +0100
Subject: Added is_empty and length functions.

---
 src/rabbit_disk_queue.erl  | 127 +++++++++++++++++++++++++++------------------
 src/rabbit_mixed_queue.erl |  13 ++++-
 2 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8c602b53..2bc40123 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,6 +42,8 @@
 	 tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
 	 requeue/2, requeue_with_seqs/2, purge/1]).
 
+-export([length/1, is_empty/1]).
+
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
 -include("rabbit.hrl").
@@ -83,7 +85,7 @@
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
 %% Sequences:   this is an ets table which contains:
-%%              {Q, ReadSeqId, WriteSeqId}
+%%              {Q, ReadSeqId, WriteSeqId, QueueLength}
 %% rabbit_disk_queue: this is an mnesia table which contains:
 %%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
 %% 			      is_delivered = IsDelivered,
@@ -245,6 +247,8 @@
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
+-spec(length/1 :: (queue_name()) -> non_neg_integer()).
+-spec(is_empty/1 :: (queue_name()) -> bool()).
 
 -endif.
 
@@ -303,6 +307,13 @@ to_disk_only_mode() ->
 to_ram_disk_mode() ->
     gen_server:call(?SERVER, to_ram_disk_mode, infinity).
 
+length(Q) ->
+    gen_server:call(?SERVER, {length, Q}, infinity).
+
+is_empty(Q) ->
+    Length = rabbit_disk_queue:length(Q),
+    Length == 0.
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -378,7 +389,7 @@ handle_call({phantom_deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, false, State),
     {reply, Result, State1};
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
-    PubMsgSeqIds = lists:zip(PubMsgIds, lists:duplicate(length(PubMsgIds), next)),
+    PubMsgSeqIds = lists:zip(PubMsgIds, lists:duplicate(erlang:length(PubMsgIds), next)),
     {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, State),
     {reply, ok, State1};
 handle_call({tx_commit_with_seqs, Q, PubSeqMsgIds, AckSeqIds}, _From, State) ->
@@ -418,7 +429,12 @@ handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = disk_on
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
-    {reply, ok, State #dqstate { operation_mode = ram_disk }}.
+    {reply, ok, State #dqstate { operation_mode = ram_disk }};
+handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
+    case ets:lookup(Sequences, Q) of
+        [] -> {reply, 0, State};
+        [{Q, _ReadSeqId, _WriteSeqId, Length}] -> {reply, Length, State}
+    end.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, next, MsgBody, State),
@@ -436,7 +452,7 @@ handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
     {noreply, State1};
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    MsgSeqSeqIds = lists:zip(MsgSeqIds, lists:duplicate(length(MsgSeqIds), next)),
+    MsgSeqSeqIds = lists:zip(MsgSeqIds, lists:duplicate(erlang:length(MsgSeqIds), next)),
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
     {noreply, State1};
 handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
@@ -522,29 +538,28 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mo
 internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, empty, State};
-        [{Q, ReadSeqId, WriteSeqId}] ->
-            case mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}) of
-                [] -> {ok, empty, State};
-                [Obj =
-                 #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
-                              next_seq_id = ReadSeqId2}] ->
-                    [{MsgId, _RefCount, File, Offset, TotalSize}] =
-                        dets_ets_lookup(State, MsgId),
-                    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId}),
-                    ok =
-                        if Delivered -> ok;
-                           true ->
-                                mnesia:dirty_write(rabbit_disk_queue,
-                                                   Obj #dq_msg_loc {is_delivered = true})
-                        end,
-                    if ReadMsg ->
-                            {FileHdl, State1} = get_read_handle(File, State),
-                            {ok, {MsgBody, BodySize}} =
-                                read_message_at_offset(FileHdl, Offset, TotalSize),
-                            {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
-                             State1};
-                       true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
-                    end
+        [{Q, _ReadSeqId, _WriteSeqId, 0}] -> {ok, empty, State};
+        [{Q, ReadSeqId, WriteSeqId, Length}] ->
+            [Obj =
+             #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
+                          next_seq_id = ReadSeqId2}] =
+                mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
+            [{MsgId, _RefCount, File, Offset, TotalSize}] =
+                dets_ets_lookup(State, MsgId),
+            true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId, Length - 1}),
+            ok =
+                if Delivered -> ok;
+                   true ->
+                        mnesia:dirty_write(rabbit_disk_queue,
+                                           Obj #dq_msg_loc {is_delivered = true})
+                end,
+            if ReadMsg ->
+                    {FileHdl, State1} = get_read_handle(File, State),
+                    {ok, {MsgBody, BodySize}} =
+                        read_message_at_offset(FileHdl, Offset, TotalSize),
+                    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
+                     State1};
+               true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
             end
     end.
 
@@ -673,17 +688,18 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                                       current_file_name = CurName,
                                       sequences = Sequences
                                      }) ->
-    {PubList, PubAcc, ReadSeqId} =
+    {PubList, PubAcc, ReadSeqId, Length} =
         case PubMsgSeqIds of
             [] -> {[], undefined, undefined};
             [_|PubMsgSeqIdsTail] ->
-                {InitReadSeqId, InitWriteSeqId} =
+                {InitReadSeqId, InitWriteSeqId, InitLength} =
                     case ets:lookup(Sequences, Q) of
-                        [] -> {0,0};
-                        [{Q, ReadSeqId2, WriteSeqId2}] -> {ReadSeqId2, WriteSeqId2}
+                        [] -> {0,0,0};
+                        [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
+                            {ReadSeqId2, WriteSeqId2, Length2}
                     end,
                 { lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
-                  InitWriteSeqId, InitReadSeqId}
+                  InitWriteSeqId, InitReadSeqId, InitLength}
         end,
     {atomic, {Sync, WriteSeqId, State2}} =
         mnesia:transaction(
@@ -719,7 +735,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                    {Sync2, WriteSeqId3, State3}
           end),
     true = if PubList =:= [] -> true;
-              true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId})
+              true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId, Length + erlang:length(PubList)})
            end,
     ok = if Sync -> file:sync(CurHdl);
             true -> ok
@@ -730,16 +746,16 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
 internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
         internal_tx_publish(MsgId, MsgBody, State),
-    {ReadSeqId, WriteSeqId} =
+    {ReadSeqId, WriteSeqId, Length} =
         case ets:lookup(Sequences, Q) of
             [] -> %% previously unseen queue
-                {0, 0};
-            [{Q, ReadSeqId2, WriteSeqId2}] ->
-                {ReadSeqId2, WriteSeqId2}
+                {0, 0, 0};
+            [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
+                {ReadSeqId2, WriteSeqId2, Length2}
         end,
     WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId),
     WriteSeqId3Next = WriteSeqId3 + 1,
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId3Next}),
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId3Next, Length + 1}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
                                           msg_id = MsgId,
@@ -750,7 +766,7 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
-    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
+    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
@@ -780,7 +796,7 @@ internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
     %% as they have no concept of sequence id anyway).
 
     %% the Q _must_ already exist
-    [{Q, ReadSeqId, WriteSeqId}] = ets:lookup(Sequences, Q),
+    [{Q, ReadSeqId, WriteSeqId, Length}] = ets:lookup(Sequences, Q),
     MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
     {atomic, WriteSeqId2} =
         mnesia:transaction(
@@ -806,13 +822,13 @@ internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
                             NextSeqIdTo2
                     end, WriteSeqId, MsgSeqIdsZipped)
           end),
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2}),
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2, Length + erlang:length(MsgSeqIds)}),
     {ok, State}.
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, 0, State};
-        [{Q, ReadSeqId, WriteSeqId}] ->
+        [{Q, ReadSeqId, WriteSeqId, _Length}] ->
             {atomic, {ok, State2}} =
                 mnesia:transaction(
                   fun() ->
@@ -825,7 +841,7 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                             end, [], lists:seq(ReadSeqId, WriteSeqId - 1)),
                           remove_messages(Q, MsgSeqIds, txn, State)
                   end),
-            true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
+            true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId, 0}),
             {ok, WriteSeqId - ReadSeqId, State2}
     end.
 
@@ -1146,7 +1162,7 @@ load_from_disk(State) ->
                      ok = mnesia:read_lock_table(rabbit_disk_queue),
                      mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
                                           true = 1 =:=
-                                              length(dets_ets_lookup(State1, MsgId))
+                                              erlang:length(dets_ets_lookup(State1, MsgId))
                                   end,
                                   true, rabbit_disk_queue)
              end),
@@ -1171,9 +1187,16 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                             [] ->
                                 true = ets:insert_new(Sequences,
                                                       {Q, SeqId, NextWrite});
-                            [Orig = {Q, Read, Write}] ->
+                            [Orig = {Q, Read, Write, Length}] ->
                                 Repl = {Q, lists:min([Read, SeqId]),
-                                        lists:max([Write, NextWrite])},
+                                        %% Length is wrong here, but
+                                        %% it doesn't matter because
+                                        %% we'll pull out the gaps in
+                                        %% remove_gaps_in_sequences in
+                                        %% then do a straight
+                                        %% subtraction to get the
+                                        %% right length
+                                        lists:max([Write, NextWrite]), Length},
                                 if Orig /= Repl ->
                                         true = ets:insert(Sequences, Repl);
                                    true -> true
@@ -1199,9 +1222,11 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   lists:foreach(
-                    fun ({Q, ReadSeqId, WriteSeqId}) ->
+                    fun ({Q, ReadSeqId, WriteSeqId, _Length}) ->
                             Gap = shuffle_up(Q, ReadSeqId - 1, WriteSeqId - 1, 0),
-                            true = ets:insert(Sequences, {Q, ReadSeqId + Gap, WriteSeqId})
+                            ReadSeqId2 = ReadSeqId + Gap,
+                            Length = WriteSeqId - ReadSeqId2,
+                            true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId, Length})
                     end, ets:match_object(Sequences, '_'))
           end).
 
@@ -1244,7 +1269,7 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
         fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case length(mnesia:dirty_index_match_object
+                case erlang:length(mnesia:dirty_index_match_object
                             (rabbit_disk_queue,
                              #dq_msg_loc { msg_id = MsgId,
                                            queue_and_seq_id = '_',
@@ -1289,7 +1314,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %% all of these messages should appear in the mnesia table,
     %% otherwise they wouldn't have been copied out
     lists:foreach(fun (MsgId) ->
-                          true = 0 < length(mnesia:dirty_index_match_object
+                          true = 0 < erlang:length(mnesia:dirty_index_match_object
                                             (rabbit_disk_queue,
                                              #dq_msg_loc { msg_id = MsgId,
                                                            queue_and_seq_id = '_',
@@ -1329,7 +1354,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% we're in case 4 above.
             %% check that everything in the main file is a valid message in mnesia
             lists:foreach(fun (MsgId) ->
-                                  true = 0 < length(mnesia:dirty_index_match_object
+                                  true = 0 < erlang:length(mnesia:dirty_index_match_object
                                                     (rabbit_disk_queue,
                                                      #dq_msg_loc { msg_id = MsgId,
                                                                    queue_and_seq_id = '_',
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 4749e1da..c909e2a5 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -35,7 +35,7 @@
 
 -export([publish/4, deliver/1, ack/2,
          tx_publish/4, tx_commit/3, tx_cancel/2,
-         requeue/2, purge/1]).
+         requeue/2, purge/1, length/1, is_empty/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -49,7 +49,7 @@
 start_link(Queue, Mode) when Mode =:= disk orelse Mode =:= mixed ->
     rabbit_disk_queue:start_link(?FILE_SIZE_LIMIT),
     rabbit_disk_queue:to_ram_disk_mode(), %% TODO, CHANGE ME
-    {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 0, queue = Queue }}.
+    {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 1, queue = Queue }}.
 
 publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk, queue = Q }) ->
     ok = rabbit_disk_queue:publish(Q, MsgId, Msg),
@@ -173,3 +173,12 @@ purge(State = #mqstate { queue = Q, msg_buf = MsgBuf, mode = mixed }) ->
     rabbit_disk_queue:purge(Q),
     Count = queue:len(MsgBuf),
     {Count, State #mqstate { msg_buf = queue:new() }}.
+
+length(State = #mqstate { queue = Q, mode = disk }) ->
+    Length = rabbit_disk_queue:length(Q),
+    {Length, State};
+length(State = #mqstate { mode = mixed, msg_buf = MsgBuf }) ->
+    {queue:length(MsgBuf), State}.
+
+is_empty(State) ->
+    0 == rabbit_mixed_queue:length(State).
-- 
cgit v1.2.1


From ad91c9f499acedb627278b61f9b09a4930fcdd89 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 May 2009 13:57:21 +0100
Subject: Made deliver (in its various guises) also return the number of
 remaining messages

---
 src/rabbit_disk_queue.erl  | 11 ++++++-----
 src/rabbit_mixed_queue.erl |  8 ++++----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2bc40123..71d812f6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -231,9 +231,9 @@
 -spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id(), binary()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
              {'empty' | {msg_id(), binary(), non_neg_integer(),
-                         bool(), {msg_id(), seq_id()}}}).
+                         bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
 -spec(phantom_deliver/1 :: (queue_name()) ->
-             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
+             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) -> 'ok').
@@ -546,7 +546,8 @@ internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
                 mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
             [{MsgId, _RefCount, File, Offset, TotalSize}] =
                 dets_ets_lookup(State, MsgId),
-            true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId, Length - 1}),
+            Remaining = Length - 1,
+            true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId, Remaining}),
             ok =
                 if Delivered -> ok;
                    true ->
@@ -557,9 +558,9 @@ internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
                     {FileHdl, State1} = get_read_handle(File, State),
                     {ok, {MsgBody, BodySize}} =
                         read_message_at_offset(FileHdl, Offset, TotalSize),
-                    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
+                    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}, Remaining},
                      State1};
-               true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
+               true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining}, State}
             end
     end.
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index c909e2a5..811d140a 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -68,19 +68,19 @@ publish(MsgId, Msg, IsPersistent,
 
 deliver(State = #mqstate { mode = disk, queue = Q }) ->
     {rabbit_disk_queue:deliver(Q), State};
-deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf }) ->
+deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf, next_write_seq = NextWrite }) ->
     {Result, MsgBuf2} = queue:out(MsgBuf),
     case Result of
         empty ->
             {empty, State};
-        {value, {_Seq, {MsgId, Msg, IsPersistent}}} ->
+        {value, {Seq, {MsgId, Msg, IsPersistent}}} ->
             {IsDelivered, Ack} =
                 if IsPersistent ->
-                        {MsgId, IsDelivered2, Ack2} = rabbit_disk_queue:phantom_deliver(Q),
+                        {MsgId, IsDelivered2, Ack2, _PersistRemaining} = rabbit_disk_queue:phantom_deliver(Q),
                         {IsDelivered2, Ack2};
                    true -> {false, noack}
                 end,
-            {{MsgId, Msg, size(Msg), IsDelivered, Ack},
+            {{MsgId, Msg, size(Msg), IsDelivered, Ack, (NextWrite - 1 - Seq)},
              State #mqstate { msg_buf = MsgBuf2 }}
     end.
 
-- 
cgit v1.2.1


From e5f7a7da72c5949e52da695c9ea0726c8931add0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 May 2009 17:34:23 +0100
Subject: WIP, DNC. However, I think the basic control flow is right, although
 there are a lot of bugs. One of the bigger remaining issues is the correct
 routing of AckTags around, and no doubt there will be assumptions I've made
 which are wrong, but I think it's getting there...

---
 src/rabbit_amqqueue_process.erl | 349 ++++++++++++++++++----------------------
 src/rabbit_misc.erl             |  10 ++
 src/rabbit_mixed_queue.erl      |   9 +-
 3 files changed, 174 insertions(+), 194 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 69edb64f..417c3f02 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -51,8 +51,8 @@
             owner,
             exclusive_consumer,
             has_had_consumers,
+            mixed_state,
             next_msg_id,
-            message_buffer,
             round_robin}).
 
 -record(consumer, {tag, ack_required}).
@@ -96,16 +96,18 @@ init(Q) ->
             owner = none,
             exclusive_consumer = none,
             has_had_consumers = false,
+            mixed_state = rabbit_mixed_queue:start_link(qname(Q), mixed), %% TODO, CHANGE ME
             next_msg_id = 1,
-            message_buffer = queue:new(),
             round_robin = queue:new()}, ?HIBERNATE_AFTER}.
 
 terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
     QName = qname(State),
-    lists:foreach(fun (Txn) -> ok = rollback_work(Txn, QName) end,
-                  all_tx()),
-    ok = purge_message_buffer(QName, State#q.message_buffer),
+    NewState =
+        lists:foldl(fun (Txn, State1) ->
+                            rollback_transaction(Txn, State1)
+                    end, State, all_tx()),
+    rabbit_mixed_queue:purge(NewState #q.mixed_state),
     ok = rabbit_amqqueue:internal_delete(QName).
 
 code_change(_OldVsn, State, _Extra) ->
@@ -156,11 +158,10 @@ ch_record_state_transition(OldCR, NewCR) ->
        true                               -> ok
     end.
 
-deliver_immediately(Message, Delivered,
-                    State = #q{q = #amqqueue{name = QName},
-                               round_robin = RoundRobin,
-                               next_msg_id = NextId}) ->
-    ?LOGDEBUG("AMQQUEUE ~p DELIVERY:~n~p~n", [QName, Message]),
+deliver_queue(Fun,
+              State = #q{q = #amqqueue{name = QName},
+                         round_robin = RoundRobin,
+                         next_msg_id = NextId}) ->
     case queue:out(RoundRobin) of
         {{value, QEntry = {ChPid, #consumer{tag = ConsumerTag,
                                             ack_required = AckRequired}}},
@@ -171,62 +172,103 @@ deliver_immediately(Message, Delivered,
             case not(AckRequired) orelse rabbit_limiter:can_send(
                                            LimiterPid, self()) of
                 true ->
-                    rabbit_channel:deliver(
-                      ChPid, ConsumerTag, AckRequired,
-                      {QName, self(), NextId, Delivered, Message}),
-                    NewUAM = case AckRequired of
-                                 true  -> dict:store(NextId, Message, UAM);
-                                 false -> UAM
-                             end,
-                    NewC = C#cr{unsent_message_count = Count + 1,
-                                unacked_messages = NewUAM},
-                    store_ch_record(NewC),
-                    NewConsumers =
-                        case ch_record_state_transition(C, NewC) of
-                            ok    -> queue:in(QEntry, RoundRobinTail);
-                            block -> block_consumers(ChPid, RoundRobinTail)
-                        end,
-                    {offered, AckRequired, State#q{round_robin = NewConsumers,
-                                                   next_msg_id = NextId + 1}};
+                    case Fun(State) of
+                        {empty, State2} ->
+                            {empty, State2};
+                        {{MsgId, Msg, MsgSize, IsDelivered, AckTag, Remaining}, State2} ->
+                            rabbit_channel:deliver(
+                              ChPid, ConsumerTag, AckRequired,
+                              {QName, self(), NextId, Delivered, Message}),  %% TODO FIXME
+                            NewUAM = case AckRequired of
+                                         true  -> dict:store(NextId, Message, UAM);
+                                         false -> UAM
+                                     end,
+                            NewC = C#cr{unsent_message_count = Count + 1,
+                                        unacked_messages = NewUAM},
+                            store_ch_record(NewC),
+                            NewConsumers =
+                                case ch_record_state_transition(C, NewC) of
+                                    ok    -> queue:in(QEntry, RoundRobinTail);
+                                    block -> block_consumers(ChPid, RoundRobinTail)
+                                end,
+                            State3 = State2 #q { round_robin = NewConsumers,
+                                                 next_msg_id = NextId + 1
+                                                },
+                            if Remaining == 0 -> {offered, AckRequired, State3};
+                               true -> deliver_queue(Fun, State3)
+                            end
+                    end;
                 false ->
                     store_ch_record(C#cr{is_limit_active = true}),
                     NewConsumers = block_consumers(ChPid, RoundRobinTail),
-                    deliver_immediately(Message, Delivered,
-                                        State#q{round_robin = NewConsumers})
+                    deliver_queue(Fun, State#q{round_robin = NewConsumers})
             end;
         {empty, _} ->
             {not_offered, State}
     end.
 
+deliver_from_queue(State = #q { mixed_state = MS }) ->
+    {Res, MS2} = rabbit_mixed_queue:deliver(MS),
+    {Res, State #q { mixed_state = MS2 }}.
+
+run_message_queue(State) ->
+    case deliver_queue(deliver_from_queue/1, State) of
+        {not_offered, State2} ->
+            State2;
+        {empty, State2} ->
+            State2;
+        {offered, _AckRequired, State2} ->
+            State2
+    end.
+
 attempt_delivery(none, Message, State) ->
-    case deliver_immediately(Message, false, State) of
+    Fun = fun (State2) -> {{MsgId, Message, MsgSize, false, AckTag, 0}, State2} end, %% TODO FIX ME
+    case deliver_queue(Fun, State) of
         {offered, false, State1} ->
             {true, State1};
         {offered, true, State1} ->
-            persist_message(none, qname(State), Message), %% DQ HERE
-            persist_delivery(qname(State), Message, false), %% DQ HERE
-            {true, State1};
+            MS = rabbit_mixed_queue:publish_delivered(Message, State1 #q.mixed_state), %% TODO API CHANGE
+            {true, State1 #q { mixed_state = MS }};
         {not_offered, State1} ->
             {false, State1}
     end;
 attempt_delivery(Txn, Message, State) ->
-    persist_message(Txn, qname(State), Message), %% DQ tx_commit and store msgid in txn map
-    record_pending_message(Txn, Message), %% DQ seems to be done here!
-    {true, State}.
+    MS = rabbit_mixed_queue:tx_publish(Message, State #q.mixed_state), %% TODO API CHANGE
+    record_pending_message(Txn, Message),
+    {true, State #q { mixed_state = MS }}.
 
 deliver_or_enqueue(Txn, Message, State) ->
     case attempt_delivery(Txn, Message, State) of
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
-            persist_message(Txn, qname(State), Message), %% DQ Txn must be false here
-            NewMB = queue:in({Message, false}, NewState#q.message_buffer), %% DQ magic here
-            {false, NewState#q{message_buffer = NewMB}}
+            %% Txn is none
+            MS = rabbit_mixed_queue:publish(Message, State #q.mixed_state), %% TODO API CHANGE
+            {false, NewState #q { mixed_state = MS }}
+    end.
+
+%% all these messages have already been delivered at least once and
+%% not ack'd, but need to be either redelivered or requeued
+deliver_or_requeue_n(Messages, State) ->
+    {AutoAcks, Remaining} =
+        dropwhilefoldl(deliver_or_requeue_msg/2, {[], State}, Messages),
+    {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks), State #q.mixed_state), %% TODO FIXME
+    case Remaining of
+        [] -> run_message_queue(State #q { mixed_state = MS });
+        _ -> {ok, MS2} = rabbit_mixed_queue:requeue(Remaining, MS), %% TODO FIXME
+             State #q { mixed_state = MS2 }
     end.
 
-deliver_or_enqueue_n(Messages, State = #q{message_buffer = MessageBuffer}) ->
-    run_poke_burst(queue:join(MessageBuffer, queue:from_list(Messages)),
-                   State).
+deliver_or_requeue_msg(Message, {AcksAcc, State}) ->
+    Fun = fun (State2) -> {{MsgId, Message, MsgSize, true, AckTag, 0}, State2} end, %% TODO FIX ME
+    case deliver_queue(Fun, State) of
+        {offered, true, State1} ->
+            {true, {AcksAcc, State1}};
+        {offered, false, State1} ->
+            {true, {[AckTag|AcksAcc], State1}}; %% TODO FIXME where does AckTag come from?!
+        {not_offered, State1} ->
+            {false, {AcksAcc, State1}}
+    end.
 
 block_consumers(ChPid, RoundRobin) ->
     %%?LOGDEBUG("~p Blocking ~p from ~p~n", [self(), ChPid, queue:to_list(RoundRobin)]),
@@ -257,7 +299,7 @@ possibly_unblock(State, ChPid, Update) ->
                 unblock -> NewRR = unblock_consumers(ChPid,
                                                      NewC#cr.consumers,
                                                      State#q.round_robin),
-                           run_poke_burst(State#q{round_robin = NewRR})
+                           run_message_queue(State#q{round_robin = NewRR})
             end
     end.
     
@@ -300,9 +342,9 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder,
             erlang:demonitor(MonitorRef),
             erase({ch, ChPid}),
             case check_auto_delete(
-                   deliver_or_enqueue_n(
-                     [{Message, true} ||
-                         {_Messsage_id, Message} <- dict:to_list(UAM)], %% DQ alter all this stuff?
+                   deliver_or_requeue_n(
+                     [Message ||
+                         {_Messsage_id, Message} <- dict:to_list(UAM)],
                      State#q{
                        exclusive_consumer = case Holder of
                                                 {ChPid, _} -> none;
@@ -335,26 +377,6 @@ check_exclusive_access(none, true) ->
         false -> in_use
     end.
 
-run_poke_burst(State = #q{message_buffer = MessageBuffer}) ->
-    run_poke_burst(MessageBuffer, State).
-
-run_poke_burst(MessageBuffer, State) ->
-    case queue:out(MessageBuffer) of
-        {{value, {Message, Delivered}}, BufferTail} ->
-            case deliver_immediately(Message, Delivered, State) of
-                {offered, true, NewState} ->
-                    persist_delivery(qname(State), Message, Delivered), %% DQ ack needed
-                    run_poke_burst(BufferTail, NewState);
-                {offered, false, NewState} ->
-                    persist_auto_ack(qname(State), Message), %% DQ record? We don't persist acks anyway now...
-                    run_poke_burst(BufferTail, NewState);
-                {not_offered, NewState} ->
-                    NewState#q{message_buffer = MessageBuffer}
-            end;
-        {empty, _} ->
-            State#q{message_buffer = MessageBuffer}
-    end.
-
 is_unused() ->
     is_unused1(get()).
 
@@ -371,62 +393,6 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
-persist_message(_Txn, _QName, #basic_message{is_persistent = false}) -> %% DQ
-    ok;
-persist_message(Txn, QName, Message) ->
-    M = Message#basic_message{
-          %% don't persist any recoverable decoded properties, rebuild from properties_bin on restore
-          content = rabbit_binary_parser:clear_decoded_content(
-                      Message#basic_message.content)},
-    persist_work(Txn, QName,
-                 [{publish, M, {QName, M#basic_message.guid}}]).
-
-persist_delivery(_QName, _Message, %% DQ
-                 true) ->
-    ok;
-persist_delivery(_QName, #basic_message{is_persistent = false}, %% DQ
-                 _Delivered) ->
-    ok;
-persist_delivery(QName, #basic_message{guid = MsgId}, %% DQ
-                 _Delivered) ->
-    persist_work(none, QName, [{deliver, {QName, MsgId}}]).
-
-persist_acks(Txn, QName, Messages) ->  %% DQ 
-    persist_work(Txn, QName,
-                 [{ack, {QName, MsgId}} ||
-                     #basic_message{guid = MsgId, is_persistent = P} <- Messages,
-                     P]).
-
-persist_auto_ack(_QName, #basic_message{is_persistent = false}) ->
-    ok;
-persist_auto_ack(QName, #basic_message{is_persistent = true, guid = MsgId}) ->
-    %% auto-acks are always non-transactional
-    rabbit_persister:dirty_work([{ack, {QName, MsgId}}]).
-
-persist_work(_Txn,_QName, []) ->
-    ok;
-persist_work(none, _QName, WorkList) ->
-    rabbit_persister:dirty_work(WorkList);
-persist_work(Txn, QName, WorkList) ->
-    mark_tx_persistent(Txn),
-    rabbit_persister:extend_transaction({Txn, QName}, WorkList).
-
-commit_work(Txn, QName) ->
-    do_if_persistent(fun rabbit_persister:commit_transaction/1,
-                     Txn, QName).
-
-rollback_work(Txn, QName) ->
-    do_if_persistent(fun rabbit_persister:rollback_transaction/1,
-                     Txn, QName).
-
-%% optimisation: don't do unnecessary work
-%% it would be nice if this was handled by the persister
-do_if_persistent(F, Txn, QName) ->
-    case is_tx_persistent(Txn) of
-        false -> ok;
-        true  -> ok = F({Txn, QName})
-    end.
-
 lookup_tx(Txn) ->
     case get({txn, Txn}) of
         undefined -> #tx{ch_pid = none,
@@ -448,54 +414,52 @@ all_tx_record() ->
 all_tx() ->
     [Txn || {{txn, Txn}, _} <- get()].
 
-mark_tx_persistent(Txn) ->
-    Tx = lookup_tx(Txn),
-    store_tx(Txn, Tx#tx{is_persistent = true}).
-    
 is_tx_persistent(Txn) ->
     #tx{is_persistent = Res} = lookup_tx(Txn),
     Res.
 
-record_pending_message(Txn, Message) ->
-    Tx = #tx{pending_messages = Pending} = lookup_tx(Txn),
-    store_tx(Txn, Tx#tx{pending_messages = [{Message, false} | Pending]}).
+record_pending_message(Txn, Message = #basic_message { is_persistent = IsPersistent }) ->
+    Tx = #tx{pending_messages = Pending, is_persistent = IsPersistentTxn } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [{Message, false} | Pending],
+                           is_persistent = IsPersistentTxn orelse IsPersistent
+                         }).
 
 record_pending_acks(Txn, ChPid, MsgIds) ->
     Tx = #tx{pending_acks = Pending} = lookup_tx(Txn),
     store_tx(Txn, Tx#tx{pending_acks = [MsgIds | Pending], ch_pid = ChPid}).
 
-process_pending(Txn, State) ->
-    #tx{ch_pid = ChPid,
-        pending_messages = PendingMessages,
-        pending_acks = PendingAcks} = lookup_tx(Txn),
+commit_transaction(Txn, State) ->
+    #tx { ch_pid = ChPid,
+          pending_messages = PendingMessages,
+          pending_acks = PendingAcks
+        } = lookup_tx(Txn),
+    PendingMessagesOrdered = lists:reverse(PendingMessages),
+    PendingAcksOrdered = lists:append(lists:reverse(PendingAcks)),
     case lookup_ch(ChPid) of
-        not_found -> ok;
-        C = #cr{unacked_messages = UAM} ->
-            {_Acked, Remaining} =
-                collect_messages(lists:append(PendingAcks), UAM),
-            store_ch_record(C#cr{unacked_messages = Remaining})
-    end,
-    deliver_or_enqueue_n(lists:reverse(PendingMessages), State).
+        not_found -> State;
+        C = #cr { unacked_messages = UAM } ->
+            {Acked, Remaining} =
+                collect_messages(PendingAcksAppended, UAM),
+            store_ch_record(C#cr{unacked_messages = Remaining}),
+            MS = rabbit_mixed_queue:tx_commit(PendingMessagesOrdered,
+                                              Acked,
+                                              State #q.mixed_state),
+            State #q { mixed_state = MS }
+    end.
 
+rollback_transaction(Txn, State) ->
+    #tx { pending_messages = PendingMessages
+        } = lookup_tx(Txn),
+    MS = rabbit_mixed_queue:tx_cancel(lists:reverse(PendingMessages), State #q.mixed_state),
+    State #q { mixed_state = MS }.
+
+%% {A, B} = collect_messages(C, D) %% A = C `intersect` D; B = D \\ C
+%% err, A = C `intersect` D , via projection through the dict that is A
 collect_messages(MsgIds, UAM) ->
     lists:mapfoldl(
       fun (MsgId, D) -> {dict:fetch(MsgId, D), dict:erase(MsgId, D)} end,
       UAM, MsgIds).
 
-purge_message_buffer(QName, MessageBuffer) ->
-    Messages =
-        [[Message || {Message, _Delivered} <-
-                         queue:to_list(MessageBuffer)] |
-         lists:map(
-           fun (#cr{unacked_messages = UAM}) ->
-                   [Message || {_MessageId, Message} <- dict:to_list(UAM)]
-           end,
-           all_ch_record())],
-    %% the simplest, though certainly not the most obvious or
-    %% efficient, way to purge messages from the persister is to
-    %% artifically ack them.
-    persist_acks(none, QName, lists:append(Messages)).
-
 infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
 i(name,        #q{q = #amqqueue{name        = Name}})       -> Name;
@@ -564,12 +528,11 @@ handle_call({deliver, Txn, Message}, _From, State) ->
     reply(Delivered, NewState);
 
 handle_call({commit, Txn}, From, State) ->
-    ok = commit_work(Txn, qname(State)),
+    NewState = commit_transaction(Txn, State),
     %% optimisation: we reply straight away so the sender can continue
     gen_server2:reply(From, ok),
-    NewState = process_pending(Txn, State),
     erase_tx(Txn),
-    noreply(NewState);
+    noreply(run_message_queue(NewState));
 
 handle_call({notify_down, ChPid}, From, State) ->
     %% optimisation: we reply straight away so the sender can continue
@@ -579,23 +542,25 @@ handle_call({notify_down, ChPid}, From, State) ->
 handle_call({basic_get, ChPid, NoAck}, _From,
             State = #q{q = #amqqueue{name = QName},
                        next_msg_id = NextId,
-                       message_buffer = MessageBuffer}) ->
-    case queue:out(MessageBuffer) of
-        {{value, {Message, Delivered}}, BufferTail} ->
+                       mixed_state = MS
+                       }) ->
+    case rabbit_mixed_queue:deliver(MS) of
+        {empty, MS2} -> reply(empty, State #q { mixed_state = MS2 });
+        {{MsgId, Msg, MsgSize, IsDelivered, AckTag, Remaining}, MS2} ->
             AckRequired = not(NoAck),
-            case AckRequired of
-                true  ->
-                    persist_delivery(QName, Message, Delivered),
-                    C = #cr{unacked_messages = UAM} = ch_record(ChPid),
-                    NewUAM = dict:store(NextId, Message, UAM),
-                    store_ch_record(C#cr{unacked_messages = NewUAM});
-                false ->
-                    persist_auto_ack(QName, Message)
-            end,
-            Msg = {QName, self(), NextId, Delivered, Message},
-            reply({ok, queue:len(BufferTail), Msg},
-                  State#q{message_buffer = BufferTail,
-                          next_msg_id = NextId + 1});
+            MS3 =
+                case AckRequired of
+                    true  ->
+                        C = #cr{unacked_messages = UAM} = ch_record(ChPid),
+                        NewUAM = dict:store(NextId, Message, UAM),
+                        store_ch_record(C#cr{unacked_messages = NewUAM}),
+                        MS2;
+                    false ->
+                        rabbit_mixed_queue:ack([AckTag], MS2)
+                end,
+            Message = {QName, self(), NextId, IsDelivered, Msg}, %% TODO, FIX UP
+            reply({ok, Remaining, Message},
+                  State#q{next_msg_id = NextId + 1});
         {empty, _} ->
             reply(empty, State)
     end;
@@ -630,7 +595,7 @@ handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
                                        end,
                                      round_robin = queue:in({ChPid, Consumer}, RoundRobin)},
                     ok = maybe_send_reply(ChPid, OkMsg),
-                    reply(ok, run_poke_burst(State1))
+                    reply(ok, run_message_queue(State1))
             end
     end;
 
@@ -667,27 +632,29 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
     end;
 
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    message_buffer = MessageBuffer,
+                                    mixed_state = MS,
                                     round_robin = RoundRobin}) ->
-    reply({ok, Name, queue:len(MessageBuffer), queue:len(RoundRobin)}, State);
+    {Length, MS2} = rabbit_mixed_queue:length(MS),
+    reply({ok, Name, Length, queue:len(RoundRobin)}, State #q { mixed_state = MS2 });
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
-            State = #q{message_buffer = MessageBuffer}) ->
+            State = #q{message_buffer = MessageBuffer, mixed_state = MS}) ->
     IsEmpty = queue:is_empty(MessageBuffer),
     IsUnused = is_unused(),
+    {Length, MS2} = rabbit_mixed_queue:length(MS),
     if
         IfEmpty and not(IsEmpty) ->
             reply({error, not_empty}, State);
         IfUnused and not(IsUnused) ->
             reply({error, in_use}, State);
         true ->
-            {stop, normal, {ok, queue:len(MessageBuffer)}, State}
+            {stop, normal, {ok, Length}, State #q { mixed_state = MS2 }}
     end;
 
-handle_call(purge, _From, State = #q{message_buffer = MessageBuffer}) ->
-    ok = purge_message_buffer(qname(State), MessageBuffer),
-    reply({ok, queue:len(MessageBuffer)},
-          State#q{message_buffer = queue:new()});
+handle_call(purge, _From, State) ->
+    {Count, MS} = rabbit_mixed_queue:purge(State #q.mixed_state),
+    reply({ok, Count},
+          State #q { mixed_state = MS });
 
 handle_call({claim_queue, ReaderPid}, _From, State = #q{owner = Owner,
                                                         exclusive_consumer = Holder}) ->
@@ -722,23 +689,24 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
             {Acked, Remaining} = collect_messages(MsgIds, UAM),
-            persist_acks(Txn, qname(State), Acked),
             case Txn of
                 none ->
-                    store_ch_record(C#cr{unacked_messages = Remaining});
+                    MS = rabbit_mixed_queue:ack(Acked, State #q.mixed_state), %% TODO API
+                    store_ch_record(C#cr{unacked_messages = Remaining}),
+                    noreply(State #q { mixed_state = MS });
                 _  ->
-                    record_pending_acks(Txn, ChPid, MsgIds)
+                    record_pending_acks(Txn, ChPid, MsgIds),
+                    noreply(State)
             end,
-            noreply(State)
     end;
 
 handle_cast({rollback, Txn}, State) ->
-    ok = rollback_work(Txn, qname(State)),
+    NewState = rollback_transaction(Txn, State),
     erase_tx(Txn),
-    noreply(State);
+    noreply(State2);
 
 handle_cast({redeliver, Messages}, State) ->
-    noreply(deliver_or_enqueue_n(Messages, State));
+    noreply(ok); %% TODO - probably remove - only used by the old persister
 
 handle_cast({requeue, MsgIds, ChPid}, State) ->
     case lookup_ch(ChPid) of
@@ -749,8 +717,7 @@ handle_cast({requeue, MsgIds, ChPid}, State) ->
         C = #cr{unacked_messages = UAM} ->
             {Messages, NewUAM} = collect_messages(MsgIds, UAM),
             store_ch_record(C#cr{unacked_messages = NewUAM}),
-            noreply(deliver_or_enqueue_n(
-                      [{Message, true} || Message <- Messages], State))
+            noreply(deliver_or_requeue_n(Messages, State))
     end;
 
 handle_cast({unblock, ChPid}, State) ->
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index de7bc010..f90abe3f 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -51,6 +51,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
+-export([dropwhilefoldl/3]).
 
 -import(mnesia).
 -import(lists).
@@ -407,3 +408,12 @@ stop_applications(Apps) ->
                         cannot_stop_application,
                         Apps).
 
+dropwhilefoldl(_PredFun, Acc0, []) ->
+    {Acc0, []};
+dropwhilefoldl(PredFun, Acc0, [E|List]) ->
+    case PredFun(E, Acc0) of
+        {true, Acc1} ->
+            dropwhilefoldl(PredFun, Acc1, List);
+        {false, Acc1} ->
+            {Acc1, List}
+    end.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 811d140a..790f4b75 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -87,9 +87,12 @@ deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf, next_write
 remove_noacks(Acks) ->
     lists:filter(fun (A) -> A /= noack end, Acks).
 
-ack(Acks, State = #mqstate { queue = Q }) ->             
-    ok = rabbit_disk_queue:ack(Q, remove_noacks(Acks)),
-    {ok, State}.
+ack(Acks, State = #mqstate { queue = Q }) ->
+    case remove_noacks(Acks) of
+        [] -> {ok, State};
+        AckTags -> ok = rabbit_disk_queue:ack(Q, AckTags),
+                   {ok, State}
+    end.
                                                    
 tx_publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_publish(MsgId, Msg),
-- 
cgit v1.2.1


From d7ac123035531dce255a552fff40222a76197781 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 May 2009 18:13:16 +0100
Subject: Still WIP + DNC. However, deliver_queue is now rather funky, pretty
 much incorporating a HO fold. This does rather simplify matters as it means
 we get told inadvance that we need to produce a message only when we have a
 consumer for it, and at the same time we get told about whether this message
 is going to get an explicit ack, and we get an accumulator to play with as
 well. Pretty nifty - has simplified code elsewhere.

---
 src/rabbit_amqqueue_process.erl | 87 ++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 40 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 417c3f02..5e941346 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -158,7 +158,7 @@ ch_record_state_transition(OldCR, NewCR) ->
        true                               -> ok
     end.
 
-deliver_queue(Fun,
+deliver_queue(Fun, FunAcc0,
               State = #q{q = #amqqueue{name = QName},
                          round_robin = RoundRobin,
                          next_msg_id = NextId}) ->
@@ -172,10 +172,10 @@ deliver_queue(Fun,
             case not(AckRequired) orelse rabbit_limiter:can_send(
                                            LimiterPid, self()) of
                 true ->
-                    case Fun(State) of
-                        {empty, State2} ->
-                            {empty, State2};
-                        {{MsgId, Msg, MsgSize, IsDelivered, AckTag, Remaining}, State2} ->
+                    case Fun(AckRequired, FunAcc0, State) of
+                        {empty, FunAcc1, State2} ->
+                            {FunAcc1, State2};
+                        {{MsgId, Msg, MsgSize, IsDelivered, AckTag, Remaining}, FunAcc1, State2} ->
                             rabbit_channel:deliver(
                               ChPid, ConsumerTag, AckRequired,
                               {QName, self(), NextId, Delivered, Message}),  %% TODO FIXME
@@ -194,46 +194,51 @@ deliver_queue(Fun,
                             State3 = State2 #q { round_robin = NewConsumers,
                                                  next_msg_id = NextId + 1
                                                 },
-                            if Remaining == 0 -> {offered, AckRequired, State3};
-                               true -> deliver_queue(Fun, State3)
+                            if Remaining == 0 -> {FunAcc1, State3};
+                               true -> deliver_queue(Fun, FunAcc1, State3)
                             end
                     end;
                 false ->
                     store_ch_record(C#cr{is_limit_active = true}),
                     NewConsumers = block_consumers(ChPid, RoundRobinTail),
-                    deliver_queue(Fun, State#q{round_robin = NewConsumers})
+                    deliver_queue(Fun, FunAcc0, State#q{round_robin = NewConsumers})
             end;
         {empty, _} ->
-            {not_offered, State}
+            {FunAcc0, State}
     end.
 
-deliver_from_queue(State = #q { mixed_state = MS }) ->
+deliver_from_queue(AckRequired, Acc, State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
-    {Res, State #q { mixed_state = MS2 }}.
+    MS3 = case {Res, AckRequired} of
+              {empty, _} -> MS2;
+              {_, true} -> MS2;
+              {{_MsgId, _Msg, _MsgSize, _IsDelivered, AckTag, _Remaining}, false} ->
+                  {ok, MS4} = rabbit_mixed_queue:ack([AckTag], MS2),
+                  MS3
+          end,
+    {Res, Acc, State #q { mixed_state = MS3 }}.
 
 run_message_queue(State) ->
-    case deliver_queue(deliver_from_queue/1, State) of
-        {not_offered, State2} ->
-            State2;
-        {empty, State2} ->
-            State2;
-        {offered, _AckRequired, State2} ->
-            State2
-    end.
+    {undefined, State2} = deliver_queue(deliver_from_queue/1, undefined, State),
+    State2.
 
 attempt_delivery(none, Message, State) ->
-    Fun = fun (State2) -> {{MsgId, Message, MsgSize, false, AckTag, 0}, State2} end, %% TODO FIX ME
-    case deliver_queue(Fun, State) of
-        {offered, false, State1} ->
-            {true, State1};
-        {offered, true, State1} ->
-            MS = rabbit_mixed_queue:publish_delivered(Message, State1 #q.mixed_state), %% TODO API CHANGE
-            {true, State1 #q { mixed_state = MS }};
-        {not_offered, State1} ->
-            {false, State1}
-    end;
+    Fun =
+        fun (AckRequired, false, State2) ->
+                {AckTag, State3} =
+                    if AckRequired ->
+                            %% TODO API CHANGE
+                            {ok, MS, AckTag2} = rabbit_mixed_queue:publish_delivered(Message,
+                                                                                     State2 #q.mixed_state),
+                            {AckTag2, State2 #q { mixed_state = MS }};
+                       true ->
+                            {noack, State2}
+                    end,
+                {{MsgId, Message, MsgSize, false, AckTag, 0}, true, State3} %% TODO FIX ME
+        end,
+    deliver_queue(Fun, false, State);
 attempt_delivery(Txn, Message, State) ->
-    MS = rabbit_mixed_queue:tx_publish(Message, State #q.mixed_state), %% TODO API CHANGE
+    {ok, MS} = rabbit_mixed_queue:tx_publish(Message, State #q.mixed_state), %% TODO API CHANGE
     record_pending_message(Txn, Message),
     {true, State #q { mixed_state = MS }}.
 
@@ -242,8 +247,8 @@ deliver_or_enqueue(Txn, Message, State) ->
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
-            %% Txn is none
-            MS = rabbit_mixed_queue:publish(Message, State #q.mixed_state), %% TODO API CHANGE
+            %% Txn is none and no unblocked channels with consumers
+            {ok, MS} = rabbit_mixed_queue:publish(Message, State #q.mixed_state), %% TODO API CHANGE
             {false, NewState #q { mixed_state = MS }}
     end.
 
@@ -259,14 +264,16 @@ deliver_or_requeue_n(Messages, State) ->
              State #q { mixed_state = MS2 }
     end.
 
-deliver_or_requeue_msg(Message, {AcksAcc, State}) ->
-    Fun = fun (State2) -> {{MsgId, Message, MsgSize, true, AckTag, 0}, State2} end, %% TODO FIX ME
-    case deliver_queue(Fun, State) of
-        {offered, true, State1} ->
-            {true, {AcksAcc, State1}};
-        {offered, false, State1} ->
-            {true, {[AckTag|AcksAcc], State1}}; %% TODO FIXME where does AckTag come from?!
-        {not_offered, State1} ->
+deliver_or_requeue_msg(Message, {AcksAcc, State}) -> %% TODO the acktag really should be within the msg here
+    Fun = fun (AckRequired, {false, AcksAcc}, State2) ->
+                  AcksAcc2 = if AckRequired -> AcksAcc;
+                                true -> [AckTag|AcksAcc]
+                             end,
+                  {{MsgId, Message, MsgSize, true, AckTag, 0}, {true, AcksAcc2}, State2} end, %% TODO FIX ME
+    case deliver_queue(Fun, {false, AcksAcc}, State) of
+        {{true, AcksAcc3}, State1} ->
+            {true, {AcksAcc3, State1}};
+        {{false, AcksAcc}, State1} ->
             {false, {AcksAcc, State1}}
     end.
 
-- 
cgit v1.2.1


From 0cabd4bfdfd131fee7aa4470830ce11d98cbbdfd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 May 2009 18:15:21 +0100
Subject: just a couple more comments / TODO items.

---
 src/rabbit_amqqueue_process.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5e941346..bcf4dae4 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -350,7 +350,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder,
             erase({ch, ChPid}),
             case check_auto_delete(
                    deliver_or_requeue_n(
-                     [Message ||
+                     [Message || %% TODO NEED TO GRAB ACKTAGS OUT OF HERE AND PASS THEM THROUGH
                          {_Messsage_id, Message} <- dict:to_list(UAM)],
                      State#q{
                        exclusive_consumer = case Holder of
@@ -722,7 +722,7 @@ handle_cast({requeue, MsgIds, ChPid}, State) ->
                                [ChPid]),
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
-            {Messages, NewUAM} = collect_messages(MsgIds, UAM),
+            {Messages, NewUAM} = collect_messages(MsgIds, UAM), %% TODO Messages must contain AckTags too
             store_ch_record(C#cr{unacked_messages = NewUAM}),
             noreply(deliver_or_requeue_n(Messages, State))
     end;
-- 
cgit v1.2.1


From 3e873765e85ee6b4f4138d7b17e394e6d4214796 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 22 May 2009 14:03:47 +0100
Subject: Tiny changes to amqqueue_process, but mainly getting the mixed_queue
 api into proper shape.

---
 src/rabbit_amqqueue_process.erl |  2 +-
 src/rabbit_mixed_queue.erl      | 94 ++++++++++++++++++++++++++---------------
 2 files changed, 61 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index bcf4dae4..15b3a036 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -228,7 +228,7 @@ attempt_delivery(none, Message, State) ->
                 {AckTag, State3} =
                     if AckRequired ->
                             %% TODO API CHANGE
-                            {ok, MS, AckTag2} = rabbit_mixed_queue:publish_delivered(Message,
+                            {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(Message,
                                                                                      State2 #q.mixed_state),
                             {AckTag2, State2 #q { mixed_state = MS }};
                        true ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 790f4b75..e56e667d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -31,10 +31,12 @@
 
 -module(rabbit_mixed_queue).
 
+-include("rabbit.hrl").
+
 -export([start_link/2]).
 
--export([publish/4, deliver/1, ack/2,
-         tx_publish/4, tx_commit/3, tx_cancel/2,
+-export([publish/2, publish_delivered/2, deliver/1, ack/2,
+         tx_publish/2, tx_commit/3, tx_cancel/2,
          requeue/2, purge/1, length/1, is_empty/1]).
 
 -record(mqstate, { mode,
@@ -51,36 +53,56 @@ start_link(Queue, Mode) when Mode =:= disk orelse Mode =:= mixed ->
     rabbit_disk_queue:to_ram_disk_mode(), %% TODO, CHANGE ME
     {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 1, queue = Queue }}.
 
-publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk, queue = Q }) ->
-    ok = rabbit_disk_queue:publish(Q, MsgId, Msg),
+msg_to_bin(Msg = #basic_message { content = Content }) ->
+    ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
+    term_to_binary(Msg #basic_message { content = ClearedContent }).
+
+bin_to_msg(MsgBin) ->
+    binary_to_term(MsgBin).
+
+publish(Msg = #basic_message { guid = MsgId },
+        State = #mqstate { mode = disk, queue = Q }) ->
+    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
     {ok, State};
-publish(MsgId, Msg, IsPersistent,
+publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
         State = #mqstate { queue = Q, mode = mixed,
                            next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
-    if IsPersistent ->
-            ok = rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, Msg);
-       true -> ok
-    end,
+    ok = if IsPersistent ->
+                 rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, msg_to_bin(Msg));
+            true -> ok
+         end,
     {ok, State #mqstate { next_write_seq = NextSeq + 1,
-                          msg_buf = queue:in({NextSeq, {MsgId, Msg, IsPersistent}},
-                                             MsgBuf)
+                          msg_buf = queue:in({NextSeq, Msg, false}, MsgBuf)
                         }}.
 
+%% assumption here is that the queue is empty already (only called via publish immediate)
+publish_delivered(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent},
+                  State = #mqstate { mode = Mode, queue = Q })
+  when Mode =:= disk orelse IsPersistent ->
+    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
+    {MsgId, false, Ack, 0} = rabbit_disk_queue:phantom_deliver(Q),
+    {ok, Ack, State};
+publish_delivered(#basic_message { is_persistent = false },
+                  State = #mqstate { mode = mixed }) ->
+    {ok, noack, State}.
+
 deliver(State = #mqstate { mode = disk, queue = Q }) ->
-    {rabbit_disk_queue:deliver(Q), State};
+    {MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining} = rabbit_disk_queue:deliver(Q),
+    Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
+    {{Msg, IsDelivered, AckTag, Remaining}, State};
 deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf, next_write_seq = NextWrite }) ->
     {Result, MsgBuf2} = queue:out(MsgBuf),
     case Result of
         empty ->
             {empty, State};
-        {value, {Seq, {MsgId, Msg, IsPersistent}}} ->
-            {IsDelivered, Ack} =
+        {value, {Seq, Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent }, IsDelivered}} ->
+            AckTag =
                 if IsPersistent ->
-                        {MsgId, IsDelivered2, Ack2, _PersistRemaining} = rabbit_disk_queue:phantom_deliver(Q),
-                        {IsDelivered2, Ack2};
-                   true -> {false, noack}
+                        {MsgId, IsDelivered, AckTag2, _PersistRemaining} = rabbit_disk_queue:phantom_deliver(Q),
+                        AckTag2;
+                   true -> noack
                 end,
-            {{MsgId, Msg, size(Msg), IsDelivered, Ack, (NextWrite - 1 - Seq)},
+            {{Msg, IsDelivered, AckTag, (NextWrite - 1 - Seq)},
              State #mqstate { msg_buf = MsgBuf2 }}
     end.
 
@@ -94,17 +116,19 @@ ack(Acks, State = #mqstate { queue = Q }) ->
                    {ok, State}
     end.
                                                    
-tx_publish(MsgId, Msg, _IsPersistent, State = #mqstate { mode = disk }) ->
-    ok = rabbit_disk_queue:tx_publish(MsgId, Msg),
+tx_publish(Msg = #basic_message { guid = MsgId }, State = #mqstate { mode = disk }) ->
+    ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
-tx_publish(MsgId, Msg, true, State = #mqstate { mode = mixed }) ->
-    ok = rabbit_disk_queue:tx_publish(MsgId, Msg),
+tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = true },
+           State = #mqstate { mode = mixed }) ->
+    ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
-tx_publish(_MsgId, _Msg, false, State = #mqstate { mode = mixed }) ->
+tx_publish(#basic_message { is_persistent = false },
+           State = #mqstate { mode = mixed }) ->
     {ok, State}.
 
 only_msg_ids(Pubs) ->
-    lists:map(fun (P) -> element(1, P) end, Pubs).
+    lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
 tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
     ok = rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes), Acks),
@@ -112,16 +136,16 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               msg_buf = MsgBuf,
                                               next_write_seq = NextSeq
-                                             }) ->
+                                            }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
-        lists:foldl(fun ({MsgId, Msg, IsPersistent}, {Acc, MsgBuf3, NextSeq3}) ->
+        lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
+                         {Acc, MsgBuf3, NextSeq3}) ->
                             Acc2 =
                                 if IsPersistent ->
-                                        [{MsgId, NextSeq3} | Acc];
+                                        [{Msg #basic_message.guid, NextSeq3} | Acc];
                                    true -> Acc
                                 end,
-                            MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
-                                               MsgBuf3),
+                            MsgBuf4 = queue:in({NextSeq3, Msg, false}, MsgBuf3),
                             {Acc2, MsgBuf4, NextSeq3 + 1}
                     end, {[], MsgBuf, NextSeq}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
@@ -131,8 +155,9 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
     {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
 
 only_persistent_msg_ids(Pubs) ->
-    lists:reverse(lists:foldl(fun ({MsgId, _, IsPersistent}, Acc) ->
-                                      if IsPersistent -> [MsgId | Acc];
+    lists:reverse(lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
+                                   Acc) ->
+                                      if IsPersistent -> [Msg #basic_message.guid | Acc];
                                          true -> Acc
                                       end
                               end, [], Pubs)).
@@ -147,6 +172,7 @@ tx_cancel(Publishes, State = #mqstate { mode = mixed }) ->
 only_ack_tags(MsgWithAcks) ->
     lists:map(fun (P) -> element(2, P) end, MsgWithAcks).
 
+%% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q }) ->
     rabbit_disk_queue:requeue(Q, only_ack_tags(MessagesWithAckTags)),
     {ok, State};
@@ -155,15 +181,15 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 next_write_seq = NextSeq
                                               }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
-        lists:foldl(fun ({{MsgId, Msg, IsPersistent}, AckTag}, {Acc, MsgBuf3, NextSeq3}) ->
+        lists:foldl(fun ({Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId }, AckTag},
+                         {Acc, MsgBuf3, NextSeq3}) ->
                             Acc2 =
                                 if IsPersistent ->
                                         {MsgId, _OldSeqId} = AckTag,
                                         [{AckTag, NextSeq3} | Acc];
                                    true -> Acc
                                 end,
-                            MsgBuf4 = queue:in({NextSeq3, {MsgId, Msg, IsPersistent}},
-                                               MsgBuf3),
+                            MsgBuf4 = queue:in({NextSeq3, Msg, true}, MsgBuf3),
                             {Acc2, MsgBuf4, NextSeq3 + 1}
                     end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(PersistentPubs)),
-- 
cgit v1.2.1


From ea6c42f39a77e1bead99c098ccda7bd8e4e332d3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 22 May 2009 15:32:53 +0100
Subject: It compiles.

---
 src/rabbit_amqqueue_process.erl | 136 ++++++++++++++++++----------------------
 src/rabbit_misc.erl             |  11 ----
 src/rabbit_mixed_queue.erl      |   4 +-
 3 files changed, 64 insertions(+), 87 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 15b3a036..8fe0d623 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -175,12 +175,12 @@ deliver_queue(Fun, FunAcc0,
                     case Fun(AckRequired, FunAcc0, State) of
                         {empty, FunAcc1, State2} ->
                             {FunAcc1, State2};
-                        {{MsgId, Msg, MsgSize, IsDelivered, AckTag, Remaining}, FunAcc1, State2} ->
+                        {{Msg, IsDelivered, AckTag, Remaining}, FunAcc1, State2} ->
                             rabbit_channel:deliver(
                               ChPid, ConsumerTag, AckRequired,
-                              {QName, self(), NextId, Delivered, Message}),  %% TODO FIXME
+                              {QName, self(), NextId, IsDelivered, Msg}),
                             NewUAM = case AckRequired of
-                                         true  -> dict:store(NextId, Message, UAM);
+                                         true  -> dict:store(NextId, {Msg, AckTag}, UAM);
                                          false -> UAM
                                      end,
                             NewC = C#cr{unsent_message_count = Count + 1,
@@ -201,81 +201,74 @@ deliver_queue(Fun, FunAcc0,
                 false ->
                     store_ch_record(C#cr{is_limit_active = true}),
                     NewConsumers = block_consumers(ChPid, RoundRobinTail),
-                    deliver_queue(Fun, FunAcc0, State#q{round_robin = NewConsumers})
+                    deliver_queue(Fun, FunAcc0, State #q { round_robin = NewConsumers })
             end;
         {empty, _} ->
             {FunAcc0, State}
     end.
 
-deliver_from_queue(AckRequired, Acc, State = #q { mixed_state = MS }) ->
+deliver_from_queue(AckRequired, Acc = undefined, State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
     MS3 = case {Res, AckRequired} of
-              {empty, _} -> MS2;
               {_, true} -> MS2;
-              {{_MsgId, _Msg, _MsgSize, _IsDelivered, AckTag, _Remaining}, false} ->
+              {empty, _} -> MS2;
+              {{_Msg, _IsDelivered, AckTag, _Remaining}, false} ->
                   {ok, MS4} = rabbit_mixed_queue:ack([AckTag], MS2),
-                  MS3
+                  MS4
           end,
     {Res, Acc, State #q { mixed_state = MS3 }}.
 
 run_message_queue(State) ->
-    {undefined, State2} = deliver_queue(deliver_from_queue/1, undefined, State),
+    {undefined, State2} = deliver_queue(fun deliver_from_queue/3, undefined, State),
     State2.
 
-attempt_delivery(none, Message, State) ->
+attempt_immediate_delivery(none, Msg, State) ->
     Fun =
         fun (AckRequired, false, State2) ->
                 {AckTag, State3} =
                     if AckRequired ->
-                            %% TODO API CHANGE
-                            {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(Message,
+                            {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(Msg,
                                                                                      State2 #q.mixed_state),
                             {AckTag2, State2 #q { mixed_state = MS }};
                        true ->
                             {noack, State2}
                     end,
-                {{MsgId, Message, MsgSize, false, AckTag, 0}, true, State3} %% TODO FIX ME
+                {{Msg, false, AckTag, 0}, true, State3}
         end,
     deliver_queue(Fun, false, State);
-attempt_delivery(Txn, Message, State) ->
-    {ok, MS} = rabbit_mixed_queue:tx_publish(Message, State #q.mixed_state), %% TODO API CHANGE
-    record_pending_message(Txn, Message),
+attempt_immediate_delivery(Txn, Msg, State) ->
+    {ok, MS} = rabbit_mixed_queue:tx_publish(Msg, State #q.mixed_state),
+    record_pending_message(Txn, Msg),
     {true, State #q { mixed_state = MS }}.
 
-deliver_or_enqueue(Txn, Message, State) ->
-    case attempt_delivery(Txn, Message, State) of
+deliver_or_enqueue(Txn, Msg, State) ->
+    case attempt_immediate_delivery(Txn, Msg, State) of
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
-            {ok, MS} = rabbit_mixed_queue:publish(Message, State #q.mixed_state), %% TODO API CHANGE
+            {ok, MS} = rabbit_mixed_queue:publish(Msg, State #q.mixed_state),
             {false, NewState #q { mixed_state = MS }}
     end.
 
 %% all these messages have already been delivered at least once and
 %% not ack'd, but need to be either redelivered or requeued
-deliver_or_requeue_n(Messages, State) ->
-    {AutoAcks, Remaining} =
-        dropwhilefoldl(deliver_or_requeue_msg/2, {[], State}, Messages),
-    {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks), State #q.mixed_state), %% TODO FIXME
-    case Remaining of
-        [] -> run_message_queue(State #q { mixed_state = MS });
-        _ -> {ok, MS2} = rabbit_mixed_queue:requeue(Remaining, MS), %% TODO FIXME
-             State #q { mixed_state = MS2 }
+deliver_or_requeue_n([], State) ->
+    State;
+deliver_or_requeue_n(MsgsWithAcks, State) ->
+    {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
+        deliver_queue(fun deliver_or_requeue_msgs/3, {length(MsgsWithAcks) - 1, [], MsgsWithAcks}, State),
+    {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks), NewState #q.mixed_state),
+    case OutstandingMsgs of
+        [] -> run_message_queue(NewState #q { mixed_state = MS });
+        _ -> {ok, MS2} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
+             NewState #q { mixed_state = MS2 }
     end.
 
-deliver_or_requeue_msg(Message, {AcksAcc, State}) -> %% TODO the acktag really should be within the msg here
-    Fun = fun (AckRequired, {false, AcksAcc}, State2) ->
-                  AcksAcc2 = if AckRequired -> AcksAcc;
-                                true -> [AckTag|AcksAcc]
-                             end,
-                  {{MsgId, Message, MsgSize, true, AckTag, 0}, {true, AcksAcc2}, State2} end, %% TODO FIX ME
-    case deliver_queue(Fun, {false, AcksAcc}, State) of
-        {{true, AcksAcc3}, State1} ->
-            {true, {AcksAcc3, State1}};
-        {{false, AcksAcc}, State1} ->
-            {false, {AcksAcc, State1}}
-    end.
+deliver_or_requeue_msgs(false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+    {{Msg, true, noack, Len}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
+deliver_or_requeue_msgs(true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+    {{Msg, true, AckTag, Len}, {Len - 1, [AcksAcc], MsgsWithAcks}, State}.
 
 block_consumers(ChPid, RoundRobin) ->
     %%?LOGDEBUG("~p Blocking ~p from ~p~n", [self(), ChPid, queue:to_list(RoundRobin)]),
@@ -350,8 +343,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder,
             erase({ch, ChPid}),
             case check_auto_delete(
                    deliver_or_requeue_n(
-                     [Message || %% TODO NEED TO GRAB ACKTAGS OUT OF HERE AND PASS THEM THROUGH
-                         {_Messsage_id, Message} <- dict:to_list(UAM)],
+                     [MsgWithAck ||
+                         {_MsgId, MsgWithAck} <- dict:to_list(UAM)],
                      State#q{
                        exclusive_consumer = case Holder of
                                                 {ChPid, _} -> none;
@@ -421,10 +414,6 @@ all_tx_record() ->
 all_tx() ->
     [Txn || {{txn, Txn}, _} <- get()].
 
-is_tx_persistent(Txn) ->
-    #tx{is_persistent = Res} = lookup_tx(Txn),
-    Res.
-
 record_pending_message(Txn, Message = #basic_message { is_persistent = IsPersistent }) ->
     Tx = #tx{pending_messages = Pending, is_persistent = IsPersistentTxn } = lookup_tx(Txn),
     store_tx(Txn, Tx #tx { pending_messages = [{Message, false} | Pending],
@@ -445,12 +434,13 @@ commit_transaction(Txn, State) ->
     case lookup_ch(ChPid) of
         not_found -> State;
         C = #cr { unacked_messages = UAM } ->
-            {Acked, Remaining} =
-                collect_messages(PendingAcksAppended, UAM),
+            {MsgWithAcks, Remaining} =
+                collect_messages(PendingAcksOrdered, UAM),
             store_ch_record(C#cr{unacked_messages = Remaining}),
-            MS = rabbit_mixed_queue:tx_commit(PendingMessagesOrdered,
-                                              Acked,
-                                              State #q.mixed_state),
+            MS = rabbit_mixed_queue:tx_commit(
+                   PendingMessagesOrdered,
+                   lists:map(fun ({_Msg, AckTag}) -> AckTag end, MsgWithAcks),
+                   State #q.mixed_state),
             State #q { mixed_state = MS }
     end.
 
@@ -461,7 +451,7 @@ rollback_transaction(Txn, State) ->
     State #q { mixed_state = MS }.
 
 %% {A, B} = collect_messages(C, D) %% A = C `intersect` D; B = D \\ C
-%% err, A = C `intersect` D , via projection through the dict that is A
+%% err, A = C `intersect` D , via projection through the dict that is C
 collect_messages(MsgIds, UAM) ->
     lists:mapfoldl(
       fun (MsgId, D) -> {dict:fetch(MsgId, D), dict:erase(MsgId, D)} end,
@@ -475,8 +465,8 @@ i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete;
 i(arguments,   #q{q = #amqqueue{arguments   = Arguments}})  -> Arguments;
 i(pid, _) ->
     self();
-i(messages_ready, #q{message_buffer = MessageBuffer}) ->
-    queue:len(MessageBuffer);
+i(messages_ready, #q { mixed_state = MS }) ->
+    rabbit_mixed_queue:length(MS);
 i(messages_unacknowledged, _) ->
     lists:sum([dict:size(UAM) ||
                   #cr{unacked_messages = UAM} <- all_ch_record()]);
@@ -526,7 +516,7 @@ handle_call({deliver_immediately, Txn, Message}, _From, State) ->
     %% just all ready-to-consume queues get the message, with unready
     %% queues discarding the message?
     %%
-    {Delivered, NewState} = attempt_delivery(Txn, Message, State),
+    {Delivered, NewState} = attempt_immediate_delivery(Txn, Message, State),
     reply(Delivered, NewState);
 
 handle_call({deliver, Txn, Message}, _From, State) ->
@@ -553,23 +543,23 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                        }) ->
     case rabbit_mixed_queue:deliver(MS) of
         {empty, MS2} -> reply(empty, State #q { mixed_state = MS2 });
-        {{MsgId, Msg, MsgSize, IsDelivered, AckTag, Remaining}, MS2} ->
+        {{Msg, IsDelivered, AckTag, Remaining}, MS2} ->
             AckRequired = not(NoAck),
-            MS3 =
+            {ok, MS3} =
                 case AckRequired of
                     true  ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
-                        NewUAM = dict:store(NextId, Message, UAM),
+                        NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        MS2;
+                        {ok, MS2};
                     false ->
                         rabbit_mixed_queue:ack([AckTag], MS2)
                 end,
-            Message = {QName, self(), NextId, IsDelivered, Msg}, %% TODO, FIX UP
+            Message = {QName, self(), NextId, IsDelivered, Msg},
             reply({ok, Remaining, Message},
-                  State#q{next_msg_id = NextId + 1});
-        {empty, _} ->
-            reply(empty, State)
+                  State #q { next_msg_id = NextId + 1,
+                             mixed_state = MS3
+                           })
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -645,10 +635,10 @@ handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
     reply({ok, Name, Length, queue:len(RoundRobin)}, State #q { mixed_state = MS2 });
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
-            State = #q{message_buffer = MessageBuffer, mixed_state = MS}) ->
-    IsEmpty = queue:is_empty(MessageBuffer),
-    IsUnused = is_unused(),
+            State = #q { mixed_state = MS }) ->
     {Length, MS2} = rabbit_mixed_queue:length(MS),
+    IsEmpty = Length == 0,
+    IsUnused = is_unused(),
     if
         IfEmpty and not(IsEmpty) ->
             reply({error, not_empty}, State);
@@ -695,25 +685,23 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
         not_found ->
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
-            {Acked, Remaining} = collect_messages(MsgIds, UAM),
+            {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
             case Txn of
                 none ->
-                    MS = rabbit_mixed_queue:ack(Acked, State #q.mixed_state), %% TODO API
+                    Acks = lists:map(fun ({_Msg, AckTag}) -> AckTag end, MsgWithAcks),
+                    {ok, MS} = rabbit_mixed_queue:ack(Acks, State #q.mixed_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
                     noreply(State #q { mixed_state = MS });
                 _  ->
                     record_pending_acks(Txn, ChPid, MsgIds),
                     noreply(State)
-            end,
+            end
     end;
 
 handle_cast({rollback, Txn}, State) ->
     NewState = rollback_transaction(Txn, State),
     erase_tx(Txn),
-    noreply(State2);
-
-handle_cast({redeliver, Messages}, State) ->
-    noreply(ok); %% TODO - probably remove - only used by the old persister
+    noreply(NewState);
 
 handle_cast({requeue, MsgIds, ChPid}, State) ->
     case lookup_ch(ChPid) of
@@ -722,9 +710,9 @@ handle_cast({requeue, MsgIds, ChPid}, State) ->
                                [ChPid]),
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
-            {Messages, NewUAM} = collect_messages(MsgIds, UAM), %% TODO Messages must contain AckTags too
+            {MsgWithAcks, NewUAM} = collect_messages(MsgIds, UAM),
             store_ch_record(C#cr{unacked_messages = NewUAM}),
-            noreply(deliver_or_requeue_n(Messages, State))
+            noreply(deliver_or_requeue_n(MsgWithAcks, State))
     end;
 
 handle_cast({unblock, ChPid}, State) ->
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index f90abe3f..f207038e 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -51,7 +51,6 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
--export([dropwhilefoldl/3]).
 
 -import(mnesia).
 -import(lists).
@@ -407,13 +406,3 @@ stop_applications(Apps) ->
                         not_started,
                         cannot_stop_application,
                         Apps).
-
-dropwhilefoldl(_PredFun, Acc0, []) ->
-    {Acc0, []};
-dropwhilefoldl(PredFun, Acc0, [E|List]) ->
-    case PredFun(E, Acc0) of
-        {true, Acc1} ->
-            dropwhilefoldl(PredFun, Acc1, List);
-        {false, Acc1} ->
-            {Acc1, List}
-    end.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index e56e667d..24d0de8d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -80,8 +80,8 @@ publish_delivered(Msg = #basic_message { guid = MsgId, is_persistent = IsPersist
                   State = #mqstate { mode = Mode, queue = Q })
   when Mode =:= disk orelse IsPersistent ->
     ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
-    {MsgId, false, Ack, 0} = rabbit_disk_queue:phantom_deliver(Q),
-    {ok, Ack, State};
+    {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
+    {ok, AckTag, State};
 publish_delivered(#basic_message { is_persistent = false },
                   State = #mqstate { mode = mixed }) ->
     {ok, noack, State}.
-- 
cgit v1.2.1


From ebe539387c634c5c9532aa25b7bb0c3fa8d5fc9c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 22 May 2009 16:01:06 +0100
Subject: IT WORKS! (for SendString and simpleconsumer)

---
 src/rabbit_amqqueue_process.erl | 23 ++++++++++++-----------
 src/rabbit_mixed_queue.erl      |  5 ++---
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 8fe0d623..80051149 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -90,13 +90,14 @@ start_link(Q) ->
 
 %%----------------------------------------------------------------------------
 
-init(Q) ->
+init(Q = #amqqueue { name = QName }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
+    {ok, MS} = rabbit_mixed_queue:start_link(QName, mixed), %% TODO, CHANGE ME
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
             has_had_consumers = false,
-            mixed_state = rabbit_mixed_queue:start_link(qname(Q), mixed), %% TODO, CHANGE ME
+            mixed_state = MS,
             next_msg_id = 1,
             round_robin = queue:new()}, ?HIBERNATE_AFTER}.
 
@@ -437,17 +438,17 @@ commit_transaction(Txn, State) ->
             {MsgWithAcks, Remaining} =
                 collect_messages(PendingAcksOrdered, UAM),
             store_ch_record(C#cr{unacked_messages = Remaining}),
-            MS = rabbit_mixed_queue:tx_commit(
-                   PendingMessagesOrdered,
-                   lists:map(fun ({_Msg, AckTag}) -> AckTag end, MsgWithAcks),
-                   State #q.mixed_state),
+            {ok, MS} = rabbit_mixed_queue:tx_commit(
+                         PendingMessagesOrdered,
+                         lists:map(fun ({_Msg, AckTag}) -> AckTag end, MsgWithAcks),
+                         State #q.mixed_state),
             State #q { mixed_state = MS }
     end.
 
 rollback_transaction(Txn, State) ->
     #tx { pending_messages = PendingMessages
         } = lookup_tx(Txn),
-    MS = rabbit_mixed_queue:tx_cancel(lists:reverse(PendingMessages), State #q.mixed_state),
+    {ok, MS} = rabbit_mixed_queue:tx_cancel(lists:reverse(PendingMessages), State #q.mixed_state),
     State #q { mixed_state = MS }.
 
 %% {A, B} = collect_messages(C, D) %% A = C `intersect` D; B = D \\ C
@@ -631,12 +632,12 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
                                     mixed_state = MS,
                                     round_robin = RoundRobin}) ->
-    {Length, MS2} = rabbit_mixed_queue:length(MS),
-    reply({ok, Name, Length, queue:len(RoundRobin)}, State #q { mixed_state = MS2 });
+    Length = rabbit_mixed_queue:length(MS),
+    reply({ok, Name, Length, queue:len(RoundRobin)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
             State = #q { mixed_state = MS }) ->
-    {Length, MS2} = rabbit_mixed_queue:length(MS),
+    Length = rabbit_mixed_queue:length(MS),
     IsEmpty = Length == 0,
     IsUnused = is_unused(),
     if
@@ -645,7 +646,7 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
         IfUnused and not(IsUnused) ->
             reply({error, in_use}, State);
         true ->
-            {stop, normal, {ok, Length}, State #q { mixed_state = MS2 }}
+            {stop, normal, {ok, Length}, State}
     end;
 
 handle_call(purge, _From, State) ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 24d0de8d..037aeebf 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -204,10 +204,9 @@ purge(State = #mqstate { queue = Q, msg_buf = MsgBuf, mode = mixed }) ->
     {Count, State #mqstate { msg_buf = queue:new() }}.
 
 length(State = #mqstate { queue = Q, mode = disk }) ->
-    Length = rabbit_disk_queue:length(Q),
-    {Length, State};
+    rabbit_disk_queue:length(Q);
 length(State = #mqstate { mode = mixed, msg_buf = MsgBuf }) ->
-    {queue:length(MsgBuf), State}.
+    queue:len(MsgBuf).
 
 is_empty(State) ->
     0 == rabbit_mixed_queue:length(State).
-- 
cgit v1.2.1


From 489d3b4ad526fcb8cac7bb7b99094c954b2323cc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 22 May 2009 18:08:45 +0100
Subject: Well, it /tends/ to work, but sometimes falls over, apparently trying
 to read a message which has been erased from mnesia. Mysterious!

---
 src/rabbit_disk_queue.erl  | 47 +++++++++++++++++++++++++++++-----------------
 src/rabbit_misc.erl        | 10 ++++++++++
 src/rabbit_mixed_queue.erl | 11 +++++++----
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 71d812f6..2e3ff89a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -538,8 +538,8 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mo
 internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, empty, State};
-        [{Q, _ReadSeqId, _WriteSeqId, 0}] -> {ok, empty, State};
-        [{Q, ReadSeqId, WriteSeqId, Length}] ->
+        [{Q, SeqId, SeqId, 0}] -> {ok, empty, State};
+        [{Q, ReadSeqId, WriteSeqId, Length}] when Length > 0 ->
             [Obj =
              #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
                           next_seq_id = ReadSeqId2}] =
@@ -671,16 +671,22 @@ internal_tx_publish(MsgId, MsgBody,
             {ok, State}
     end.
 
-adjust_last_msg_seq_id(_Q, ExpectedSeqId, next) ->
+adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
     ExpectedSeqId;
-adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId) ->
+adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId, _Mode) ->
     SuppliedSeqId;
-adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId) ->
+adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId, _Mode) ->
     ExpectedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId) when SuppliedSeqId > ExpectedSeqId ->
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, dirty) when SuppliedSeqId > ExpectedSeqId ->
     [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
+    SuppliedSeqId;
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, Lock) when SuppliedSeqId > ExpectedSeqId ->
+    [Obj] = mnesia:read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}, Lock),
+    ok = mnesia:write(rabbit_disk_queue,
+                      Obj #dq_msg_loc { next_seq_id = SuppliedSeqId },
+                      Lock),
     SuppliedSeqId.
 
 %% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
@@ -716,7 +722,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                                {Acc, ExpectedSeqId}) ->
                                  [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
                                      dets_ets_lookup(State, MsgId),
-                                 SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId),
+                                 SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId, write),
                                  NextSeqId2 = if NextSeqId =:= next -> SeqId2 + 1;
                                                  true -> NextSeqId
                                               end,
@@ -754,7 +760,7 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
             [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
                 {ReadSeqId2, WriteSeqId2, Length2}
         end,
-    WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId),
+    WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId, dirty),
     WriteSeqId3Next = WriteSeqId3 + 1,
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId3Next, Length + 1}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
@@ -772,7 +778,7 @@ internal_tx_cancel(MsgIds, State) ->
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
+internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
                  State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
@@ -798,6 +804,10 @@ internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
 
     %% the Q _must_ already exist
     [{Q, ReadSeqId, WriteSeqId, Length}] = ets:lookup(Sequences, Q),
+    ReadSeqId2 =
+        if ReadSeqId == WriteSeqId andalso FirstSeqIdTo > WriteSeqId -> FirstSeqIdTo;
+           true -> ReadSeqId
+        end,
     MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
     {atomic, WriteSeqId2} =
         mnesia:transaction(
@@ -807,7 +817,7 @@ internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
                     fun ({{{MsgId, SeqIdOrig}, SeqIdTo},
                           {_NextMsgSeqId, NextSeqIdTo}},
                          ExpectedSeqIdTo) ->
-                            SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo),
+                            SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
                             NextSeqIdTo2 = if NextSeqIdTo =:= next -> SeqIdTo2 + 1;
                                               true -> NextSeqIdTo
                                            end,
@@ -823,7 +833,7 @@ internal_requeue(Q, MsgSeqIds = [_|MsgSeqIdsTail],
                             NextSeqIdTo2
                     end, WriteSeqId, MsgSeqIdsZipped)
           end),
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId2, Length + erlang:length(MsgSeqIds)}),
+    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId2, Length + erlang:length(MsgSeqIds)}),
     {ok, State}.
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
@@ -834,12 +844,15 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                 mnesia:transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          MsgSeqIds = lists:foldl(
-                            fun (SeqId, Acc) ->
-                                    [#dq_msg_loc { is_delivered = false, msg_id = MsgId }] =
-                                        mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
-                                    [{MsgId, SeqId} | Acc]
-                            end, [], lists:seq(ReadSeqId, WriteSeqId - 1)),
+                          MsgSeqIds =
+                              rabbit_misc:unfold(
+                                fun (SeqId) when SeqId == WriteSeqId -> false;
+                                    (SeqId) ->
+                                        [#dq_msg_loc { msg_id = MsgId,
+                                                       next_seq_id = NextSeqId }
+                                        ] = mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+                                        {true, {MsgId, SeqId}, NextSeqId}
+                                end, ReadSeqId),
                           remove_messages(Q, MsgSeqIds, txn, State)
                   end),
             true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId, 0}),
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index f207038e..153a8a7c 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -51,6 +51,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
+-export([unfold/2]).
 
 -import(mnesia).
 -import(lists).
@@ -406,3 +407,12 @@ stop_applications(Apps) ->
                         not_started,
                         cannot_stop_application,
                         Apps).
+
+unfold(Fun, Init) ->
+    unfold(Fun, [], Init).
+
+unfold(Fun, Acc, Init) ->
+    case Fun(Init) of
+        {true, E, I} -> unfold(Fun, [E|Acc], I);
+        false -> Acc
+    end.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 037aeebf..5cda8eca 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -77,11 +77,14 @@ publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
 
 %% assumption here is that the queue is empty already (only called via publish immediate)
 publish_delivered(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent},
-                  State = #mqstate { mode = Mode, queue = Q })
+                  State = #mqstate { mode = Mode, queue = Q, next_write_seq = NextSeq })
   when Mode =:= disk orelse IsPersistent ->
     ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
     {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
-    {ok, AckTag, State};
+    State2 = if Mode =:= mixed -> State #mqstate { next_write_seq = NextSeq + 1 };
+                true -> State
+             end,
+    {ok, AckTag, State2};
 publish_delivered(#basic_message { is_persistent = false },
                   State = #mqstate { mode = mixed }) ->
     {ok, noack, State}.
@@ -203,9 +206,9 @@ purge(State = #mqstate { queue = Q, msg_buf = MsgBuf, mode = mixed }) ->
     Count = queue:len(MsgBuf),
     {Count, State #mqstate { msg_buf = queue:new() }}.
 
-length(State = #mqstate { queue = Q, mode = disk }) ->
+length(#mqstate { queue = Q, mode = disk }) ->
     rabbit_disk_queue:length(Q);
-length(State = #mqstate { mode = mixed, msg_buf = MsgBuf }) ->
+length(#mqstate { mode = mixed, msg_buf = MsgBuf }) ->
     queue:len(MsgBuf).
 
 is_empty(State) ->
-- 
cgit v1.2.1


From aa639cd7e354457f3b0c37e2d0e507edbb953346 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 22 May 2009 18:26:30 +0100
Subject: Yup, basically, same bug in three places. Fixed. It all works.

---
 src/rabbit_disk_queue.erl | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2e3ff89a..81617b8b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -697,16 +697,20 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                                      }) ->
     {PubList, PubAcc, ReadSeqId, Length} =
         case PubMsgSeqIds of
-            [] -> {[], undefined, undefined};
-            [_|PubMsgSeqIdsTail] ->
+            [] -> {[], undefined, undefined, undefined};
+            [{_, FirstSeqIdTo}|PubMsgSeqIdsTail] ->
                 {InitReadSeqId, InitWriteSeqId, InitLength} =
                     case ets:lookup(Sequences, Q) of
                         [] -> {0,0,0};
                         [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
                             {ReadSeqId2, WriteSeqId2, Length2}
                     end,
+                InitReadSeqId2 = if InitReadSeqId == InitWriteSeqId andalso FirstSeqIdTo > InitWriteSeqId ->
+                                         FirstSeqIdTo;
+                                    true -> InitReadSeqId
+                                 end,
                 { lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
-                  InitWriteSeqId, InitReadSeqId, InitLength}
+                  InitWriteSeqId, InitReadSeqId2, InitLength}
         end,
     {atomic, {Sync, WriteSeqId, State2}} =
         mnesia:transaction(
@@ -762,12 +766,15 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
         end,
     WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId, dirty),
     WriteSeqId3Next = WriteSeqId3 + 1,
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId3Next, Length + 1}),
+    ReadSeqId3 = if ReadSeqId == WriteSeqId andalso WriteSeqId3 > WriteSeqId -> WriteSeqId3;
+                    true -> ReadSeqId
+                 end,
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
                                           msg_id = MsgId,
                                           next_seq_id = WriteSeqId3Next,
                                           is_delivered = false}),
+    true = ets:insert(Sequences, {Q, ReadSeqId3, WriteSeqId3Next, Length + 1}),
     {ok, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
-- 
cgit v1.2.1


From ca99bf2aecc14559a856a6327cb0e5635d61b5f7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 22 May 2009 18:46:53 +0100
Subject: duh!

---
 src/rabbit_disk_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 81617b8b..7710a0a2 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -705,7 +705,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                         [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
                             {ReadSeqId2, WriteSeqId2, Length2}
                     end,
-                InitReadSeqId2 = if InitReadSeqId == InitWriteSeqId andalso FirstSeqIdTo > InitWriteSeqId ->
+                InitReadSeqId2 = if InitReadSeqId == InitWriteSeqId andalso FirstSeqIdTo > InitWriteSeqId andalso FirstSeqIdTo /= next ->
                                          FirstSeqIdTo;
                                     true -> InitReadSeqId
                                  end,
@@ -812,7 +812,7 @@ internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
     %% the Q _must_ already exist
     [{Q, ReadSeqId, WriteSeqId, Length}] = ets:lookup(Sequences, Q),
     ReadSeqId2 =
-        if ReadSeqId == WriteSeqId andalso FirstSeqIdTo > WriteSeqId -> FirstSeqIdTo;
+        if ReadSeqId == WriteSeqId andalso FirstSeqIdTo > WriteSeqId andalso FirstSeqIdTo /= next -> FirstSeqIdTo;
            true -> ReadSeqId
         end,
     MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
-- 
cgit v1.2.1


From 3ee3659a5804823d913288bb70874740968c93dc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 27 May 2009 11:43:06 +0100
Subject: preemptive refactoring

---
 src/rabbit_disk_queue.erl | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 7710a0a2..c7ef1177 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -705,10 +705,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                         [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
                             {ReadSeqId2, WriteSeqId2, Length2}
                     end,
-                InitReadSeqId2 = if InitReadSeqId == InitWriteSeqId andalso FirstSeqIdTo > InitWriteSeqId andalso FirstSeqIdTo /= next ->
-                                         FirstSeqIdTo;
-                                    true -> InitReadSeqId
-                                 end,
+                InitReadSeqId2 = determine_next_read_id(InitReadSeqId, InitWriteSeqId, FirstSeqIdTo),
                 { lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
                   InitWriteSeqId, InitReadSeqId2, InitLength}
         end,
@@ -764,11 +761,9 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
             [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
                 {ReadSeqId2, WriteSeqId2, Length2}
         end,
+    ReadSeqId3 = determine_next_read_id(ReadSeqId, WriteSeqId, SeqId),
     WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId, dirty),
     WriteSeqId3Next = WriteSeqId3 + 1,
-    ReadSeqId3 = if ReadSeqId == WriteSeqId andalso WriteSeqId3 > WriteSeqId -> WriteSeqId3;
-                    true -> ReadSeqId
-                 end,
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
                                           msg_id = MsgId,
@@ -783,6 +778,17 @@ internal_tx_cancel(MsgIds, State) ->
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
+determine_next_read_id(CurrentReadWrite, CurrentReadWrite, CurrentReadWrite) ->
+    CurrentReadWrite;
+determine_next_read_id(CurrentRead, _CurrentWrite, next) ->
+    CurrentRead;
+determine_next_read_id(CurrentReadWrite, CurrentReadWrite, NextWrite)
+  when NextWrite > CurrentReadWrite ->
+    NextWrite;
+determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
+  when NextWrite >= CurrentWrite ->
+    CurrentRead.
+
 internal_requeue(_Q, [], State) ->
     {ok, State};
 internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
@@ -811,10 +817,7 @@ internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
 
     %% the Q _must_ already exist
     [{Q, ReadSeqId, WriteSeqId, Length}] = ets:lookup(Sequences, Q),
-    ReadSeqId2 =
-        if ReadSeqId == WriteSeqId andalso FirstSeqIdTo > WriteSeqId andalso FirstSeqIdTo /= next -> FirstSeqIdTo;
-           true -> ReadSeqId
-        end,
+    ReadSeqId2 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
     MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
     {atomic, WriteSeqId2} =
         mnesia:transaction(
-- 
cgit v1.2.1


From 891a79b5fdd2b5bd90c196498bd350392c8040ca Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 27 May 2009 12:45:53 +0100
Subject: o) fix up -specs o) add queue_delete - previously I was just
 (ab)using purge but this isn't right as purge won't eliminate
 delivered/unacked messages. o) refactorings and moving code around a bit

---
 src/rabbit_amqqueue_process.erl |   2 +-
 src/rabbit_disk_queue.erl       | 171 +++++++++++++++++++++++-----------------
 src/rabbit_misc.erl             |   1 +
 src/rabbit_mixed_queue.erl      |  11 ++-
 4 files changed, 111 insertions(+), 74 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 80051149..3ca88aaa 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -108,7 +108,7 @@ terminate(_Reason, State) ->
         lists:foldl(fun (Txn, State1) ->
                             rollback_transaction(Txn, State1)
                     end, State, all_tx()),
-    rabbit_mixed_queue:purge(NewState #q.mixed_state),
+    rabbit_mixed_queue:delete_queue(NewState #q.mixed_state),
     ok = rabbit_amqqueue:internal_delete(QName).
 
 code_change(_OldVsn, State, _Extra) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c7ef1177..b73e456c 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,7 @@
 
 -export([publish/3, publish_with_seq/4, deliver/1, phantom_deliver/1, ack/2,
 	 tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
-	 requeue/2, requeue_with_seqs/2, purge/1]).
+	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1]).
 
 -export([length/1, is_empty/1]).
 
@@ -224,11 +224,12 @@
 -ifdef(use_specs).
 
 -type(seq_id() :: non_neg_integer()).
+-type(seq_id_or_next() :: { seq_id() | 'next' }).
 
 -spec(start_link/1 :: (non_neg_integer()) ->
               {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
--spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id(), binary()) -> 'ok').
+-spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id_or_next(), binary()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
              {'empty' | {msg_id(), binary(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
@@ -237,11 +238,11 @@
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) -> 'ok').
--spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id()}],
+-spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id_or_next()}],
                                 [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
--spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id()}]) -> 'ok').
+-spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id_or_next()}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
@@ -295,6 +296,9 @@ requeue_with_seqs(Q, MsgSeqSeqIds) when is_list(MsgSeqSeqIds) ->
 purge(Q) ->
     gen_server:call(?SERVER, {purge, Q}).
 
+delete_queue(Q) ->
+    gen_server:cast(?SERVER, {delete_queue, Q}).
+
 stop() ->
     gen_server:call(?SERVER, stop, infinity).
 
@@ -457,6 +461,9 @@ handle_cast({requeue, Q, MsgSeqIds}, State) ->
     {noreply, State1};
 handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
+    {noreply, State1};
+handle_cast({delete_queue, Q}, State) ->
+    {ok, State1} = internal_delete_queue(Q, State),
     {noreply, State1}.
 
 handle_info(_Info, State) ->
@@ -533,6 +540,69 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mo
                       Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
+find_next_seq_id(CurrentSeq, next) ->
+    CurrentSeq + 1;
+find_next_seq_id(CurrentSeq, NextSeqId)
+  when NextSeqId > CurrentSeq ->
+    NextSeqId.
+
+determine_next_read_id(CurrentReadWrite, CurrentReadWrite, CurrentReadWrite) ->
+    CurrentReadWrite;
+determine_next_read_id(CurrentRead, _CurrentWrite, next) ->
+    CurrentRead;
+determine_next_read_id(CurrentReadWrite, CurrentReadWrite, NextWrite)
+  when NextWrite > CurrentReadWrite ->
+    NextWrite;
+determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
+  when NextWrite >= CurrentWrite ->
+    CurrentRead.
+
+get_read_handle(File, State =
+              #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
+                         read_file_handles_limit = ReadFileHandlesLimit }) ->
+    Now = now(),
+    {FileHdl, ReadHdls1, ReadHdlsAge1} =
+        case dict:find(File, ReadHdls) of
+            error ->
+                {ok, Hdl} = file:open(form_filename(File),
+                                      [read, raw, binary,
+                                       read_ahead]),
+                case dict:size(ReadHdls) < ReadFileHandlesLimit of
+                    true ->
+                        {Hdl, ReadHdls, ReadHdlsAge};
+                    _False ->
+                        {Then, OldFile, ReadHdlsAge2} =
+                            gb_trees:take_smallest(ReadHdlsAge),
+                        {ok, {OldHdl, Then}} =
+                            dict:find(OldFile, ReadHdls),
+                        ok = file:close(OldHdl),
+                        {Hdl, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
+                end;
+            {ok, {Hdl, Then}} ->
+                {Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
+        end,
+    ReadHdls3 = dict:store(File, {FileHdl, Now}, ReadHdls1),
+    ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
+    {FileHdl, State #dqstate {read_file_handles = {ReadHdls3, ReadHdlsAge3}}}.
+
+adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
+    ExpectedSeqId;
+adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId, _Mode) ->
+    SuppliedSeqId;
+adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId, _Mode) ->
+    ExpectedSeqId;
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, dirty) when SuppliedSeqId > ExpectedSeqId ->
+    [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
+    ok = mnesia:dirty_write(rabbit_disk_queue,
+                            Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
+    SuppliedSeqId;
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, Lock) when SuppliedSeqId > ExpectedSeqId ->
+    [Obj] = mnesia:read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}, Lock),
+    ok = mnesia:write(rabbit_disk_queue,
+                      Obj #dq_msg_loc { next_seq_id = SuppliedSeqId },
+                      Lock),
+    SuppliedSeqId.
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
@@ -564,42 +634,15 @@ internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
             end
     end.
 
-get_read_handle(File, State =
-              #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
-                         read_file_handles_limit = ReadFileHandlesLimit }) ->
-    Now = now(),
-    {FileHdl, ReadHdls1, ReadHdlsAge1} =
-        case dict:find(File, ReadHdls) of
-            error ->
-                {ok, Hdl} = file:open(form_filename(File),
-                                      [read, raw, binary,
-                                       read_ahead]),
-                case dict:size(ReadHdls) < ReadFileHandlesLimit of
-                    true ->
-                        {Hdl, ReadHdls, ReadHdlsAge};
-                    _False ->
-                        {Then, OldFile, ReadHdlsAge2} =
-                            gb_trees:take_smallest(ReadHdlsAge),
-                        {ok, {OldHdl, Then}} =
-                            dict:find(OldFile, ReadHdls),
-                        ok = file:close(OldHdl),
-                        {Hdl, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
-                end;
-            {ok, {Hdl, Then}} ->
-                {Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
-        end,
-    ReadHdls3 = dict:store(File, {FileHdl, Now}, ReadHdls1),
-    ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
-    {FileHdl, State #dqstate {read_file_handles = {ReadHdls3, ReadHdlsAge3}}}.
-
 internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, true, State).
 
 %% Q is only needed if MnesiaDelete /= false
-%% called from tx_cancel with MnesiaDelete = false
-%% called from internal_tx_cancel with MnesiaDelete = txn
 %% called from ack with MnesiaDelete = true
+%% called from tx_commit with MnesiaDelete = txn
+%% called from tx_cancel with MnesiaDelete = false
 %% called from purge with MnesiaDelete = txn
+%% called from delete_queue with MnesiaDelete = txn
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
                 State = #dqstate { file_summary = FileSummary,
                                    current_file_name = CurName
@@ -671,24 +714,6 @@ internal_tx_publish(MsgId, MsgBody,
             {ok, State}
     end.
 
-adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
-    ExpectedSeqId;
-adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId, _Mode) ->
-    SuppliedSeqId;
-adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId, _Mode) ->
-    ExpectedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, dirty) when SuppliedSeqId > ExpectedSeqId ->
-    [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
-    ok = mnesia:dirty_write(rabbit_disk_queue,
-                            Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
-    SuppliedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, Lock) when SuppliedSeqId > ExpectedSeqId ->
-    [Obj] = mnesia:read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}, Lock),
-    ok = mnesia:write(rabbit_disk_queue,
-                      Obj #dq_msg_loc { next_seq_id = SuppliedSeqId },
-                      Lock),
-    SuppliedSeqId.
-
 %% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
 internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                    State = #dqstate { current_file_handle = CurHdl,
@@ -724,10 +749,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                                  [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
                                      dets_ets_lookup(State, MsgId),
                                  SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId, write),
-                                 NextSeqId2 = if NextSeqId =:= next -> SeqId2 + 1;
-                                                 true -> NextSeqId
-                                              end,
-                                 true = NextSeqId2 > SeqId2,
+                                 NextSeqId2 = find_next_seq_id(SeqId2, NextSeqId),
                                  ok = mnesia:write(rabbit_disk_queue,
                                                    #dq_msg_loc { queue_and_seq_id =
                                                                  {Q, SeqId2},
@@ -778,17 +800,6 @@ internal_tx_cancel(MsgIds, State) ->
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
-determine_next_read_id(CurrentReadWrite, CurrentReadWrite, CurrentReadWrite) ->
-    CurrentReadWrite;
-determine_next_read_id(CurrentRead, _CurrentWrite, next) ->
-    CurrentRead;
-determine_next_read_id(CurrentReadWrite, CurrentReadWrite, NextWrite)
-  when NextWrite > CurrentReadWrite ->
-    NextWrite;
-determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
-  when NextWrite >= CurrentWrite ->
-    CurrentRead.
-
 internal_requeue(_Q, [], State) ->
     {ok, State};
 internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
@@ -828,10 +839,7 @@ internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
                           {_NextMsgSeqId, NextSeqIdTo}},
                          ExpectedSeqIdTo) ->
                             SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
-                            NextSeqIdTo2 = if NextSeqIdTo =:= next -> SeqIdTo2 + 1;
-                                              true -> NextSeqIdTo
-                                           end,
-                            true = NextSeqIdTo2 > SeqIdTo2,
+                            NextSeqIdTo2 = find_next_seq_id(SeqIdTo2, NextSeqIdTo),
                             [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
                                 mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
                             mnesia:write(rabbit_disk_queue,
@@ -869,6 +877,27 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
             {ok, WriteSeqId - ReadSeqId, State2}
     end.
 
+internal_delete_queue(Q, State = #dqstate { sequences = Sequences }) ->
+    true = ets:delete(Sequences, Q),
+    {atomic, {ok, State1}} =
+        mnesia:transaction(
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  Objs =
+                      mnesia:match_object(rabbit_disk_queue, #dq_msg_loc { queue_and_seq_id = {Q, '_'},
+                                                                           msg_id = '_',
+                                                                           is_delivered = '_',
+                                                                           next_seq_id = '_'
+                                                                          }, write),
+                  MsgSeqIds =
+                      lists:map(
+                        fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId}, msg_id = MsgId }) ->
+                            {MsgId, SeqId}
+                        end, Objs),
+                  remove_messages(Q, MsgSeqIds, txn, State)
+          end),
+    {ok, State1}.
+
 %% ---- ROLLING OVER THE APPEND FILE ----
 
 maybe_roll_to_new_file(Offset,
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 153a8a7c..5b021b36 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -110,6 +110,7 @@
 -spec(format_stderr/2 :: (string(), [any()]) -> 'true').
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
+-spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false'))) -> A -> [B])
 
 -endif.
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 5cda8eca..b807fce2 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -36,8 +36,8 @@
 -export([start_link/2]).
 
 -export([publish/2, publish_delivered/2, deliver/1, ack/2,
-         tx_publish/2, tx_commit/3, tx_cancel/2,
-         requeue/2, purge/1, length/1, is_empty/1]).
+         tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
+         length/1, is_empty/1, delete_queue/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -206,6 +206,13 @@ purge(State = #mqstate { queue = Q, msg_buf = MsgBuf, mode = mixed }) ->
     Count = queue:len(MsgBuf),
     {Count, State #mqstate { msg_buf = queue:new() }}.
 
+delete_queue(State = #mqstate { queue = Q, mode = disk }) ->
+    rabbit_disk_queue:delete_queue(Q),
+    {ok, State};
+delete_queue(State = #mqstate { queue = Q, mode = mixed }) ->
+    rabbit_disk_queue:delete_queue(Q),
+    {ok, State #mqstate { msg_buf = queue:new() }}.
+
 length(#mqstate { queue = Q, mode = disk }) ->
     rabbit_disk_queue:length(Q);
 length(#mqstate { mode = mixed, msg_buf = MsgBuf }) ->
-- 
cgit v1.2.1


From af8fe76086cba976f7b3cad9d78ca3858fa83875 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 27 May 2009 12:48:53 +0100
Subject: ugh, managed to forget the need to compile before running dialyzer,
 hence just committed broken code...

---
 src/rabbit_misc.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 5b021b36..2f329aa9 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -110,7 +110,7 @@
 -spec(format_stderr/2 :: (string(), [any()]) -> 'true').
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
--spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false'))) -> A -> [B])
+-spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> [B]).
 
 -endif.
 
-- 
cgit v1.2.1


From 10217468503f33fe30e92d214eb79b311ea55e51 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 27 May 2009 13:11:23 +0100
Subject: Brought out starting the persister to rabbit.erl. Also, reduced the
 file size limit to 25MB. The reason is that it was observed that start up was
 taking a long time simply because the files were large (100MB). Given that
 the file being written to is never GC'd, reducing the file size limit forces
 new files to be created more frequently, thus increasing the use of GC and
 thereby keeping file utilisation higher. As a result, less time is wasted at
 startup scanning over delivered but not-yet-GC'd messages.

---
 src/rabbit.erl             | 5 +++++
 src/rabbit_disk_queue.erl  | 9 +++++----
 src/rabbit_mixed_queue.erl | 4 ----
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 0de93e99..3a15e6b0 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -155,6 +155,11 @@ start(normal, []) ->
         fun () ->
                 ok = start_child(rabbit_persister)
         end},
+       {"disk queue",
+        fun () ->
+                ok = start_child(rabbit_disk_queue),
+                ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
+        end},
        {"guid generator",
         fun () ->
                 ok = start_child(rabbit_guid)
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b73e456c..d13b6eb6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server).
 
--export([start_link/1]).
+-export([start_link/0]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -63,6 +63,7 @@
 -define(SERVER, ?MODULE).
 
 -define(MAX_READ_FILE_HANDLES, 256).
+-define(FILE_SIZE_LIMIT, (25*1024*1024)).
 
 -record(dqstate, {msg_location_dets,       %% where are messages?
 		  msg_location_ets,        %% as above, but for ets version
@@ -226,7 +227,7 @@
 -type(seq_id() :: non_neg_integer()).
 -type(seq_id_or_next() :: { seq_id() | 'next' }).
 
--spec(start_link/1 :: (non_neg_integer()) ->
+-spec(start_link/0 :: () ->
               {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
 -spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id_or_next(), binary()) -> 'ok').
@@ -255,9 +256,9 @@
 
 %% ---- PUBLIC API ----
 
-start_link(FileSizeLimit) ->
+start_link() ->
     gen_server:start_link({local, ?SERVER}, ?MODULE,
-                          [FileSizeLimit, ?MAX_READ_FILE_HANDLES], []).
+                          [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
 
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index b807fce2..6a8f3097 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -46,11 +46,7 @@
                  }
        ).
 
--define(FILE_SIZE_LIMIT, (100*1024*1024)).
-
 start_link(Queue, Mode) when Mode =:= disk orelse Mode =:= mixed ->
-    rabbit_disk_queue:start_link(?FILE_SIZE_LIMIT),
-    rabbit_disk_queue:to_ram_disk_mode(), %% TODO, CHANGE ME
     {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 1, queue = Queue }}.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
-- 
cgit v1.2.1


From d1cad0af57633d3f40a2740572e619e5fb645521 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 27 May 2009 18:02:45 +0100
Subject: gen_server -> gen_server2, delete_queue calls purge_queue first in
 order to try and reduce horrible inefficient mnesia_match_object call. Also
 some refactoring and tidying.

---
 scripts/rabbitmq-server   |  1 +
 src/rabbit_disk_queue.erl | 81 +++++++++++++++++++++++++----------------------
 2 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 8502d60a..0aa09bd8 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -98,6 +98,7 @@ exec erl \
     -os_mon memsup_system_only true \
     -os_mon system_memory_high_watermark 0.95 \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
+    -mnesia dump_log_write_threshold 10000 \
     ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
     "$@"
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d13b6eb6..a0bc1bfd 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_disk_queue).
 
--behaviour(gen_server).
+-behaviour(gen_server2).
 
 -export([start_link/0]).
 
@@ -257,63 +257,63 @@
 %% ---- PUBLIC API ----
 
 start_link() ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE,
-                          [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
+    gen_server2:start_link({local, ?SERVER}, ?MODULE,
+                           [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
 
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
-    gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
+    gen_server2:cast(?SERVER, {publish, Q, MsgId, Msg}).
 
 publish_with_seq(Q, MsgId, SeqId, Msg) when is_binary(Msg) ->
-    gen_server:cast(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg}).
+    gen_server2:cast(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg}).
 
 deliver(Q) ->
-    gen_server:call(?SERVER, {deliver, Q}, infinity).
+    gen_server2:call(?SERVER, {deliver, Q}, infinity).
 
 phantom_deliver(Q) ->
-    gen_server:call(?SERVER, {phantom_deliver, Q}).
+    gen_server2:call(?SERVER, {phantom_deliver, Q}, infinity).
 
 ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
-    gen_server:cast(?SERVER, {ack, Q, MsgSeqIds}).
+    gen_server2:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
 tx_publish(MsgId, Msg) when is_binary(Msg) ->
-    gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
+    gen_server2:cast(?SERVER, {tx_publish, MsgId, Msg}).
 
 tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
-    gen_server:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
+    gen_server2:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
 
 tx_commit_with_seqs(Q, PubMsgSeqIds, AckSeqIds)
   when is_list(PubMsgSeqIds) andalso is_list(AckSeqIds) ->
-    gen_server:call(?SERVER, {tx_commit_with_seqs, Q, PubMsgSeqIds, AckSeqIds}, infinity).
+    gen_server2:call(?SERVER, {tx_commit_with_seqs, Q, PubMsgSeqIds, AckSeqIds}, infinity).
 
 tx_cancel(MsgIds) when is_list(MsgIds) ->
-    gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
+    gen_server2:cast(?SERVER, {tx_cancel, MsgIds}).
 
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
-    gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
+    gen_server2:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
 requeue_with_seqs(Q, MsgSeqSeqIds) when is_list(MsgSeqSeqIds) ->
-    gen_server:cast(?SERVER, {requeue_with_seqs, Q, MsgSeqSeqIds}).
+    gen_server2:cast(?SERVER, {requeue_with_seqs, Q, MsgSeqSeqIds}).
 
 purge(Q) ->
-    gen_server:call(?SERVER, {purge, Q}).
+    gen_server2:call(?SERVER, {purge, Q}, infinity).
 
 delete_queue(Q) ->
-    gen_server:cast(?SERVER, {delete_queue, Q}).
+    gen_server2:cast(?SERVER, {delete_queue, Q}).
 
 stop() ->
-    gen_server:call(?SERVER, stop, infinity).
+    gen_server2:call(?SERVER, stop, infinity).
 
 stop_and_obliterate() ->
-    gen_server:call(?SERVER, stop_vaporise, infinity).
+    gen_server2:call(?SERVER, stop_vaporise, infinity).
 
 to_disk_only_mode() ->
-    gen_server:call(?SERVER, to_disk_only_mode, infinity).
+    gen_server2:call(?SERVER, to_disk_only_mode, infinity).
 
 to_ram_disk_mode() ->
-    gen_server:call(?SERVER, to_ram_disk_mode, infinity).
+    gen_server2:call(?SERVER, to_ram_disk_mode, infinity).
 
 length(Q) ->
-    gen_server:call(?SERVER, {length, Q}, infinity).
+    gen_server2:call(?SERVER, {length, Q}, infinity).
 
 is_empty(Q) ->
     Length = rabbit_disk_queue:length(Q),
@@ -878,9 +878,10 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
             {ok, WriteSeqId - ReadSeqId, State2}
     end.
 
-internal_delete_queue(Q, State = #dqstate { sequences = Sequences }) ->
+internal_delete_queue(Q, State) ->
+    {ok, _Count, State1 = #dqstate { sequences = Sequences }} = internal_purge(Q, State),
     true = ets:delete(Sequences, Q),
-    {atomic, {ok, State1}} =
+    {atomic, {ok, State2}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
@@ -895,9 +896,9 @@ internal_delete_queue(Q, State = #dqstate { sequences = Sequences }) ->
                         fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId}, msg_id = MsgId }) ->
                             {MsgId, SeqId}
                         end, Objs),
-                  remove_messages(Q, MsgSeqIds, txn, State)
+                  remove_messages(Q, MsgSeqIds, txn, State1)
           end),
-    {ok, State1}.
+    {ok, State2}.
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
@@ -1196,14 +1197,26 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
 
 %% ---- DISK RECOVERY ----
 
+add_index() ->
+    case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
+        {atomic, ok} -> ok;
+        {aborted,{already_exists,rabbit_disk_queue,_}} -> ok;
+        E -> E
+    end.
+
+del_index() ->
+    case mnesia:del_table_index(rabbit_disk_queue, msg_id) of
+        {atomic, ok} -> ok;
+        %% hmm, something weird must be going on, but it's probably
+        %% not the end of the world
+        {aborted,{no_exists,rabbit_disk_queue,_}} -> ok;
+        E2 -> E2
+    end.
+
 load_from_disk(State) ->
     %% sorted so that smallest number is first. which also means
     %% eldest file (left-most) first
-    ok = case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
-             {atomic, ok} -> ok;
-             {aborted,{already_exists,rabbit_disk_queue,_}} -> ok;
-             E -> E
-         end,
+    ok = add_index(),
     {Files, TmpFiles} = get_disk_queue_files(),
     ok = recover_crashed_compactions(Files, TmpFiles),
     %% There should be no more tmp files now, so go ahead and load the
@@ -1221,13 +1234,7 @@ load_from_disk(State) ->
                                   true, rabbit_disk_queue)
              end),
     State2 = extract_sequence_numbers(State1),
-    ok = case mnesia:del_table_index(rabbit_disk_queue, msg_id) of
-             {atomic, ok} -> ok;
-             %% hmm, something weird must be going on, but it's
-             %% probably not the end of the world
-             {aborted,{no_exists,rabbit_disk_queue,_}} -> ok;
-             E2 -> E2
-         end,
+    ok = del_index(),
     {ok, State2}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
-- 
cgit v1.2.1


From 01b7ee0af5f47e1d12078eb90e62a3928da606c4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 29 May 2009 11:05:22 +0100
Subject: (just for testing - switch to disk only mode)

---
 src/rabbit.erl                  | 4 ++--
 src/rabbit_amqqueue_process.erl | 2 +-
 src/rabbit_disk_queue.erl       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index c0d09547..5062e7e9 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -157,8 +157,8 @@ start(normal, []) ->
         end},
        {"disk queue",
         fun () ->
-                ok = start_child(rabbit_disk_queue),
-                ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
+                ok = start_child(rabbit_disk_queue) %%,
+                %% ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
         end},
        {"guid generator",
         fun () ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 3ca88aaa..5a8bd4a4 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -92,7 +92,7 @@ start_link(Q) ->
 
 init(Q = #amqqueue { name = QName }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    {ok, MS} = rabbit_mixed_queue:start_link(QName, mixed), %% TODO, CHANGE ME
+    {ok, MS} = rabbit_mixed_queue:start_link(QName, disk), %% TODO, CHANGE ME
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a0bc1bfd..8fb5b905 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -63,7 +63,7 @@
 -define(SERVER, ?MODULE).
 
 -define(MAX_READ_FILE_HANDLES, 256).
--define(FILE_SIZE_LIMIT, (25*1024*1024)).
+-define(FILE_SIZE_LIMIT, (256*1024*1024)).
 
 -record(dqstate, {msg_location_dets,       %% where are messages?
 		  msg_location_ets,        %% as above, but for ets version
-- 
cgit v1.2.1


From f14667e45c463b72e740ed12db55932d697c3cc2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Jun 2009 12:06:22 +0100
Subject: only write out persistent messages sent to a durable queue

---
 src/rabbit.erl                  |  4 ++--
 src/rabbit_amqqueue_process.erl |  4 ++--
 src/rabbit_mixed_queue.erl      | 50 ++++++++++++++++++++++++-----------------
 3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 5062e7e9..c0d09547 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -157,8 +157,8 @@ start(normal, []) ->
         end},
        {"disk queue",
         fun () ->
-                ok = start_child(rabbit_disk_queue) %%,
-                %% ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
+                ok = start_child(rabbit_disk_queue),
+                ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
         end},
        {"guid generator",
         fun () ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5a8bd4a4..73fae892 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -90,9 +90,9 @@ start_link(Q) ->
 
 %%----------------------------------------------------------------------------
 
-init(Q = #amqqueue { name = QName }) ->
+init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    {ok, MS} = rabbit_mixed_queue:start_link(QName, disk), %% TODO, CHANGE ME
+    {ok, MS} = rabbit_mixed_queue:start_link(QName, Durable, mixed), %% TODO, CHANGE ME
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 6a8f3097..d1000c88 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -33,7 +33,7 @@
 
 -include("rabbit.hrl").
 
--export([start_link/2]).
+-export([start_link/3]).
 
 -export([publish/2, publish_delivered/2, deliver/1, ack/2,
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
@@ -42,12 +42,14 @@
 -record(mqstate, { mode,
                    msg_buf,
                    next_write_seq,
-                   queue
+                   queue,
+                   is_durable
                  }
        ).
 
-start_link(Queue, Mode) when Mode =:= disk orelse Mode =:= mixed ->
-    {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 1, queue = Queue }}.
+start_link(Queue, IsDurable, Mode) when Mode =:= disk orelse Mode =:= mixed ->
+    {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 1,
+                    queue = Queue, is_durable = IsDurable }}.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
@@ -61,9 +63,9 @@ publish(Msg = #basic_message { guid = MsgId },
     ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
     {ok, State};
 publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
-        State = #mqstate { queue = Q, mode = mixed,
+        State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                            next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
-    ok = if IsPersistent ->
+    ok = if IsDurable andalso IsPersistent ->
                  rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, msg_to_bin(Msg));
             true -> ok
          end,
@@ -71,32 +73,33 @@ publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
                           msg_buf = queue:in({NextSeq, Msg, false}, MsgBuf)
                         }}.
 
-%% assumption here is that the queue is empty already (only called via publish immediate)
+%% assumption here is that the queue is empty already (only called via attempt_immediate_delivery)
 publish_delivered(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent},
-                  State = #mqstate { mode = Mode, queue = Q, next_write_seq = NextSeq })
-  when Mode =:= disk orelse IsPersistent ->
+                  State = #mqstate { mode = Mode, queue = Q, is_durable = IsDurable,
+                                     next_write_seq = NextSeq })
+  when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
     {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
     State2 = if Mode =:= mixed -> State #mqstate { next_write_seq = NextSeq + 1 };
                 true -> State
              end,
     {ok, AckTag, State2};
-publish_delivered(#basic_message { is_persistent = false },
-                  State = #mqstate { mode = mixed }) ->
+publish_delivered(_Msg, State = #mqstate { mode = mixed }) ->
     {ok, noack, State}.
 
 deliver(State = #mqstate { mode = disk, queue = Q }) ->
     {MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining} = rabbit_disk_queue:deliver(Q),
     Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
     {{Msg, IsDelivered, AckTag, Remaining}, State};
-deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf, next_write_seq = NextWrite }) ->
+deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+                           next_write_seq = NextWrite, is_durable = IsDurable }) ->
     {Result, MsgBuf2} = queue:out(MsgBuf),
     case Result of
         empty ->
             {empty, State};
         {value, {Seq, Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent }, IsDelivered}} ->
             AckTag =
-                if IsPersistent ->
+                if IsDurable andalso IsPersistent ->
                         {MsgId, IsDelivered, AckTag2, _PersistRemaining} = rabbit_disk_queue:phantom_deliver(Q),
                         AckTag2;
                    true -> noack
@@ -118,12 +121,12 @@ ack(Acks, State = #mqstate { queue = Q }) ->
 tx_publish(Msg = #basic_message { guid = MsgId }, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
-tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = true },
-           State = #mqstate { mode = mixed }) ->
+tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
+           State = #mqstate { mode = mixed, is_durable = IsDurable })
+  when IsDurable andalso IsPersistent ->
     ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
-tx_publish(#basic_message { is_persistent = false },
-           State = #mqstate { mode = mixed }) ->
+tx_publish(_Msg, State = #mqstate { mode = mixed }) ->
     {ok, State}.
 
 only_msg_ids(Pubs) ->
@@ -134,7 +137,8 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
     {ok, State};
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               msg_buf = MsgBuf,
-                                              next_write_seq = NextSeq
+                                              next_write_seq = NextSeq,
+                                              is_durable = IsDurable
                                             }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
@@ -149,7 +153,10 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                     end, {[], MsgBuf, NextSeq}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
     %% requirements of rabbit_disk_queue (ascending SeqIds)
-    ok = rabbit_disk_queue:tx_commit_with_seqs(Q, lists:reverse(PersistentPubs),
+    PersistentPubs2 = if IsDurable -> lists:reverse(PersistentPubs);
+                         true -> []
+                      end,
+    ok = rabbit_disk_queue:tx_commit_with_seqs(Q, PersistentPubs2,
                                                remove_noacks(Acks)),
     {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
 
@@ -177,13 +184,14 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q }) ->
     {ok, State};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
-                                                next_write_seq = NextSeq
+                                                next_write_seq = NextSeq,
+                                                is_durable = IsDurable
                                               }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
         lists:foldl(fun ({Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId }, AckTag},
                          {Acc, MsgBuf3, NextSeq3}) ->
                             Acc2 =
-                                if IsPersistent ->
+                                if IsDurable andalso IsPersistent ->
                                         {MsgId, _OldSeqId} = AckTag,
                                         [{AckTag, NextSeq3} | Acc];
                                    true -> Acc
-- 
cgit v1.2.1


From 6ff3f1443aabcfa24c912469b7b1fa989ac497fa Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Jun 2009 12:28:23 +0100
Subject: removed the persister and modified guid generation to use a persisted
 serial id stored on disk

---
 src/rabbit.erl           |   4 -
 src/rabbit_guid.erl      |  22 +-
 src/rabbit_persister.erl | 523 -----------------------------------------------
 3 files changed, 15 insertions(+), 534 deletions(-)
 delete mode 100644 src/rabbit_persister.erl

diff --git a/src/rabbit.erl b/src/rabbit.erl
index c0d09547..e79c7f59 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -151,10 +151,6 @@ start(normal, []) ->
                 ok = rabbit_exchange:recover(),
                 ok = rabbit_amqqueue:recover()
         end},
-       {"persister",
-        fun () ->
-                ok = start_child(rabbit_persister)
-        end},
        {"disk queue",
         fun () ->
                 ok = start_child(rabbit_disk_queue),
diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl
index 2be00503..fe5acc83 100644
--- a/src/rabbit_guid.erl
+++ b/src/rabbit_guid.erl
@@ -42,6 +42,7 @@
          terminate/2, code_change/3]).
 
 -define(SERVER, ?MODULE).
+-define(SERIAL_FILENAME, rabbit_guid).
 
 -record(state, {serial}).
 
@@ -59,17 +60,24 @@
 %%----------------------------------------------------------------------------
 
 start_link() ->
-    %% The persister can get heavily loaded, and we don't want that to
-    %% impact guid generation.  We therefore keep the serial in a
-    %% separate process rather than calling rabbit_persister:serial/0
-    %% directly in the functions below.
     gen_server:start_link({local, ?SERVER}, ?MODULE,
-                          [rabbit_persister:serial()], []).
+                          [update_disk_serial()], []).
+
+update_disk_serial() ->
+    Filename = filename:join(mnesia:system_info(directory), ?SERIAL_FILENAME),
+    Serial = case file:read_file(Filename) of
+                 {ok, Content} ->
+                     binary_to_term(Content);
+                 {error, _} ->
+                     0
+             end,
+    ok = file:write_file(Filename, term_to_binary(Serial + 1)),
+    Serial.
 
 %% generate a guid that is monotonically increasing per process.
 %%
 %% The id is only unique within a single cluster and as long as the
-%% persistent message store hasn't been deleted.
+%% serial store hasn't been deleted.
 guid() ->
     %% We don't use erlang:now() here because a) it may return
     %% duplicates when the system clock has been rewound prior to a
@@ -77,7 +85,7 @@ guid() ->
     %% now() to move ahead of the system time), and b) it is really
     %% slow since it takes a global lock and makes a system call.
     %%
-    %% rabbit_persister:serial/0, in combination with self/0 (which
+    %% A persisted serial number, in combination with self/0 (which
     %% includes the node name) uniquely identifies a process in space
     %% and time. We combine that with a process-local counter to give
     %% us a GUID that is monotonically increasing per process.
diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl
deleted file mode 100644
index d0d60ddf..00000000
--- a/src/rabbit_persister.erl
+++ /dev/null
@@ -1,523 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_persister).
-
--behaviour(gen_server).
-
--export([start_link/0]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([transaction/1, extend_transaction/2, dirty_work/1,
-         commit_transaction/1, rollback_transaction/1,
-         force_snapshot/0, serial/0]).
-
--include("rabbit.hrl").
-
--define(SERVER, ?MODULE).
-
--define(LOG_BUNDLE_DELAY, 5).
--define(COMPLETE_BUNDLE_DELAY, 2).
-
--define(HIBERNATE_AFTER, 10000).
-
--define(MAX_WRAP_ENTRIES, 500).
-
--define(PERSISTER_LOG_FORMAT_VERSION, {2, 4}).
-
--record(pstate, {log_handle, entry_count, deadline,
-                 pending_logs, pending_replies,
-                 snapshot}).             
-
-%% two tables for efficient persistency
-%% one maps a key to a message
-%% the other maps a key to one or more queues.
-%% The aim is to reduce the overload of storing a message multiple times
-%% when it appears in several queues.
--record(psnapshot, {serial, transactions, messages, queues}).
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--type(qmsg() :: {amqqueue(), pkey()}).
--type(work_item() ::
-      {publish, message(), qmsg()} |
-      {deliver, qmsg()} |
-      {ack, qmsg()}).
-
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(transaction/1 :: ([work_item()]) -> 'ok').
--spec(extend_transaction/2 :: (txn(), [work_item()]) -> 'ok').
--spec(dirty_work/1 :: ([work_item()]) -> 'ok').
--spec(commit_transaction/1 :: (txn()) -> 'ok').
--spec(rollback_transaction/1 :: (txn()) -> 'ok').
--spec(force_snapshot/0 :: () -> 'ok').
--spec(serial/0 :: () -> non_neg_integer()).
-
--endif.
-
-%%----------------------------------------------------------------------------
-
-start_link() ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
-
-transaction(MessageList) ->
-    ?LOGDEBUG("transaction ~p~n", [MessageList]),
-    TxnKey = rabbit_guid:guid(),
-    gen_server:call(?SERVER, {transaction, TxnKey, MessageList}, infinity).
-
-extend_transaction(TxnKey, MessageList) ->
-    ?LOGDEBUG("extend_transaction ~p ~p~n", [TxnKey, MessageList]),
-    gen_server:cast(?SERVER, {extend_transaction, TxnKey, MessageList}).
-
-dirty_work(MessageList) ->
-    ?LOGDEBUG("dirty_work ~p~n", [MessageList]),
-    gen_server:cast(?SERVER, {dirty_work, MessageList}).
-
-commit_transaction(TxnKey) ->
-    ?LOGDEBUG("commit_transaction ~p~n", [TxnKey]),
-    gen_server:call(?SERVER, {commit_transaction, TxnKey}, infinity).
-
-rollback_transaction(TxnKey) ->
-    ?LOGDEBUG("rollback_transaction ~p~n", [TxnKey]),
-    gen_server:cast(?SERVER, {rollback_transaction, TxnKey}).
-
-force_snapshot() ->
-    gen_server:call(?SERVER, force_snapshot, infinity).
-
-serial() ->
-    gen_server:call(?SERVER, serial, infinity).
-
-%%--------------------------------------------------------------------
-
-init(_Args) ->
-    process_flag(trap_exit, true),
-    FileName = base_filename(),
-    ok = filelib:ensure_dir(FileName),
-    Snapshot = #psnapshot{serial       = 0,
-                          transactions = dict:new(),
-                          messages     = ets:new(messages, []),
-                          queues       = ets:new(queues, [])},
-    LogHandle =
-        case disk_log:open([{name, rabbit_persister},
-                            {head, current_snapshot(Snapshot)},
-                            {file, FileName}]) of
-            {ok, LH} -> LH;
-            {repaired, LH, {recovered, Recovered}, {badbytes, Bad}} ->
-                WarningFun = if
-                                 Bad > 0 -> fun rabbit_log:warning/2;
-                                 true    -> fun rabbit_log:info/2
-                             end,
-                WarningFun("Repaired persister log - ~p recovered, ~p bad~n",
-                           [Recovered, Bad]),
-                LH
-        end,
-    {Res, LoadedSnapshot} = internal_load_snapshot(LogHandle, Snapshot),
-    NewSnapshot = LoadedSnapshot#psnapshot{
-                    serial = LoadedSnapshot#psnapshot.serial + 1},
-    case Res of
-        ok ->
-            ok = take_snapshot(LogHandle, NewSnapshot);
-        {error, Reason} ->
-            rabbit_log:error("Failed to load persister log: ~p~n", [Reason]),
-            ok = take_snapshot_and_save_old(LogHandle, NewSnapshot)
-    end,
-    State = #pstate{log_handle = LogHandle,
-                    entry_count = 0,
-                    deadline = infinity,
-                    pending_logs = [],
-                    pending_replies = [],
-                    snapshot = NewSnapshot},
-    {ok, State}.
-
-handle_call({transaction, Key, MessageList}, From, State) ->
-    NewState = internal_extend(Key, MessageList, State),
-    do_noreply(internal_commit(From, Key, NewState));
-handle_call({commit_transaction, TxnKey}, From, State) ->
-    do_noreply(internal_commit(From, TxnKey, State));
-handle_call(force_snapshot, _From, State) -> 
-    do_reply(ok, flush(true, State));
-handle_call(serial, _From,
-            State = #pstate{snapshot = #psnapshot{serial = Serial}}) ->
-    do_reply(Serial, State);
-handle_call(_Request, _From, State) ->
-    {noreply, State}.
-
-handle_cast({rollback_transaction, TxnKey}, State) ->
-    do_noreply(internal_rollback(TxnKey, State));
-handle_cast({dirty_work, MessageList}, State) ->
-    do_noreply(internal_dirty_work(MessageList, State));
-handle_cast({extend_transaction, TxnKey, MessageList}, State) ->
-    do_noreply(internal_extend(TxnKey, MessageList, State));
-handle_cast(_Msg, State) ->
-    {noreply, State}.
-
-handle_info(timeout, State = #pstate{deadline = infinity}) ->
-    State1 = flush(true, State),
-    %% TODO: Once we drop support for R11B-5, we can change this to
-    %% {noreply, State1, hibernate};
-    proc_lib:hibernate(gen_server2, enter_loop, [?MODULE, [], State1]);
-handle_info(timeout, State) ->
-    do_noreply(flush(State));
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, State = #pstate{log_handle = LogHandle}) ->
-    flush(State),
-    disk_log:close(LogHandle),
-    ok.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, flush(State)}.
-
-%%--------------------------------------------------------------------
-
-internal_extend(Key, MessageList, State) ->
-    log_work(fun (ML) -> {extend_transaction, Key, ML} end,
-             MessageList, State).
-
-internal_dirty_work(MessageList, State) ->
-    log_work(fun (ML) -> {dirty_work, ML} end,
-             MessageList, State).
-
-internal_commit(From, Key, State = #pstate{snapshot = Snapshot}) -> 
-    Unit = {commit_transaction, Key},
-    NewSnapshot = internal_integrate1(Unit, Snapshot),
-    complete(From, Unit, State#pstate{snapshot = NewSnapshot}).
-
-internal_rollback(Key, State = #pstate{snapshot = Snapshot}) ->
-    Unit = {rollback_transaction, Key},
-    NewSnapshot = internal_integrate1(Unit, Snapshot),
-    log(State#pstate{snapshot = NewSnapshot}, Unit).
-
-complete(From, Item, State = #pstate{deadline = ExistingDeadline,
-                                     pending_logs = Logs,
-                                     pending_replies = Waiting}) ->
-    State#pstate{deadline = compute_deadline(
-                              ?COMPLETE_BUNDLE_DELAY, ExistingDeadline),
-                 pending_logs = [Item | Logs],
-                 pending_replies = [From | Waiting]}.
-
-%% This is made to limit disk usage by writing messages only once onto
-%% disk.  We keep a table associating pkeys to messages, and provided
-%% the list of messages to output is left to right, we can guarantee
-%% that pkeys will be a backreference to a message in memory when a
-%% "tied" is met.
-log_work(CreateWorkUnit, MessageList,
-         State = #pstate{
-           snapshot = Snapshot = #psnapshot{
-                        messages = Messages}}) ->
-    Unit = CreateWorkUnit(
-             rabbit_misc:map_in_order(
-               fun(M = {publish, Message, QK = {_QName, PKey}}) ->
-                       case ets:lookup(Messages, PKey) of
-                           [_] -> {tied, QK};
-                           []  -> ets:insert(Messages, {PKey, Message}), 
-                                  M
-                       end;
-                  (M) -> M
-               end,
-               MessageList)),
-    NewSnapshot = internal_integrate1(Unit, Snapshot),
-    log(State#pstate{snapshot = NewSnapshot}, Unit).
-
-log(State = #pstate{deadline = ExistingDeadline, pending_logs = Logs}, 
-    Message) ->
-    State#pstate{deadline = compute_deadline(?LOG_BUNDLE_DELAY,
-                                             ExistingDeadline),
-                 pending_logs = [Message | Logs]}.
-
-base_filename() ->
-    rabbit_mnesia:dir() ++ "/rabbit_persister.LOG".
-
-take_snapshot(LogHandle, OldFileName, Snapshot) ->
-    ok = disk_log:sync(LogHandle),
-    %% current_snapshot is the Head (ie. first thing logged)
-    ok = disk_log:reopen(LogHandle, OldFileName, current_snapshot(Snapshot)).
-
-take_snapshot(LogHandle, Snapshot) ->
-    OldFileName = lists:flatten(base_filename() ++ ".previous"),
-    file:delete(OldFileName),
-    rabbit_log:info("Rolling persister log to ~p~n", [OldFileName]),
-    ok = take_snapshot(LogHandle, OldFileName, Snapshot).
-
-take_snapshot_and_save_old(LogHandle, Snapshot) ->
-    {MegaSecs, Secs, MicroSecs} = erlang:now(),
-    Timestamp = MegaSecs * 1000000 + Secs * 1000 + MicroSecs,
-    OldFileName = lists:flatten(io_lib:format("~s.saved.~p",
-                                              [base_filename(), Timestamp])),
-    rabbit_log:info("Saving persister log in ~p~n", [OldFileName]),
-    ok = take_snapshot(LogHandle, OldFileName, Snapshot).
-
-maybe_take_snapshot(Force, State = #pstate{entry_count = EntryCount,
-                                           log_handle = LH,
-                                           snapshot = Snapshot})
-  when Force orelse EntryCount >= ?MAX_WRAP_ENTRIES ->
-    ok = take_snapshot(LH, Snapshot),
-    State#pstate{entry_count = 0};
-maybe_take_snapshot(_Force, State) ->
-    State.
-
-later_ms(DeltaMilliSec) ->
-    {MegaSec, Sec, MicroSec} = now(),
-    %% Note: not normalised. Unimportant for this application.
-    {MegaSec, Sec, MicroSec + (DeltaMilliSec * 1000)}.
-
-%% Result = B - A, more or less
-time_diff({B1, B2, B3}, {A1, A2, A3}) ->
-    (B1 - A1) * 1000000 + (B2 - A2) + (B3 - A3) / 1000000.0 .
-
-compute_deadline(TimerDelay, infinity) ->
-    later_ms(TimerDelay);
-compute_deadline(_TimerDelay, ExistingDeadline) ->
-    ExistingDeadline.
-
-compute_timeout(infinity) ->
-    ?HIBERNATE_AFTER;
-compute_timeout(Deadline) ->
-    DeltaMilliSec = time_diff(Deadline, now()) * 1000.0,
-    if
-        DeltaMilliSec =< 1 ->
-            0;
-        true ->
-            round(DeltaMilliSec)
-    end.
-
-do_noreply(State = #pstate{deadline = Deadline}) ->
-    {noreply, State, compute_timeout(Deadline)}.
-
-do_reply(Reply, State = #pstate{deadline = Deadline}) ->
-    {reply, Reply, State, compute_timeout(Deadline)}.
-
-flush(State) -> flush(false, State).
-
-flush(ForceSnapshot, State = #pstate{pending_logs = PendingLogs,
-                                     pending_replies = Waiting,
-                                     log_handle = LogHandle}) ->
-    State1 = if PendingLogs /= [] ->
-                     disk_log:alog(LogHandle, lists:reverse(PendingLogs)),
-                     State#pstate{entry_count = State#pstate.entry_count + 1};
-                true ->
-                     State
-             end,
-    State2 = maybe_take_snapshot(ForceSnapshot, State1),
-    if Waiting /= [] ->
-            ok = disk_log:sync(LogHandle),
-            lists:foreach(fun (From) -> gen_server:reply(From, ok) end,
-                          Waiting);
-       true ->
-            ok
-    end,
-    State2#pstate{deadline = infinity,
-                  pending_logs = [],
-                  pending_replies = []}.
-
-current_snapshot(_Snapshot = #psnapshot{serial = Serial,
-                                        transactions= Ts,
-                                        messages = Messages,
-                                        queues = Queues}) ->
-    %% Avoid infinite growth of the table by removing messages not
-    %% bound to a queue anymore
-    prune_table(Messages, ets:foldl(
-                            fun ({{_QName, PKey}, _Delivered}, S) ->
-                                    sets:add_element(PKey, S)
-                            end, sets:new(), Queues)),
-    InnerSnapshot = {{serial, Serial},
-                     {txns, Ts},
-                     {messages, ets:tab2list(Messages)},
-                     {queues, ets:tab2list(Queues)}},
-    ?LOGDEBUG("Inner snapshot: ~p~n", [InnerSnapshot]),
-    {persist_snapshot, {vsn, ?PERSISTER_LOG_FORMAT_VERSION},
-     term_to_binary(InnerSnapshot)}.
-
-prune_table(Tab, Keys) ->
-    true = ets:safe_fixtable(Tab, true),
-    ok = prune_table(Tab, Keys, ets:first(Tab)),
-    true = ets:safe_fixtable(Tab, false).
-    
-prune_table(_Tab, _Keys, '$end_of_table') -> ok;
-prune_table(Tab, Keys, Key) ->
-    case sets:is_element(Key, Keys) of
-        true  -> ok;
-        false -> ets:delete(Tab, Key)
-    end,
-    prune_table(Tab, Keys, ets:next(Tab, Key)).
-
-internal_load_snapshot(LogHandle, 
-                       Snapshot = #psnapshot{messages = Messages,
-                                             queues = Queues}) ->
-    {K, [Loaded_Snapshot | Items]} = disk_log:chunk(LogHandle, start),
-    case check_version(Loaded_Snapshot) of
-        {ok, StateBin} ->
-            {{serial, Serial}, {txns, Ts}, {messages, Ms}, {queues, Qs}} =
-                binary_to_term(StateBin),
-            true = ets:insert(Messages, Ms),
-            true = ets:insert(Queues, Qs),
-            Snapshot1 = replay(Items, LogHandle, K,
-                               Snapshot#psnapshot{
-                                 serial = Serial,
-                                 transactions = Ts}),
-            Snapshot2 = requeue_messages(Snapshot1),
-            %% uncompleted transactions are discarded - this is TRTTD
-            %% since we only get into this code on node restart, so
-            %% any uncompleted transactions will have been aborted.
-            {ok, Snapshot2#psnapshot{transactions = dict:new()}};
-        {error, Reason} -> {{error, Reason}, Snapshot}
-    end.
-
-check_version({persist_snapshot, {vsn, ?PERSISTER_LOG_FORMAT_VERSION},
-               StateBin}) ->
-    {ok, StateBin};
-check_version({persist_snapshot, {vsn, Vsn}, _StateBin}) ->
-    {error, {unsupported_persister_log_format, Vsn}};
-check_version(_Other) ->
-    {error, unrecognised_persister_log_format}.
-
-requeue_messages(Snapshot = #psnapshot{messages = Messages,
-                                       queues = Queues}) ->
-    Work = ets:foldl(fun accumulate_requeues/2, dict:new(), Queues),
-    %% unstable parallel map, because order doesn't matter
-    L = lists:append(
-          rabbit_misc:upmap(
-            %% we do as much work as possible in spawned worker
-            %% processes, but we need to make sure the ets:inserts are
-            %% performed in self()
-            fun ({QName, Requeues}) ->
-                    requeue(QName, Requeues, Messages)
-            end, dict:to_list(Work))),
-    NewMessages = [{K, M} || {{_Q, K}, M, _D} <- L],
-    NewQueues  = [{QK, D} || {QK, _M, D} <- L],
-    ets:delete_all_objects(Messages),
-    ets:delete_all_objects(Queues),
-    true = ets:insert(Messages, NewMessages),
-    true = ets:insert(Queues, NewQueues),
-    %% contains the mutated messages and queues tables
-    Snapshot.
-
-accumulate_requeues({{QName, PKey}, Delivered}, Acc) ->
-    Requeue = {PKey, Delivered},
-    dict:update(QName,
-                fun (Requeues) -> [Requeue | Requeues] end,
-                [Requeue],
-                Acc).
-
-requeue(QName, Requeues, Messages) ->
-    case rabbit_amqqueue:lookup(QName) of
-        {ok, #amqqueue{pid = QPid}} ->
-            RequeueMessages = 
-                [{{QName, PKey}, Message, Delivered} ||
-                    {PKey, Delivered} <- Requeues, 
-                    {_, Message} <- ets:lookup(Messages, PKey)],
-            rabbit_amqqueue:redeliver(
-              QPid,
-              %% Messages published by the same process receive
-              %% persistence keys that are monotonically
-              %% increasing. Since message ordering is defined on a
-              %% per-channel basis, and channels are bound to specific
-              %% processes, sorting the list does provide the correct
-              %% ordering properties.
-              [{Message, Delivered} || {_, Message, Delivered} <-
-                                           lists:sort(RequeueMessages)]),
-            RequeueMessages;
-        {error, not_found} ->
-            []
-    end.
-
-replay([], LogHandle, K, Snapshot) ->
-    case disk_log:chunk(LogHandle, K) of
-        {K1, Items} ->
-            replay(Items, LogHandle, K1, Snapshot);
-        {K1, Items, Badbytes} ->
-            rabbit_log:warning("~p bad bytes recovering persister log~n", 
-                               [Badbytes]),
-            replay(Items, LogHandle, K1, Snapshot);
-        eof -> Snapshot
-    end;
-replay([Item | Items], LogHandle, K, Snapshot) ->
-    NewSnapshot = internal_integrate_messages(Item, Snapshot),
-    replay(Items, LogHandle, K, NewSnapshot).
-
-internal_integrate_messages(Items, Snapshot) ->
-    lists:foldl(fun (Item, Snap) -> internal_integrate1(Item, Snap) end,
-                Snapshot, Items).
-
-internal_integrate1({extend_transaction, Key, MessageList},
-                    Snapshot = #psnapshot {transactions = Transactions}) ->
-    NewTransactions =
-        dict:update(Key,
-                    fun (MessageLists) -> [MessageList | MessageLists] end,
-                    [MessageList],
-                    Transactions),
-    Snapshot#psnapshot{transactions = NewTransactions};
-internal_integrate1({rollback_transaction, Key},
-                    Snapshot = #psnapshot{transactions = Transactions}) ->
-    Snapshot#psnapshot{transactions = dict:erase(Key, Transactions)};
-internal_integrate1({commit_transaction, Key},
-                    Snapshot = #psnapshot{transactions = Transactions,
-                                          messages = Messages,
-                                          queues = Queues}) ->
-    case dict:find(Key, Transactions) of
-        {ok, MessageLists} ->
-            ?LOGDEBUG("persist committing txn ~p~n", [Key]),
-            lists:foreach(fun (ML) -> perform_work(ML, Messages, Queues) end,
-                          lists:reverse(MessageLists)),
-            Snapshot#psnapshot{transactions = dict:erase(Key, Transactions)};
-        error ->
-            Snapshot
-    end;
-internal_integrate1({dirty_work, MessageList},
-                    Snapshot = #psnapshot {messages = Messages,
-                                           queues = Queues}) ->
-    perform_work(MessageList, Messages, Queues),
-    Snapshot.
-
-perform_work(MessageList, Messages, Queues) ->
-    lists:foreach(
-      fun (Item) -> perform_work_item(Item, Messages, Queues) end,
-      MessageList).
-
-perform_work_item({publish, Message, QK = {_QName, PKey}}, Messages, Queues) ->
-    ets:insert(Messages, {PKey, Message}),
-    ets:insert(Queues, {QK, false});
-
-perform_work_item({tied, QK}, _Messages, Queues) ->
-    ets:insert(Queues, {QK, false});
-
-perform_work_item({deliver, QK}, _Messages, Queues) ->
-    %% from R12B-2 onward we could use ets:update_element/3 here
-    ets:delete(Queues, QK),
-    ets:insert(Queues, {QK, true});
-
-perform_work_item({ack, QK}, _Messages, Queues) ->
-    ets:delete(Queues, QK).
-- 
cgit v1.2.1


From 1cade28a60af1c4e4dffdbd52ef1cfcab86a3f68 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Jun 2009 15:14:18 +0100
Subject: sorted out the disk_queue tests which had been left behind with the
 last set of API changes. By fixing them, discovered a bug in the disk queue.
 Also made the tests a little more rigorous, and discovered a the
 rdq_stress_gc test was not doing anything like what I'd wanted. Fixed.

---
 src/rabbit_disk_queue.erl |   2 +-
 src/rabbit_tests.erl      | 119 ++++++++++++++++++++++++----------------------
 2 files changed, 64 insertions(+), 57 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8fb5b905..da25e524 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1247,7 +1247,7 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                         case ets:lookup(Sequences, Q) of
                             [] ->
                                 true = ets:insert_new(Sequences,
-                                                      {Q, SeqId, NextWrite});
+                                                      {Q, SeqId, NextWrite, -1});
                             [Orig = {Q, Read, Write, Length}] ->
                                 Repl = {Q, lists:min([Read, SeqId]),
                                         %% Length is wrong here, but
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 597b0a76..46c641fc 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -689,6 +689,7 @@ delete_log_handlers(Handlers) ->
     ok.
 
 test_disk_queue() ->
+    rdq_stop(),
     % unicode chars are supported properly from r13 onwards
     io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
     [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize),
@@ -707,7 +708,6 @@ test_disk_queue() ->
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Startup = rdq_virgin(),
     rdq_start(),
-    rabbit_disk_queue:to_ram_disk_mode(),
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
@@ -721,9 +721,12 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     {Deliver, ok} =
         timer:tc(?MODULE, rdq_time_commands,
                  [[fun() -> [begin SeqIds =
-                                       [begin {N, Msg, MsgSizeBytes, false, SeqId} =
-                                                  rabbit_disk_queue:deliver(Q), SeqId end
-                                        || N <- List],
+                                       [begin
+                                            Remaining = MsgCount - N,
+                                            {N, Msg, MsgSizeBytes, false, SeqId, Remaining} =
+                                                  rabbit_disk_queue:deliver(Q),
+                                            SeqId
+                                        end || N <- List],
                                    ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
                              end || Q <- Qs]
                    end]]),
@@ -747,53 +750,36 @@ rdq_stress_gc(MsgCount) ->
     rabbit_disk_queue:tx_commit(q, List, []),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
-        lists:reverse(
-          lists:foldl(
-            fun (E, Acc) ->
-                    case Acc of
-                        [] -> [E];
-                        [F|_Fs] ->
-                            case E rem F of
-                                0 -> Acc;
-                                _ -> [E|Acc]
-                            end
-                    end
-            end, [], lists:flatten([lists:seq(N,MsgCount,N)
-                                    || N <- lists:seq(StartChunk,MsgCount)]))) ++
-        lists:seq(1, (StartChunk - 1)),
+        lists:foldl(
+          fun (E, Acc) ->
+                  case lists:member(E, Acc) of
+                      true -> Acc;
+                      false -> [E|Acc]
+                  end
+          end, [], lists:flatten(
+                     lists:reverse(
+                       [ lists:seq(N, MsgCount, N)
+                         || N <- lists:seq(1, round(MsgCount / 2), 1)
+                       ]))),
+    {Start, End} = lists:split(StartChunk, AckList),
+    AckList2 = End ++ Start,
     MsgIdToSeqDict =
         lists:foldl(
-          fun (_, Acc) ->
-                  {MsgId, Msg, MsgSizeBytes, false, SeqId} =
+          fun (MsgId, Acc) ->
+                  Remaining = MsgCount - MsgId,
+                  {MsgId, Msg, MsgSizeBytes, false, SeqId, Remaining} =
                       rabbit_disk_queue:deliver(q),
                   dict:store(MsgId, SeqId, Acc)
           end, dict:new(), List),
     %% we really do want to ack each of this individually
     [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict),
-           rabbit_disk_queue:ack(q, [SeqId]) end
-     || MsgId <- AckList],
+           rabbit_disk_queue:ack(q, [SeqId])
+     end || MsgId <- AckList2],
     rabbit_disk_queue:tx_commit(q, [], []),
+    empty = rabbit_disk_queue:deliver(q),
     rdq_stop(),
     passed.
 
-rdq_time_insane_startup() ->
-    rdq_virgin(),
-    OneGig = 1024*1024*1024,
-    rabbit_disk_queue:start_link(OneGig),
-    rabbit_disk_queue:to_ram_disk_mode(),
-    Msg = <<>>,
-    Count = 100000,
-    List = lists:seq(1, Count),
-    %% 1M empty messages, at say, 100B per message, should all fit
-    %% within 1GB and thus in a single file
-    io:format("Publishing ~p empty messages...~n",[Count]),
-    [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
-    rabbit_disk_queue:tx_commit(q, List, []),
-    io:format("...done. Timing restart...~n", []),
-    rdq_stop(),
-    Micros = rdq_virgin(),
-    io:format("...startup took ~w microseconds.~n", [Micros]).
-
 rdq_test_startup_with_queue_gaps() ->
     rdq_virgin(),
     rdq_start(),
@@ -805,8 +791,10 @@ rdq_test_startup_with_queue_gaps() ->
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
-    Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-            || N <- lists:seq(1,Half)],
+    Seqs = [begin
+                Remaining = Total - N,
+                {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q), SeqId
+            end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     %% ack every other message we have delivered (starting at the _first_)
     lists:foldl(fun (SeqId2, true) ->
@@ -821,13 +809,19 @@ rdq_test_startup_with_queue_gaps() ->
     rdq_start(),
     io:format("Startup (with shuffle) done~n", []),
     %% should have shuffled up. So we should now get lists:seq(2,500,2) already delivered
-    Seqs2 = [begin {N, Msg, 256, true, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-             || N <- lists:seq(2,Half,2)],
+    Seqs2 = [begin
+                 Remaining = round(Total - ((Half + N)/2)),
+                 {N, Msg, 256, true, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 SeqId
+             end || N <- lists:seq(2,Half,2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     io:format("Reread non-acked messages done~n", []),
     %% and now fetch the rest
-    Seqs3 = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-             || N <- lists:seq(1 + Half,Total)],
+    Seqs3 = [begin
+                 Remaining = Total - N,
+                 {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 SeqId
+             end || N <- lists:seq(1 + Half,Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
     io:format("Read second half done~n", []),
     empty = rabbit_disk_queue:deliver(q),
@@ -845,8 +839,11 @@ rdq_test_redeliver() ->
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
-    Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-            || N <- lists:seq(1,Half)],
+    Seqs = [begin
+                Remaining = Total - N,
+                {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                SeqId
+            end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     %% now requeue every other message (starting at the _first_)
     %% and ack the other ones
@@ -860,11 +857,17 @@ rdq_test_redeliver() ->
     rabbit_disk_queue:tx_commit(q, [], []),
     io:format("Redeliver and acking done~n", []),
     %% we should now get the 2nd half in order, followed by every-other-from-the-first-half
-    Seqs2 = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-            || N <- lists:seq(1+Half, Total)],
+    Seqs2 = [begin
+                 Remaining = round(Total - N + (Half/2)),
+                 {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 SeqId
+             end || N <- lists:seq(1+Half, Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
-    Seqs3 = [begin {N, Msg, 256, true, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-            || N <- lists:seq(1, Half, 2)],
+    Seqs3 = [begin
+                 Remaining = round((Half - N) / 2) - 1,
+                 {N, Msg, 256, true, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 SeqId
+             end || N <- lists:seq(1, Half, 2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
     empty = rabbit_disk_queue:deliver(q),
     rdq_stop(),
@@ -881,8 +884,11 @@ rdq_test_purge() ->
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
-    Seqs = [begin {N, Msg, 256, false, SeqId} = rabbit_disk_queue:deliver(q), SeqId end
-            || N <- lists:seq(1,Half)],
+    Seqs = [begin
+                Remaining = Total - N,
+                {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                SeqId
+            end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     rabbit_disk_queue:purge(q),
     io:format("Purge done~n", []),
@@ -897,13 +903,14 @@ rdq_time_commands(Funcs) ->
 
 rdq_virgin() ->
     {Micros, {ok, _}} =
-        timer:tc(rabbit_disk_queue, start_link, [1024*1024]),
+        timer:tc(rabbit_disk_queue, start_link, []),
     ok = rabbit_disk_queue:stop_and_obliterate(),
     timer:sleep(1000),
     Micros.
 
 rdq_start() ->
-    {ok, _} = rabbit_disk_queue:start_link(1024*1024).
+    {ok, _} = rabbit_disk_queue:start_link(),
+    rabbit_disk_queue:to_ram_disk_mode().
 
 rdq_stop() ->
     rabbit_disk_queue:stop(),
-- 
cgit v1.2.1


From 1af858bce8be8dc17bc4e5633793e1a5b559e576 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Jun 2009 16:14:05 +0100
Subject: added dump_queue to the disk queue. This spits out a list of the
 queue contents from the current read pointer to the end of the queue (i.e.
 messages for which we are waiting for acks will not be included). Of course,
 at startup, all the read pointers are at the start of the queue (i.e. not
 waiting for any acks) so this grabs everything. Some minor refactoring was
 involved in the addition of this function. Also, I needed to change my
 definition of unfold so that it's now both simultaneously an anamorphism and
 a catamorphism instead of just an anamorphism (i.e. the accumulator /
 'initial' value is spat out at the end). This could be a hylomorphism. Can't
 remember...

---
 src/rabbit_disk_queue.erl | 83 +++++++++++++++++++++++++++++++++--------------
 src/rabbit_misc.erl       |  2 +-
 src/rabbit_tests.erl      | 29 +++++++++++++++++
 3 files changed, 89 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index da25e524..2ffb9a75 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,9 @@
 
 -export([publish/3, publish_with_seq/4, deliver/1, phantom_deliver/1, ack/2,
 	 tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
-	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1]).
+	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
+         dump_queue/1
+        ]).
 
 -export([length/1, is_empty/1]).
 
@@ -245,6 +247,8 @@
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id_or_next()}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
+-spec(dump_queue/1 :: (queue_name()) -> [{msg_id(), binary(), non_neg_integer(),
+                                          bool(), {msg_id(), seq_id()}}]).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
@@ -300,6 +304,9 @@ purge(Q) ->
 delete_queue(Q) ->
     gen_server2:cast(?SERVER, {delete_queue, Q}).
 
+dump_queue(Q) ->
+    gen_server2:call(?SERVER, {dump_queue, Q}, infinity).
+
 stop() ->
     gen_server2:call(?SERVER, stop, infinity).
 
@@ -439,7 +446,10 @@ handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {reply, 0, State};
         [{Q, _ReadSeqId, _WriteSeqId, Length}] -> {reply, Length, State}
-    end.
+    end;
+handle_call({dump_queue, Q}, _From, State) ->
+    {Result, State1} = internal_dump_queue(Q, State),
+    {reply, Result, State1}.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, next, MsgBody, State),
@@ -611,28 +621,37 @@ internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
         [] -> {ok, empty, State};
         [{Q, SeqId, SeqId, 0}] -> {ok, empty, State};
         [{Q, ReadSeqId, WriteSeqId, Length}] when Length > 0 ->
-            [Obj =
-             #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
-                          next_seq_id = ReadSeqId2}] =
-                mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
-            [{MsgId, _RefCount, File, Offset, TotalSize}] =
-                dets_ets_lookup(State, MsgId),
             Remaining = Length - 1,
-            true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId, Remaining}),
-            ok =
-                if Delivered -> ok;
-                   true ->
-                        mnesia:dirty_write(rabbit_disk_queue,
-                                           Obj #dq_msg_loc {is_delivered = true})
-                end,
-            if ReadMsg ->
-                    {FileHdl, State1} = get_read_handle(File, State),
-                    {ok, {MsgBody, BodySize}} =
-                        read_message_at_offset(FileHdl, Offset, TotalSize),
-                    {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}, Remaining},
-                     State1};
-               true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining}, State}
-            end
+            {ok, Result, NextReadSeqId, State1} = internal_read_message(Q, ReadSeqId, false, ReadMsg, State),
+            true = ets:insert(Sequences, {Q, NextReadSeqId, WriteSeqId, Remaining}),
+            {ok, case Result of
+                     {MsgId, Delivered, {MsgId, ReadSeqId}} ->
+                         {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining};
+                     {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}} ->
+                         {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}, Remaining}
+                 end, State1}
+    end.
+
+internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
+    [Obj =
+     #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
+                  next_seq_id = NextReadSeqId}] =
+        mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
+    [{MsgId, _RefCount, File, Offset, TotalSize}] =
+        dets_ets_lookup(State, MsgId),
+    ok =
+        if FakeDeliver orelse Delivered -> ok;
+           true ->
+                mnesia:dirty_write(rabbit_disk_queue,
+                                   Obj #dq_msg_loc {is_delivered = true})
+        end,
+    if ReadMsg ->
+            {FileHdl, State1} = get_read_handle(File, State),
+            {ok, {MsgBody, BodySize}} =
+                read_message_at_offset(FileHdl, Offset, TotalSize),
+            {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
+             NextReadSeqId, State1};
+       true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
     end.
 
 internal_ack(Q, MsgSeqIds, State) ->
@@ -863,7 +882,7 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                 mnesia:transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          MsgSeqIds =
+                          {MsgSeqIds, WriteSeqId} =
                               rabbit_misc:unfold(
                                 fun (SeqId) when SeqId == WriteSeqId -> false;
                                     (SeqId) ->
@@ -900,6 +919,22 @@ internal_delete_queue(Q, State) ->
           end),
     {ok, State2}.
 
+internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
+    case ets:lookup(Sequences, Q) of
+        [] -> {[], State};
+        [{Q, ReadSeq, WriteSeq, _Length}] ->
+            {QList, {WriteSeq, State3}} =
+                rabbit_misc:unfold(
+                  fun ({SeqId, _State1}) when SeqId == WriteSeq ->
+                          false;
+                      ({SeqId, State1}) ->
+                          {ok, Result, NextReadSeqId, State2} =
+                              internal_read_message(Q, SeqId, true, true, State1),
+                          {true, Result, {NextReadSeqId, State2}}
+                  end, {ReadSeq, State}),
+            {lists:reverse(QList), State3}
+    end.
+
 %% ---- ROLLING OVER THE APPEND FILE ----
 
 maybe_roll_to_new_file(Offset,
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 49faba29..acadf2a0 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -429,5 +429,5 @@ unfold(Fun, Init) ->
 unfold(Fun, Acc, Init) ->
     case Fun(Init) of
         {true, E, I} -> unfold(Fun, [E|Acc], I);
-        false -> Acc
+        false -> {Acc, Init}
     end.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 46c641fc..75b36d6f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -703,6 +703,7 @@ test_disk_queue() ->
     passed = rdq_test_startup_with_queue_gaps(),
     passed = rdq_test_redeliver(),
     passed = rdq_test_purge(),
+    passed = rdq_test_dump_queue(),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
@@ -898,6 +899,34 @@ rdq_test_purge() ->
     rdq_stop(),
     passed.    
 
+rdq_test_dump_queue() ->
+    rdq_virgin(),
+    rdq_start(),
+    Msg = <<0:(8*256)>>,
+    Total = 1000,
+    All = lists:seq(1,Total),
+    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    rabbit_disk_queue:tx_commit(q, All, []),
+    io:format("Publish done~n", []),
+    QList = [{N, Msg, 256, false, {N, (N-1)}} || N <- All],
+    QList = rabbit_disk_queue:dump_queue(q),
+    rdq_stop(),
+    io:format("dump ok undelivered~n", []),
+    rdq_start(),
+    lists:foreach(
+      fun (N) ->
+              Remaining = Total - N,
+              {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q)
+      end, All),
+    [] = rabbit_disk_queue:dump_queue(q),
+    rdq_stop(),
+    io:format("dump ok post delivery~n", []),
+    rdq_start(),
+    QList2 = [{N, Msg, 256, true, {N, (N-1)}} || N <- All],
+    QList2 = rabbit_disk_queue:dump_queue(q),
+    io:format("dump ok post delivery + restart~n", []),
+    passed.
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From 6b45b0130a771c8d6020ba19694f7a5b4eaacd8c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Jun 2009 18:37:23 +0100
Subject: Queue recovery on startup now works.

o) All queues start up and ask the disk_queue for their contents
o) Post durable-queue-recovery, the main rabbit process tells the disk queue to delete the contents of any other queues which have not been found as persistent.
---
 src/rabbit.erl             | 16 ++++++++-----
 src/rabbit_amqqueue.erl    | 58 ++++++++++++++++++++++++----------------------
 src/rabbit_disk_queue.erl  | 50 +++++++++++++++++++++++++++++----------
 src/rabbit_mixed_queue.erl |  9 ++++++-
 src/rabbit_tests.erl       |  6 ++---
 5 files changed, 89 insertions(+), 50 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index e79c7f59..ce73f6ce 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -145,17 +145,21 @@ start(normal, []) ->
                 ok = start_child(rabbit_router),
                 ok = start_child(rabbit_node_monitor)
         end},
-       {"recovery",
-        fun () ->
-                ok = maybe_insert_default_data(),
-                ok = rabbit_exchange:recover(),
-                ok = rabbit_amqqueue:recover()
-        end},
        {"disk queue",
         fun () ->
                 ok = start_child(rabbit_disk_queue),
                 ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
         end},
+       {"recovery",
+        fun () ->
+                ok = maybe_insert_default_data(),
+                ok = rabbit_exchange:recover(),
+                {ok, DurableQueues} = rabbit_amqqueue:recover(),
+                DurableQueueNames =
+                    sets:from_list(lists:map(
+                                     fun(Q) -> Q #amqqueue.name end, DurableQueues)),
+                ok = rabbit_disk_queue:delete_non_durable_queues(DurableQueueNames)
+        end},
        {"guid generator",
         fun () ->
                 ok = start_child(rabbit_guid)
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 0316788f..c56e5188 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -119,37 +119,39 @@ start() ->
     ok.
 
 recover() ->
-    ok = recover_durable_queues(),
-    ok.
+    {ok, DurableQueues} = recover_durable_queues(),
+    {ok, DurableQueues}.
 
 recover_durable_queues() ->
     Node = node(),
-    lists:foreach(
-      fun (RecoveredQ) ->
-              Q = start_queue_process(RecoveredQ),
-              %% We need to catch the case where a client connected to
-              %% another node has deleted the queue (and possibly
-              %% re-created it).
-              case rabbit_misc:execute_mnesia_transaction(
-                     fun () -> case mnesia:match_object(
-                                      rabbit_durable_queue, RecoveredQ, read) of
-                                   [_] -> ok = store_queue(Q),
-                                          true;
-                                   []  -> false
-                               end
-                     end) of
-                  true  -> ok;
-                  false -> exit(Q#amqqueue.pid, shutdown)
-              end
-      end,
-      %% TODO: use dirty ops instead
-      rabbit_misc:execute_mnesia_transaction(
-        fun () ->
-                qlc:e(qlc:q([Q || Q = #amqqueue{pid = Pid}
-                                        <- mnesia:table(rabbit_durable_queue),
-                                  node(Pid) == Node]))
-        end)),
-    ok.
+    DurableQueues =
+        lists:foldl(
+          fun (RecoveredQ, Acc) ->
+                  Q = start_queue_process(RecoveredQ),
+                  %% We need to catch the case where a client connected to
+                  %% another node has deleted the queue (and possibly
+                  %% re-created it).
+                  case rabbit_misc:execute_mnesia_transaction(
+                         fun () -> case mnesia:match_object(
+                                          rabbit_durable_queue, RecoveredQ, read) of
+                                       [_] -> ok = store_queue(Q),
+                                              true;
+                                       []  -> false
+                                   end
+                         end) of
+                      true  -> [Q|Acc];
+                      false -> exit(Q#amqqueue.pid, shutdown),
+                               Acc
+                  end
+          end, [],
+          %% TODO: use dirty ops instead
+          rabbit_misc:execute_mnesia_transaction(
+            fun () ->
+                    qlc:e(qlc:q([Q || Q = #amqqueue{pid = Pid}
+                                          <- mnesia:table(rabbit_durable_queue),
+                                      node(Pid) == Node]))
+            end)),
+    {ok, DurableQueues}.
 
 declare(QueueName, Durable, AutoDelete, Args) ->
     Q = start_queue_process(#amqqueue{name = QueueName,
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2ffb9a75..b7eca499 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -41,7 +41,7 @@
 -export([publish/3, publish_with_seq/4, deliver/1, phantom_deliver/1, ack/2,
 	 tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
 	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
-         dump_queue/1
+         dump_queue/1, delete_non_durable_queues/1
         ]).
 
 -export([length/1, is_empty/1]).
@@ -248,7 +248,8 @@
 -spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id_or_next()}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(dump_queue/1 :: (queue_name()) -> [{msg_id(), binary(), non_neg_integer(),
-                                          bool(), {msg_id(), seq_id()}}]).
+                                          bool(), seq_id()}]).
+-spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
@@ -307,6 +308,9 @@ delete_queue(Q) ->
 dump_queue(Q) ->
     gen_server2:call(?SERVER, {dump_queue, Q}, infinity).
 
+delete_non_durable_queues(DurableQueues) ->
+    gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues}, infinity).
+
 stop() ->
     gen_server2:call(?SERVER, stop, infinity).
 
@@ -346,7 +350,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
             E -> E
         end,
     ok = filelib:ensure_dir(form_filename("nothing")),
-    InitName = "0" ++ ?FILE_EXTENSION,
+    file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
+                              ?FILE_EXTENSION_DETS)),
     {ok, MsgLocationDets} =
         dets:open_file(?MSG_LOC_NAME,
                        [{file, form_filename(atom_to_list(?MSG_LOC_NAME) ++
@@ -360,6 +365,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% it would be better to have this as private, but dets:from_ets/2
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
+
+    InitName = "0" ++ ?FILE_EXTENSION,
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
                    msg_location_ets        = MsgLocationEts,
@@ -449,7 +456,10 @@ handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     end;
 handle_call({dump_queue, Q}, _From, State) ->
     {Result, State1} = internal_dump_queue(Q, State),
-    {reply, Result, State1}.
+    {reply, Result, State1};
+handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
+    {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
+    {reply, ok, State1}.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_publish(Q, MsgId, next, MsgBody, State),
@@ -928,13 +938,27 @@ internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
                   fun ({SeqId, _State1}) when SeqId == WriteSeq ->
                           false;
                       ({SeqId, State1}) ->
-                          {ok, Result, NextReadSeqId, State2} =
+                          {ok, {MsgId, Msg, Size, Delivered, {MsgId, SeqId}}, NextReadSeqId, State2} =
                               internal_read_message(Q, SeqId, true, true, State1),
-                          {true, Result, {NextReadSeqId, State2}}
+                          {true, {MsgId, Msg, Size, Delivered, SeqId}, {NextReadSeqId, State2}}
                   end, {ReadSeq, State}),
             {lists:reverse(QList), State3}
     end.
 
+internal_delete_non_durable_queues(DurableQueues, State = #dqstate { sequences = Sequences }) ->
+    State3 =
+        ets:foldl(
+          fun ({Q, _Read, _Write, _Length}, State1) ->
+                  case sets:is_element(Q, DurableQueues) of
+                      true ->
+                          State1;
+                      false ->
+                          {ok, State2} = internal_delete_queue(Q, State1),
+                          State2
+                  end
+          end, State, Sequences),
+    {ok, State3}.
+
 %% ---- ROLLING OVER THE APPEND FILE ----
 
 maybe_roll_to_new_file(Offset,
@@ -1064,10 +1088,10 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     State = close_file(Source, close_file(Destination, State1)),
     {ok, SourceHdl} =
         file:open(form_filename(Source),
-                  [read, write, raw, binary, delayed_write, read_ahead]),
+                  [read, write, raw, binary, read_ahead, delayed_write]),
     {ok, DestinationHdl} =
         file:open(form_filename(Destination),
-                  [read, write, raw, binary, delayed_write, read_ahead]),
+                  [read, write, raw, binary, read_ahead, delayed_write]),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
     %% if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
@@ -1080,7 +1104,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
             Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} =
                 file:open(form_filename(Tmp),
-                          [read, write, raw, binary, delayed_write, read_ahead]),
+                          [read, write, raw, binary, read_ahead, delayed_write]),
             Worklist =
                 lists:dropwhile(
                   fun ({_, _, _, Offset, _})
@@ -1262,9 +1286,11 @@ load_from_disk(State) ->
     {atomic, true} = mnesia:transaction(
              fun() ->
                      ok = mnesia:read_lock_table(rabbit_disk_queue),
-                     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId }, true) ->
-                                          true = 1 =:=
-                                              erlang:length(dets_ets_lookup(State1, MsgId))
+                     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }, true) ->
+                                          case erlang:length(dets_ets_lookup(State1, MsgId)) of
+                                              0 -> ok == mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write);
+                                              1 -> true
+                                          end
                                   end,
                                   true, rabbit_disk_queue)
              end),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index d1000c88..8455bf1c 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -48,7 +48,14 @@
        ).
 
 start_link(Queue, IsDurable, Mode) when Mode =:= disk orelse Mode =:= mixed ->
-    {ok, #mqstate { mode = Mode, msg_buf = queue:new(), next_write_seq = 1,
+    QList = rabbit_disk_queue:dump_queue(Queue),
+    {MsgBuf, NextSeq} =
+        lists:foldl(
+          fun ({MsgId, Msg, Size, Delivered, SeqId}, {Buf, NSeq})
+              when SeqId >= NSeq ->
+                  {queue:in({SeqId, Msg, Delivered}, Buf), SeqId + 1}
+          end, {queue:new(), 0}, QList),
+    {ok, #mqstate { mode = Mode, msg_buf = MsgBuf, next_write_seq = NextSeq,
                     queue = Queue, is_durable = IsDurable }}.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 75b36d6f..70fc45e0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -908,7 +908,7 @@ rdq_test_dump_queue() ->
     [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
-    QList = [{N, Msg, 256, false, {N, (N-1)}} || N <- All],
+    QList = [{N, Msg, 256, false, (N-1)} || N <- All],
     QList = rabbit_disk_queue:dump_queue(q),
     rdq_stop(),
     io:format("dump ok undelivered~n", []),
@@ -916,13 +916,13 @@ rdq_test_dump_queue() ->
     lists:foreach(
       fun (N) ->
               Remaining = Total - N,
-              {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q)
+              {N, Msg, 256, false, _SeqId, Remaining} = rabbit_disk_queue:deliver(q)
       end, All),
     [] = rabbit_disk_queue:dump_queue(q),
     rdq_stop(),
     io:format("dump ok post delivery~n", []),
     rdq_start(),
-    QList2 = [{N, Msg, 256, true, {N, (N-1)}} || N <- All],
+    QList2 = [{N, Msg, 256, true, (N-1)} || N <- All],
     QList2 = rabbit_disk_queue:dump_queue(q),
     io:format("dump ok post delivery + restart~n", []),
     passed.
-- 
cgit v1.2.1


From c4a363d05be1138898717be6c487684e14e2f249 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 5 Jun 2009 13:32:02 +0100
Subject: Java functional tests now pass.

There was a bad interaction between delivery and the limiter.
Previously, the limiter was only queried when we know we have a message to deliver. However, because of the fact that we now really store messages on disk, we don't want to read in a message only to then be told later than we have no consumer to send it to. Thus the message is not required until we know we can deliver it. The problem is then that there's a possibility that there is no message to deliver, in the mean time we've gone and asked the limiter to let us send.

So the generator we pass into deliver_queue now must respond to the atom is_message_ready and tell us, without side effecting (gimme a type system, ffs), whether we have a message available, without required the message itself be presented. If so, we then check with the limiter, and proceed to deliver.
---
 src/rabbit_amqqueue_process.erl | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5f96b84b..7230e09c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -170,8 +170,11 @@ deliver_queue(Fun, FunAcc0,
             C = #cr{limiter_pid = LimiterPid,
                     unsent_message_count = Count,
                     unacked_messages = UAM} = ch_record(ChPid),
-            case not(AckRequired) orelse rabbit_limiter:can_send(
-                                           LimiterPid, self()) of
+            IsMsgReady = Fun(is_message_ready, FunAcc0, State),
+            case not(AckRequired) orelse
+                ( IsMsgReady andalso
+                  rabbit_limiter:can_send( LimiterPid, self())
+                ) of
                 true ->
                     case Fun(AckRequired, FunAcc0, State) of
                         {empty, FunAcc1, State2} ->
@@ -199,15 +202,21 @@ deliver_queue(Fun, FunAcc0,
                                true -> deliver_queue(Fun, FunAcc1, State3)
                             end
                     end;
-                false ->
+                %% if IsMsgReady then (AckRequired and we've hit the limiter)
+                false when IsMsgReady ->
                     store_ch_record(C#cr{is_limit_active = true}),
                     NewConsumers = block_consumers(ChPid, RoundRobinTail),
-                    deliver_queue(Fun, FunAcc0, State #q { round_robin = NewConsumers })
+                    deliver_queue(Fun, FunAcc0, State #q { round_robin = NewConsumers });
+                false ->
+                    %% no message was ready, so we don't need to block anyone
+                    {FunAcc0, State}
             end;
         {empty, _} ->
             {FunAcc0, State}
     end.
 
+deliver_from_queue(is_message_ready, undefined, #q { mixed_state = MS }) ->
+    not(rabbit_mixed_queue:is_empty(MS));
 deliver_from_queue(AckRequired, Acc = undefined, State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
     MS3 = case {Res, AckRequired} of
@@ -225,7 +234,9 @@ run_message_queue(State) ->
 
 attempt_immediate_delivery(none, Msg, State) ->
     Fun =
-        fun (AckRequired, false, State2) ->
+        fun (is_message_ready, false, _State) ->
+                true;
+            (AckRequired, false, State2) ->
                 {AckTag, State3} =
                     if AckRequired ->
                             {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(Msg,
@@ -255,7 +266,7 @@ deliver_or_enqueue(Txn, Msg, State) ->
 %% all these messages have already been delivered at least once and
 %% not ack'd, but need to be either redelivered or requeued
 deliver_or_requeue_n([], State) ->
-    State;
+    run_message_queue(State);
 deliver_or_requeue_n(MsgsWithAcks, State) ->
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
         deliver_queue(fun deliver_or_requeue_msgs/3, {length(MsgsWithAcks) - 1, [], MsgsWithAcks}, State),
@@ -266,6 +277,8 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
              NewState #q { mixed_state = MS2 }
     end.
 
+deliver_or_requeue_msgs(is_message_ready, {Len, _AcksAcc, _MsgsWithAcks}, _State) ->
+    -1 < Len;
 deliver_or_requeue_msgs(false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
     {{Msg, true, noack, Len}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
 deliver_or_requeue_msgs(true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
-- 
cgit v1.2.1


From bbd3d2cd82e834c9b68db642fa07094e09d3ef7a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 5 Jun 2009 13:37:50 +0100
Subject: all java tests now pass. tiny (but significant!) bug in the
 mixed_queue startup post disk_queue startup.

---
 src/rabbit_mixed_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 1b0386e6..610a2366 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -53,7 +53,7 @@ start_link(Queue, IsDurable, Mode) when Mode =:= disk orelse Mode =:= mixed ->
         lists:foldl(
           fun ({_MsgId, Msg, _Size, Delivered, SeqId}, {Buf, NSeq})
               when SeqId >= NSeq ->
-                  {queue:in({SeqId, Msg, Delivered}, Buf), SeqId + 1}
+                  {queue:in({SeqId, binary_to_term(Msg), Delivered}, Buf), SeqId + 1}
           end, {queue:new(), 0}, QList),
     {ok, #mqstate { mode = Mode, msg_buf = MsgBuf, next_write_seq = NextSeq,
                     queue = Queue, is_durable = IsDurable }}.
-- 
cgit v1.2.1


From d7a67f373c39514a7759685026372a1452e79008 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 5 Jun 2009 17:10:33 +0100
Subject: All tests pass. Including clustering tests. "ram-nodes" in clustering
 now have mnesia schemas, but the tables are created as ram tables (other than
 the disk_queue, which does its own thing and is either disc_copies or
 disc_only_copies, but that table is also local_content - we don't want to try
 and duplicate that state across nodes).

---
 src/rabbit_mnesia.erl | 42 +++++++++++++++++++++++-------------------
 src/rabbit_tests.erl  | 37 ++++++++++++++++++-------------------
 2 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index cddcab64..77e309fe 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -153,9 +153,17 @@ table_definitions() ->
        {disc_only_copies, [node()]}]}
     ].
 
+replicated_table_definitions() ->
+    [{Tab, Attrs} || {Tab, Attrs} <- table_definitions(),
+                     not lists:member({local_content, true}, Attrs)
+    ].
+
 table_names() ->
     [Tab || {Tab, _} <- table_definitions()].
 
+replicated_table_names() ->
+    [Tab || {Tab, _} <- replicated_table_definitions()].
+
 dir() -> mnesia:system_info(directory).
     
 ensure_mnesia_dir() ->
@@ -180,7 +188,7 @@ ensure_mnesia_not_running() ->
 
 check_schema_integrity() ->
     %%TODO: more thorough checks
-    case catch [mnesia:table_info(Tab, version) || Tab <- table_names()] of
+    case catch [mnesia:table_info(Tab, version) || Tab <- replicated_table_names()] of
         {'EXIT', Reason} -> {error, Reason};
         _ -> ok
     end.
@@ -260,9 +268,10 @@ init_db(ClusterNodes) ->
     WasDiskNode = mnesia:system_info(use_dir),
     IsDiskNode = ClusterNodes == [] orelse
         lists:member(node(), ClusterNodes),
-    case mnesia:change_config(extra_db_nodes, ClusterNodes -- [node()]) of
+    ExtraNodes = ClusterNodes -- [node()],
+    case mnesia:change_config(extra_db_nodes, ExtraNodes) of
         {ok, []} ->
-            if WasDiskNode and IsDiskNode ->
+            if WasDiskNode ->
                     case check_schema_integrity() of
                         ok ->
                             ok;
@@ -277,14 +286,8 @@ init_db(ClusterNodes) ->
                             ok = move_db(),
                             ok = create_schema()
                     end;
-               WasDiskNode ->
-                    throw({error, {cannot_convert_disk_node_to_ram_node,
-                                   ClusterNodes}});
-               IsDiskNode ->
-                    ok = create_schema();
                true ->
-                    throw({error, {unable_to_contact_cluster_nodes,
-                                   ClusterNodes}})
+                    ok = create_schema()
             end;
         {ok, [_|_]} ->
             ok = wait_for_tables(),
@@ -344,15 +347,19 @@ create_tables() ->
     ok.
 
 create_local_table_copies(Type) ->
-    ok = if Type /= ram -> create_local_table_copy(schema, disc_copies);
-            true -> ok
-         end,
+    ok = create_local_table_copy(schema, disc_copies),
     lists:foreach(
       fun({Tab, TabDef}) ->
               HasDiscCopies =
-                  lists:keymember(disc_copies, 1, TabDef),
+                  case lists:keysearch(disc_copies, 1, TabDef) of
+                      false -> false;
+                      {value, {disc_copies, List1}} -> lists:member(node(), List1)
+                  end,
               HasDiscOnlyCopies =
-                  lists:keymember(disc_only_copies, 1, TabDef),
+                  case lists:keysearch(disc_only_copies, 1, TabDef) of
+                      false -> false;
+                      {value, {disc_only_copies, List2}} -> lists:member(node(), List2)
+                  end,
               StorageType =
                   case Type of
                       disc ->
@@ -374,9 +381,6 @@ create_local_table_copies(Type) ->
               ok = create_local_table_copy(Tab, StorageType)
       end,
       table_definitions()),
-    ok = if Type == ram -> create_local_table_copy(schema, ram_copies);
-            true -> ok
-         end,
     ok.
 
 create_local_table_copy(Tab, Type) ->
@@ -394,7 +398,7 @@ create_local_table_copy(Tab, Type) ->
 wait_for_tables() -> 
     case check_schema_integrity() of
         ok ->
-            case mnesia:wait_for_tables(table_names(), 30000) of
+            case mnesia:wait_for_tables(replicated_table_names(), 30000) of
                 ok -> ok;
                 {timeout, BadTabs} ->
                     throw({error, {timeout_waiting_for_tables, BadTabs}});
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 70fc45e0..849f8c2a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -407,19 +407,17 @@ test_cluster_management() ->
                   end,
                   ClusteringSequence),
 
-    %% attempt to convert a disk node into a ram node
+    %% convert a disk node into a ram node
     ok = control_action(reset, []),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
-    {error, {cannot_convert_disk_node_to_ram_node, _}} =
-        control_action(cluster, ["invalid1@invalid",
-                                 "invalid2@invalid"]),
+    ok = control_action(cluster, ["invalid1@invalid",
+                                  "invalid2@invalid"]),
 
-    %% attempt to join a non-existing cluster as a ram node
+    %% join a non-existing cluster as a ram node
     ok = control_action(reset, []),
-    {error, {unable_to_contact_cluster_nodes, _}} =
-        control_action(cluster, ["invalid1@invalid",
-                                 "invalid2@invalid"]),
+    ok = control_action(cluster, ["invalid1@invalid",
+                                  "invalid2@invalid"]),
 
     SecondaryNode = rabbit_misc:localnode(hare),
     case net_adm:ping(SecondaryNode) of
@@ -435,11 +433,12 @@ test_cluster_management2(SecondaryNode) ->
     NodeS = atom_to_list(node()),
     SecondaryNodeS = atom_to_list(SecondaryNode),
 
-    %% attempt to convert a disk node into a ram node
+    %% make a disk node
     ok = control_action(reset, []),
     ok = control_action(cluster, [NodeS]),
-    {error, {unable_to_join_cluster, _, _}} =
-        control_action(cluster, [SecondaryNodeS]),
+    %% make a ram node
+    ok = control_action(reset, []),
+    ok = control_action(cluster, [SecondaryNodeS]),
 
     %% join cluster as a ram node
     ok = control_action(reset, []),
@@ -452,21 +451,21 @@ test_cluster_management2(SecondaryNode) ->
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
 
-    %% attempt to join non-existing cluster as a ram node
-    {error, _} = control_action(cluster, ["invalid1@invalid",
-                                          "invalid2@invalid"]),
-
+    %% join non-existing cluster as a ram node
+    ok = control_action(cluster, ["invalid1@invalid",
+                                  "invalid2@invalid"]),
     %% turn ram node into disk node
+    ok = control_action(reset, []),
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
     
-    %% attempt to convert a disk node into a ram node
-    {error, {cannot_convert_disk_node_to_ram_node, _}} =
-        control_action(cluster, ["invalid1@invalid",
-                                 "invalid2@invalid"]),
+    %% convert a disk node into a ram node
+    ok = control_action(cluster, ["invalid1@invalid",
+                                  "invalid2@invalid"]),
 
     %% turn a disk node into a ram node
+    ok = control_action(reset, []),
     ok = control_action(cluster, [SecondaryNodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
-- 
cgit v1.2.1


From b166963c0ff39709650b2aa40828f450fe89e73c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 5 Jun 2009 17:27:25 +0100
Subject: need to leave the disk queue running so that the rest of the tests go
 through

---
 src/rabbit_tests.erl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 849f8c2a..a61f4c3f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -703,6 +703,8 @@ test_disk_queue() ->
     passed = rdq_test_redeliver(),
     passed = rdq_test_purge(),
     passed = rdq_test_dump_queue(),
+    rdq_virgin(),
+    rdq_start(),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
-- 
cgit v1.2.1


From d29c973b4afca1944349d6ebe254519152318d28 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 6 Jun 2009 01:27:18 +0100
Subject: was just watching the logs go by when running tests and saw an
 explosion. Pretty basic typeo which hadn't been caught so far.

---
 src/rabbit_amqqueue_process.erl | 2 +-
 src/rabbit_tests.erl            | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 7230e09c..4e02f2e4 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -282,7 +282,7 @@ deliver_or_requeue_msgs(is_message_ready, {Len, _AcksAcc, _MsgsWithAcks}, _State
 deliver_or_requeue_msgs(false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
     {{Msg, true, noack, Len}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
 deliver_or_requeue_msgs(true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
-    {{Msg, true, AckTag, Len}, {Len - 1, [AcksAcc], MsgsWithAcks}, State}.
+    {{Msg, true, AckTag, Len}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
 
 block_consumers(ChPid, RoundRobin) ->
     %%?LOGDEBUG("~p Blocking ~p from ~p~n", [self(), ChPid, queue:to_list(RoundRobin)]),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a61f4c3f..4b7487b0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -704,7 +704,8 @@ test_disk_queue() ->
     passed = rdq_test_purge(),
     passed = rdq_test_dump_queue(),
     rdq_virgin(),
-    rdq_start(),
+    ok = control_action(stop_app, []),
+    ok = control_action(start_app, []),
     passed.
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
@@ -926,6 +927,7 @@ rdq_test_dump_queue() ->
     QList2 = [{N, Msg, 256, true, (N-1)} || N <- All],
     QList2 = rabbit_disk_queue:dump_queue(q),
     io:format("dump ok post delivery + restart~n", []),
+    rdq_stop(),
     passed.
 
 rdq_time_commands(Funcs) ->
-- 
cgit v1.2.1


From 5b1eec17ecb33cdb2a29bba4810c0ecf9661e997 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 8 Jun 2009 12:19:42 +0100
Subject: tiny refactorings

---
 src/rabbit_amqqueue_process.erl | 10 ++++------
 src/rabbit_tests.erl            |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a542172b..08736f25 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -223,7 +223,7 @@ deliver_queue(Fun, FunAcc0,
     end.
 
 deliver_from_queue(is_message_ready, undefined, #q { mixed_state = MS }) ->
-    not(rabbit_mixed_queue:is_empty(MS));
+    0 /= rabbit_mixed_queue:length(MS);
 deliver_from_queue(AckRequired, Acc = undefined, State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
     MS3 = case {Res, AckRequired} of
@@ -555,8 +555,7 @@ handle_call({deliver_immediately, Txn, Message, ChPid}, _From, State) ->
 
 handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
     %% Synchronous, "mandatory" delivery mode
-    {Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
-    reply(Delivered, NewState);
+    reply(deliver_or_enqueue(Txn, ChPid, Message, State));
 
 handle_call({commit, Txn}, From, State) ->
     NewState = commit_transaction(Txn, State),
@@ -580,13 +579,12 @@ handle_call({basic_get, ChPid, NoAck}, _From,
         {{Msg, IsDelivered, AckTag, Remaining}, MS2} ->
             AckRequired = not(NoAck),
             {ok, MS3} =
-                case AckRequired of
-                    true  ->
+                if AckRequired ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
                         {ok, MS2};
-                    false ->
+                   true ->
                         rabbit_mixed_queue:ack([AckTag], MS2)
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 5d3c2770..4b7487b0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -56,7 +56,7 @@ all_tests() ->
     passed = test_cluster_management(),
     passed = test_user_management(),
     passed = test_server_status(),
-    %%passed = test_disk_queue(),
+    passed = test_disk_queue(),
     passed.
 
 test_priority_queue() ->
-- 
cgit v1.2.1


From 19e430d54d23632d15d78cb35b891da2b52efed3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 8 Jun 2009 12:24:21 +0100
Subject: idiot

---
 src/rabbit_amqqueue_process.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 08736f25..b61e450b 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -555,7 +555,8 @@ handle_call({deliver_immediately, Txn, Message, ChPid}, _From, State) ->
 
 handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
     %% Synchronous, "mandatory" delivery mode
-    reply(deliver_or_enqueue(Txn, ChPid, Message, State));
+    {Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
+    reply(Delivered, NewState);
 
 handle_call({commit, Txn}, From, State) ->
     NewState = commit_transaction(Txn, State),
-- 
cgit v1.2.1


From 5f1cfd403818a695919d47dcb90f953f1ea84e4e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 8 Jun 2009 16:26:36 +0100
Subject: refactorings and code cleanup

---
 src/rabbit_disk_queue.erl | 753 ++++++++++++++++++++++++----------------------
 1 file changed, 398 insertions(+), 355 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b7eca499..a8773af6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -344,9 +344,11 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     Node = node(),
     ok = 
-        case mnesia:change_table_copy_type(rabbit_disk_queue, Node, disc_only_copies) of
+        case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
+                                           disc_only_copies) of
             {atomic, ok} -> ok;
-            {aborted, {already_exists, rabbit_disk_queue, Node, disc_only_copies}} -> ok;
+            {aborted, {already_exists, rabbit_disk_queue, Node,
+                       disc_only_copies}} -> ok;
             E -> E
         end,
     ok = filelib:ensure_dir(form_filename("nothing")),
@@ -393,12 +395,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
              end,
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
-    ok = if Exists -> ok;
-            true -> %% new file, so preallocate
-                 {ok, FileSizeLimit} = file:position(FileHdl, {bof, FileSizeLimit}),
-                 file:truncate(FileHdl)
-         end,
-    {ok, Offset} = file:position(FileHdl, {bof, Offset}),
+    if Exists -> {ok, Offset} = file:position(FileHdl, {bof, Offset});
+       true -> %% new file, so preallocate
+            ok = preallocate(FileHdl, FileSizeLimit, Offset)
+    end,
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
 handle_call({deliver, Q}, _From, State) ->
@@ -408,7 +408,7 @@ handle_call({phantom_deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, false, State),
     {reply, Result, State1};
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
-    PubMsgSeqIds = lists:zip(PubMsgIds, lists:duplicate(erlang:length(PubMsgIds), next)),
+    PubMsgSeqIds = zip_with_tail(PubMsgIds, {duplicate, next}),
     {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, State),
     {reply, ok, State1};
 handle_call({tx_commit_with_seqs, Q, PubSeqMsgIds, AckSeqIds}, _From, State) ->
@@ -431,29 +431,33 @@ handle_call(stop_vaporise, _From, State) ->
      State1 #dqstate { current_file_handle = undefined,
                        read_file_handles = {dict:new(), gb_trees:empty()}}};
     %% gen_server now calls terminate, which then calls shutdown
-handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = disk_only }) ->
+handle_call(to_disk_only_mode, _From,
+            State = #dqstate { operation_mode = disk_only }) ->
     {reply, ok, State};
-handle_call(to_disk_only_mode, _From, State = #dqstate { operation_mode = ram_disk,
-                                                         msg_location_dets = MsgLocationDets,
-                                                         msg_location_ets = MsgLocationEts }) ->
-    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_only_copies),
+handle_call(to_disk_only_mode, _From,
+            State = #dqstate { operation_mode = ram_disk,
+                               msg_location_dets = MsgLocationDets,
+                               msg_location_ets = MsgLocationEts }) ->
+    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
+                                                 disc_only_copies),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
     {reply, ok, State #dqstate { operation_mode = disk_only }};
-handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = ram_disk }) ->
+handle_call(to_ram_disk_mode, _From,
+            State = #dqstate { operation_mode = ram_disk }) ->
     {reply, ok, State};
-handle_call(to_ram_disk_mode, _From, State = #dqstate { operation_mode = disk_only,
-                                                        msg_location_dets = MsgLocationDets,
-                                                        msg_location_ets = MsgLocationEts }) ->
-    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(), disc_copies),
+handle_call(to_ram_disk_mode, _From,
+            State = #dqstate { operation_mode = disk_only,
+                               msg_location_dets = MsgLocationDets,
+                               msg_location_ets = MsgLocationEts }) ->
+    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
+                                                 disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
     {reply, ok, State #dqstate { operation_mode = ram_disk }};
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
-    case ets:lookup(Sequences, Q) of
-        [] -> {reply, 0, State};
-        [{Q, _ReadSeqId, _WriteSeqId, Length}] -> {reply, Length, State}
-    end;
+    {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
+    {reply, Length, State};
 handle_call({dump_queue, Q}, _From, State) ->
     {Result, State1} = internal_dump_queue(Q, State),
     {reply, Result, State1};
@@ -477,7 +481,7 @@ handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
     {noreply, State1};
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    MsgSeqSeqIds = lists:zip(MsgSeqIds, lists:duplicate(erlang:length(MsgSeqIds), next)),
+    MsgSeqSeqIds = zip_with_tail(MsgSeqIds, {duplicate, next}),
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
     {noreply, State1};
 handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
@@ -524,40 +528,57 @@ form_filename(Name) ->
 base_directory() ->
     filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
 
-dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+zip_with_tail(List1, List2) when length(List1) =:= length(List2) ->
+    lists:zip(List1, List2);
+zip_with_tail(List = [_|Tail], {last, E}) ->
+    zip_with_tail(List, Tail ++ [E]);
+zip_with_tail(List, {duplicate, E}) ->
+    zip_with_tail(List, lists:duplicate(erlang:length(List), E)).
+
+dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets,
+                           operation_mode = disk_only },
                 Key) ->
     dets:lookup(MsgLocationDets, Key);
-dets_ets_lookup(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+dets_ets_lookup(#dqstate { msg_location_ets = MsgLocationEts,
+                           operation_mode = ram_disk },
                 Key) ->
     ets:lookup(MsgLocationEts, Key).
 
-dets_ets_delete(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+dets_ets_delete(#dqstate { msg_location_dets = MsgLocationDets,
+                           operation_mode = disk_only },
                 Key) ->
     ok = dets:delete(MsgLocationDets, Key);
-dets_ets_delete(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+dets_ets_delete(#dqstate { msg_location_ets = MsgLocationEts,
+                           operation_mode = ram_disk },
                 Key) ->
     true = ets:delete(MsgLocationEts, Key),
     ok.
 
-dets_ets_insert(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+dets_ets_insert(#dqstate { msg_location_dets = MsgLocationDets,
+                           operation_mode = disk_only },
                 Obj) ->
     ok = dets:insert(MsgLocationDets, Obj);
-dets_ets_insert(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+dets_ets_insert(#dqstate { msg_location_ets = MsgLocationEts,
+                           operation_mode = ram_disk },
                 Obj) ->
     true = ets:insert(MsgLocationEts, Obj),
     ok.
 
-dets_ets_insert_new(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+dets_ets_insert_new(#dqstate { msg_location_dets = MsgLocationDets,
+                               operation_mode = disk_only },
                     Obj) ->
     true = dets:insert_new(MsgLocationDets, Obj);
-dets_ets_insert_new(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+dets_ets_insert_new(#dqstate { msg_location_ets = MsgLocationEts,
+                               operation_mode = ram_disk },
                     Obj) ->
     true = ets:insert_new(MsgLocationEts, Obj).
 
-dets_ets_match_object(#dqstate { msg_location_dets = MsgLocationDets, operation_mode = disk_only },
+dets_ets_match_object(#dqstate { msg_location_dets = MsgLocationDets,
+                                 operation_mode = disk_only },
                       Obj) ->
     dets:match_object(MsgLocationDets, Obj);
-dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts, operation_mode = ram_disk },
+dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
+                                 operation_mode = ram_disk },
                       Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
@@ -612,18 +633,28 @@ adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId, _Mode) ->
     SuppliedSeqId;
 adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId, _Mode) ->
     ExpectedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, dirty) when SuppliedSeqId > ExpectedSeqId ->
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, dirty)
+  when SuppliedSeqId > ExpectedSeqId ->
     [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
     SuppliedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, Lock) when SuppliedSeqId > ExpectedSeqId ->
+adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, Lock)
+  when SuppliedSeqId > ExpectedSeqId ->
     [Obj] = mnesia:read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}, Lock),
     ok = mnesia:write(rabbit_disk_queue,
                       Obj #dq_msg_loc { next_seq_id = SuppliedSeqId },
                       Lock),
     SuppliedSeqId.
 
+sequence_lookup(Sequences, Q) ->
+    case ets:lookup(Sequences, Q) of
+        [] ->
+            {0, 0, 0};
+        [{Q, ReadSeqId, WriteSeqId, Length}] ->
+            {ReadSeqId, WriteSeqId, Length}
+    end.
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
@@ -632,14 +663,18 @@ internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
         [{Q, SeqId, SeqId, 0}] -> {ok, empty, State};
         [{Q, ReadSeqId, WriteSeqId, Length}] when Length > 0 ->
             Remaining = Length - 1,
-            {ok, Result, NextReadSeqId, State1} = internal_read_message(Q, ReadSeqId, false, ReadMsg, State),
-            true = ets:insert(Sequences, {Q, NextReadSeqId, WriteSeqId, Remaining}),
-            {ok, case Result of
-                     {MsgId, Delivered, {MsgId, ReadSeqId}} ->
-                         {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining};
-                     {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}} ->
-                         {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}, Remaining}
-                 end, State1}
+            {ok, Result, NextReadSeqId, State1} =
+                internal_read_message(Q, ReadSeqId, false, ReadMsg, State),
+            true = ets:insert(Sequences,
+                              {Q, NextReadSeqId, WriteSeqId, Remaining}),
+            {ok,
+             case Result of
+                 {MsgId, Delivered, {MsgId, ReadSeqId}} ->
+                     {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining};
+                 {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}} ->
+                     {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId},
+                      Remaining}
+             end, State1}
     end.
 
 internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
@@ -661,7 +696,8 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
                 read_message_at_offset(FileHdl, Offset, TotalSize),
             {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
              NextReadSeqId, State1};
-       true -> {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
+       true ->
+            {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
     end.
 
 internal_ack(Q, MsgSeqIds, State) ->
@@ -685,25 +721,30 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                   Files3 =
                       if 1 =:= RefCount ->
                               ok = dets_ets_delete(State, MsgId),
-                              [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
-                                  ets:lookup(FileSummary, File),
-                              ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-                              true = ets:insert(FileSummary,
-                                                {File, (ValidTotalSize - TotalSize
-                                                        - ?FILE_PACKING_ADJUSTMENT),
+                              [{File, ValidTotalSize, ContiguousTop,
+                                Left, Right}] = ets:lookup(FileSummary, File),
+                              ContiguousTop1 =
+                                  lists:min([ContiguousTop, Offset]),
+                              true =
+                                  ets:insert(FileSummary,
+                                             {File, (ValidTotalSize-TotalSize-
+                                                     ?FILE_PACKING_ADJUSTMENT),
                                                  ContiguousTop1, Left, Right}),
                               if CurName =:= File -> Files2;
                                  true -> sets:add_element(File, Files2)
                               end;
                          1 < RefCount ->
-                              ok = dets_ets_insert(State, {MsgId, RefCount - 1,
-                                                           File, Offset, TotalSize}),
+                              ok = dets_ets_insert(
+                                     State, {MsgId, RefCount - 1,
+                                             File, Offset, TotalSize}),
                               Files2
                       end,
                   ok = if MnesiaDelete ->
-                               mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId});
+                               mnesia:dirty_delete(rabbit_disk_queue,
+                                                   {Q, SeqId});
                           MnesiaDelete =:= txn ->
-                               mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write);
+                               mnesia:delete(rabbit_disk_queue,
+                                             {Q, SeqId}, write);
                           true -> ok
                        end,
                   Files3
@@ -735,8 +776,8 @@ internal_tx_publish(MsgId, MsgBody,
             true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
                                             ContiguousTop1, Left, undefined}),
             NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
-            maybe_roll_to_new_file(NextOffset,
-                                   State #dqstate {current_offset = NextOffset});
+            maybe_roll_to_new_file(
+              NextOffset, State #dqstate {current_offset = NextOffset});
         [{MsgId, RefCount, File, Offset, TotalSize}] ->
             %% We already know about it, just update counter
             ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
@@ -753,49 +794,50 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
     {PubList, PubAcc, ReadSeqId, Length} =
         case PubMsgSeqIds of
             [] -> {[], undefined, undefined, undefined};
-            [{_, FirstSeqIdTo}|PubMsgSeqIdsTail] ->
+            [{_, FirstSeqIdTo}|_] ->
                 {InitReadSeqId, InitWriteSeqId, InitLength} =
-                    case ets:lookup(Sequences, Q) of
-                        [] -> {0,0,0};
-                        [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
-                            {ReadSeqId2, WriteSeqId2, Length2}
-                    end,
-                InitReadSeqId2 = determine_next_read_id(InitReadSeqId, InitWriteSeqId, FirstSeqIdTo),
-                { lists:zip(PubMsgSeqIds, (PubMsgSeqIdsTail ++ [{next, next}])),
+                    sequence_lookup(Sequences, Q),
+                InitReadSeqId2 = determine_next_read_id(
+                                   InitReadSeqId, InitWriteSeqId, FirstSeqIdTo),
+                { zip_with_tail(PubMsgSeqIds, {last, {next, next}}),
                   InitWriteSeqId, InitReadSeqId2, InitLength}
         end,
     {atomic, {Sync, WriteSeqId, State2}} =
         mnesia:transaction(
-          fun() -> ok = mnesia:write_lock_table(rabbit_disk_queue),
-                   %% must deal with publishes first, if we didn't
-                   %% then we could end up acking a message before
-                   %% it's been published, which is clearly
-                   %% nonsense. I.e. in commit, do not do things in an
-                   %% order which _could_not_ have happened.
-                   {Sync2, WriteSeqId3} =
-                       lists:foldl(
-                         fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
-                               {Acc, ExpectedSeqId}) ->
-                                 [{MsgId, _RefCount, File, _Offset, _TotalSize}] =
-                                     dets_ets_lookup(State, MsgId),
-                                 SeqId2 = adjust_last_msg_seq_id(Q, ExpectedSeqId, SeqId, write),
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  %% must deal with publishes first, if we didn't
+                  %% then we could end up acking a message before
+                  %% it's been published, which is clearly
+                  %% nonsense. I.e. in commit, do not do things in an
+                  %% order which _could_not_ have happened.
+                  {Sync2, WriteSeqId3} =
+                      lists:foldl(
+                        fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
+                             {Acc, ExpectedSeqId}) ->
+                                [{MsgId, _RefCount, File, _Offset,
+                                  _TotalSize}] = dets_ets_lookup(State, MsgId),
+                                 SeqId2 = adjust_last_msg_seq_id(
+                                            Q, ExpectedSeqId, SeqId, write),
                                  NextSeqId2 = find_next_seq_id(SeqId2, NextSeqId),
-                                 ok = mnesia:write(rabbit_disk_queue,
-                                                   #dq_msg_loc { queue_and_seq_id =
-                                                                 {Q, SeqId2},
-                                                                 msg_id = MsgId,
-                                                                 is_delivered = false,
-                                                                 next_seq_id = NextSeqId2
-                                                                },
-                                                   write),
-                                 {Acc or (CurName =:= File), NextSeqId2}
+                                 ok = mnesia:write(
+                                        rabbit_disk_queue,
+                                        #dq_msg_loc { queue_and_seq_id =
+                                                      {Q, SeqId2},
+                                                      msg_id = MsgId,
+                                                      is_delivered = false,
+                                                      next_seq_id = NextSeqId2
+                                                     },
+                                        write),
+                                 {Acc orelse (CurName =:= File), NextSeqId2}
                          end, {false, PubAcc}, PubList),
 
                    {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
                    {Sync2, WriteSeqId3, State3}
           end),
     true = if PubList =:= [] -> true;
-              true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId, Length + erlang:length(PubList)})
+              true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
+                                             Length + erlang:length(PubList)})
            end,
     ok = if Sync -> file:sync(CurHdl);
             true -> ok
@@ -807,12 +849,7 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
         internal_tx_publish(MsgId, MsgBody, State),
     {ReadSeqId, WriteSeqId, Length} =
-        case ets:lookup(Sequences, Q) of
-            [] -> %% previously unseen queue
-                {0, 0, 0};
-            [{Q, ReadSeqId2, WriteSeqId2, Length2}] ->
-                {ReadSeqId2, WriteSeqId2, Length2}
-        end,
+        sequence_lookup(Sequences, Q),
     ReadSeqId3 = determine_next_read_id(ReadSeqId, WriteSeqId, SeqId),
     WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId, dirty),
     WriteSeqId3Next = WriteSeqId3 + 1,
@@ -827,12 +864,12 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
-    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds), undefined)),
+    MsgSeqIds = zip_with_tail(MsgIds, {duplicate, undefined}),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
+internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|_],
                  State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
@@ -856,34 +893,39 @@ internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|MsgSeqIdsTail],
     %% MsgLocation and FileSummary stay put (which makes further sense
     %% as they have no concept of sequence id anyway).
 
-    %% the Q _must_ already exist
-    [{Q, ReadSeqId, WriteSeqId, Length}] = ets:lookup(Sequences, Q),
+    {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     ReadSeqId2 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
-    MsgSeqIdsZipped = lists:zip(MsgSeqIds, MsgSeqIdsTail ++ [{next, next}]),
-    {atomic, WriteSeqId2} =
+    MsgSeqIdsZipped = zip_with_tail(MsgSeqIds, {last, {next, next}}),
+    {atomic, {WriteSeqId2, Q}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foldl(
-                    fun ({{{MsgId, SeqIdOrig}, SeqIdTo},
-                          {_NextMsgSeqId, NextSeqIdTo}},
-                         ExpectedSeqIdTo) ->
-                            SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
-                            NextSeqIdTo2 = find_next_seq_id(SeqIdTo2, NextSeqIdTo),
-                            [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
-                                mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
-                            mnesia:write(rabbit_disk_queue,
-                                         Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqIdTo2},
-                                                           next_seq_id = NextSeqIdTo2
-                                                         },
-                                         write),
-                            mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write),
-                            NextSeqIdTo2
-                    end, WriteSeqId, MsgSeqIdsZipped)
+                  lists:foldl(fun requeue_message/2, {WriteSeqId, Q},
+                              MsgSeqIdsZipped)
           end),
-    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId2, Length + erlang:length(MsgSeqIds)}),
+    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId2,
+                                  Length + erlang:length(MsgSeqIds)}),
     {ok, State}.
 
+requeue_message({{{MsgId, SeqIdOrig}, SeqIdTo},
+                 {_NextMsgSeqId, NextSeqIdTo}},
+                {ExpectedSeqIdTo, Q}) ->
+    SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
+    NextSeqIdTo2 = find_next_seq_id(SeqIdTo2, NextSeqIdTo),
+    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId,
+                         next_seq_id = NextSeqIdOrig }] =
+        mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
+    if SeqIdTo2 == SeqIdOrig andalso NextSeqIdTo2 == NextSeqIdOrig -> ok;
+       true ->
+            ok = mnesia:write(rabbit_disk_queue,
+                              Obj #dq_msg_loc {queue_and_seq_id = {Q, SeqIdTo2},
+                                               next_seq_id = NextSeqIdTo2
+                                              },
+                              write),
+            ok = mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write)
+    end,
+    {NextSeqIdTo2, Q}.
+
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, 0, State};
@@ -898,7 +940,8 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                                     (SeqId) ->
                                         [#dq_msg_loc { msg_id = MsgId,
                                                        next_seq_id = NextSeqId }
-                                        ] = mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+                                        ] = mnesia:read(rabbit_disk_queue,
+                                                        {Q, SeqId}, write),
                                         {true, {MsgId, SeqId}, NextSeqId}
                                 end, ReadSeqId),
                           remove_messages(Q, MsgSeqIds, txn, State)
@@ -908,23 +951,27 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     end.
 
 internal_delete_queue(Q, State) ->
-    {ok, _Count, State1 = #dqstate { sequences = Sequences }} = internal_purge(Q, State),
+    {ok, _Count, State1 = #dqstate { sequences = Sequences }} =
+        internal_purge(Q, State), %% remove everything undelivered
     true = ets:delete(Sequences, Q),
     {atomic, {ok, State2}} =
         mnesia:transaction(
-          fun() ->
+          fun() -> %% now remove everything already delivered
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   Objs =
-                      mnesia:match_object(rabbit_disk_queue, #dq_msg_loc { queue_and_seq_id = {Q, '_'},
-                                                                           msg_id = '_',
-                                                                           is_delivered = '_',
-                                                                           next_seq_id = '_'
-                                                                          }, write),
+                      mnesia:match_object(
+                        rabbit_disk_queue,
+                        #dq_msg_loc { queue_and_seq_id = {Q, '_'},
+                                      msg_id = '_',
+                                      is_delivered = '_',
+                                      next_seq_id = '_'
+                                     },
+                        write),
                   MsgSeqIds =
                       lists:map(
-                        fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId}, msg_id = MsgId }) ->
-                            {MsgId, SeqId}
-                        end, Objs),
+                        fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
+                                           msg_id = MsgId }) ->
+                                {MsgId, SeqId} end, Objs),
                   remove_messages(Q, MsgSeqIds, txn, State1)
           end),
     {ok, State2}.
@@ -938,26 +985,25 @@ internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
                   fun ({SeqId, _State1}) when SeqId == WriteSeq ->
                           false;
                       ({SeqId, State1}) ->
-                          {ok, {MsgId, Msg, Size, Delivered, {MsgId, SeqId}}, NextReadSeqId, State2} =
-                              internal_read_message(Q, SeqId, true, true, State1),
-                          {true, {MsgId, Msg, Size, Delivered, SeqId}, {NextReadSeqId, State2}}
+                          {ok, {MsgId, Msg, Size, Delivered, {MsgId, SeqId}},
+                           NextReadSeqId, State2} =
+                              internal_read_message(Q, SeqId, true, true,
+                                                    State1),
+                          {true, {MsgId, Msg, Size, Delivered, SeqId},
+                           {NextReadSeqId, State2}}
                   end, {ReadSeq, State}),
             {lists:reverse(QList), State3}
     end.
 
-internal_delete_non_durable_queues(DurableQueues, State = #dqstate { sequences = Sequences }) ->
-    State3 =
-        ets:foldl(
-          fun ({Q, _Read, _Write, _Length}, State1) ->
-                  case sets:is_element(Q, DurableQueues) of
-                      true ->
-                          State1;
-                      false ->
-                          {ok, State2} = internal_delete_queue(Q, State1),
-                          State2
-                  end
-          end, State, Sequences),
-    {ok, State3}.
+internal_delete_non_durable_queues(
+  DurableQueues, State = #dqstate { sequences = Sequences }) ->
+    ets:foldl(
+      fun ({Q, _Read, _Write, _Length}, {ok, State1}) ->
+              case sets:is_element(Q, DurableQueues) of
+                  true -> {ok, State1};
+                  false -> internal_delete_queue(Q, State1)
+              end
+      end, {ok, State}, Sequences).
 
 %% ---- ROLLING OVER THE APPEND FILE ----
 
@@ -975,10 +1021,8 @@ maybe_roll_to_new_file(Offset,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = file:open(form_filename(NextName),
                               [write, raw, binary, delayed_write]),
-    {ok, FileSizeLimit} = file:position(NextHdl, {bof, FileSizeLimit}),
-    ok = file:truncate(NextHdl),
-    {ok, 0} = file:position(NextHdl, {bof, 0}),
-    true = ets:update_element(FileSummary, CurName, {5, NextName}), %% 5 is Right
+    ok = preallocate(NextHdl, FileSizeLimit, 0),
+    true = ets:update_element(FileSummary, CurName, {5, NextName}),%% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     State1 = State #dqstate { current_file_name = NextName,
                               current_file_handle = NextHdl,
@@ -989,6 +1033,12 @@ maybe_roll_to_new_file(Offset,
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file:position(Hdl, {bof, FileSizeLimit}),
+    ok = file:truncate(Hdl),
+    {ok, FinalPos} = file:position(Hdl, {bof, FinalPos}),
+    ok.
+
 %% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
 
 compact(FilesSet, State) ->
@@ -1000,70 +1050,59 @@ compact(FilesSet, State) ->
                                  end, [], Files),
     lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
 
-combine_file(File, State = #dqstate { file_size_limit = FileSizeLimit,
-                                     file_summary = FileSummary,
-                                     current_file_name = CurName
-                                   }) ->
+combine_file(File, State = #dqstate { file_summary = FileSummary,
+                                      current_file_name = CurName
+                                    }) ->
     %% the file we're looking at may no longer exist as it may have
     %% been deleted within the current GC run
     case ets:lookup(FileSummary, File) of
         [] -> State;
-        [FileObj = {File, ValidData, _ContiguousTop, Left, Right}] ->
+        [FileObj = {File, _ValidData, _ContiguousTop, Left, Right}] ->
             GoRight =
                 fun() ->
                         case Right of
                             undefined -> State;
-                            _ when not(CurName =:= Right) ->
-                                [RightObj = {Right, RightValidData, 
-                                             _RightContiguousTop, File, RightRight}] =
-                                    ets:lookup(FileSummary, Right),
-                                RightSumData = ValidData + RightValidData,
-                                if FileSizeLimit >= RightSumData ->
-                                        %% here, Right will be the source and so will be deleted,
-                                        %%       File will be the destination
-                                        State1 = combine_files(RightObj, FileObj,
-                                                              State),
-                                        %% this could fail if RightRight is undefined
-                                        %% left is the 4th field
-                                        ets:update_element(FileSummary,
-                                                           RightRight, {4, File}),
-                                        true = ets:insert(FileSummary, {File,
-                                                                        RightSumData,
-                                                                        RightSumData,
-                                                                        Left,
-                                                                        RightRight}),
-                                        true = ets:delete(FileSummary, Right),
-                                        State1;
-                                   true -> State
-                                end;
+                            _ when not (CurName == Right) ->
+                                [RightObj] = ets:lookup(FileSummary, Right),
+                                {_, State1} =
+                                    adjust_meta_and_combine(FileObj, RightObj,
+                                                            State),
+                                State1;
                             _ -> State
                         end
                 end,
             case Left of
                 undefined ->
                     GoRight();
-                _ -> [LeftObj =
-                      {Left, LeftValidData, _LeftContiguousTop, LeftLeft, File}] =
-                         ets:lookup(FileSummary, Left),
-                     LeftSumData = ValidData + LeftValidData,
-                     if FileSizeLimit >= LeftSumData ->
-                             %% here, File will be the source and so will be deleted,
-                             %%       Left will be the destination
-                             State1 = combine_files(FileObj, LeftObj, State),
-                             %% this could fail if Right is undefined
-                             %% left is the 4th field
-                             ets:update_element(FileSummary, Right, {4, Left}),
-                             true = ets:insert(FileSummary, {Left, LeftSumData,
-                                                             LeftSumData,
-                                                             LeftLeft, Right}),
-                             true = ets:delete(FileSummary, File),
-                             State1;
-                        true ->
-                             GoRight()
+                _ -> [LeftObj] = ets:lookup(FileSummary, Left),
+                     case adjust_meta_and_combine(LeftObj, FileObj, State) of
+                         {true, State1} -> State1;
+                         {false, State} -> GoRight()
                      end
             end
     end.
 
+adjust_meta_and_combine(
+  LeftObj = {LeftFile, LeftValidData, _LeftContigTop, LeftLeft, RightFile},
+  RightObj = {RightFile, RightValidData, _RightContigTop, LeftFile, RightRight},
+  State = #dqstate { file_size_limit = FileSizeLimit,
+                     file_summary = FileSummary
+                   }) ->
+    TotalValidData = LeftValidData + RightValidData,
+    if FileSizeLimit >= TotalValidData ->
+            State1 = combine_files(RightObj, LeftObj, State),
+            %% this could fail if RightRight is undefined
+            %% left is the 4th field
+            ets:update_element(FileSummary, RightRight, {4, LeftFile}),
+            true = ets:insert(FileSummary, {LeftFile,
+                                            TotalValidData, TotalValidData,
+                                            LeftLeft,
+                                            RightRight}),
+            true = ets:delete(FileSummary, RightFile),
+            {true, State1};
+       true -> {false, State}
+    end.
+
 sort_msg_locations_by_offset(Asc, List) ->
     Comp = if Asc  -> fun erlang:'<'/2;
               true -> fun erlang:'>'/2
@@ -1093,92 +1132,72 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
         file:open(form_filename(Destination),
                   [read, write, raw, binary, read_ahead, delayed_write]),
     ExpectedSize = SourceValid + DestinationValid,
-    %% if DestinationValid =:= DestinationContiguousTop then we don't need a tmp file
-    %% if they're not equal, then we need to write out everything past the DestinationContiguousTop to a tmp file
-    %%   then truncate, copy back in, and then copy over from Source
+    %% if DestinationValid =:= DestinationContiguousTop then we don't
+    %% need a tmp file
+    %% if they're not equal, then we need to write out everything past
+    %%   the DestinationContiguousTop to a tmp file then truncate,
+    %%   copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
     if DestinationContiguousTop =:= DestinationValid ->
             ok = truncate_and_extend_file(DestinationHdl,
-                                       DestinationValid, ExpectedSize);
+                                          DestinationValid, ExpectedSize);
        true ->
             Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} =
                 file:open(form_filename(Tmp),
-                          [read, write, raw, binary, read_ahead, delayed_write]),
+                          [read, write, raw, binary,
+                           read_ahead, delayed_write]),
             Worklist =
                 lists:dropwhile(
                   fun ({_, _, _, Offset, _})
                       when Offset /= DestinationContiguousTop ->
-                          %% it cannot be that Offset == DestinationContiguousTop
-                          %% because if it was then DestinationContiguousTop would have been
-                          %% extended by TotalSize
+                          %% it cannot be that Offset ==
+                          %% DestinationContiguousTop because if it
+                          %% was then DestinationContiguousTop would
+                          %% have been extended by TotalSize
                           Offset < DestinationContiguousTop
-                          %% Given expected access patterns, I suspect that the list should be
-                          %% naturally sorted as we require, however, we need to enforce it anyway
-                  end, sort_msg_locations_by_offset(true,
-                                                dets_ets_match_object(State,
-                                                                      {'_', '_',
-                                                                       Destination,
-                                                                       '_', '_'}))),
+                          %% Given expected access patterns, I suspect
+                          %% that the list should be naturally sorted
+                          %% as we require, however, we need to
+                          %% enforce it anyway
+                  end, sort_msg_locations_by_offset(
+                         true, dets_ets_match_object(State,
+                                                     {'_', '_', Destination,
+                                                      '_', '_'}))),
+            ok = copy_messages(
+                   Worklist, DestinationContiguousTop, DestinationValid,
+                   DestinationHdl, TmpHdl, Destination, State),
             TmpSize = DestinationValid - DestinationContiguousTop,
-            {TmpSize, BlockStart1, BlockEnd1} =
-                lists:foldl(
-                  fun ({MsgId, RefCount, _Destination, Offset, TotalSize},
-                       {CurOffset, BlockStart, BlockEnd}) ->
-                          %% CurOffset is in the TmpFile.
-                          %% Offset, BlockStart and BlockEnd are in the DestinationFile (which is currently the source!)
-                          Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
-                          %% this message is going to end up back in
-                          %% Destination, at DestinationContiguousTop
-                          %% + CurOffset
-                          FinalOffset = DestinationContiguousTop + CurOffset,
-                          ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
-                                                       FinalOffset, TotalSize}),
-                          NextOffset = CurOffset + Size,
-                          if BlockStart =:= undefined ->
-                                  %% base case, called only for the
-                                  %% first list elem
-                                  {NextOffset, Offset, Offset + Size};
-                             Offset =:= BlockEnd ->
-                                  %% extend the current block because
-                                  %% the next msg follows straight on
-                                  {NextOffset, BlockStart, BlockEnd + Size};
-                             true ->
-                                  %% found a gap, so actually do the
-                                  %% work for the previous block
-                                  BSize = BlockEnd - BlockStart,
-                                  {ok, BlockStart} =
-                                      file:position(DestinationHdl,
-                                                    {bof, BlockStart}),
-                                  {ok, BSize} = file:copy(DestinationHdl,
-                                                          TmpHdl, BSize),
-                                  {NextOffset, Offset, Offset + Size}
-                          end
-                  end, {0, undefined, undefined}, Worklist),
-            %% do the last remaining block
-            BSize1 = BlockEnd1 - BlockStart1,
-            {ok, BlockStart1} = file:position(DestinationHdl, {bof, BlockStart1}),
-            {ok, BSize1} = file:copy(DestinationHdl, TmpHdl, BSize1),
             %% so now Tmp contains everything we need to salvage from
             %% Destination, and MsgLocationDets has been updated to
             %% reflect compaction of Destination so truncate
             %% Destination and copy from Tmp back to the end
             {ok, 0} = file:position(TmpHdl, {bof, 0}),
-            ok = truncate_and_extend_file(DestinationHdl,
-                                       DestinationContiguousTop, ExpectedSize),
+            ok = truncate_and_extend_file(
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
             {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
-            %% position in DestinationHdl should now be
-            %% DestinationValid
+            %% position in DestinationHdl should now be DestinationValid
             ok = file:sync(DestinationHdl),
             ok = file:close(TmpHdl),
             ok = file:delete(form_filename(Tmp))
     end,
     SourceWorkList =
-        sort_msg_locations_by_offset(true,
-                                 dets_ets_match_object(State,
-                                                       {'_', '_', Source,
-                                                        '_', '_'})),
-    {ExpectedSize, BlockStart2, BlockEnd2} =
+        sort_msg_locations_by_offset(
+          true, dets_ets_match_object(State,
+                                      {'_', '_', Source,
+                                       '_', '_'})),
+    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                       SourceHdl, DestinationHdl, Destination, State),
+    %% tidy up
+    ok = file:sync(DestinationHdl),
+    ok = file:close(SourceHdl),
+    ok = file:close(DestinationHdl),
+    ok = file:delete(form_filename(Source)),
+    State.
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+              Destination, State) ->
+    {FinalOffset, BlockStart2, BlockEnd2} =
         lists:foldl(
           fun ({MsgId, RefCount, _Source, Offset, TotalSize},
                {CurOffset, BlockStart, BlockEnd}) ->
@@ -1190,8 +1209,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                                                CurOffset, TotalSize}),
                   NextOffset = CurOffset + Size,
                   if BlockStart =:= undefined ->
-                          %% base case, called only for the first list
-                          %% elem
+                          %% base case, called only for the first list elem
                           {NextOffset, Offset, Offset + Size};
                      Offset =:= BlockEnd ->
                           %% extend the current block because the next
@@ -1207,17 +1225,12 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                               file:copy(SourceHdl, DestinationHdl, BSize),
                           {NextOffset, Offset, Offset + Size}
                   end
-          end, {DestinationValid, undefined, undefined}, SourceWorkList),
+          end, {InitOffset, undefined, undefined}, WorkList),
     %% do the last remaining block
     BSize2 = BlockEnd2 - BlockStart2,
     {ok, BlockStart2} = file:position(SourceHdl, {bof, BlockStart2}),
     {ok, BSize2} = file:copy(SourceHdl, DestinationHdl, BSize2),
-    %% tidy up
-    ok = file:sync(DestinationHdl),
-    ok = file:close(SourceHdl),
-    ok = file:close(DestinationHdl),
-    ok = file:delete(form_filename(Source)),
-    State.
+    ok.
 
 close_file(File, State = #dqstate { read_file_handles =
                                    {ReadHdls, ReadHdlsAge} }) ->
@@ -1237,20 +1250,22 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
-        0 -> case {Left, Right} of
-                 {undefined, _} when not(is_atom(Right)) ->
-                     %% the eldest file is empty. YAY!
-                     %% left is the 4th field
-                     true = ets:update_element(FileSummary, Right, {4, undefined});
-                 {_, _} when not(is_atom(Right)) ->
-                     %% left is the 4th field
-                     true = ets:update_element(FileSummary, Right, {4, Left}),
-                     %% right is the 5th field
-                     true = ets:update_element(FileSummary, Left, {5, Right})
-             end,
-             true = ets:delete(FileSummary, File),
-             ok = file:delete(form_filename(File)),
-             Acc;
+        0 ->
+            case {Left, Right} of
+                {undefined, _} when not (is_atom(Right)) ->
+                    %% the eldest file is empty. YAY!
+                    %% left is the 4th field
+                    true =
+                        ets:update_element(FileSummary, Right, {4, undefined});
+                {_, _} when not (is_atom(Right)) ->
+                    %% left is the 4th field
+                    true = ets:update_element(FileSummary, Right, {4, Left}),
+                    %% right is the 5th field
+                    true = ets:update_element(FileSummary, Left, {5, Right})
+            end,
+            true = ets:delete(FileSummary, File),
+            ok = file:delete(form_filename(File)),
+            Acc;
         _ -> [File|Acc]
     end.
 
@@ -1268,7 +1283,7 @@ del_index() ->
         {atomic, ok} -> ok;
         %% hmm, something weird must be going on, but it's probably
         %% not the end of the world
-        {aborted,{no_exists,rabbit_disk_queue,_}} -> ok;
+        {aborted, {no_exists, rabbit_disk_queue,_}} -> ok;
         E2 -> E2
     end.
 
@@ -1286,13 +1301,18 @@ load_from_disk(State) ->
     {atomic, true} = mnesia:transaction(
              fun() ->
                      ok = mnesia:read_lock_table(rabbit_disk_queue),
-                     mnesia:foldl(fun (#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }, true) ->
-                                          case erlang:length(dets_ets_lookup(State1, MsgId)) of
-                                              0 -> ok == mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write);
-                                              1 -> true
-                                          end
-                                  end,
-                                  true, rabbit_disk_queue)
+                     mnesia:foldl(
+                       fun (#dq_msg_loc { msg_id = MsgId,
+                                          queue_and_seq_id = {Q, SeqId} },
+                            true) ->
+                               case erlang:length(dets_ets_lookup(
+                                                    State1, MsgId)) of
+                                   0 -> ok == mnesia:delete(rabbit_disk_queue,
+                                                            {Q, SeqId}, write);
+                                   1 -> true
+                               end
+                       end,
+                       true, rabbit_disk_queue)
              end),
     State2 = extract_sequence_numbers(State1),
     ok = del_index(),
@@ -1306,9 +1326,9 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                 fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
                         NextWrite = SeqId + 1,
                         case ets:lookup(Sequences, Q) of
-                            [] ->
-                                true = ets:insert_new(Sequences,
-                                                      {Q, SeqId, NextWrite, -1});
+                            [] -> true =
+                                      ets:insert_new(Sequences,
+                                                     {Q, SeqId, NextWrite, -1});
                             [Orig = {Q, Read, Write, Length}] ->
                                 Repl = {Q, lists:min([Read, SeqId]),
                                         %% Length is wrong here, but
@@ -1345,10 +1365,12 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   lists:foreach(
                     fun ({Q, ReadSeqId, WriteSeqId, _Length}) ->
-                            Gap = shuffle_up(Q, ReadSeqId - 1, WriteSeqId - 1, 0),
+                            Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
                             ReadSeqId2 = ReadSeqId + Gap,
                             Length = WriteSeqId - ReadSeqId2,
-                            true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId, Length})
+                            true =
+                                ets:insert(Sequences,
+                                           {Q, ReadSeqId2, WriteSeqId, Length})
                     end, ets:match_object(Sequences, '_'))
           end).
 
@@ -1361,8 +1383,9 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
             [Obj] ->
                 if Gap =:= 0 -> ok;
                    true -> mnesia:write(rabbit_disk_queue,
-                                        Obj #dq_msg_loc { queue_and_seq_id = {Q, SeqId + Gap },
-                                                          next_seq_id = SeqId + Gap + 1
+                                        Obj #dq_msg_loc {
+                                          queue_and_seq_id = {Q, SeqId + Gap },
+                                          next_seq_id = SeqId + Gap + 1
                                                         },
                                         write),
                            mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
@@ -1371,8 +1394,9 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
         end,
     shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
 
-load_messages(undefined, [], State = #dqstate { file_summary = FileSummary,
-                                                current_file_name = CurName }) ->
+load_messages(undefined, [],
+              State = #dqstate { file_summary = FileSummary,
+                                 current_file_name = CurName }) ->
     true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
     State;
 load_messages(Left, [], State) ->
@@ -1401,8 +1425,9 @@ load_messages(Left, [File|Files],
                              msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
-                        true = dets_ets_insert_new(State, {MsgId, RefCount, File,
-                                                           Offset, TotalSize}),
+                        true =
+                            dets_ets_insert_new(State, {MsgId, RefCount, File,
+                                                        Offset, TotalSize}),
                         {[{MsgId, TotalSize, Offset}|VMAcc],
                          VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
                         }
@@ -1410,21 +1435,37 @@ load_messages(Left, [File|Files],
         end, {[], 0}, Messages),
     %% foldl reverses lists and find_contiguous_block_prefix needs
     %% elems in the same order as from scan_file_for_valid_messages
-    {ContiguousTop, _} = find_contiguous_block_prefix(lists:reverse(ValidMessagesRev)),
+    {ContiguousTop, _} = find_contiguous_block_prefix(
+                           lists:reverse(ValidMessagesRev)),
     Right = case Files of
                 [] -> undefined;
                 [F|_] -> F
             end,
-    true = ets:insert_new(FileSummary, {File, ValidTotalSize, ContiguousTop, Left, Right}),
+    true = ets:insert_new(FileSummary,
+                          {File, ValidTotalSize, ContiguousTop, Left, Right}),
     load_messages(File, Files, State).
 
 %% ---- DISK RECOVERY OF FAILED COMPACTION ----
 
 recover_crashed_compactions(Files, TmpFiles) ->
-    lists:foreach(fun (TmpFile) -> ok = recover_crashed_compactions1(Files, TmpFile) end,
+    lists:foreach(fun (TmpFile) ->
+                          ok = recover_crashed_compactions1(Files, TmpFile) end,
                   TmpFiles),
     ok.
 
+verify_messages_in_mnesia(MsgIds) ->
+    lists:foreach(
+      fun (MsgId) ->
+              true = 0 < erlang:length(mnesia:dirty_index_match_object
+                                       (rabbit_disk_queue,
+                                        #dq_msg_loc { msg_id = MsgId,
+                                                      queue_and_seq_id = '_',
+                                                      is_delivered = '_',
+                                                      next_seq_id = '_'
+                                                     },
+                                        msg_id))
+      end, MsgIds).
+
 recover_crashed_compactions1(Files, TmpFile) ->
     GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
@@ -1435,37 +1476,35 @@ recover_crashed_compactions1(Files, TmpFile) ->
     MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
     %% all of these messages should appear in the mnesia table,
     %% otherwise they wouldn't have been copied out
-    lists:foreach(fun (MsgId) ->
-                          true = 0 < erlang:length(mnesia:dirty_index_match_object
-                                            (rabbit_disk_queue,
-                                             #dq_msg_loc { msg_id = MsgId,
-                                                           queue_and_seq_id = '_',
-                                                           is_delivered = '_',
-                                                           next_seq_id = '_'
-                                                         },
-                                             msg_id))
-                  end, MsgIdsTmp),
+    verify_messages_in_mnesia(MsgIdsTmp),
     {ok, UncorruptedMessages} =
         scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
     MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
-    %% 1) It's possible that everything in the tmp file is also in the main file
-    %%    such that the main file is (prefix ++ tmpfile). This means that compaction
-    %%    failed immediately prior to the final step of deleting the tmp file.
-    %%    Plan: just delete the tmp file
-    %% 2) It's possible that everything in the tmp file is also in the main file
-    %%    but with holes throughout (or just somthing like main = (prefix ++ hole ++ tmpfile)).
-    %%    This means that compaction wrote out the tmp file successfully and then failed.
-    %%    Plan: just delete the tmp file and allow the compaction to eventually be triggered later
-    %% 3) It's possible that everything in the tmp file is also in the main file
-    %%    but such that the main file does not end with tmp file (and there are valid messages
-    %%    in the suffix; main = (prefix ++ tmpfile[with extra holes?] ++ suffix)).
-    %%    This means that compaction failed as we were writing out the tmp file.
-    %%    Plan: just delete the tmp file and allow the compaction to eventually be triggered later
-    %% 4) It's possible that there are messages in the tmp file which are not in the main file.
-    %%    This means that writing out the tmp file succeeded, but then we failed as we
-    %%    were copying them back over to the main file, after truncating the main file.
-    %%    As the main file has already been truncated, it should consist only of valid messages
-    %%    Plan: Truncate the main file back to before any of the files in the tmp file and copy
+    %% 1) It's possible that everything in the tmp file is also in the
+    %%    main file such that the main file is (prefix ++
+    %%    tmpfile). This means that compaction failed immediately
+    %%    prior to the final step of deleting the tmp file. Plan: just
+    %%    delete the tmp file
+    %% 2) It's possible that everything in the tmp file is also in the
+    %%    main file but with holes throughout (or just somthing like
+    %%    main = (prefix ++ hole ++ tmpfile)). This means that
+    %%    compaction wrote out the tmp file successfully and then
+    %%    failed. Plan: just delete the tmp file and allow the
+    %%    compaction to eventually be triggered later
+    %% 3) It's possible that everything in the tmp file is also in the
+    %%    main file but such that the main file does not end with tmp
+    %%    file (and there are valid messages in the suffix; main =
+    %%    (prefix ++ tmpfile[with extra holes?] ++ suffix)). This
+    %%    means that compaction failed as we were writing out the tmp
+    %%    file. Plan: just delete the tmp file and allow the
+    %%    compaction to eventually be triggered later
+    %% 4) It's possible that there are messages in the tmp file which
+    %%    are not in the main file. This means that writing out the
+    %%    tmp file succeeded, but then we failed as we were copying
+    %%    them back over to the main file, after truncating the main
+    %%    file. As the main file has already been truncated, it should
+    %%    consist only of valid messages. Plan: Truncate the main file
+    %%    back to before any of the files in the tmp file and copy
     %%    them over again
     case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
@@ -1473,29 +1512,21 @@ recover_crashed_compactions1(Files, TmpFile) ->
                 %% is empty
             ok = file:delete(TmpFile);
         _False ->
-            %% we're in case 4 above.
-            %% check that everything in the main file is a valid message in mnesia
-            lists:foreach(fun (MsgId) ->
-                                  true = 0 < erlang:length(mnesia:dirty_index_match_object
-                                                    (rabbit_disk_queue,
-                                                     #dq_msg_loc { msg_id = MsgId,
-                                                                   queue_and_seq_id = '_',
-                                                                   is_delivered = '_',
-                                                                   next_seq_id = '_'
-                                                                 },
-                                                     msg_id))
-                          end, MsgIds),
+            %% we're in case 4 above. Check that everything in the
+            %% main file is a valid message in mnesia
+            verify_messages_in_mnesia(MsgIds),
             %% The main file should be contiguous
             {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
             %% we should have that none of the messages in the prefix
             %% are in the tmp file
-            true = lists:all(fun (MsgId) -> not(lists:member(MsgId, MsgIdsTmp)) end,
-                             MsgIds),
-
+            true = lists:all(fun (MsgId) ->
+                                     not (lists:member(MsgId, MsgIdsTmp))
+                             end, MsgIds),
             {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile),
                                       [write, raw, binary, delayed_write]),
             {ok, Top} = file:position(MainHdl, Top),
-            ok = file:truncate(MainHdl), %% wipe out any rubbish at the end of the file
+            %% wipe out any rubbish at the end of the file
+            ok = file:truncate(MainHdl),
             %% there really could be rubbish at the end of the file -
             %% we could have failed after the extending truncate.
             %% Remember the head of the list will be the highest entry
@@ -1596,7 +1627,8 @@ read_message_at_offset(FileHdl, Offset, TotalSize) ->
 scan_file_for_valid_messages(File) ->
     {ok, Hdl} = file:open(File, [raw, binary, read]),
     Valid = scan_file_for_valid_messages(Hdl, 0, []),
-    _ = file:close(Hdl), %% if something really bad's happened, the close could fail, but ignore
+    %% if something really bad's happened, the close could fail, but ignore
+    file:close(Hdl),
     Valid.
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
@@ -1607,21 +1639,28 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
         {ok, {ok, MsgId, TotalSize, NextOffset}} ->
             scan_file_for_valid_messages(FileHdl, NextOffset,
                                          [{MsgId, TotalSize, Offset}|Acc]);
-        _KO -> {ok, Acc} %% bad message, but we may still have recovered some valid messages
+        _KO ->
+            %% bad message, but we may still have recovered some valid messages
+            {ok, Acc}
     end.
             
 
 read_next_file_entry(FileHdl, Offset) ->
     TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
     case file:read(FileHdl, TwoIntegers) of
-        {ok, <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
+        {ok,
+         <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
             case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
                 {true, _} -> {ok, eof}; %% Nothing we can do other than stop
-                {false, true} -> %% current message corrupted, try skipping past it
-                    ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
+                {false, true} ->
+                    %% current message corrupted, try skipping past it
+                    ExpectedAbsPos =
+                        Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
                     case file:position(FileHdl, {cur, TotalSize + 1}) of
-                        {ok, ExpectedAbsPos} -> {ok, {corrupted, ExpectedAbsPos}};
-                        {ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
+                        {ok, ExpectedAbsPos} ->
+                            {ok, {corrupted, ExpectedAbsPos}};
+                        {ok, _SomeOtherPos} ->
+                            {ok, eof}; %% seek failed, so give up
                         KO -> KO
                     end;
                 {false, false} -> %% all good, let's continue
@@ -1629,19 +1668,23 @@ read_next_file_entry(FileHdl, Offset) ->
                         {ok, <<MsgId:MsgIdBinSize/binary>>} ->
                             ExpectedAbsPos = Offset + TwoIntegers + TotalSize,
                             case file:position(FileHdl,
-                                               {cur, TotalSize - MsgIdBinSize}) of
+                                               {cur, TotalSize - MsgIdBinSize}
+                                              ) of
                                 {ok, ExpectedAbsPos} ->
                                     NextOffset = Offset + TotalSize +
                                         ?FILE_PACKING_ADJUSTMENT,
                                     case file:read(FileHdl, 1) of
-                                        {ok, <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
+                                        {ok,
+                                         <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
                                             {ok, {ok, binary_to_term(MsgId),
                                                   TotalSize, NextOffset}};
                                         {ok, _SomeOtherData} ->
                                             {ok, {corrupted, NextOffset}};
                                         KO -> KO
                                     end;
-                                {ok, _SomeOtherPos} -> {ok, eof}; %% seek failed, so give up
+                                {ok, _SomeOtherPos} ->
+                                    %% seek failed, so give up
+                                    {ok, eof}; 
                                 KO -> KO
                             end;
                         eof -> {ok, eof};
-- 
cgit v1.2.1


From c466b77b0d2e47c874ba50619f386b0eae359005 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 8 Jun 2009 16:46:20 +0100
Subject: tiny refactor

---
 src/rabbit_disk_queue.erl | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a8773af6..de2e52e9 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1114,10 +1114,7 @@ sort_msg_locations_by_offset(Asc, List) ->
 truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
     ok = file:truncate(FileHdl),
-    {ok, Highpoint} = file:position(FileHdl, {bof, Highpoint}),
-    ok = file:truncate(FileHdl),
-    {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
-    ok.
+    ok = preallocate(FileHdl, Highpoint, Lowpoint).
 
 combine_files({Source, SourceValid, _SourceContiguousTop,
               _SourceLeft, _SourceRight},
-- 
cgit v1.2.1


From 6bdb029e29d5bb22b56488b6aa25592cccf37f7a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 8 Jun 2009 18:06:51 +0100
Subject: can now switch the mixed queue between modes

---
 src/rabbit_mixed_queue.erl | 49 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 610a2366..dc180f00 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,6 +39,8 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1]).
 
+-export([to_disk_only_mode/1, to_mixed_mode/1]).
+
 -record(mqstate, { mode,
                    msg_buf,
                    next_write_seq,
@@ -53,11 +55,56 @@ start_link(Queue, IsDurable, Mode) when Mode =:= disk orelse Mode =:= mixed ->
         lists:foldl(
           fun ({_MsgId, Msg, _Size, Delivered, SeqId}, {Buf, NSeq})
               when SeqId >= NSeq ->
-                  {queue:in({SeqId, binary_to_term(Msg), Delivered}, Buf), SeqId + 1}
+                  {queue:in({SeqId, bin_to_msg(Msg), Delivered}, Buf), SeqId + 1}
           end, {queue:new(), 0}, QList),
     {ok, #mqstate { mode = Mode, msg_buf = MsgBuf, next_write_seq = NextSeq,
                     queue = Queue, is_durable = IsDurable }}.
 
+to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+                                     is_durable = IsDurable }) ->
+    Msgs = queue:to_list(MsgBuf),
+    AckTags =
+        lists:foldl(
+          fun ({_Seq, Msg = #basic_message { guid = MsgId,
+                                            is_persistent = IsPersistent },
+                IsDelivered}, AcksAcc) ->
+                  ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
+                  if IsDurable andalso IsPersistent ->
+                          {MsgId, IsDelivered, AckTag, _PersistRemaining}
+                              = rabbit_disk_queue:phantom_deliver(Q),
+                          [AckTag | AcksAcc];
+                     true -> AcksAcc
+                  end
+          end, [], Msgs),
+    ok = rabbit_disk_queue:ack(Q, lists:reverse(AckTags)),
+    State #mqstate { mode = disk, msg_buf = queue:new() }.
+
+to_mixed_mode(State = #mqstate { mode = disk, msg_buf = MsgBuf, queue = Q,
+                                 is_durable = IsDurable,
+                                 next_write_seq = NextSeq }) ->
+    QList = rabbit_disk_queue:dump_queue(Q),
+    {MsgBuf1, NextSeq1, AckTags} =
+        lists:foldl(
+          fun ({MsgId, MsgBin, _Size, IsDelivered, SeqId}, {Buf, NSeq, AcksAcc})
+              when SeqId >= NSeq ->
+                  Msg = #basic_message { guid = MsgId,
+                                         is_persistent = IsPersistent }
+                      = bin_to_msg(MsgBin),
+                  Buf1 = queue:in({SeqId, Msg, IsDelivered}, Buf),
+                  NSeq1 = SeqId + 1,
+                  AcksAcc1 =
+                      if IsDurable andalso IsPersistent ->
+                              [AcksAcc];
+                         true -> 
+                              {MsgId, IsDelivered, AckTag, _PersistRemaining} =
+                                  rabbit_disk_queue:phantom_deliver(Q),
+                              [AckTag | AcksAcc]
+                      end,
+                  {Buf1, NSeq1, AcksAcc1}
+          end, {MsgBuf, NextSeq, []}, QList),
+    ok = rabbit_disk_queue:ack(Q, lists:reverse(AckTags)),
+    State #mqstate { mode = mixed, msg_buf = MsgBuf1, next_write_seq = NextSeq1 }.
+
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
     term_to_binary(Msg #basic_message { content = ClearedContent }).
-- 
cgit v1.2.1


From fc2087cf56032928675877bc16c4808f819c9d99 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Jun 2009 11:06:11 +0100
Subject: Logic failure which only came to light when trying to run the
 consumers as documented in bug 20470

---
 src/rabbit_amqqueue_process.erl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b61e450b..81dea027 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -178,9 +178,11 @@ deliver_queue(Fun, FunAcc0,
                     unsent_message_count = Count,
                     unacked_messages = UAM} = ch_record(ChPid),
             IsMsgReady = Fun(is_message_ready, FunAcc0, State),
-            case not(AckRequired) orelse
-                ( IsMsgReady andalso
-                  rabbit_limiter:can_send( LimiterPid, self())
+            case IsMsgReady
+                andalso
+                ( (not AckRequired)
+                  orelse
+                  rabbit_limiter:can_send( LimiterPid, self() )
                 ) of
                 true ->
                     case Fun(AckRequired, FunAcc0, State) of
-- 
cgit v1.2.1


From 9af52131729b5f7b6c42909d88e4883b27a68a2e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Jun 2009 12:52:14 +0100
Subject: Using delayed_write batches together small writes and reduces the
 number of OS calls. This is a good thing and makes writing to disk much
 faster. However, we can have the situation where we are trying to read a
 message off disk before that message has been fully written out to disk.
 Therefore, we need to fsync at choice times. Because fsync is quite
 expensive, we want to call fsync no more than absolutely necessary. Thus we
 now have a 'dirty' flag which tracks whether the current file has been
 written to sinc the last fsync, and we call fsync whenever is dirty and the
 file to read from is the current file. This has also had some similar changes
 elsewhere in the disk queue. In short however, it seems this does work as I'm
 no longer able to reproduce reads of messages which return all blanks.

---
 src/rabbit_disk_queue.erl | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index de2e52e9..cc5099eb 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -76,6 +76,7 @@
 		  current_file_name,       %% current file name
 		  current_file_handle,     %% current file handle
 		  current_offset,          %% current offset within current file
+                  current_dirty,           %% has the current file been written to since the last fsync?
 		  file_size_limit,         %% how big can our files get?
 		  read_file_handles,       %% file handles for reading (LRU)
 		  read_file_handles_limit  %% how many file handles can we open?
@@ -381,6 +382,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_file_name       = InitName,
                    current_file_handle     = undefined,
                    current_offset          = 0,
+                   current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
                    read_file_handles       = {dict:new(), gb_trees:empty()},
                    read_file_handles_limit = ReadFileHandlesLimit
@@ -515,6 +517,7 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                      file:close(Hdl)
               end, ok, ReadHdls),
     State #dqstate { current_file_handle = undefined,
+                     current_dirty = false,
                      read_file_handles = {dict:new(), gb_trees:empty()}}.
 
 code_change(_OldVsn, State, _Extra) ->
@@ -600,8 +603,17 @@ determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
     CurrentRead.
 
 get_read_handle(File, State =
-              #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
-                         read_file_handles_limit = ReadFileHandlesLimit }) ->
+                #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
+                           read_file_handles_limit = ReadFileHandlesLimit,
+                           current_file_name = CurName,
+                           current_file_handle = CurHdl,
+                           current_dirty = IsDirty
+                         }) ->
+    IsDirty2 = if CurName == File andalso IsDirty ->
+                       file:sync(CurHdl),
+                       false;
+                  true -> IsDirty
+               end,
     Now = now(),
     {FileHdl, ReadHdls1, ReadHdlsAge1} =
         case dict:find(File, ReadHdls) of
@@ -625,7 +637,9 @@ get_read_handle(File, State =
         end,
     ReadHdls3 = dict:store(File, {FileHdl, Now}, ReadHdls1),
     ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
-    {FileHdl, State #dqstate {read_file_handles = {ReadHdls3, ReadHdlsAge3}}}.
+    {FileHdl, State #dqstate { read_file_handles = {ReadHdls3, ReadHdlsAge3},
+                               current_dirty = IsDirty2
+                             }}.
 
 adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
     ExpectedSeqId;
@@ -777,7 +791,8 @@ internal_tx_publish(MsgId, MsgBody,
                                             ContiguousTop1, Left, undefined}),
             NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
             maybe_roll_to_new_file(
-              NextOffset, State #dqstate {current_offset = NextOffset});
+              NextOffset, State #dqstate {current_offset = NextOffset,
+                                          current_dirty = true});
         [{MsgId, RefCount, File, Offset, TotalSize}] ->
             %% We already know about it, just update counter
             ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
@@ -789,6 +804,7 @@ internal_tx_publish(MsgId, MsgBody,
 internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                    State = #dqstate { current_file_handle = CurHdl,
                                       current_file_name = CurName,
+                                      current_dirty = IsDirty,
                                       sequences = Sequences
                                      }) ->
     {PubList, PubAcc, ReadSeqId, Length} =
@@ -839,10 +855,12 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
               true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
                                              Length + erlang:length(PubList)})
            end,
-    ok = if Sync -> file:sync(CurHdl);
-            true -> ok
-         end,
-    {ok, State2}.
+    IsDirty2 = if IsDirty andalso Sync ->
+                       ok = file:sync(CurHdl),
+                       false;
+                  true -> IsDirty
+               end,
+    {ok, State2 #dqstate { current_dirty = IsDirty2 }}.
 
 %% SeqId can be 'next'
 internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
@@ -1012,10 +1030,13 @@ maybe_roll_to_new_file(Offset,
                                           current_file_name = CurName,
                                           current_file_handle = CurHdl,
                                           current_file_num = CurNum,
+                                          current_dirty = IsDirty,
                                           file_summary = FileSummary
                                         }
                       ) when Offset >= FileSizeLimit ->
-    ok = file:sync(CurHdl),
+    ok = if IsDirty -> file:sync(CurHdl);
+            true -> ok
+         end,
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
@@ -1027,7 +1048,8 @@ maybe_roll_to_new_file(Offset,
     State1 = State #dqstate { current_file_name = NextName,
                               current_file_handle = NextHdl,
                               current_file_num = NextNum,
-                              current_offset = 0
+                              current_offset = 0,
+                              current_dirty = false
                              },
     {ok, compact(sets:from_list([CurName]), State1)};
 maybe_roll_to_new_file(_, State) ->
-- 
cgit v1.2.1


From d1bb56cf99475afc3116d179fb8d17d1370d2f9d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Jun 2009 17:55:33 +0100
Subject: just committing as need to work from home tomorrow. Code in
 "interesting" state of flux. disk mode to mixed mode in the mixed_queue is
 annoyingly hard.

---
 src/rabbit_disk_queue.erl  | 27 ++++++++++-----
 src/rabbit_mixed_queue.erl | 83 +++++++++++++++++++++++-----------------------
 src/rabbit_tests.erl       | 72 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 130 insertions(+), 52 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index cc5099eb..8a018d96 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -44,7 +44,7 @@
          dump_queue/1, delete_non_durable_queues/1
         ]).
 
--export([length/1, is_empty/1]).
+-export([length/1, is_empty/1, next_write_seq/1]).
 
 -export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
@@ -249,13 +249,14 @@
 -spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id_or_next()}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(dump_queue/1 :: (queue_name()) -> [{msg_id(), binary(), non_neg_integer(),
-                                          bool(), seq_id()}]).
+                                          bool(), {msg_id(), seq_id()}, seq_id()}]).
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
+-spec(next_write_seq/1 :: (queue_name()) -> non_neg_integer()).
 -spec(is_empty/1 :: (queue_name()) -> bool()).
 
 -endif.
@@ -327,6 +328,9 @@ to_ram_disk_mode() ->
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
 
+next_write_seq(Q) ->
+    gen_server2:call(?SERVER, {next_write_seq, Q}, infinity).
+
 is_empty(Q) ->
     Length = rabbit_disk_queue:length(Q),
     Length == 0.
@@ -460,6 +464,9 @@ handle_call(to_ram_disk_mode, _From,
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     {reply, Length, State};
+handle_call({next_write_seq, Q}, _From, State = #dqstate { sequences = Sequences }) ->
+    {_ReadSeqId, WriteSeqId, _Length} = sequence_lookup(Sequences, Q),
+    {reply, WriteSeqId, State};
 handle_call({dump_queue, Q}, _From, State) ->
     {Result, State1} = internal_dump_queue(Q, State),
     {reply, Result, State1};
@@ -483,7 +490,7 @@ handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
     {noreply, State1};
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    MsgSeqSeqIds = zip_with_tail(MsgSeqIds, {duplicate, next}),
+    MsgSeqSeqIds = zip_with_tail(MsgSeqIds, {duplicate, {next, true}}),
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
     {noreply, State1};
 handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
@@ -887,7 +894,7 @@ internal_tx_cancel(MsgIds, State) ->
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|_],
+internal_requeue(Q, MsgSeqIds = [{_, {FirstSeqIdTo, _}}|_],
                  State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
@@ -913,7 +920,7 @@ internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|_],
 
     {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     ReadSeqId2 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
-    MsgSeqIdsZipped = zip_with_tail(MsgSeqIds, {last, {next, next}}),
+    MsgSeqIdsZipped = zip_with_tail(MsgSeqIds, {last, {next, {next, true}}}),
     {atomic, {WriteSeqId2, Q}} =
         mnesia:transaction(
           fun() ->
@@ -925,8 +932,8 @@ internal_requeue(Q, MsgSeqIds = [{_, FirstSeqIdTo}|_],
                                   Length + erlang:length(MsgSeqIds)}),
     {ok, State}.
 
-requeue_message({{{MsgId, SeqIdOrig}, SeqIdTo},
-                 {_NextMsgSeqId, NextSeqIdTo}},
+requeue_message({{{MsgId, SeqIdOrig}, {SeqIdTo, NewIsDelivered}},
+                 {_NextMsgSeqId, {NextSeqIdTo, _NextNewIsDelivered}}},
                 {ExpectedSeqIdTo, Q}) ->
     SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
     NextSeqIdTo2 = find_next_seq_id(SeqIdTo2, NextSeqIdTo),
@@ -937,7 +944,8 @@ requeue_message({{{MsgId, SeqIdOrig}, SeqIdTo},
        true ->
             ok = mnesia:write(rabbit_disk_queue,
                               Obj #dq_msg_loc {queue_and_seq_id = {Q, SeqIdTo2},
-                                               next_seq_id = NextSeqIdTo2
+                                               next_seq_id = NextSeqIdTo2,
+                                               is_delivered = NewIsDelivered
                                               },
                               write),
             ok = mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write)
@@ -1007,7 +1015,8 @@ internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
                            NextReadSeqId, State2} =
                               internal_read_message(Q, SeqId, true, true,
                                                     State1),
-                          {true, {MsgId, Msg, Size, Delivered, SeqId},
+                          {true,
+                           {MsgId, Msg, Size, Delivered, {MsgId, SeqId}, SeqId},
                            {NextReadSeqId, State2}}
                   end, {ReadSeq, State}),
             {lists:reverse(QList), State3}
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index dc180f00..c14aef5c 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -49,61 +49,62 @@
                  }
        ).
 
-start_link(Queue, IsDurable, Mode) when Mode =:= disk orelse Mode =:= mixed ->
-    QList = rabbit_disk_queue:dump_queue(Queue),
-    {MsgBuf, NextSeq} =
-        lists:foldl(
-          fun ({_MsgId, Msg, _Size, Delivered, SeqId}, {Buf, NSeq})
-              when SeqId >= NSeq ->
-                  {queue:in({SeqId, bin_to_msg(Msg), Delivered}, Buf), SeqId + 1}
-          end, {queue:new(), 0}, QList),
-    {ok, #mqstate { mode = Mode, msg_buf = MsgBuf, next_write_seq = NextSeq,
-                    queue = Queue, is_durable = IsDurable }}.
+start_link(Queue, IsDurable, disk) ->
+    NextSeq = rabbit_disk_queue:next_write_seq(Queue),
+    {ok, #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
+                    next_write_seq = NextSeq, is_durable = IsDurable }};
+start_link(Queue, IsDurable, mixed) ->
+    {ok, State} = start_link(Queue, IsDurable, disk),
+    to_mixed_mode(State #mqstate { next_write_seq = 0 }).
 
 to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                                     is_durable = IsDurable }) ->
+                                     is_durable = IsDurable,
+                                     next_write_seq = NextSeq }) ->
     Msgs = queue:to_list(MsgBuf),
-    AckTags =
+    {NextSeq1, Requeue} =
         lists:foldl(
           fun ({_Seq, Msg = #basic_message { guid = MsgId,
-                                            is_persistent = IsPersistent },
-                IsDelivered}, AcksAcc) ->
-                  ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
+                                             is_persistent = IsPersistent },
+                IsDelivered}, {NSeq, RQueueAcc}) ->
                   if IsDurable andalso IsPersistent ->
-                          {MsgId, IsDelivered, AckTag, _PersistRemaining}
-                              = rabbit_disk_queue:phantom_deliver(Q),
-                          [AckTag | AcksAcc];
-                     true -> AcksAcc
+                          {MsgId, IsDelivered, AckTag, _PersistRemaining} =
+                              rabbit_disk_queue:phantom_deliver(Q),
+                          {NSeq + 1,
+                           [ {AckTag, {NSeq, IsDelivered}} | RQueueAcc ]};
+                     true ->
+                          ok = if [] == RQueueAcc -> ok;
+                                  true ->
+                                       rabbit_disk_queue:requeue_with_seqs(
+                                         Q, lists:reverse(RQueueAcc))
+                               end,
+                          ok = rabbit_disk_queue:publish_with_seq(
+                                 Q, MsgId, NSeq, msg_to_bin(Msg)),
+                          {NSeq + 1, []}
                   end
-          end, [], Msgs),
-    ok = rabbit_disk_queue:ack(Q, lists:reverse(AckTags)),
-    State #mqstate { mode = disk, msg_buf = queue:new() }.
+          end, {NextSeq, []}, Msgs),
+    ok = if [] == Requeue -> ok;
+            true ->
+                 rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
+         end,
+    {ok, State #mqstate { mode = disk, msg_buf = queue:new(),
+                          next_write_seq = NextSeq1 }}.
 
 to_mixed_mode(State = #mqstate { mode = disk, msg_buf = MsgBuf, queue = Q,
-                                 is_durable = IsDurable,
                                  next_write_seq = NextSeq }) ->
     QList = rabbit_disk_queue:dump_queue(Q),
-    {MsgBuf1, NextSeq1, AckTags} =
+    {MsgBuf1, NextSeq1} =
         lists:foldl(
-          fun ({MsgId, MsgBin, _Size, IsDelivered, SeqId}, {Buf, NSeq, AcksAcc})
+          fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId}, {Buf, NSeq})
               when SeqId >= NSeq ->
-                  Msg = #basic_message { guid = MsgId,
-                                         is_persistent = IsPersistent }
+                  Msg = #basic_message { guid = MsgId }
                       = bin_to_msg(MsgBin),
-                  Buf1 = queue:in({SeqId, Msg, IsDelivered}, Buf),
+                  Buf1 = queue:in({SeqId,
+                                   Msg #basic_message { is_persistent = true },
+                                   IsDelivered}, Buf),
                   NSeq1 = SeqId + 1,
-                  AcksAcc1 =
-                      if IsDurable andalso IsPersistent ->
-                              [AcksAcc];
-                         true -> 
-                              {MsgId, IsDelivered, AckTag, _PersistRemaining} =
-                                  rabbit_disk_queue:phantom_deliver(Q),
-                              [AckTag | AcksAcc]
-                      end,
-                  {Buf1, NSeq1, AcksAcc1}
-          end, {MsgBuf, NextSeq, []}, QList),
-    ok = rabbit_disk_queue:ack(Q, lists:reverse(AckTags)),
-    State #mqstate { mode = mixed, msg_buf = MsgBuf1, next_write_seq = NextSeq1 }.
+                  {Buf1, NSeq1}
+          end, {MsgBuf, NextSeq}, QList),
+    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1, next_write_seq = NextSeq1 }}.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
@@ -250,7 +251,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                             Acc2 =
                                 if IsDurable andalso IsPersistent ->
                                         {MsgId, _OldSeqId} = AckTag,
-                                        [{AckTag, NextSeq3} | Acc];
+                                        [{AckTag, {NextSeq3, true}} | Acc];
                                    true -> Acc
                                 end,
                             MsgBuf4 = queue:in({NextSeq3, Msg, true}, MsgBuf3),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4b7487b0..3d173e2e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -910,7 +910,7 @@ rdq_test_dump_queue() ->
     [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
-    QList = [{N, Msg, 256, false, (N-1)} || N <- All],
+    QList = [{N, Msg, 256, false, {N, (N-1)}, (N-1)} || N <- All],
     QList = rabbit_disk_queue:dump_queue(q),
     rdq_stop(),
     io:format("dump ok undelivered~n", []),
@@ -924,12 +924,80 @@ rdq_test_dump_queue() ->
     rdq_stop(),
     io:format("dump ok post delivery~n", []),
     rdq_start(),
-    QList2 = [{N, Msg, 256, true, (N-1)} || N <- All],
+    QList2 = [{N, Msg, 256, true, {N, (N-1)}, (N-1)} || N <- All],
     QList2 = rabbit_disk_queue:dump_queue(q),
     io:format("dump ok post delivery + restart~n", []),
     rdq_stop(),
     passed.
 
+rdq_test_mixed_queue_modes() ->
+    rdq_virgin(),
+    rdq_start(),
+    Payload = <<0:(8*256)>>,
+    {ok, MS} = rabbit_mixed_queue:start_link(q, true, mixed),
+    MS2 = lists:foldl(fun (_N, MS1) ->
+                              Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
+                              {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
+                              MS1a
+                      end, MS, lists:seq(1,10)),
+    MS4 = lists:foldl(fun (_N, MS3) ->
+                              Msg = (rabbit_basic:message(x, <<>>, <<>>, Payload))
+                                  #basic_message { is_persistent = true },
+                              {ok, MS3a} = rabbit_mixed_queue:publish(Msg, MS3),
+                              MS3a
+                      end, MS2, lists:seq(1,10)),
+    MS6 = lists:foldl(fun (_N, MS5) ->
+                              Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
+                              {ok, MS5a} = rabbit_mixed_queue:publish(Msg, MS5),
+                              MS5a
+                      end, MS4, lists:seq(1,10)),
+    30 = rabbit_mixed_queue:length(MS6),
+    io:format("Published a mixture of messages~n"),
+    {ok, _MS7} = rabbit_mixed_queue:to_disk_only_mode(MS6),
+    io:format("Converted to disk only mode~n"),
+    rdq_stop(),
+    rdq_start(),
+    {ok, MS8} = rabbit_mixed_queue:start_link(q, true, mixed),
+    30 = rabbit_mixed_queue:length(MS8),
+    io:format("Recovered queue~n"),
+    MS10 =
+        lists:foldl(
+          fun (N, MS9) ->
+                  Rem = 30 - N,
+                  {{#basic_message { is_persistent = true },
+                    false, _AckTag, Rem},
+                   MS9a} = rabbit_mixed_queue:deliver(MS9),
+                  MS9a
+          end, MS8, lists:seq(1,10)),
+    io:format("Delivered initial non persistent messages~n"),
+    {ok, _MS11} = rabbit_mixed_queue:to_disk_only_mode(MS10),
+    io:format("Converted to disk only mode~n"),
+    rdq_stop(),
+    rdq_start(),
+    {ok, MS12} = rabbit_mixed_queue:start_link(q, true, mixed),
+    30 = rabbit_mixed_queue:length(MS12),
+    io:format("Recovered queue~n"),
+    {MS14, AckTags} =
+        lists:foldl(
+          fun (N, {MS13, AcksAcc}) ->
+                  Rem = 30 - N,
+                  IsDelivered = N < 11,
+                  {{#basic_message { is_persistent = true },
+                    IsDelivered, AckTag, Rem},
+                   MS13a} = rabbit_mixed_queue:deliver(MS13),
+                  {MS13a, [AckTag | AcksAcc]}
+          end, {MS2, []}, lists:seq(1,20)),
+    {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
+    io:format("Delivered and acked initial non persistent messages~n"),
+    {ok, _MS16} = rabbit_mixed_queue:to_disk_only_mode(MS15),
+    io:format("Converted to disk only mode~n"),
+    rdq_stop(),
+    rdq_start(),
+    {ok, MS17} = rabbit_mixed_queue:start_link(q, true, mixed),
+    10 = rabbit_mixed_queue:length(MS17),
+    io:format("Recovered queue~n"),
+    passed.
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From 519e1301c6f60b97db220aacfd0695a1385ca27b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Jun 2009 15:44:54 +0100
Subject: Two things have happened here. Firstly, the mixed_queue now functions
 correctly when being run in disk_only mode. This is _much_ more complicated
 than I had thought because of the fact that the presence of a message on disk
 has nothing to do with whether it is persistent or not. As a result early
 acking is required and requeuing operations are horrendous to say the least.

When going from disk-only mode to mixed mode, we don't ack anything at all. It's arguable that we should ack non-persistent messages at this point, but the problem there is that if the conversion fails then we lose messages. Therefore, we then arrive at the sitation where we're in mixed mode, and we have messages held in ram that are not persistent, but are still on disk, and require early acking when being delivered (again, requeue is hell).

The conversion to and from disk-only and mixed mode now seems to work well.

When starting up, non-persistent messages on disk are deleted.

Finally, in disk_queue, publish now takes an IsDelivered flag. This allows you to publish messages and mark them delivered in one go. However, the message is still available for delivery (i.e. it's not waiting for an ack).

Also in disk_queue, requeue_with_seqs is now [{AckTag, {NewSeqId, NewIsDelivered}}], which allows you to requeue and unset the delivered flag. Note however, that it is still not safe to requeue a message which isn't waiting for an ack.

(Please note, it's now very important to distinguish between messages which "AreDelivered" _and_ are waiting for an ack _and_ are not going to appear if you call deliver(Q), VERSUS messages which "AreDelivered" but are not waiting for an ack and will appear (eventually) if you call deliver(Q).
---
 src/rabbit_disk_queue.erl  |  48 ++++++---
 src/rabbit_mixed_queue.erl | 261 +++++++++++++++++++++++++++++++--------------
 src/rabbit_tests.erl       |  60 ++++++-----
 3 files changed, 245 insertions(+), 124 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8a018d96..5c1f969e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,8 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/3, publish_with_seq/4, deliver/1, phantom_deliver/1, ack/2,
-	 tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
+-export([publish/4, publish_with_seq/5, deliver/1, phantom_deliver/1, ack/2,
+         tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
 	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
          dump_queue/1, delete_non_durable_queues/1
         ]).
@@ -232,8 +232,8 @@
 
 -spec(start_link/0 :: () ->
               {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
--spec(publish_with_seq/4 :: (queue_name(), msg_id(), seq_id_or_next(), binary()) -> 'ok').
+-spec(publish/4 :: (queue_name(), msg_id(), binary(), bool()) -> 'ok').
+-spec(publish_with_seq/5 :: (queue_name(), msg_id(), seq_id_or_next(), binary(), bool()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
              {'empty' | {msg_id(), binary(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
@@ -267,11 +267,16 @@ start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE,
                            [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
 
-publish(Q, MsgId, Msg) when is_binary(Msg) ->
-    gen_server2:cast(?SERVER, {publish, Q, MsgId, Msg}).
+publish(Q, MsgId, Msg, false) when is_binary(Msg) ->
+    gen_server2:cast(?SERVER, {publish, Q, MsgId, Msg});
+publish(Q, MsgId, Msg, true) when is_binary(Msg) ->
+    gen_server2:call(?SERVER, {publish, Q, MsgId, Msg}, infinity).
 
-publish_with_seq(Q, MsgId, SeqId, Msg) when is_binary(Msg) ->
-    gen_server2:cast(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg}).
+publish_with_seq(Q, MsgId, SeqId, Msg, false) when is_binary(Msg) ->
+    gen_server2:cast(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg});
+publish_with_seq(Q, MsgId, SeqId, Msg, true) when is_binary(Msg) ->
+    gen_server2:call(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg},
+                     infinity).
 
 deliver(Q) ->
     gen_server2:call(?SERVER, {deliver, Q}, infinity).
@@ -285,12 +290,14 @@ ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
 tx_publish(MsgId, Msg) when is_binary(Msg) ->
     gen_server2:cast(?SERVER, {tx_publish, MsgId, Msg}).
 
-tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
+tx_commit(Q, PubMsgIds, AckSeqIds)
+  when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
     gen_server2:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
 
 tx_commit_with_seqs(Q, PubMsgSeqIds, AckSeqIds)
   when is_list(PubMsgSeqIds) andalso is_list(AckSeqIds) ->
-    gen_server2:call(?SERVER, {tx_commit_with_seqs, Q, PubMsgSeqIds, AckSeqIds}, infinity).
+    gen_server2:call(?SERVER, {tx_commit_with_seqs, Q, PubMsgSeqIds, AckSeqIds},
+                     infinity).
 
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server2:cast(?SERVER, {tx_cancel, MsgIds}).
@@ -332,8 +339,7 @@ next_write_seq(Q) ->
     gen_server2:call(?SERVER, {next_write_seq, Q}, infinity).
 
 is_empty(Q) ->
-    Length = rabbit_disk_queue:length(Q),
-    Length == 0.
+    0 == rabbit_disk_queue:length(Q).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
@@ -407,6 +413,14 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     end,
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
+handle_call({publish, Q, MsgId, MsgBody}, _From, State) ->
+    {ok, MsgSeqId, State1} =
+        internal_publish(Q, MsgId, next, MsgBody, true, State),
+    {reply, MsgSeqId, State1};
+handle_call({publish_with_seq, Q, MsgId, SeqId, MsgBody}, _From, State) ->
+    {ok, MsgSeqId, State1} =
+        internal_publish(Q, MsgId, SeqId, MsgBody, true, State),
+    {reply, MsgSeqId, State1};
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, State),
     {reply, Result, State1};
@@ -475,10 +489,10 @@ handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {reply, ok, State1}.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
-    {ok, State1} = internal_publish(Q, MsgId, next, MsgBody, State),
+    {ok, _MsgSeqId, State1} = internal_publish(Q, MsgId, next, MsgBody, false, State),
     {noreply, State1};
 handle_cast({publish_with_seq, Q, MsgId, SeqId, MsgBody}, State) ->
-    {ok, State1} = internal_publish(Q, MsgId, SeqId, MsgBody, State),
+    {ok, _MsgSeqId, State1} = internal_publish(Q, MsgId, SeqId, MsgBody, false, State),
     {noreply, State1};
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
@@ -870,7 +884,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
     {ok, State2 #dqstate { current_dirty = IsDirty2 }}.
 
 %% SeqId can be 'next'
-internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
+internal_publish(Q, MsgId, SeqId, MsgBody, IsDelivered, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
         internal_tx_publish(MsgId, MsgBody, State),
     {ReadSeqId, WriteSeqId, Length} =
@@ -882,9 +896,9 @@ internal_publish(Q, MsgId, SeqId, MsgBody, State) ->
                             #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
                                           msg_id = MsgId,
                                           next_seq_id = WriteSeqId3Next,
-                                          is_delivered = false}),
+                                          is_delivered = IsDelivered}),
     true = ets:insert(Sequences, {Q, ReadSeqId3, WriteSeqId3Next, Length + 1}),
-    {ok, State1}.
+    {ok, {MsgId, WriteSeqId3}, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index c14aef5c..dae4dad1 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -50,23 +50,27 @@
        ).
 
 start_link(Queue, IsDurable, disk) ->
-    NextSeq = rabbit_disk_queue:next_write_seq(Queue),
-    {ok, #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-                    next_write_seq = NextSeq, is_durable = IsDurable }};
+    purge_non_persistent_messages(
+      #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
+        	 next_write_seq = 0, is_durable = IsDurable });
 start_link(Queue, IsDurable, mixed) ->
     {ok, State} = start_link(Queue, IsDurable, disk),
-    to_mixed_mode(State #mqstate { next_write_seq = 0 }).
+    to_mixed_mode(State).
 
 to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                                     is_durable = IsDurable,
                                      next_write_seq = NextSeq }) ->
+    %% We enqueue _everything_ here. This means that should a message
+    %% already be in the disk queue we must remove it and add it back
+    %% in. Fortunately, by using requeue, we avoid rewriting the
+    %% message on disk.
+    %% Note we also batch together messages on disk so that we minimise
+    %% the calls to requeue.
     Msgs = queue:to_list(MsgBuf),
     {NextSeq1, Requeue} =
         lists:foldl(
-          fun ({_Seq, Msg = #basic_message { guid = MsgId,
-                                             is_persistent = IsPersistent },
-                IsDelivered}, {NSeq, RQueueAcc}) ->
-                  if IsDurable andalso IsPersistent ->
+          fun ({_Seq, Msg = #basic_message { guid = MsgId },
+                IsDelivered, OnDisk}, {NSeq, RQueueAcc}) ->
+                  if OnDisk ->
                           {MsgId, IsDelivered, AckTag, _PersistRemaining} =
                               rabbit_disk_queue:phantom_deliver(Q),
                           {NSeq + 1,
@@ -78,7 +82,7 @@ to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                                          Q, lists:reverse(RQueueAcc))
                                end,
                           ok = rabbit_disk_queue:publish_with_seq(
-                                 Q, MsgId, NSeq, msg_to_bin(Msg)),
+                                 Q, MsgId, NSeq, msg_to_bin(Msg), false),
                           {NSeq + 1, []}
                   end
           end, {NextSeq, []}, Msgs),
@@ -89,22 +93,52 @@ to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
     {ok, State #mqstate { mode = disk, msg_buf = queue:new(),
                           next_write_seq = NextSeq1 }}.
 
-to_mixed_mode(State = #mqstate { mode = disk, msg_buf = MsgBuf, queue = Q,
-                                 next_write_seq = NextSeq }) ->
+to_mixed_mode(State = #mqstate { mode = disk, queue = Q }) ->
+    %% load up a new queue with everything that's on disk.
+    %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
     {MsgBuf1, NextSeq1} =
         lists:foldl(
           fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId}, {Buf, NSeq})
               when SeqId >= NSeq ->
-                  Msg = #basic_message { guid = MsgId }
-                      = bin_to_msg(MsgBin),
-                  Buf1 = queue:in({SeqId,
-                                   Msg #basic_message { is_persistent = true },
-                                   IsDelivered}, Buf),
-                  NSeq1 = SeqId + 1,
-                  {Buf1, NSeq1}
-          end, {MsgBuf, NextSeq}, QList),
-    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1, next_write_seq = NextSeq1 }}.
+                  Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
+                  {queue:in({SeqId, Msg, IsDelivered, true}, Buf), SeqId + 1}
+          end, {queue:new(), 0}, QList),
+    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1,
+			  next_write_seq = NextSeq1 }}.
+
+purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
+						 is_durable = IsDurable }) ->
+    %% iterate through the content on disk, ack anything which isn't
+    %% persistent, accumulate everything else that is persistent and
+    %% requeue it
+    NextSeq = rabbit_disk_queue:next_write_seq(Q),
+    {Acks, Requeue, NextSeq2} =
+	deliver_all_messages(Q, IsDurable, [], [], NextSeq),
+    ok = if Requeue == [] -> ok;
+            true -> rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
+         end,
+    ok = if Acks == [] -> ok;
+            true -> rabbit_disk_queue:ack(Q, lists:reverse(Acks))
+         end,
+    {ok, State #mqstate { next_write_seq = NextSeq2 }}.
+
+deliver_all_messages(Q, IsDurable, Acks, Requeue, NextSeq) ->
+    case rabbit_disk_queue:deliver(Q) of
+	empty -> {Acks, Requeue, NextSeq};
+	{MsgId, MsgBin, _Size, IsDelivered, AckTag, _Remaining} ->
+	    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
+		bin_to_msg(MsgBin),
+	    OnDisk = IsPersistent andalso IsDurable,
+	    {Acks2, Requeue2, NextSeq2} =
+		if OnDisk -> {Acks,
+			      [{AckTag, {NextSeq, IsDelivered}} | Requeue],
+			      NextSeq + 1
+			     };
+		   true -> {[AckTag | Acks], Requeue, NextSeq}
+		end,
+	    deliver_all_messages(Q, IsDurable, Acks2, Requeue2, NextSeq2)
+    end.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
@@ -115,48 +149,78 @@ bin_to_msg(MsgBin) ->
 
 publish(Msg = #basic_message { guid = MsgId },
         State = #mqstate { mode = disk, queue = Q }) ->
-    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
+    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
     {ok, State};
 publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
         State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                            next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
-    ok = if IsDurable andalso IsPersistent ->
-                 rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq, msg_to_bin(Msg));
+    OnDisk = IsDurable andalso IsPersistent,
+    ok = if OnDisk ->
+                 rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq,
+						    msg_to_bin(Msg), false);
             true -> ok
          end,
     {ok, State #mqstate { next_write_seq = NextSeq + 1,
-                          msg_buf = queue:in({NextSeq, Msg, false}, MsgBuf)
-                        }}.
+                          msg_buf = queue:in({NextSeq, Msg, false, OnDisk},
+					     MsgBuf)
+			}}.
 
-%% assumption here is that the queue is empty already (only called via attempt_immediate_delivery)
-publish_delivered(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent},
-                  State = #mqstate { mode = Mode, queue = Q, is_durable = IsDurable,
-                                     next_write_seq = NextSeq })
+%% Assumption here is that the queue is empty already (only called via
+%% attempt_immediate_delivery).  Also note that the seq id assigned by
+%% the disk queue could well not be the same as the NextSeq (true =
+%% NextSeq >= disk_queue_write_seq_for_queue(Q)) , but this doesn't
+%% matter because the AckTag will still be correct (AckTags for
+%% non-persistent messages don't exist). (next_write_seq is actually
+%% only used to calculate how many messages are in the queue).
+publish_delivered(Msg =
+		  #basic_message { guid = MsgId, is_persistent = IsPersistent},
+                  State = #mqstate { mode = Mode, is_durable = IsDurable,
+                                     next_write_seq = NextSeq, queue = Q })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg)),
+    true = rabbit_disk_queue:is_empty(Q),
+    rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
+    %% must call phantom_deliver otherwise the msg remains at the head
+    %% of the queue
     {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
-    State2 = if Mode =:= mixed -> State #mqstate { next_write_seq = NextSeq + 1 };
-                true -> State
-             end,
+    State2 =
+	if Mode =:= mixed -> State #mqstate { next_write_seq = NextSeq + 1 };
+	   true -> State
+	end,
     {ok, AckTag, State2};
-publish_delivered(_Msg, State = #mqstate { mode = mixed }) ->
+publish_delivered(_Msg, State = #mqstate { mode = mixed, msg_buf = MsgBuf }) ->
+    true = queue:is_empty(MsgBuf),
     {ok, noack, State}.
 
-deliver(State = #mqstate { mode = disk, queue = Q }) ->
-    {MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining} = rabbit_disk_queue:deliver(Q),
-    Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
-    {{Msg, IsDelivered, AckTag, Remaining}, State};
-deliver(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                           next_write_seq = NextWrite, is_durable = IsDurable }) ->
+deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable }) ->
+    case rabbit_disk_queue:deliver(Q) of
+	empty -> {empty, State};
+	{MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining} ->
+	    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
+		Msg = bin_to_msg(MsgBin),
+	    AckTag2 = if IsPersistent andalso IsDurable -> AckTag;
+			 true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
+				 noack
+		      end,
+	    {{Msg, IsDelivered, AckTag2, Remaining}, State}
+    end;
+       
+deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
+                           next_write_seq = NextWrite, msg_buf = MsgBuf }) ->
     {Result, MsgBuf2} = queue:out(MsgBuf),
     case Result of
         empty ->
             {empty, State};
-        {value, {Seq, Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent }, IsDelivered}} ->
+        {value, {Seq, Msg = #basic_message { guid = MsgId,
+					     is_persistent = IsPersistent },
+		 IsDelivered, OnDisk}} ->
             AckTag =
-                if IsDurable andalso IsPersistent ->
-                        {MsgId, IsDelivered, AckTag2, _PersistRemaining} = rabbit_disk_queue:phantom_deliver(Q),
-                        AckTag2;
+                if OnDisk ->
+                        {MsgId, IsDelivered, AckTag2, _PersistRemaining} =
+			    rabbit_disk_queue:phantom_deliver(Q),
+			if IsPersistent andalso IsDurable -> AckTag2;
+			   true -> ok = rabbit_disk_queue:ack(Q, [AckTag2]),
+				   noack
+			end;
                    true -> noack
                 end,
             {{Msg, IsDelivered, AckTag, (NextWrite - 1 - Seq)},
@@ -173,7 +237,8 @@ ack(Acks, State = #mqstate { queue = Q }) ->
                    {ok, State}
     end.
                                                    
-tx_publish(Msg = #basic_message { guid = MsgId }, State = #mqstate { mode = disk }) ->
+tx_publish(Msg = #basic_message { guid = MsgId },
+	   State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
 tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
@@ -182,13 +247,18 @@ tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
     ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
 tx_publish(_Msg, State = #mqstate { mode = mixed }) ->
+    %% this message will reappear in the tx_commit, so ignore for now
     {ok, State}.
 
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
 tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
-    ok = rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes), Acks),
+    RealAcks = remove_noacks(Acks),
+    ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
+	    true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
+						RealAcks)
+	 end,
     {ok, State};
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               msg_buf = MsgBuf,
@@ -198,47 +268,69 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
     {PersistentPubs, MsgBuf2, NextSeq2} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
                          {Acc, MsgBuf3, NextSeq3}) ->
+			    OnDisk = IsPersistent andalso IsDurable,
                             Acc2 =
-                                if IsPersistent ->
-                                        [{Msg #basic_message.guid, NextSeq3} | Acc];
+                                if OnDisk ->
+                                        [{Msg #basic_message.guid, NextSeq3}
+					 | Acc];
                                    true -> Acc
                                 end,
-                            MsgBuf4 = queue:in({NextSeq3, Msg, false}, MsgBuf3),
+                            MsgBuf4 = queue:in({NextSeq3, Msg, false, OnDisk},
+					       MsgBuf3),
                             {Acc2, MsgBuf4, NextSeq3 + 1}
                     end, {[], MsgBuf, NextSeq}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
     %% requirements of rabbit_disk_queue (ascending SeqIds)
-    PersistentPubs2 = if IsDurable -> lists:reverse(PersistentPubs);
-                         true -> []
-                      end,
-    ok = rabbit_disk_queue:tx_commit_with_seqs(Q, PersistentPubs2,
-                                               remove_noacks(Acks)),
+    RealAcks = remove_noacks(Acks),
+    ok = if ([] == PersistentPubs) andalso ([] == RealAcks) -> ok;
+	    true ->
+		 rabbit_disk_queue:tx_commit_with_seqs(
+		   Q, lists:reverse(PersistentPubs), RealAcks)
+	 end,
     {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
 
 only_persistent_msg_ids(Pubs) ->
-    lists:reverse(lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
-                                   Acc) ->
-                                      if IsPersistent -> [Msg #basic_message.guid | Acc];
-                                         true -> Acc
-                                      end
-                              end, [], Pubs)).
+    lists:reverse(
+      lists:foldl(
+	fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
+		if IsPersistent -> [Msg #basic_message.guid | Acc];
+		   true -> Acc
+		end
+	end, [], Pubs)).
 
 tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_cancel(only_msg_ids(Publishes)),
     {ok, State};
-tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
-    MsgIds = if IsDurable -> only_persistent_msg_ids(Publishes);
-                true -> []
-             end,
-    ok = rabbit_disk_queue:tx_cancel(MsgIds),
+tx_cancel(Publishes,
+	  State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
+    ok =
+	if IsDurable ->
+		rabbit_disk_queue:tx_cancel(only_persistent_msg_ids(Publishes));
+	   true -> ok
+	end,
     {ok, State}.
 
-only_ack_tags(MsgWithAcks) ->
-    lists:map(fun (P) -> element(2, P) end, MsgWithAcks).
-
 %% [{Msg, AckTag}]
-requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q }) ->
-    rabbit_disk_queue:requeue(Q, only_ack_tags(MessagesWithAckTags)),
+requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
+                                                is_durable = IsDurable }) ->
+    %% here, we may have messages with no ack tags, because of the
+    %% fact they are not persistent, but nevertheless we want to
+    %% requeue them. This means publishing them delivered.
+    Requeue
+        = lists:foldl(
+            fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
+                when IsPersistent andalso IsDurable ->
+                    [AckTag | RQ];
+                ({Msg = #basic_message { guid = MsgId }, _AckTag}, RQ) ->
+                    ok = if RQ == [] -> ok;
+                            true -> rabbit_disk_queue:requeue(
+                                      Q, lists:reverse(RQ))
+                         end,
+                    _AckTag2 = rabbit_disk_queue:publish(
+                                 Q, MsgId, msg_to_bin(Msg), true),
+                    []
+            end, [], MessagesWithAckTags),
+    ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
     {ok, State};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
@@ -246,18 +338,21 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 is_durable = IsDurable
                                               }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
-        lists:foldl(fun ({Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId }, AckTag},
-                         {Acc, MsgBuf3, NextSeq3}) ->
-                            Acc2 =
-                                if IsDurable andalso IsPersistent ->
-                                        {MsgId, _OldSeqId} = AckTag,
-                                        [{AckTag, {NextSeq3, true}} | Acc];
-                                   true -> Acc
-                                end,
-                            MsgBuf4 = queue:in({NextSeq3, Msg, true}, MsgBuf3),
-                            {Acc2, MsgBuf4, NextSeq3 + 1}
-                    end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
-    ok = rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(PersistentPubs)),
+        lists:foldl(
+	  fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
+	       {Acc, MsgBuf3, NextSeq3}) ->
+		  OnDisk = IsDurable andalso IsPersistent,
+		  Acc2 =
+		      if OnDisk -> [{AckTag, {NextSeq3, true}} | Acc];
+			 true -> Acc
+		      end,
+		  MsgBuf4 = queue:in({NextSeq3, Msg, true, OnDisk}, MsgBuf3),
+		  {Acc2, MsgBuf4, NextSeq3 + 1}
+	  end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
+    ok = if [] == PersistentPubs -> ok;
+            true -> rabbit_disk_queue:requeue_with_seqs(
+                      Q, lists:reverse(PersistentPubs))
+         end,
     {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
 
 purge(State = #mqstate { queue = Q, mode = disk }) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3d173e2e..a2a31a18 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -264,7 +264,7 @@ test_log_management() ->
     %% original log files are not writable
     ok = make_files_non_writable([MainLog, SaslLog]),
     {error, {{cannot_rotate_main_logs, _},
-	     {cannot_rotate_sasl_logs, _}}} = control_action(rotate_logs, []),
+             {cannot_rotate_sasl_logs, _}}} = control_action(rotate_logs, []),
 
     %% logging directed to tty (handlers were removed in last test)
     ok = clean_logs([MainLog, SaslLog], Suffix),
@@ -283,7 +283,7 @@ test_log_management() ->
     ok = application:set_env(sasl, sasl_error_logger, {file, SaslLog}),
     ok = application:set_env(kernel, error_logger, {file, MainLog}),
     ok = add_log_handlers([{rabbit_error_logger_file_h, MainLog},
-			   {rabbit_sasl_report_file_h, SaslLog}]),
+                           {rabbit_sasl_report_file_h, SaslLog}]),
     passed.
 
 test_log_management_during_startup() ->
@@ -688,6 +688,20 @@ delete_log_handlers(Handlers) ->
     ok.
 
 test_disk_queue() ->
+    rdq_stop(),
+    rdq_virgin(),
+    passed = rdq_stress_gc(10000),
+    passed = rdq_test_startup_with_queue_gaps(),
+    passed = rdq_test_redeliver(),
+    passed = rdq_test_purge(),
+    passed = rdq_test_dump_queue(),
+    passed = rdq_test_mixed_queue_modes(),
+    rdq_virgin(),
+    ok = control_action(stop_app, []),
+    ok = control_action(start_app, []),
+    passed.
+
+benchmark_disk_queue() ->
     rdq_stop(),
     % unicode chars are supported properly from r13 onwards
     io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
@@ -698,12 +712,6 @@ test_disk_queue() ->
         MsgCount <- [1024, 4096, 16384]
     ],
     rdq_virgin(),
-    passed = rdq_stress_gc(10000),
-    passed = rdq_test_startup_with_queue_gaps(),
-    passed = rdq_test_redeliver(),
-    passed = rdq_test_purge(),
-    passed = rdq_test_dump_queue(),
-    rdq_virgin(),
     ok = control_action(stop_app, []),
     ok = control_action(start_app, []),
     passed.
@@ -953,49 +961,52 @@ rdq_test_mixed_queue_modes() ->
                       end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages~n"),
-    {ok, _MS7} = rabbit_mixed_queue:to_disk_only_mode(MS6),
+    {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode(MS6),
+    30 = rabbit_mixed_queue:length(MS7),
     io:format("Converted to disk only mode~n"),
-    rdq_stop(),
-    rdq_start(),
-    {ok, MS8} = rabbit_mixed_queue:start_link(q, true, mixed),
+    {ok, MS8} = rabbit_mixed_queue:to_mixed_mode(MS7),
     30 = rabbit_mixed_queue:length(MS8),
-    io:format("Recovered queue~n"),
+    io:format("Converted to mixed mode~n"),
     MS10 =
         lists:foldl(
           fun (N, MS9) ->
                   Rem = 30 - N,
-                  {{#basic_message { is_persistent = true },
+                  {{#basic_message { is_persistent = false },
                     false, _AckTag, Rem},
                    MS9a} = rabbit_mixed_queue:deliver(MS9),
                   MS9a
           end, MS8, lists:seq(1,10)),
+    20 = rabbit_mixed_queue:length(MS10),
     io:format("Delivered initial non persistent messages~n"),
-    {ok, _MS11} = rabbit_mixed_queue:to_disk_only_mode(MS10),
+    {ok, MS11} = rabbit_mixed_queue:to_disk_only_mode(MS10),
+    20 = rabbit_mixed_queue:length(MS11),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
     {ok, MS12} = rabbit_mixed_queue:start_link(q, true, mixed),
-    30 = rabbit_mixed_queue:length(MS12),
+    10 = rabbit_mixed_queue:length(MS12),
     io:format("Recovered queue~n"),
     {MS14, AckTags} =
         lists:foldl(
           fun (N, {MS13, AcksAcc}) ->
-                  Rem = 30 - N,
-                  IsDelivered = N < 11,
+                  Rem = 10 - N,
                   {{#basic_message { is_persistent = true },
-                    IsDelivered, AckTag, Rem},
+                    false, AckTag, Rem},
                    MS13a} = rabbit_mixed_queue:deliver(MS13),
                   {MS13a, [AckTag | AcksAcc]}
-          end, {MS2, []}, lists:seq(1,20)),
+          end, {MS12, []}, lists:seq(1,10)),
+    0 = rabbit_mixed_queue:length(MS14),
     {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
-    io:format("Delivered and acked initial non persistent messages~n"),
-    {ok, _MS16} = rabbit_mixed_queue:to_disk_only_mode(MS15),
+    io:format("Delivered and acked all messages~n"),
+    {ok, MS16} = rabbit_mixed_queue:to_disk_only_mode(MS15),
+    0 = rabbit_mixed_queue:length(MS16),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
     {ok, MS17} = rabbit_mixed_queue:start_link(q, true, mixed),
-    10 = rabbit_mixed_queue:length(MS17),
+    0 = rabbit_mixed_queue:length(MS17),
     io:format("Recovered queue~n"),
+    rdq_stop(),
     passed.
 
 rdq_time_commands(Funcs) ->
@@ -1010,7 +1021,8 @@ rdq_virgin() ->
 
 rdq_start() ->
     {ok, _} = rabbit_disk_queue:start_link(),
-    rabbit_disk_queue:to_ram_disk_mode().
+    ok = rabbit_disk_queue:to_ram_disk_mode(),
+    ok.
 
 rdq_stop() ->
     rabbit_disk_queue:stop(),
-- 
cgit v1.2.1


From 4d1e4005117fdbdf12221c8db9523d028d86fac7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Jun 2009 19:04:58 +0100
Subject: Added means to alter all queues and switch to disk_only mode in the
 disk queue.

rabbit_queue_mode_manager:change_memory_usage(undef, true).

this will first ask all queues to switch from mixed to disk mode, and will on a 2nd call, ask the disk queue to switch to disk only mode.

rabbit_queue_mode_manager:change_memory_usage(undef, false).

moves the other way.

This all works, eg set MulticastMain pushing in messages and switch modes, and it's fine.

One immediate problem is that as soon as everything becomes disk only, the performance suffers, so as a result messages build up. This is as expected. Then, going back to the middle mode (i.e. disk queue in ram_disk mode and queues in disk mode), the switch in the disk queue eats up a lot of memory. I suspect this is the effect of converting the mnesia table from disc_only_copies to disc_copies when there are 40k+ messages in there (one row per message). As a result, this conversion on its own is very dangerous to make. It might be more sensible to use the "weird" mode, where the queues are in mixed mode and the disk queue is in disk_only mode so as to try and get the queues to drain as fast as possible, reducing the size of the mnesia table so that when it is finally converted back, it's small.

More experimentation is needed.

I'll hook the above commands into rabbitmqctl soon.
---
 src/rabbit.erl                    |   2 +
 src/rabbit_amqqueue.erl           |   5 ++
 src/rabbit_amqqueue_process.erl   |  11 +++-
 src/rabbit_disk_queue.erl         |   4 ++
 src/rabbit_mixed_queue.erl        |   6 +++
 src/rabbit_queue_mode_manager.erl | 105 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 131 insertions(+), 2 deletions(-)
 create mode 100644 src/rabbit_queue_mode_manager.erl

diff --git a/src/rabbit.erl b/src/rabbit.erl
index ce73f6ce..44e4dc7f 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -138,6 +138,8 @@ start(normal, []) ->
 
 		{ok, MemoryAlarms} = application:get_env(memory_alarms),
                 ok = rabbit_alarm:start(MemoryAlarms),
+
+                ok = start_child(rabbit_queue_mode_manager),
                 
                 ok = rabbit_binary_generator:
                     check_empty_content_body_frame_size(),
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 08c67946..97ffcda8 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,6 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
+-export([constrain_memory/2]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -103,6 +104,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
+-spec(constrain_memory/2 :: (pid(), bool()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -312,6 +314,9 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:cast(QPid, {unblock, ChPid}).
 
+constrain_memory(QPid, Constrain) ->
+    gen_server2:cast(QPid, {constrain, Constrain}).
+
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index f45f931e..6ad4e4e6 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -93,7 +93,8 @@ start_link(Q) ->
 
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    {ok, MS} = rabbit_mixed_queue:start_link(QName, Durable, mixed), %% TODO, CHANGE ME
+    {ok, Mode} = rabbit_queue_mode_manager:register(self()),
+    {ok, MS} = rabbit_mixed_queue:start_link(QName, Durable, Mode), %% TODO, CHANGE ME
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
@@ -779,7 +780,13 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 end,
                 NewLimited = Limited andalso LimiterPid =/= undefined,
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
-        end)).
+        end));
+
+handle_cast({constrain, Constrain}, State = #q { mixed_state = MS }) ->
+    {ok, MS2} = if Constrain -> rabbit_mixed_queue:to_disk_only_mode(MS);
+                   true -> rabbit_mixed_queue:to_mixed_mode(MS)
+                end,
+    noreply(State #q { mixed_state = MS2 }).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 5c1f969e..1b30051f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -458,6 +458,7 @@ handle_call(to_disk_only_mode, _From,
             State = #dqstate { operation_mode = ram_disk,
                                msg_location_dets = MsgLocationDets,
                                msg_location_ets = MsgLocationEts }) ->
+    rabbit_log:info("Converting disk queue to disk only mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_only_copies),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
@@ -470,6 +471,7 @@ handle_call(to_ram_disk_mode, _From,
             State = #dqstate { operation_mode = disk_only,
                                msg_location_dets = MsgLocationDets,
                                msg_location_ets = MsgLocationEts }) ->
+    rabbit_log:info("Converting disk queue to ram disk mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
@@ -514,6 +516,8 @@ handle_cast({delete_queue, Q}, State) ->
     {ok, State1} = internal_delete_queue(Q, State),
     {noreply, State1}.
 
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State};
 handle_info(_Info, State) ->
     {noreply, State}.
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index dae4dad1..6a463242 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -57,8 +57,11 @@ start_link(Queue, IsDurable, mixed) ->
     {ok, State} = start_link(Queue, IsDurable, disk),
     to_mixed_mode(State).
 
+to_disk_only_mode(State = #mqstate { mode = disk }) ->
+    {ok, State};
 to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                                      next_write_seq = NextSeq }) ->
+    rabbit_log:info("Converting queue to disk only mode: ~p~n", [Q]),
     %% We enqueue _everything_ here. This means that should a message
     %% already be in the disk queue we must remove it and add it back
     %% in. Fortunately, by using requeue, we avoid rewriting the
@@ -93,7 +96,10 @@ to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
     {ok, State #mqstate { mode = disk, msg_buf = queue:new(),
                           next_write_seq = NextSeq1 }}.
 
+to_mixed_mode(State = #mqstate { mode = mixed }) ->
+    {ok, State};
 to_mixed_mode(State = #mqstate { mode = disk, queue = Q }) ->
+    rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
new file mode 100644
index 00000000..aee57ac3
--- /dev/null
+++ b/src/rabbit_queue_mode_manager.erl
@@ -0,0 +1,105 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_mode_manager).
+
+-behaviour(gen_server2).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([register/1, change_memory_usage/2]).
+
+-define(SERVER, ?MODULE).
+
+-record(state, { mode,
+                 queues
+               }).
+
+start_link() ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+register(Pid) ->
+    gen_server2:call(?SERVER, {register, Pid}).
+
+change_memory_usage(_Pid, Conserve) ->
+    gen_server2:cast(?SERVER, {change_memory_usage, Conserve}).
+                           
+init([]) ->
+    process_flag(trap_exit, true),
+    ok = rabbit_alarm:register(self(), {?MODULE, change_memory_usage, []}),
+    {ok, #state { mode = unlimited,
+                  queues = []
+                }}.
+
+handle_call({register, Pid}, _From, State = #state { queues = Qs, mode = Mode }) ->
+    Result = case Mode of
+                 unlimited -> mixed;
+                 _ -> disk
+             end,
+    {reply, {ok, Result}, State #state { queues = [Pid | Qs] }}.
+
+handle_cast({change_memory_usage, true}, State = #state { mode = disk_only }) ->
+    {noreply, State};
+handle_cast({change_memory_usage, true}, State = #state { mode = ram_disk }) ->
+    ok = rabbit_disk_queue:to_disk_only_mode(),
+    {noreply, State #state { mode = disk_only }};
+handle_cast({change_memory_usage, true}, State = #state { mode = unlimited }) ->
+    constrain_queues(true, State #state.queues),
+    {noreply, State #state { mode = ram_disk }};
+
+handle_cast({change_memory_usage, false}, State = #state { mode = unlimited }) ->
+    {noreply, State};
+handle_cast({change_memory_usage, false}, State = #state { mode = ram_disk }) ->
+    constrain_queues(false, State #state.queues),
+    {noreply, State #state { mode = unlimited }};
+handle_cast({change_memory_usage, false}, State = #state { mode = disk_only }) ->
+    ok = rabbit_disk_queue:to_ram_disk_mode(),
+    {noreply, State #state { mode = ram_disk }}.
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State};
+handle_info(_Info, State) ->
+    {noreply, State}.
+
+terminate(_Reason, State) ->
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+constrain_queues(Constrain, Qs) ->
+    lists:foreach(
+      fun (QPid) ->
+              ok = rabbit_amqqueue:constrain_memory(QPid, Constrain)
+      end, Qs).
-- 
cgit v1.2.1


From 5596460d82aa54c5ba72f979fd26e3e4392eef53 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Jun 2009 20:06:04 +0100
Subject: Took advantage of the gen_server2 priorities.

Reversed order  - i.e. now when swapping out, the first thing is to alter the disk_queue, and the 2nd thing is to alter the queues.
And vice versa.

The reasoning is as follows:
Changing the disk_queue is a BIG operation because it affects every message in there, from all queues. In order to minimise the impa
ct of this operation, we must do it first, not second, because if we do it first, only persistent messages from durable queues will
be in there, whereas if we do it second, then all messages from all queues will be in there.
Similarly, when swapping in, altering the individual queues is the first thing to do because it prevents the disk queue from growing
 further (i.e. only persistent messages to durable queues then make it to the disk queue), and each queue pulls out from the disk qu
eue all the messages in there and so subsequent delivery from the mixed queue becomes very fast (actually, this is a total lie because of the call to rabbit_disk_queue:phantom_deliver in rabbit_mixed_queue:deliver - if I could get rid of this or at least make it async then that would greatly improve matters).
---
 src/rabbit_amqqueue.erl           | 2 +-
 src/rabbit_disk_queue.erl         | 6 +++---
 src/rabbit_mixed_queue.erl        | 1 +
 src/rabbit_queue_mode_manager.erl | 8 ++++----
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 97ffcda8..a5c58f23 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -315,7 +315,7 @@ unblock(QPid, ChPid) ->
     gen_server2:cast(QPid, {unblock, ChPid}).
 
 constrain_memory(QPid, Constrain) ->
-    gen_server2:cast(QPid, {constrain, Constrain}).
+    gen_server2:pcast(QPid, 10, {constrain, Constrain}).
 
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 1b30051f..3fc208df 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -315,7 +315,7 @@ delete_queue(Q) ->
     gen_server2:cast(?SERVER, {delete_queue, Q}).
 
 dump_queue(Q) ->
-    gen_server2:call(?SERVER, {dump_queue, Q}, infinity).
+    gen_server2:pcall(?SERVER, {dump_queue, Q}, infinity).
 
 delete_non_durable_queues(DurableQueues) ->
     gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues}, infinity).
@@ -327,10 +327,10 @@ stop_and_obliterate() ->
     gen_server2:call(?SERVER, stop_vaporise, infinity).
 
 to_disk_only_mode() ->
-    gen_server2:call(?SERVER, to_disk_only_mode, infinity).
+    gen_server2:pcall(?SERVER, 10, to_disk_only_mode, infinity).
 
 to_ram_disk_mode() ->
-    gen_server2:call(?SERVER, to_ram_disk_mode, infinity).
+    gen_server2:pcall(?SERVER, 10, to_ram_disk_mode, infinity).
 
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 6a463242..8aedc9eb 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -103,6 +103,7 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q }) ->
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
+    rabbit_log:info("Queue length: ~p ~w~n", [Q, erlang:length(QList)]),
     {MsgBuf1, NextSeq1} =
         lists:foldl(
           fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId}, {Buf, NSeq})
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index aee57ac3..080607bb 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -72,19 +72,19 @@ handle_call({register, Pid}, _From, State = #state { queues = Qs, mode = Mode })
 handle_cast({change_memory_usage, true}, State = #state { mode = disk_only }) ->
     {noreply, State};
 handle_cast({change_memory_usage, true}, State = #state { mode = ram_disk }) ->
-    ok = rabbit_disk_queue:to_disk_only_mode(),
+    constrain_queues(true, State #state.queues),
     {noreply, State #state { mode = disk_only }};
 handle_cast({change_memory_usage, true}, State = #state { mode = unlimited }) ->
-    constrain_queues(true, State #state.queues),
+    ok = rabbit_disk_queue:to_disk_only_mode(),
     {noreply, State #state { mode = ram_disk }};
 
 handle_cast({change_memory_usage, false}, State = #state { mode = unlimited }) ->
     {noreply, State};
 handle_cast({change_memory_usage, false}, State = #state { mode = ram_disk }) ->
-    constrain_queues(false, State #state.queues),
+    ok = rabbit_disk_queue:to_ram_disk_mode(),
     {noreply, State #state { mode = unlimited }};
 handle_cast({change_memory_usage, false}, State = #state { mode = disk_only }) ->
-    ok = rabbit_disk_queue:to_ram_disk_mode(),
+    constrain_queues(false, State #state.queues),
     {noreply, State #state { mode = ram_disk }}.
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
-- 
cgit v1.2.1


From 9083b091bb69e9b8d1d7c523c235e5793acfd572 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Jun 2009 21:02:54 +0100
Subject: well, I've made the acking for messages which are on disk but are not
 persistent/durable async, and it has improved some issues. But, if you switch
 to disk only mode, then allow, say 10k messages to build up (use
 MulticastMain) then switch back to ram mode, then it won't recover - the
 receive rate will stay very low, and rabbitmqctl list_queues will continue to
 grow insanely. This is very very odd, because querying the disk_queue
 directly for the queue length shows it drops to 0, but at least one CPU is
 maxed out at 100% use, messages continue to arrive, but the delivery rate
 never goes back up. Mysterious.

---
 src/rabbit_disk_queue.erl  | 25 +++++++++++++++++++------
 src/rabbit_mixed_queue.erl | 21 +++++++++++++--------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3fc208df..2b6f7b00 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -41,7 +41,7 @@
 -export([publish/4, publish_with_seq/5, deliver/1, phantom_deliver/1, ack/2,
          tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
 	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
-         dump_queue/1, delete_non_durable_queues/1
+         dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
 -export([length/1, is_empty/1, next_write_seq/1]).
@@ -287,6 +287,9 @@ phantom_deliver(Q) ->
 ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
+auto_ack_next_message(Q) ->
+    gen_server2:cast(?SERVER, {auto_ack_next_message, Q}).
+
 tx_publish(MsgId, Msg) when is_binary(Msg) ->
     gen_server2:cast(?SERVER, {tx_publish, MsgId, Msg}).
 
@@ -315,7 +318,7 @@ delete_queue(Q) ->
     gen_server2:cast(?SERVER, {delete_queue, Q}).
 
 dump_queue(Q) ->
-    gen_server2:pcall(?SERVER, {dump_queue, Q}, infinity).
+    gen_server2:call(?SERVER, {dump_queue, Q}, infinity).
 
 delete_non_durable_queues(DurableQueues) ->
     gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues}, infinity).
@@ -422,10 +425,10 @@ handle_call({publish_with_seq, Q, MsgId, SeqId, MsgBody}, _From, State) ->
         internal_publish(Q, MsgId, SeqId, MsgBody, true, State),
     {reply, MsgSeqId, State1};
 handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, true, State),
+    {ok, Result, State1} = internal_deliver(Q, true, false, State),
     {reply, Result, State1};
 handle_call({phantom_deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, false, State),
+    {ok, Result, State1} = internal_deliver(Q, false, false, State),
     {reply, Result, State1};
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
     PubMsgSeqIds = zip_with_tail(PubMsgIds, {duplicate, next}),
@@ -499,6 +502,9 @@ handle_cast({publish_with_seq, Q, MsgId, SeqId, MsgBody}, State) ->
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
     {noreply, State1};
+handle_cast({auto_ack_next_message, Q}, State) ->
+    {ok, State1} = internal_auto_ack(Q, State),
+    {noreply, State1};
 handle_cast({tx_publish, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
     {noreply, State1};
@@ -696,14 +702,14 @@ sequence_lookup(Sequences, Q) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, ReadMsg, State = #dqstate { sequences = Sequences }) ->
+internal_deliver(Q, ReadMsg, FakeDeliver, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, empty, State};
         [{Q, SeqId, SeqId, 0}] -> {ok, empty, State};
         [{Q, ReadSeqId, WriteSeqId, Length}] when Length > 0 ->
             Remaining = Length - 1,
             {ok, Result, NextReadSeqId, State1} =
-                internal_read_message(Q, ReadSeqId, false, ReadMsg, State),
+                internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State),
             true = ets:insert(Sequences,
                               {Q, NextReadSeqId, WriteSeqId, Remaining}),
             {ok,
@@ -739,6 +745,13 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
             {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
     end.
 
+internal_auto_ack(Q, State) ->
+    case internal_deliver(Q, false, true, State) of
+        {ok, empty, State1} -> {ok, State1};
+        {ok, {_MsgId, _Delivered, MsgSeqId, _Remaining}, State1} ->
+            remove_messages(Q, [MsgSeqId], true, State1)
+    end.        
+
 internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, true, State).
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 8aedc9eb..4dce52e7 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -103,7 +103,6 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q }) ->
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
-    rabbit_log:info("Queue length: ~p ~w~n", [Q, erlang:length(QList)]),
     {MsgBuf1, NextSeq1} =
         lists:foldl(
           fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId}, {Buf, NSeq})
@@ -111,8 +110,12 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q }) ->
                   Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
                   {queue:in({SeqId, Msg, IsDelivered, true}, Buf), SeqId + 1}
           end, {queue:new(), 0}, QList),
-    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1,
-			  next_write_seq = NextSeq1 }}.
+    State1 = State #mqstate { mode = mixed, msg_buf = MsgBuf1,
+			  next_write_seq = NextSeq1 },
+    rabbit_log:info("Queue length: ~p ~w ~w~n",
+                    [Q, rabbit_mixed_queue:length(State),
+                     rabbit_mixed_queue:length(State1)]),
+    {ok, State1}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
 						 is_durable = IsDurable }) ->
@@ -222,11 +225,13 @@ deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
 		 IsDelivered, OnDisk}} ->
             AckTag =
                 if OnDisk ->
-                        {MsgId, IsDelivered, AckTag2, _PersistRemaining} =
-			    rabbit_disk_queue:phantom_deliver(Q),
-			if IsPersistent andalso IsDurable -> AckTag2;
-			   true -> ok = rabbit_disk_queue:ack(Q, [AckTag2]),
-				   noack
+			if IsPersistent andalso IsDurable -> 
+                                {MsgId, IsDelivered, AckTag2, _PersistRem} =
+                                    rabbit_disk_queue:phantom_deliver(Q),
+                                AckTag2;
+			   true ->
+                                ok = rabbit_disk_queue:auto_ack_next_message(Q),
+                                noack
 			end;
                    true -> noack
                 end,
-- 
cgit v1.2.1


From 7ff061f10ee33832fba9864313a0191b20bea7b0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 11 Jun 2009 11:52:38 +0100
Subject: And suddenly it works. Testing showed that removing the crude limit
 UNSENT_MESSAGE_LIMIT made performance better. This then made me wonder if the
 unblock and notify_sent messages weren't getting through fast enough, and
 sure enough, using pcast is much better there. Also, turning on dbg:tpl
 showed that the common path in mixed_queue was to call publish_delivered
 (i.e. the message has been delivered to a consumer, we just need to record
 this fact). Making sure everything in there for the non-persistent,
 non-durable but disk-only mode is asynchronous also helped performance
 massively.

---
 src/rabbit_amqqueue.erl         |  4 ++--
 src/rabbit_amqqueue_process.erl |  6 +++---
 src/rabbit_mixed_queue.erl      | 21 ++++++++++++---------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index a5c58f23..01d40aa1 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -309,10 +309,10 @@ basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) ->
                           infinity).
 
 notify_sent(QPid, ChPid) ->
-    gen_server2:cast(QPid, {notify_sent, ChPid}).
+    gen_server2:pcast(QPid, 10, {notify_sent, ChPid}).
 
 unblock(QPid, ChPid) ->
-    gen_server2:cast(QPid, {unblock, ChPid}).
+    gen_server2:pcast(QPid, 10, {unblock, ChPid}).
 
 constrain_memory(QPid, Constrain) ->
     gen_server2:pcast(QPid, 10, {constrain, Constrain}).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6ad4e4e6..d325346c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -208,7 +208,7 @@ deliver_queue(Fun, FunAcc0,
                                true -> deliver_queue(Fun, FunAcc1, State3)
                             end
                     end;
-                %% if IsMsgReady then (AckRequired and we've hit the limiter)
+                %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
                     store_ch_record(C#cr{is_limit_active = true}),
                     NewConsumers = block_consumers(ChPid, RoundRobinTail),
@@ -245,8 +245,8 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
             (AckRequired, false, State2) ->
                 {AckTag, State3} =
                     if AckRequired ->
-                            {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(Msg,
-                                                                                     State2 #q.mixed_state),
+                            {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(
+                                                  Msg, State2 #q.mixed_state),
                             {AckTag2, State2 #q { mixed_state = MS }};
                        true ->
                             {noack, State2}
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 4dce52e7..a950584a 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -187,16 +187,19 @@ publish_delivered(Msg =
                   State = #mqstate { mode = Mode, is_durable = IsDurable,
                                      next_write_seq = NextSeq, queue = Q })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    true = rabbit_disk_queue:is_empty(Q),
     rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
-    %% must call phantom_deliver otherwise the msg remains at the head
-    %% of the queue
-    {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
-    State2 =
-	if Mode =:= mixed -> State #mqstate { next_write_seq = NextSeq + 1 };
-	   true -> State
-	end,
-    {ok, AckTag, State2};
+    if IsDurable andalso IsPersistent ->
+            %% must call phantom_deliver otherwise the msg remains at
+            %% the head of the queue. This is synchronous, but
+            %% unavoidable as we need the AckTag
+            {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
+            {ok, AckTag, State};
+       true ->
+            %% in this case, we don't actually care about the ack, so
+            %% auto ack it (asynchronously).
+            ok = rabbit_disk_queue:auto_ack_next_message(Q),
+            {ok, noack, State #mqstate { next_write_seq = NextSeq + 1 }}
+    end;
 publish_delivered(_Msg, State = #mqstate { mode = mixed, msg_buf = MsgBuf }) ->
     true = queue:is_empty(MsgBuf),
     {ok, noack, State}.
-- 
cgit v1.2.1


From d9b1ef31f805efc1a9482c5eb7b7b0ac1a05d1c4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 12 Jun 2009 00:54:48 +0100
Subject: Made mixed_queue track its length by itself. This avoids synchronous
 calls to the disk_queue when operating in disk only mode and seems to have
 substantially improved performance (in addition to avoiding a sync call,
 repeated lasting for the length of a queue (erlang stdlib) with a million+
 items in it can't have been cheap). It now seems to be very much the case
 that when coming out of disk only mode, huge back logs are recovered
 reliably.

Also, added reduce_memory_footprint and increase_memory_footprint to control. Both can be run twice and alter whether the disk_queue changes mode or the individual queues.
---
 src/rabbit_amqqueue_process.erl   |   2 +-
 src/rabbit_control.erl            |  11 +++
 src/rabbit_mixed_queue.erl        | 165 ++++++++++++++++++++------------------
 src/rabbit_queue_mode_manager.erl |   9 ++-
 4 files changed, 106 insertions(+), 81 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index ff0cc56b..a701fa4d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -237,7 +237,7 @@ deliver_queue(Fun, FunAcc0,
     end.
 
 deliver_from_queue(is_message_ready, undefined, #q { mixed_state = MS }) ->
-    0 /= rabbit_mixed_queue:length(MS);
+    not rabbit_mixed_queue:is_empty(MS);
 deliver_from_queue(AckRequired, Acc = undefined, State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
     MS3 = case {Res, AckRequired} of
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 6649899a..0ead9533 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -137,6 +137,9 @@ Available commands:
   list_bindings  [-p <VHostPath>] 
   list_connections [<ConnectionInfoItem> ...]
 
+  reduce_memory_footprint
+  increase_memory_footprint
+
 Quiet output mode is selected with the \"-q\" flag. Informational messages
 are suppressed when quiet mode is in effect.
 
@@ -276,6 +279,14 @@ action(list_connections, Node, Args, Inform) ->
                                [ArgAtoms]),
                       ArgAtoms);
 
+action(reduce_memory_footprint, Node, _Args, Inform) ->
+    Inform("Reducing memory footprint", []),
+    call(Node, {rabbit_queue_mode_manager, reduce_memory_usage, []});
+
+action(increase_memory_footprint, Node, _Args, Inform) ->
+    Inform("Reducing memory footprint", []),
+    call(Node, {rabbit_queue_mode_manager, increase_memory_usage, []});
+
 action(Command, Node, Args, Inform) ->
     {VHost, RemainingArgs} = parse_vhost_flag(Args),
     action(Command, Node, VHost, RemainingArgs, Inform).
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index a950584a..74e47a00 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -45,14 +45,15 @@
                    msg_buf,
                    next_write_seq,
                    queue,
-                   is_durable
+                   is_durable,
+                   length
                  }
        ).
 
 start_link(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-        	 next_write_seq = 0, is_durable = IsDurable });
+        	 next_write_seq = 0, is_durable = IsDurable, length = 0 });
 start_link(Queue, IsDurable, mixed) ->
     {ok, State} = start_link(Queue, IsDurable, disk),
     to_mixed_mode(State).
@@ -98,23 +99,21 @@ to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
 
 to_mixed_mode(State = #mqstate { mode = mixed }) ->
     {ok, State};
-to_mixed_mode(State = #mqstate { mode = disk, queue = Q }) ->
+to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
     rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
-    {MsgBuf1, NextSeq1} =
+    {MsgBuf1, NextSeq1, Length} =
         lists:foldl(
-          fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId}, {Buf, NSeq})
+          fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId},
+               {Buf, NSeq, L})
               when SeqId >= NSeq ->
                   Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
-                  {queue:in({SeqId, Msg, IsDelivered, true}, Buf), SeqId + 1}
-          end, {queue:new(), 0}, QList),
+                  {queue:in({SeqId, Msg, IsDelivered, true}, Buf), SeqId+1, L+1}
+          end, {queue:new(), 0, 0}, QList),
     State1 = State #mqstate { mode = mixed, msg_buf = MsgBuf1,
-			  next_write_seq = NextSeq1 },
-    rabbit_log:info("Queue length: ~p ~w ~w~n",
-                    [Q, rabbit_mixed_queue:length(State),
-                     rabbit_mixed_queue:length(State1)]),
+                              next_write_seq = NextSeq1 },
     {ok, State1}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
@@ -131,7 +130,8 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
     ok = if Acks == [] -> ok;
             true -> rabbit_disk_queue:ack(Q, lists:reverse(Acks))
          end,
-    {ok, State #mqstate { next_write_seq = NextSeq2 }}.
+    Length = NextSeq2 - NextSeq,
+    {ok, State #mqstate { next_write_seq = NextSeq2, length = Length }}.
 
 deliver_all_messages(Q, IsDurable, Acks, Requeue, NextSeq) ->
     case rabbit_disk_queue:deliver(Q) of
@@ -158,12 +158,13 @@ bin_to_msg(MsgBin) ->
     binary_to_term(MsgBin).
 
 publish(Msg = #basic_message { guid = MsgId },
-        State = #mqstate { mode = disk, queue = Q }) ->
+        State = #mqstate { mode = disk, queue = Q, length = Length }) ->
     ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
-    {ok, State};
+    {ok, State #mqstate { length = Length + 1 }};
 publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
         State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
-                           next_write_seq = NextSeq, msg_buf = MsgBuf }) ->
+                           next_write_seq = NextSeq, msg_buf = MsgBuf,
+                           length = Length }) ->
     OnDisk = IsDurable andalso IsPersistent,
     ok = if OnDisk ->
                  rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq,
@@ -172,7 +173,8 @@ publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
          end,
     {ok, State #mqstate { next_write_seq = NextSeq + 1,
                           msg_buf = queue:in({NextSeq, Msg, false, OnDisk},
-					     MsgBuf)
+					     MsgBuf),
+                          length = Length + 1
 			}}.
 
 %% Assumption here is that the queue is empty already (only called via
@@ -180,67 +182,69 @@ publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
 %% the disk queue could well not be the same as the NextSeq (true =
 %% NextSeq >= disk_queue_write_seq_for_queue(Q)) , but this doesn't
 %% matter because the AckTag will still be correct (AckTags for
-%% non-persistent messages don't exist). (next_write_seq is actually
-%% only used to calculate how many messages are in the queue).
+%% non-persistent messages don't exist).
 publish_delivered(Msg =
 		  #basic_message { guid = MsgId, is_persistent = IsPersistent},
                   State = #mqstate { mode = Mode, is_durable = IsDurable,
-                                     next_write_seq = NextSeq, queue = Q })
+                                     next_write_seq = NextSeq, queue = Q,
+                                     length = 0 })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
+    State1 = if Mode =:= disk -> State;
+                true -> State #mqstate { next_write_seq = NextSeq + 1 }
+             end,
     if IsDurable andalso IsPersistent ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
             {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
-            {ok, AckTag, State};
+            {ok, AckTag, State1};
        true ->
             %% in this case, we don't actually care about the ack, so
             %% auto ack it (asynchronously).
             ok = rabbit_disk_queue:auto_ack_next_message(Q),
-            {ok, noack, State #mqstate { next_write_seq = NextSeq + 1 }}
+            {ok, noack, State1}
     end;
-publish_delivered(_Msg, State = #mqstate { mode = mixed, msg_buf = MsgBuf }) ->
-    true = queue:is_empty(MsgBuf),
+publish_delivered(_Msg, State = #mqstate { mode = mixed, length = 0 }) ->
     {ok, noack, State}.
 
-deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable }) ->
-    case rabbit_disk_queue:deliver(Q) of
-	empty -> {empty, State};
-	{MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining} ->
-	    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
-		Msg = bin_to_msg(MsgBin),
-	    AckTag2 = if IsPersistent andalso IsDurable -> AckTag;
-			 true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
-				 noack
-		      end,
-	    {{Msg, IsDelivered, AckTag2, Remaining}, State}
-    end;
+deliver(State = #mqstate { length = 0 }) ->
+    {empty, State};
+deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
+                           length = Length }) ->
+    {MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining}
+        = rabbit_disk_queue:deliver(Q),
+    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
+        Msg = bin_to_msg(MsgBin),
+    AckTag2 = if IsPersistent andalso IsDurable -> AckTag;
+                 true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
+                         noack
+              end,
+    {{Msg, IsDelivered, AckTag2, Remaining},
+             State #mqstate { length = Length - 1}};
        
 deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
-                           next_write_seq = NextWrite, msg_buf = MsgBuf }) ->
-    {Result, MsgBuf2} = queue:out(MsgBuf),
-    case Result of
-        empty ->
-            {empty, State};
-        {value, {Seq, Msg = #basic_message { guid = MsgId,
-					     is_persistent = IsPersistent },
-		 IsDelivered, OnDisk}} ->
-            AckTag =
-                if OnDisk ->
-			if IsPersistent andalso IsDurable -> 
-                                {MsgId, IsDelivered, AckTag2, _PersistRem} =
-                                    rabbit_disk_queue:phantom_deliver(Q),
-                                AckTag2;
-			   true ->
-                                ok = rabbit_disk_queue:auto_ack_next_message(Q),
-                                noack
-			end;
-                   true -> noack
-                end,
-            {{Msg, IsDelivered, AckTag, (NextWrite - 1 - Seq)},
-             State #mqstate { msg_buf = MsgBuf2 }}
-    end.
+                           next_write_seq = NextWrite, msg_buf = MsgBuf,
+                           length = Length }) ->
+    {{value, {Seq, Msg = #basic_message { guid = MsgId,
+                                          is_persistent = IsPersistent },
+              IsDelivered, OnDisk}}, MsgBuf2}
+        = queue:out(MsgBuf),
+    AckTag =
+        if OnDisk ->
+                if IsPersistent andalso IsDurable -> 
+                        {MsgId, IsDelivered, AckTag2, _PersistRem} =
+                            rabbit_disk_queue:phantom_deliver(Q),
+                        AckTag2;
+                   true ->
+                        ok = rabbit_disk_queue:auto_ack_next_message(Q),
+                        noack
+                end;
+           true -> noack
+        end,
+    Rem = Length - 1,
+    {{Msg, IsDelivered, AckTag, Rem},
+     State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
 remove_noacks(Acks) ->
     lists:filter(fun (A) -> A /= noack end, Acks).
@@ -268,17 +272,19 @@ tx_publish(_Msg, State = #mqstate { mode = mixed }) ->
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
-tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q }) ->
+tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q,
+                                              length = Length }) ->
     RealAcks = remove_noacks(Acks),
     ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
 	    true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
 						RealAcks)
 	 end,
-    {ok, State};
+    {ok, State #mqstate { length = Length + erlang:length(Publishes) }};
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               msg_buf = MsgBuf,
                                               next_write_seq = NextSeq,
-                                              is_durable = IsDurable
+                                              is_durable = IsDurable,
+                                              length = Length
                                             }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
@@ -302,7 +308,8 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
 		 rabbit_disk_queue:tx_commit_with_seqs(
 		   Q, lists:reverse(PersistentPubs), RealAcks)
 	 end,
-    {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
+    {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2,
+                          length = Length + erlang:length(Publishes) }}.
 
 only_persistent_msg_ids(Pubs) ->
     lists:reverse(
@@ -327,7 +334,8 @@ tx_cancel(Publishes,
 
 %% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
-                                                is_durable = IsDurable }) ->
+                                                is_durable = IsDurable,
+                                                length = Length }) ->
     %% here, we may have messages with no ack tags, because of the
     %% fact they are not persistent, but nevertheless we want to
     %% requeue them. This means publishing them delivered.
@@ -346,11 +354,12 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                     []
             end, [], MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
-    {ok, State};
+    {ok, State #mqstate {length = Length + erlang:length(MessagesWithAckTags)}};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 next_write_seq = NextSeq,
-                                                is_durable = IsDurable
+                                                is_durable = IsDurable,
+                                                length = Length
                                               }) ->
     {PersistentPubs, MsgBuf2, NextSeq2} =
         lists:foldl(
@@ -368,27 +377,25 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
             true -> rabbit_disk_queue:requeue_with_seqs(
                       Q, lists:reverse(PersistentPubs))
          end,
-    {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2 }}.
+    {ok, State #mqstate {msg_buf = MsgBuf2, next_write_seq = NextSeq2,
+                         length = Length + erlang:length(MessagesWithAckTags)}}.
 
-purge(State = #mqstate { queue = Q, mode = disk }) ->
+purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
     Count = rabbit_disk_queue:purge(Q),
-    {Count, State};
-purge(State = #mqstate { queue = Q, msg_buf = MsgBuf, mode = mixed }) ->
+    {Count, State #mqstate { length = 0 }};
+purge(State = #mqstate { queue = Q, mode = mixed, length = Length }) ->
     rabbit_disk_queue:purge(Q),
-    Count = queue:len(MsgBuf),
-    {Count, State #mqstate { msg_buf = queue:new() }}.
+    {Length, State #mqstate { msg_buf = queue:new(), length = 0 }}.
 
 delete_queue(State = #mqstate { queue = Q, mode = disk }) ->
     rabbit_disk_queue:delete_queue(Q),
-    {ok, State};
+    {ok, State #mqstate { length = 0 }};
 delete_queue(State = #mqstate { queue = Q, mode = mixed }) ->
     rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { msg_buf = queue:new() }}.
+    {ok, State #mqstate { msg_buf = queue:new(), length = 0 }}.
 
-length(#mqstate { queue = Q, mode = disk }) ->
-    rabbit_disk_queue:length(Q);
-length(#mqstate { mode = mixed, msg_buf = MsgBuf }) ->
-    queue:len(MsgBuf).
+length(#mqstate { length = Length }) ->
+    Length.
 
-is_empty(State) ->
-    0 == rabbit_mixed_queue:length(State).
+is_empty(#mqstate { length = Length }) ->
+    0 == Length.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 080607bb..32ad6b4c 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,7 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/1, change_memory_usage/2]).
+-export([register/1, change_memory_usage/2,
+         reduce_memory_usage/0, increase_memory_usage/0]).
 
 -define(SERVER, ?MODULE).
 
@@ -54,6 +55,12 @@ register(Pid) ->
 
 change_memory_usage(_Pid, Conserve) ->
     gen_server2:cast(?SERVER, {change_memory_usage, Conserve}).
+
+reduce_memory_usage() ->
+    gen_server2:cast(?SERVER, {change_memory_usage, true}).
+                           
+increase_memory_usage() ->
+    gen_server2:cast(?SERVER, {change_memory_usage, false}).
                            
 init([]) ->
     process_flag(trap_exit, true),
-- 
cgit v1.2.1


From 4ac62980edaac85249818cb65fad616149c1c38c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 12 Jun 2009 12:51:45 +0100
Subject: Yep, as I'd thought, the next_seq_id field was totally unused for
 anything useful. The code is thus now a good bit simpler.

---
 src/rabbit_mixed_queue.erl | 135 ++++++++++++++++++---------------------------
 1 file changed, 54 insertions(+), 81 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 74e47a00..31c0fb10 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -43,7 +43,6 @@
 
 -record(mqstate, { mode,
                    msg_buf,
-                   next_write_seq,
                    queue,
                    is_durable,
                    length
@@ -53,15 +52,15 @@
 start_link(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-        	 next_write_seq = 0, is_durable = IsDurable, length = 0 });
+        	 is_durable = IsDurable, length = 0 });
 start_link(Queue, IsDurable, mixed) ->
     {ok, State} = start_link(Queue, IsDurable, disk),
     to_mixed_mode(State).
 
 to_disk_only_mode(State = #mqstate { mode = disk }) ->
     {ok, State};
-to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                                     next_write_seq = NextSeq }) ->
+to_disk_only_mode(State =
+                  #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf }) ->
     rabbit_log:info("Converting queue to disk only mode: ~p~n", [Q]),
     %% We enqueue _everything_ here. This means that should a message
     %% already be in the disk queue we must remove it and add it back
@@ -70,32 +69,30 @@ to_disk_only_mode(State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
     Msgs = queue:to_list(MsgBuf),
-    {NextSeq1, Requeue} =
+    Requeue =
         lists:foldl(
-          fun ({_Seq, Msg = #basic_message { guid = MsgId },
-                IsDelivered, OnDisk}, {NSeq, RQueueAcc}) ->
+          fun ({Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk},
+               RQueueAcc) ->
                   if OnDisk ->
                           {MsgId, IsDelivered, AckTag, _PersistRemaining} =
                               rabbit_disk_queue:phantom_deliver(Q),
-                          {NSeq + 1,
-                           [ {AckTag, {NSeq, IsDelivered}} | RQueueAcc ]};
+                          [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
                      true ->
                           ok = if [] == RQueueAcc -> ok;
                                   true ->
                                        rabbit_disk_queue:requeue_with_seqs(
                                          Q, lists:reverse(RQueueAcc))
                                end,
-                          ok = rabbit_disk_queue:publish_with_seq(
-                                 Q, MsgId, NSeq, msg_to_bin(Msg), false),
-                          {NSeq + 1, []}
+                          ok = rabbit_disk_queue:publish(
+                                 Q, MsgId, msg_to_bin(Msg), false),
+                          []
                   end
-          end, {NextSeq, []}, Msgs),
+          end, [], Msgs),
     ok = if [] == Requeue -> ok;
             true ->
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
-    {ok, State #mqstate { mode = disk, msg_buf = queue:new(),
-                          next_write_seq = NextSeq1 }}.
+    {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
 
 to_mixed_mode(State = #mqstate { mode = mixed }) ->
     {ok, State};
@@ -104,50 +101,45 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
-    {MsgBuf1, NextSeq1, Length} =
+    {MsgBuf1, Length} =
         lists:foldl(
-          fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, SeqId},
-               {Buf, NSeq, L})
-              when SeqId >= NSeq ->
+          fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, _SeqId},
+               {Buf, L}) ->
                   Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
-                  {queue:in({SeqId, Msg, IsDelivered, true}, Buf), SeqId+1, L+1}
-          end, {queue:new(), 0, 0}, QList),
-    State1 = State #mqstate { mode = mixed, msg_buf = MsgBuf1,
-                              next_write_seq = NextSeq1 },
-    {ok, State1}.
+                  {queue:in({Msg, IsDelivered, true}, Buf), L+1}
+          end, {queue:new(), 0}, QList),
+    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
 						 is_durable = IsDurable }) ->
     %% iterate through the content on disk, ack anything which isn't
     %% persistent, accumulate everything else that is persistent and
     %% requeue it
-    NextSeq = rabbit_disk_queue:next_write_seq(Q),
-    {Acks, Requeue, NextSeq2} =
-	deliver_all_messages(Q, IsDurable, [], [], NextSeq),
+    {Acks, Requeue, Length} =
+	deliver_all_messages(Q, IsDurable, [], [], 0),
     ok = if Requeue == [] -> ok;
             true -> rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
     ok = if Acks == [] -> ok;
             true -> rabbit_disk_queue:ack(Q, lists:reverse(Acks))
          end,
-    Length = NextSeq2 - NextSeq,
-    {ok, State #mqstate { next_write_seq = NextSeq2, length = Length }}.
+    {ok, State #mqstate { length = Length }}.
 
-deliver_all_messages(Q, IsDurable, Acks, Requeue, NextSeq) ->
+deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
     case rabbit_disk_queue:deliver(Q) of
-	empty -> {Acks, Requeue, NextSeq};
+	empty -> {Acks, Requeue, Length};
 	{MsgId, MsgBin, _Size, IsDelivered, AckTag, _Remaining} ->
 	    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
 		bin_to_msg(MsgBin),
 	    OnDisk = IsPersistent andalso IsDurable,
-	    {Acks2, Requeue2, NextSeq2} =
+	    {Acks2, Requeue2, Length2} =
 		if OnDisk -> {Acks,
-			      [{AckTag, {NextSeq, IsDelivered}} | Requeue],
-			      NextSeq + 1
+			      [{AckTag, {next, IsDelivered}} | Requeue],
+			      Length + 1
 			     };
-		   true -> {[AckTag | Acks], Requeue, NextSeq}
+		   true -> {[AckTag | Acks], Requeue, Length}
 		end,
-	    deliver_all_messages(Q, IsDurable, Acks2, Requeue2, NextSeq2)
+	    deliver_all_messages(Q, IsDurable, Acks2, Requeue2, Length2)
     end.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
@@ -163,47 +155,34 @@ publish(Msg = #basic_message { guid = MsgId },
     {ok, State #mqstate { length = Length + 1 }};
 publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
         State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
-                           next_write_seq = NextSeq, msg_buf = MsgBuf,
-                           length = Length }) ->
+                           msg_buf = MsgBuf, length = Length }) ->
     OnDisk = IsDurable andalso IsPersistent,
     ok = if OnDisk ->
-                 rabbit_disk_queue:publish_with_seq(Q, MsgId, NextSeq,
-						    msg_to_bin(Msg), false);
+                 rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false);
             true -> ok
          end,
-    {ok, State #mqstate { next_write_seq = NextSeq + 1,
-                          msg_buf = queue:in({NextSeq, Msg, false, OnDisk},
-					     MsgBuf),
-                          length = Length + 1
-			}}.
+    {ok, State #mqstate { msg_buf = queue:in({Msg, false, OnDisk}, MsgBuf),
+                          length = Length + 1 }}.
 
 %% Assumption here is that the queue is empty already (only called via
-%% attempt_immediate_delivery).  Also note that the seq id assigned by
-%% the disk queue could well not be the same as the NextSeq (true =
-%% NextSeq >= disk_queue_write_seq_for_queue(Q)) , but this doesn't
-%% matter because the AckTag will still be correct (AckTags for
-%% non-persistent messages don't exist).
+%% attempt_immediate_delivery).
 publish_delivered(Msg =
 		  #basic_message { guid = MsgId, is_persistent = IsPersistent},
                   State = #mqstate { mode = Mode, is_durable = IsDurable,
-                                     next_write_seq = NextSeq, queue = Q,
-                                     length = 0 })
+                                     queue = Q, length = 0 })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
-    State1 = if Mode =:= disk -> State;
-                true -> State #mqstate { next_write_seq = NextSeq + 1 }
-             end,
     if IsDurable andalso IsPersistent ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
             {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
-            {ok, AckTag, State1};
+            {ok, AckTag, State};
        true ->
             %% in this case, we don't actually care about the ack, so
             %% auto ack it (asynchronously).
             ok = rabbit_disk_queue:auto_ack_next_message(Q),
-            {ok, noack, State1}
+            {ok, noack, State}
     end;
 publish_delivered(_Msg, State = #mqstate { mode = mixed, length = 0 }) ->
     {ok, noack, State}.
@@ -224,9 +203,8 @@ deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
              State #mqstate { length = Length - 1}};
        
 deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
-                           next_write_seq = NextWrite, msg_buf = MsgBuf,
-                           length = Length }) ->
-    {{value, {Seq, Msg = #basic_message { guid = MsgId,
+                           msg_buf = MsgBuf, length = Length }) ->
+    {{value, {Msg = #basic_message { guid = MsgId,
                                           is_persistent = IsPersistent },
               IsDelivered, OnDisk}}, MsgBuf2}
         = queue:out(MsgBuf),
@@ -282,33 +260,30 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q,
     {ok, State #mqstate { length = Length + erlang:length(Publishes) }};
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               msg_buf = MsgBuf,
-                                              next_write_seq = NextSeq,
                                               is_durable = IsDurable,
                                               length = Length
                                             }) ->
-    {PersistentPubs, MsgBuf2, NextSeq2} =
+    {PersistentPubs, MsgBuf2} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
-                         {Acc, MsgBuf3, NextSeq3}) ->
+                         {Acc, MsgBuf3}) ->
 			    OnDisk = IsPersistent andalso IsDurable,
                             Acc2 =
                                 if OnDisk ->
-                                        [{Msg #basic_message.guid, NextSeq3}
-					 | Acc];
+                                        [Msg #basic_message.guid | Acc];
                                    true -> Acc
                                 end,
-                            MsgBuf4 = queue:in({NextSeq3, Msg, false, OnDisk},
-					       MsgBuf3),
-                            {Acc2, MsgBuf4, NextSeq3 + 1}
-                    end, {[], MsgBuf, NextSeq}, Publishes),
+                            MsgBuf4 = queue:in({Msg, false, OnDisk}, MsgBuf3),
+                            {Acc2, MsgBuf4}
+                    end, {[], MsgBuf}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
     %% requirements of rabbit_disk_queue (ascending SeqIds)
     RealAcks = remove_noacks(Acks),
     ok = if ([] == PersistentPubs) andalso ([] == RealAcks) -> ok;
 	    true ->
-		 rabbit_disk_queue:tx_commit_with_seqs(
+		 rabbit_disk_queue:tx_commit(
 		   Q, lists:reverse(PersistentPubs), RealAcks)
 	 end,
-    {ok, State #mqstate { msg_buf = MsgBuf2, next_write_seq = NextSeq2,
+    {ok, State #mqstate { msg_buf = MsgBuf2,
                           length = Length + erlang:length(Publishes) }}.
 
 only_persistent_msg_ids(Pubs) ->
@@ -357,27 +332,25 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
     {ok, State #mqstate {length = Length + erlang:length(MessagesWithAckTags)}};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
-                                                next_write_seq = NextSeq,
                                                 is_durable = IsDurable,
                                                 length = Length
                                               }) ->
-    {PersistentPubs, MsgBuf2, NextSeq2} =
+    {PersistentPubs, MsgBuf2} =
         lists:foldl(
 	  fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-	       {Acc, MsgBuf3, NextSeq3}) ->
+	       {Acc, MsgBuf3}) ->
 		  OnDisk = IsDurable andalso IsPersistent,
 		  Acc2 =
-		      if OnDisk -> [{AckTag, {NextSeq3, true}} | Acc];
+		      if OnDisk -> [AckTag | Acc];
 			 true -> Acc
 		      end,
-		  MsgBuf4 = queue:in({NextSeq3, Msg, true, OnDisk}, MsgBuf3),
-		  {Acc2, MsgBuf4, NextSeq3 + 1}
-	  end, {[], MsgBuf, NextSeq}, MessagesWithAckTags),
+		  MsgBuf4 = queue:in({Msg, true, OnDisk}, MsgBuf3),
+		  {Acc2, MsgBuf4}
+	  end, {[], MsgBuf}, MessagesWithAckTags),
     ok = if [] == PersistentPubs -> ok;
-            true -> rabbit_disk_queue:requeue_with_seqs(
-                      Q, lists:reverse(PersistentPubs))
+            true -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
-    {ok, State #mqstate {msg_buf = MsgBuf2, next_write_seq = NextSeq2,
+    {ok, State #mqstate {msg_buf = MsgBuf2,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
 purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
-- 
cgit v1.2.1


From e58f9b400068753405a308cf1d8269e9cb0331e4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 12:13:42 +0100
Subject: just removing tabs

---
 src/rabbit.erl             |   4 +-
 src/rabbit_db_queue.erl    | 290 ++++++++++++++++++++++-----------------------
 src/rabbit_disk_queue.erl  |  52 ++++----
 src/rabbit_mixed_queue.erl |  92 +++++++-------
 4 files changed, 219 insertions(+), 219 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 44e4dc7f..2eecac5e 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -136,7 +136,7 @@ start(normal, []) ->
 
                 ok = rabbit_amqqueue:start(),
 
-		{ok, MemoryAlarms} = application:get_env(memory_alarms),
+                {ok, MemoryAlarms} = application:get_env(memory_alarms),
                 ok = rabbit_alarm:start(MemoryAlarms),
 
                 ok = start_child(rabbit_queue_mode_manager),
@@ -311,7 +311,7 @@ rotate_logs(File, Suffix, OldHandler, NewHandler) ->
 
 log_rotation_result({error, MainLogError}, {error, SaslLogError}) ->
     {error, {{cannot_rotate_main_logs, MainLogError},
-	     {cannot_rotate_sasl_logs, SaslLogError}}};
+             {cannot_rotate_sasl_logs, SaslLogError}}};
 log_rotation_result({error, MainLogError}, ok) ->
     {error, {cannot_rotate_main_logs, MainLogError}};
 log_rotation_result(ok, {error, SaslLogError}) ->
diff --git a/src/rabbit_db_queue.erl b/src/rabbit_db_queue.erl
index 897a4a6f..7530892d 100644
--- a/src/rabbit_db_queue.erl
+++ b/src/rabbit_db_queue.erl
@@ -60,7 +60,7 @@
          terminate/2, code_change/3]).
 
 -export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2,
-	 tx_commit/3, tx_cancel/1, requeue/2, purge/1]).
+         tx_commit/3, tx_cancel/1, requeue/2, purge/1]).
 
 -export([stop/0, stop_and_obliterate/0]).
 
@@ -75,13 +75,13 @@
 -type(seq_id() :: non_neg_integer()).
 
 -spec(start_link/1 :: (non_neg_integer()) ->
-	      {'ok', pid()} | 'ignore' | {'error', any()}).
+              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
-	     {'empty' | {msg_id(), binary(), non_neg_integer(),
-			 bool(), {msg_id(), seq_id()}}}).
+             {'empty' | {msg_id(), binary(), non_neg_integer(),
+                         bool(), {msg_id(), seq_id()}}}).
 -spec(phantom_deliver/1 :: (queue_name()) ->
-	     { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
+             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
@@ -97,7 +97,7 @@
 
 start_link(DSN) ->
     gen_server:start_link({local, ?SERVER}, ?MODULE,
-			  [DSN], []).
+                          [DSN], []).
 
 publish(Q, MsgId, Msg) when is_binary(Msg) ->
     gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
@@ -139,7 +139,7 @@ init([DSN]) ->
     process_flag(trap_exit, true),
     odbc:start(),
     {ok, Conn} = odbc:connect(DSN, [{auto_commit, off}, {tuple_row, on},
-				    {scrollable_cursors, off}, {trace_driver, off}]),
+                                    {scrollable_cursors, off}, {trace_driver, off}]),
     State = #dbstate { db_conn = Conn },
     compact_already_delivered(State),
     {ok, State}.
@@ -213,12 +213,12 @@ escape_byte(B) when B > 31 andalso B < 127 ->
     B;
 escape_byte(B) ->
     case io_lib:format("~.8B", [B]) of
-	O1 = [[_]] ->
-	    "\\\\00" ++ O1;
-	O2 = [[_,_]] ->
-	    "\\\\0" ++ O2;
-	O3 = [[_,_,_]] ->
-	    "\\\\" ++ O3
+        O1 = [[_]] ->
+            "\\\\00" ++ O1;
+        O2 = [[_,_]] ->
+            "\\\\0" ++ O2;
+        O3 = [[_,_,_]] ->
+            "\\\\" ++ O3
     end.
 
 escaped_string_to_binary(Str) when is_list(Str) ->
@@ -230,9 +230,9 @@ escaped_string_to_binary([$\\,$\\|Rest], Acc) ->
     escaped_string_to_binary(Rest, [$\\ | Acc]);
 escaped_string_to_binary([$\\,A,B,C|Rest], Acc) ->
     escaped_string_to_binary(Rest, [(list_to_integer([A])*64) +
-				    (list_to_integer([B])*8) +
-				    list_to_integer([C])
-				   | Acc]);
+                                    (list_to_integer([B])*8) +
+                                    list_to_integer([C])
+                                   | Acc]);
 escaped_string_to_binary([C|Rest], Acc) ->
     escaped_string_to_binary(Rest, [C|Acc]).
 
@@ -250,37 +250,37 @@ hex_string_to_binary([A,B|Rest], Acc) ->
 internal_deliver(Q, ReadMsg, State = #dbstate { db_conn = Conn }) ->
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
-	{selected, _, []} ->
-	    odbc:commit(Conn, commit),
-	    {ok, empty, State};
-	{selected, _, [{ReadSeqId}]} ->
-	    case odbc:sql_query(Conn, "select is_delivered, msg_id from ledger where queue = " ++ QStr ++
-				" and seq_id = " ++ integer_to_list(ReadSeqId)) of
-		{selected, _, []} ->
-		    {ok, empty, State};
-		{selected, _, [{IsDeliveredStr, MsgIdStr}]} ->
-		    IsDelivered = IsDeliveredStr /= "0",
-		    if IsDelivered -> ok;
-		       true -> odbc:sql_query(Conn, "update ledger set is_delivered = true where queue = " ++
-					      QStr ++ " and seq_id = " ++ integer_to_list(ReadSeqId))
-		    end,
-		    MsgId = binary_to_term(hex_string_to_binary(MsgIdStr)),
-		    %% yeah, this is really necessary. sigh
-		    MsgIdStr2 = binary_to_escaped_string(term_to_binary(MsgId)),
-		    odbc:sql_query(Conn, "update sequence set next_read = " ++ integer_to_list(ReadSeqId + 1) ++
-				   " where queue = " ++ QStr),
-		    if ReadMsg ->
-			    {selected, _, [{MsgBodyStr}]} =
-				odbc:sql_query(Conn, "select msg from message where msg_id = " ++ MsgIdStr2),
-			    odbc:commit(Conn, commit),
-			    MsgBody = hex_string_to_binary(MsgBodyStr),
-			    BodySize = size(MsgBody),
-			    {ok, {MsgId, MsgBody, BodySize, IsDelivered, {MsgId, ReadSeqId}}, State};
-		       true ->
-			    odbc:commit(Conn, commit),
-			    {ok, {MsgId, IsDelivered, {MsgId, ReadSeqId}}, State}
-		    end
-	    end
+        {selected, _, []} ->
+            odbc:commit(Conn, commit),
+            {ok, empty, State};
+        {selected, _, [{ReadSeqId}]} ->
+            case odbc:sql_query(Conn, "select is_delivered, msg_id from ledger where queue = " ++ QStr ++
+                                " and seq_id = " ++ integer_to_list(ReadSeqId)) of
+                {selected, _, []} ->
+                    {ok, empty, State};
+                {selected, _, [{IsDeliveredStr, MsgIdStr}]} ->
+                    IsDelivered = IsDeliveredStr /= "0",
+                    if IsDelivered -> ok;
+                       true -> odbc:sql_query(Conn, "update ledger set is_delivered = true where queue = " ++
+                                              QStr ++ " and seq_id = " ++ integer_to_list(ReadSeqId))
+                    end,
+                    MsgId = binary_to_term(hex_string_to_binary(MsgIdStr)),
+                    %% yeah, this is really necessary. sigh
+                    MsgIdStr2 = binary_to_escaped_string(term_to_binary(MsgId)),
+                    odbc:sql_query(Conn, "update sequence set next_read = " ++ integer_to_list(ReadSeqId + 1) ++
+                                   " where queue = " ++ QStr),
+                    if ReadMsg ->
+                            {selected, _, [{MsgBodyStr}]} =
+                                odbc:sql_query(Conn, "select msg from message where msg_id = " ++ MsgIdStr2),
+                            odbc:commit(Conn, commit),
+                            MsgBody = hex_string_to_binary(MsgBodyStr),
+                            BodySize = size(MsgBody),
+                            {ok, {MsgId, MsgBody, BodySize, IsDelivered, {MsgId, ReadSeqId}}, State};
+                       true ->
+                            odbc:commit(Conn, commit),
+                            {ok, {MsgId, IsDelivered, {MsgId, ReadSeqId}}, State}
+                    end
+            end
     end.
 
 internal_ack(Q, MsgSeqIds, State) ->
@@ -294,22 +294,22 @@ remove_messages(Q, MsgSeqIds, LedgerDelete, State = #dbstate { db_conn = Conn })
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     lists:foreach(
       fun ({MsgId, SeqId}) ->
-	      MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
-	      {selected, _, [{RefCount}]} =
-		  odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++
-				 MsgIdStr),
-	      case RefCount of
-		  1 -> odbc:sql_query(Conn, "delete from message where msg_id = " ++
-				      MsgIdStr);
-		  _ -> odbc:sql_query(Conn, "update message set ref_count = " ++
-				      integer_to_list(RefCount - 1) ++ " where msg_id = " ++
-				      MsgIdStr)
-	      end,
-	      if LedgerDelete ->
-		      odbc:sql_query(Conn, "delete from ledger where queue = " ++
-				     QStr ++ " and seq_id = " ++ integer_to_list(SeqId));
-		 true -> ok
-	      end
+              MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
+              {selected, _, [{RefCount}]} =
+                  odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++
+                                 MsgIdStr),
+              case RefCount of
+                  1 -> odbc:sql_query(Conn, "delete from message where msg_id = " ++
+                                      MsgIdStr);
+                  _ -> odbc:sql_query(Conn, "update message set ref_count = " ++
+                                      integer_to_list(RefCount - 1) ++ " where msg_id = " ++
+                                      MsgIdStr)
+              end,
+              if LedgerDelete ->
+                      odbc:sql_query(Conn, "delete from ledger where queue = " ++
+                                     QStr ++ " and seq_id = " ++ integer_to_list(SeqId));
+                 true -> ok
+              end
       end, MsgSeqIds),
     odbc:commit(Conn, commit),
     {ok, State}.
@@ -318,12 +318,12 @@ internal_tx_publish(MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
     MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
     MsgStr = binary_to_escaped_string(MsgBody),
     case odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++ MsgIdStr) of
-	{selected, _, []} ->
-	    odbc:sql_query(Conn, "insert into message (msg_id, msg, ref_count) values (" ++
-			   MsgIdStr ++ ", " ++ MsgStr ++ ", 1)");
-	{selected, _, [{RefCount}]} ->
-	    odbc:sql_query(Conn, "update message set ref_count = " ++
-			   integer_to_list(RefCount + 1) ++ " where msg_id = " ++ MsgIdStr)
+        {selected, _, []} ->
+            odbc:sql_query(Conn, "insert into message (msg_id, msg, ref_count) values (" ++
+                           MsgIdStr ++ ", " ++ MsgStr ++ ", 1)");
+        {selected, _, [{RefCount}]} ->
+            odbc:sql_query(Conn, "update message set ref_count = " ++
+                           integer_to_list(RefCount + 1) ++ " where msg_id = " ++ MsgIdStr)
     end,
     odbc:commit(Conn, commit),
     {ok, State}.
@@ -331,24 +331,24 @@ internal_tx_publish(MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
 internal_tx_commit(Q, PubMsgIds, AckSeqIds, State = #dbstate { db_conn = Conn }) ->
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     {InsertOrUpdate, NextWrite} =
-	case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
-	    {selected, _, []} -> {insert, 0};
-	    {selected, _, [{NextWrite2}]} -> {update, NextWrite2}
-	end,
+        case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
+            {selected, _, []} -> {insert, 0};
+            {selected, _, [{NextWrite2}]} -> {update, NextWrite2}
+        end,
     NextWrite3 =
-	lists:foldl(fun (MsgId, WriteSeqInteger) ->
-			    MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
-			    odbc:sql_query(Conn,
-					   "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
-					   QStr ++ ", " ++ integer_to_list(WriteSeqInteger) ++ ", false, " ++
-					   MsgIdStr ++ ")"),
-			    WriteSeqInteger + 1
-		    end, NextWrite, PubMsgIds),
+        lists:foldl(fun (MsgId, WriteSeqInteger) ->
+                            MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
+                            odbc:sql_query(Conn,
+                                           "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
+                                           QStr ++ ", " ++ integer_to_list(WriteSeqInteger) ++ ", false, " ++
+                                           MsgIdStr ++ ")"),
+                            WriteSeqInteger + 1
+                    end, NextWrite, PubMsgIds),
     case InsertOrUpdate of
-	update -> odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(NextWrite3) ++
-				 " where queue = " ++ QStr);
-	insert -> odbc:sql_query(Conn, "insert into sequence (queue, next_read, next_write) values (" ++
-				 QStr ++ ", 0, " ++ integer_to_list(NextWrite3) ++ ")")
+        update -> odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(NextWrite3) ++
+                                 " where queue = " ++ QStr);
+        insert -> odbc:sql_query(Conn, "insert into sequence (queue, next_read, next_write) values (" ++
+                                 QStr ++ ", 0, " ++ integer_to_list(NextWrite3) ++ ")")
     end,
     odbc:commit(Conn, commit),
     remove_messages(Q, AckSeqIds, true, State),
@@ -359,19 +359,19 @@ internal_publish(Q, MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
     MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     NextWrite =
-	case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
-	    {selected, _, []} ->
-		odbc:sql_query(Conn,
-			       "insert into sequence (queue, next_read, next_write) values (" ++
-			       QStr ++ ", 0, 1)"),
-		0;
-	    {selected, _, [{NextWrite2}]} ->
-		odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(1 + NextWrite2) ++
-			       " where queue = " ++ QStr),
-		NextWrite2
-	end,
+        case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
+            {selected, _, []} ->
+                odbc:sql_query(Conn,
+                               "insert into sequence (queue, next_read, next_write) values (" ++
+                               QStr ++ ", 0, 1)"),
+                0;
+            {selected, _, [{NextWrite2}]} ->
+                odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(1 + NextWrite2) ++
+                               " where queue = " ++ QStr),
+                NextWrite2
+        end,
     odbc:sql_query(Conn, "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
-		   QStr ++ ", " ++ integer_to_list(NextWrite) ++ ", false, " ++ MsgIdStr ++ ")"),
+                   QStr ++ ", " ++ integer_to_list(NextWrite) ++ ", false, " ++ MsgIdStr ++ ")"),
     odbc:commit(Conn, commit),
     {ok, State1}.
 
@@ -382,36 +382,36 @@ internal_tx_cancel(MsgIds, State) ->
 internal_requeue(Q, MsgSeqIds, State = #dbstate { db_conn = Conn }) ->
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     {selected, _, [{WriteSeqId}]} =
-	odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr),
+        odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr),
     WriteSeqId2 =
-	lists:foldl(
-	  fun ({_MsgId, SeqId}, NextWriteSeqId) ->
-		  odbc:sql_query(Conn, "update ledger set seq_id = " ++ integer_to_list(NextWriteSeqId) ++
-				 " where seq_id = " ++ integer_to_list(SeqId) ++ " and queue = " ++ QStr),
-		  NextWriteSeqId + 1
-	  end, WriteSeqId, MsgSeqIds),
+        lists:foldl(
+          fun ({_MsgId, SeqId}, NextWriteSeqId) ->
+                  odbc:sql_query(Conn, "update ledger set seq_id = " ++ integer_to_list(NextWriteSeqId) ++
+                                 " where seq_id = " ++ integer_to_list(SeqId) ++ " and queue = " ++ QStr),
+                  NextWriteSeqId + 1
+          end, WriteSeqId, MsgSeqIds),
     odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(WriteSeqId2) ++
-		   " where queue = " ++ QStr),
+                   " where queue = " ++ QStr),
     odbc:commit(Conn, commit),
     {ok, State}.
-				 
+                                 
 
 compact_already_delivered(#dbstate { db_conn = Conn }) ->
     {selected, _, Seqs} = odbc:sql_query(Conn, "select queue, next_read from sequence"),
     lists:foreach(
       fun ({QHexStr, ReadSeqId}) ->
-	      Q = binary_to_term(hex_string_to_binary(QHexStr)),
-	      QStr = binary_to_escaped_string(term_to_binary(Q)),
-	      case odbc:sql_query(Conn, "select min(seq_id) from ledger where queue = "
-				  ++ QStr) of
-		  {selected, _, []} -> ok;
-		  {selected, _, [{null}]} -> ok; %% AGH!
-		  {selected, _, [{Min}]} ->
-		      Gap = shuffle_up(Conn, QStr, Min - 1, ReadSeqId - 1, 0),
-		      odbc:sql_query(Conn, "update sequence set next_read = " ++
-				     integer_to_list(Min + Gap) ++
-				     " where queue = " ++ QStr)
-	      end
+              Q = binary_to_term(hex_string_to_binary(QHexStr)),
+              QStr = binary_to_escaped_string(term_to_binary(Q)),
+              case odbc:sql_query(Conn, "select min(seq_id) from ledger where queue = "
+                                  ++ QStr) of
+                  {selected, _, []} -> ok;
+                  {selected, _, [{null}]} -> ok; %% AGH!
+                  {selected, _, [{Min}]} ->
+                      Gap = shuffle_up(Conn, QStr, Min - 1, ReadSeqId - 1, 0),
+                      odbc:sql_query(Conn, "update sequence set next_read = " ++
+                                     integer_to_list(Min + Gap) ++
+                                     " where queue = " ++ QStr)
+              end
       end, Seqs),
     odbc:commit(Conn, commit).
 
@@ -419,36 +419,36 @@ shuffle_up(_Conn, _QStr, SeqId, SeqId, Gap) ->
     Gap;
 shuffle_up(Conn, QStr, BaseSeqId, SeqId, Gap) ->
     GapInc =
-	case odbc:sql_query(Conn, "select count(1) from ledger where queue = " ++
-			    QStr ++ " and seq_id = " ++ integer_to_list(SeqId)) of
-	    {selected, _, [{"0"}]} ->
-		1;
-	    {selected, _, [{"1"}]} ->
-		if Gap =:= 0 -> ok;
-		   true -> odbc:sql_query(Conn, "update ledger set seq_id = " ++
-					  integer_to_list(SeqId + Gap) ++ " where seq_id = " ++
-					  integer_to_list(SeqId) ++ " and queue = " ++ QStr)
-		end,
-		0
-	end,
+        case odbc:sql_query(Conn, "select count(1) from ledger where queue = " ++
+                            QStr ++ " and seq_id = " ++ integer_to_list(SeqId)) of
+            {selected, _, [{"0"}]} ->
+                1;
+            {selected, _, [{"1"}]} ->
+                if Gap =:= 0 -> ok;
+                   true -> odbc:sql_query(Conn, "update ledger set seq_id = " ++
+                                          integer_to_list(SeqId + Gap) ++ " where seq_id = " ++
+                                          integer_to_list(SeqId) ++ " and queue = " ++ QStr)
+                end,
+                0
+        end,
     shuffle_up(Conn, QStr, BaseSeqId, SeqId - 1, Gap + GapInc).
 
 internal_purge(Q, State = #dbstate { db_conn = Conn }) ->
     QStr = binary_to_escaped_string(term_to_binary(Q)),
     case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
-	{selected, _, []} ->
-	    odbc:commit(Conn, commit),
-	    {ok, 0, State};
-	{selected, _, [{ReadSeqId}]} ->
-	    odbc:sql_query(Conn, "update sequence set next_read = next_write where queue = " ++ QStr),
-	    {selected, _, MsgSeqIds} =
-		odbc:sql_query(Conn, "select msg_id, seq_id from ledger where queue = " ++
-			       QStr ++ " and seq_id >= " ++ ReadSeqId),
-	    MsgSeqIds2 = lists:map(
-			   fun ({MsgIdStr, SeqIdStr}) ->
-				   { binary_to_term(hex_string_to_binary(MsgIdStr)),
-				     list_to_integer(SeqIdStr) }
-			   end, MsgSeqIds),
-	    {ok, State2} = remove_messages(Q, MsgSeqIds2, true, State),
-	    {ok, length(MsgSeqIds2), State2}
+        {selected, _, []} ->
+            odbc:commit(Conn, commit),
+            {ok, 0, State};
+        {selected, _, [{ReadSeqId}]} ->
+            odbc:sql_query(Conn, "update sequence set next_read = next_write where queue = " ++ QStr),
+            {selected, _, MsgSeqIds} =
+                odbc:sql_query(Conn, "select msg_id, seq_id from ledger where queue = " ++
+                               QStr ++ " and seq_id >= " ++ ReadSeqId),
+            MsgSeqIds2 = lists:map(
+                           fun ({MsgIdStr, SeqIdStr}) ->
+                                   { binary_to_term(hex_string_to_binary(MsgIdStr)),
+                                     list_to_integer(SeqIdStr) }
+                           end, MsgSeqIds),
+            {ok, State2} = remove_messages(Q, MsgSeqIds2, true, State),
+            {ok, length(MsgSeqIds2), State2}
     end.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2b6f7b00..3b30a0da 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,7 @@
 
 -export([publish/4, publish_with_seq/5, deliver/1, phantom_deliver/1, ack/2,
          tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
-	 requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
+         requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
@@ -68,19 +68,19 @@
 -define(FILE_SIZE_LIMIT, (256*1024*1024)).
 
 -record(dqstate, {msg_location_dets,       %% where are messages?
-		  msg_location_ets,        %% as above, but for ets version
+                  msg_location_ets,        %% as above, but for ets version
                   operation_mode,          %% ram_disk | disk_only
-		  file_summary,            %% what's in the files?
-		  sequences,               %% next read and write for each q
-		  current_file_num,        %% current file name as number
-		  current_file_name,       %% current file name
-		  current_file_handle,     %% current file handle
-		  current_offset,          %% current offset within current file
+                  file_summary,            %% what's in the files?
+                  sequences,               %% next read and write for each q
+                  current_file_num,        %% current file name as number
+                  current_file_name,       %% current file name
+                  current_file_handle,     %% current file handle
+                  current_offset,          %% current offset within current file
                   current_dirty,           %% has the current file been written to since the last fsync?
-		  file_size_limit,         %% how big can our files get?
-		  read_file_handles,       %% file handles for reading (LRU)
-		  read_file_handles_limit  %% how many file handles can we open?
-		 }).
+                  file_size_limit,         %% how big can our files get?
+                  read_file_handles,       %% file handles for reading (LRU)
+                  read_file_handles_limit  %% how many file handles can we open?
+                 }).
 
 %% The components:
 %%
@@ -92,10 +92,10 @@
 %%              {Q, ReadSeqId, WriteSeqId, QueueLength}
 %% rabbit_disk_queue: this is an mnesia table which contains:
 %%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
-%% 			      is_delivered = IsDelivered,
-%% 			      msg_id = MsgId,
+%%                            is_delivered = IsDelivered,
+%%                            msg_id = MsgId,
 %%                            next_seq_id = SeqId
-%% 			    }
+%%                          }
 %%
 
 %% The basic idea is that messages are appended to the current file up
@@ -190,18 +190,18 @@
 %% variable. Judicious use of a mirror is required).
 %%
 %% +-------+    +-------+         +-------+
-%% |   X   |    |   G   |	  |   G   |
-%% +-------+    +-------+	  +-------+
-%% |   D   |    |   X   |	  |   F   |
-%% +-------+    +-------+	  +-------+
-%% |   X   |    |   X   |	  |   E   |
-%% +-------+    +-------+	  +-------+
+%% |   X   |    |   G   |         |   G   |
+%% +-------+    +-------+         +-------+
+%% |   D   |    |   X   |         |   F   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   E   |
+%% +-------+    +-------+         +-------+
 %% |   C   |    |   F   |   ===>  |   D   |
-%% +-------+    +-------+	  +-------+
-%% |   X   |    |   X   |	  |   C   |
-%% +-------+    +-------+	  +-------+
-%% |   B   |    |   X   |	  |   B   |
-%% +-------+    +-------+	  +-------+
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   C   |
+%% +-------+    +-------+         +-------+
+%% |   B   |    |   X   |         |   B   |
+%% +-------+    +-------+         +-------+
 %% |   A   |    |   E   |          |   A   |
 %% +-------+    +-------+         +-------+
 %%   left         right             left
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 31c0fb10..5082fe55 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -52,7 +52,7 @@
 start_link(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-        	 is_durable = IsDurable, length = 0 });
+                 is_durable = IsDurable, length = 0 });
 start_link(Queue, IsDurable, mixed) ->
     {ok, State} = start_link(Queue, IsDurable, disk),
     to_mixed_mode(State).
@@ -111,12 +111,12 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
     {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
-						 is_durable = IsDurable }) ->
+                                                 is_durable = IsDurable }) ->
     %% iterate through the content on disk, ack anything which isn't
     %% persistent, accumulate everything else that is persistent and
     %% requeue it
     {Acks, Requeue, Length} =
-	deliver_all_messages(Q, IsDurable, [], [], 0),
+        deliver_all_messages(Q, IsDurable, [], [], 0),
     ok = if Requeue == [] -> ok;
             true -> rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
@@ -127,19 +127,19 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
 
 deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
     case rabbit_disk_queue:deliver(Q) of
-	empty -> {Acks, Requeue, Length};
-	{MsgId, MsgBin, _Size, IsDelivered, AckTag, _Remaining} ->
-	    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
-		bin_to_msg(MsgBin),
-	    OnDisk = IsPersistent andalso IsDurable,
-	    {Acks2, Requeue2, Length2} =
-		if OnDisk -> {Acks,
-			      [{AckTag, {next, IsDelivered}} | Requeue],
-			      Length + 1
-			     };
-		   true -> {[AckTag | Acks], Requeue, Length}
-		end,
-	    deliver_all_messages(Q, IsDurable, Acks2, Requeue2, Length2)
+        empty -> {Acks, Requeue, Length};
+        {MsgId, MsgBin, _Size, IsDelivered, AckTag, _Remaining} ->
+            #basic_message { guid = MsgId, is_persistent = IsPersistent } =
+                bin_to_msg(MsgBin),
+            OnDisk = IsPersistent andalso IsDurable,
+            {Acks2, Requeue2, Length2} =
+                if OnDisk -> {Acks,
+                              [{AckTag, {next, IsDelivered}} | Requeue],
+                              Length + 1
+                             };
+                   true -> {[AckTag | Acks], Requeue, Length}
+                end,
+            deliver_all_messages(Q, IsDurable, Acks2, Requeue2, Length2)
     end.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
@@ -167,7 +167,7 @@ publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
 %% Assumption here is that the queue is empty already (only called via
 %% attempt_immediate_delivery).
 publish_delivered(Msg =
-		  #basic_message { guid = MsgId, is_persistent = IsPersistent},
+                  #basic_message { guid = MsgId, is_persistent = IsPersistent},
                   State = #mqstate { mode = Mode, is_durable = IsDurable,
                                      queue = Q, length = 0 })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
@@ -235,7 +235,7 @@ ack(Acks, State = #mqstate { queue = Q }) ->
     end.
                                                    
 tx_publish(Msg = #basic_message { guid = MsgId },
-	   State = #mqstate { mode = disk }) ->
+           State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
     {ok, State};
 tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
@@ -254,9 +254,9 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q,
                                               length = Length }) ->
     RealAcks = remove_noacks(Acks),
     ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
-	    true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
-						RealAcks)
-	 end,
+            true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
+                                                RealAcks)
+         end,
     {ok, State #mqstate { length = Length + erlang:length(Publishes) }};
 tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               msg_buf = MsgBuf,
@@ -266,7 +266,7 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
     {PersistentPubs, MsgBuf2} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
                          {Acc, MsgBuf3}) ->
-			    OnDisk = IsPersistent andalso IsDurable,
+                            OnDisk = IsPersistent andalso IsDurable,
                             Acc2 =
                                 if OnDisk ->
                                         [Msg #basic_message.guid | Acc];
@@ -279,32 +279,32 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
     %% requirements of rabbit_disk_queue (ascending SeqIds)
     RealAcks = remove_noacks(Acks),
     ok = if ([] == PersistentPubs) andalso ([] == RealAcks) -> ok;
-	    true ->
-		 rabbit_disk_queue:tx_commit(
-		   Q, lists:reverse(PersistentPubs), RealAcks)
-	 end,
+            true ->
+                 rabbit_disk_queue:tx_commit(
+                   Q, lists:reverse(PersistentPubs), RealAcks)
+         end,
     {ok, State #mqstate { msg_buf = MsgBuf2,
                           length = Length + erlang:length(Publishes) }}.
 
 only_persistent_msg_ids(Pubs) ->
     lists:reverse(
       lists:foldl(
-	fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
-		if IsPersistent -> [Msg #basic_message.guid | Acc];
-		   true -> Acc
-		end
-	end, [], Pubs)).
+        fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
+                if IsPersistent -> [Msg #basic_message.guid | Acc];
+                   true -> Acc
+                end
+        end, [], Pubs)).
 
 tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_cancel(only_msg_ids(Publishes)),
     {ok, State};
 tx_cancel(Publishes,
-	  State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
+          State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
     ok =
-	if IsDurable ->
-		rabbit_disk_queue:tx_cancel(only_persistent_msg_ids(Publishes));
-	   true -> ok
-	end,
+        if IsDurable ->
+                rabbit_disk_queue:tx_cancel(only_persistent_msg_ids(Publishes));
+           true -> ok
+        end,
     {ok, State}.
 
 %% [{Msg, AckTag}]
@@ -337,16 +337,16 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                               }) ->
     {PersistentPubs, MsgBuf2} =
         lists:foldl(
-	  fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-	       {Acc, MsgBuf3}) ->
-		  OnDisk = IsDurable andalso IsPersistent,
-		  Acc2 =
-		      if OnDisk -> [AckTag | Acc];
-			 true -> Acc
-		      end,
-		  MsgBuf4 = queue:in({Msg, true, OnDisk}, MsgBuf3),
-		  {Acc2, MsgBuf4}
-	  end, {[], MsgBuf}, MessagesWithAckTags),
+          fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
+               {Acc, MsgBuf3}) ->
+                  OnDisk = IsDurable andalso IsPersistent,
+                  Acc2 =
+                      if OnDisk -> [AckTag | Acc];
+                         true -> Acc
+                      end,
+                  MsgBuf4 = queue:in({Msg, true, OnDisk}, MsgBuf3),
+                  {Acc2, MsgBuf4}
+          end, {[], MsgBuf}, MessagesWithAckTags),
     ok = if [] == PersistentPubs -> ok;
             true -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
-- 
cgit v1.2.1


From b66011c960b52c6234153f18c4d3893e460f04fe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 12:14:09 +0100
Subject: Removing the failed experiment that was the odbc db queue

---
 src/rabbit_db_queue.erl        | 454 -----------------------------------------
 src/rabbit_db_queue_schema.sql |  22 --
 2 files changed, 476 deletions(-)
 delete mode 100644 src/rabbit_db_queue.erl
 delete mode 100644 src/rabbit_db_queue_schema.sql

diff --git a/src/rabbit_db_queue.erl b/src/rabbit_db_queue.erl
deleted file mode 100644
index 7530892d..00000000
--- a/src/rabbit_db_queue.erl
+++ /dev/null
@@ -1,454 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
-%% So, assuming you're on some debian linux type system,
-%% apt-get install postgresql odbc-postgresql unixodbc unixodbc-bin
-%% sudo odbcinst -i -d -f /usr/share/psqlodbc/odbcinst.ini.template
-
-%% Now set up in postgresql a user and a database that user can
-%% access. For example, the database could be called rabbit_db_queue
-%% and the username could be rabbit and the password could be rabbit.
-
-%% sudo ODBCConfig
-%% set up a system wide dsn with the above settings in it.
-%% now drop into the erlang shell, and you should not get an error after:
-
-%% > odbc:start().
-%% < ok.
-%% > odbc:connect("DSN=rabbit_db_queue", []).
-%% < {ok,<0.325.0>}
-%% ( replace rabbit_db_queue with the name of your DSN that you configured )
-
-%% the connection string (eg "DSN=rabbit_db_queue") is what you pass
-%% to start_link. Don't just pass the DSN name.
-
--module(rabbit_db_queue).
-
--behaviour(gen_server).
-
--export([start_link/1]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([publish/3, deliver/1, phantom_deliver/1, ack/2, tx_publish/2,
-         tx_commit/3, tx_cancel/1, requeue/2, purge/1]).
-
--export([stop/0, stop_and_obliterate/0]).
-
--include("rabbit.hrl").
-
--define(SERVER, ?MODULE).
-
-%% ---- SPECS ----
-
--ifdef(use_specs).
-
--type(seq_id() :: non_neg_integer()).
-
--spec(start_link/1 :: (non_neg_integer()) ->
-              {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(publish/3 :: (queue_name(), msg_id(), binary()) -> 'ok').
--spec(deliver/1 :: (queue_name()) ->
-             {'empty' | {msg_id(), binary(), non_neg_integer(),
-                         bool(), {msg_id(), seq_id()}}}).
--spec(phantom_deliver/1 :: (queue_name()) ->
-             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}}}).
--spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
--spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [msg_id()], [seq_id()]) -> 'ok').
--spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
--spec(requeue/2 :: (queue_name(), [seq_id()]) -> 'ok').
--spec(purge/1 :: (queue_name()) -> non_neg_integer()).
--spec(stop/0 :: () -> 'ok').
--spec(stop_and_obliterate/0 :: () -> 'ok').
-
--endif.
-
-%% ---- PUBLIC API ----
-
-start_link(DSN) ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE,
-                          [DSN], []).
-
-publish(Q, MsgId, Msg) when is_binary(Msg) ->
-    gen_server:cast(?SERVER, {publish, Q, MsgId, Msg}).
-
-deliver(Q) ->
-    gen_server:call(?SERVER, {deliver, Q}, infinity).
-
-phantom_deliver(Q) ->
-    gen_server:call(?SERVER, {phantom_deliver, Q}).
-
-ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
-    gen_server:cast(?SERVER, {ack, Q, MsgSeqIds}).
-
-tx_publish(MsgId, Msg) when is_binary(Msg) ->
-    gen_server:cast(?SERVER, {tx_publish, MsgId, Msg}).
-
-tx_commit(Q, PubMsgIds, AckSeqIds) when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
-    gen_server:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
-
-tx_cancel(MsgIds) when is_list(MsgIds) ->
-    gen_server:cast(?SERVER, {tx_cancel, MsgIds}).
-
-requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
-    gen_server:cast(?SERVER, {requeue, Q, MsgSeqIds}).
-
-purge(Q) ->
-    gen_server:call(?SERVER, {purge, Q}).
-
-stop() ->
-    gen_server:call(?SERVER, stop, infinity).
-
-stop_and_obliterate() ->
-    gen_server:call(?SERVER, stop_vaporise, infinity).
-
-%% ---- GEN-SERVER INTERNAL API ----
--record(dbstate, { db_conn }).
-
-init([DSN]) ->
-    process_flag(trap_exit, true),
-    odbc:start(),
-    {ok, Conn} = odbc:connect(DSN, [{auto_commit, off}, {tuple_row, on},
-                                    {scrollable_cursors, off}, {trace_driver, off}]),
-    State = #dbstate { db_conn = Conn },
-    compact_already_delivered(State),
-    {ok, State}.
-
-handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, true, State),
-    {reply, Result, State1};
-handle_call({phantom_deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, false, State),
-    {reply, Result, State1};
-handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
-    {ok, State1} = internal_tx_commit(Q, PubMsgIds, AckSeqIds, State),
-    {reply, ok, State1};
-handle_call({purge, Q}, _From, State) ->
-    {ok, Count, State1} = internal_purge(Q, State),
-    {reply, Count, State1};
-handle_call(stop, _From, State) ->
-    {stop, normal, ok, State}; %% gen_server now calls terminate
-handle_call(stop_vaporise, _From, State = #dbstate { db_conn = Conn }) ->
-    odbc:sql_query(Conn, "delete from ledger"),
-    odbc:sql_query(Conn, "delete from sequence"),
-    odbc:sql_query(Conn, "delete from message"),
-    odbc:commit(Conn, commit),
-    {stop, normal, ok, State}.
-    %% gen_server now calls terminate, which then calls shutdown
-
-handle_cast({publish, Q, MsgId, MsgBody}, State) ->
-    {ok, State1} = internal_publish(Q, MsgId, MsgBody, State),
-    {noreply, State1};
-handle_cast({ack, Q, MsgSeqIds}, State) ->
-    {ok, State1} = internal_ack(Q, MsgSeqIds, State),
-    {noreply, State1};
-handle_cast({tx_publish, MsgId, MsgBody}, State) ->
-    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    {noreply, State1};
-handle_cast({tx_cancel, MsgIds}, State) ->
-    {ok, State1} = internal_tx_cancel(MsgIds, State),
-    {noreply, State1};
-handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
-    {noreply, State1}.
-
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, State) ->
-    shutdown(State).
-
-shutdown(State = #dbstate { db_conn = Conn }) ->
-    odbc:disconnect(Conn),
-    State.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-%% ---- UTILITY FUNCTIONS ----
-
-binary_to_escaped_string(Bin) when is_binary(Bin) ->
-    "E'" ++ lists:flatten(lists:reverse(binary_to_escaped_string(Bin, []))) ++ "'".
-
-binary_to_escaped_string(<<>>, Acc) ->
-    Acc;
-binary_to_escaped_string(<<Byte:8, Rest/binary>>, Acc) ->
-    binary_to_escaped_string(Rest, [escape_byte(Byte) | Acc]).
-
-escape_byte(39) ->
-    "\\\\047";
-escape_byte(92) ->
-    "\\\\134";
-escape_byte(B) when B > 31 andalso B < 127 ->
-    B;
-escape_byte(B) ->
-    case io_lib:format("~.8B", [B]) of
-        O1 = [[_]] ->
-            "\\\\00" ++ O1;
-        O2 = [[_,_]] ->
-            "\\\\0" ++ O2;
-        O3 = [[_,_,_]] ->
-            "\\\\" ++ O3
-    end.
-
-escaped_string_to_binary(Str) when is_list(Str) ->
-    list_to_binary(lists:reverse(escaped_string_to_binary(Str, []))).
-
-escaped_string_to_binary([], Acc) ->
-    Acc;
-escaped_string_to_binary([$\\,$\\|Rest], Acc) ->
-    escaped_string_to_binary(Rest, [$\\ | Acc]);
-escaped_string_to_binary([$\\,A,B,C|Rest], Acc) ->
-    escaped_string_to_binary(Rest, [(list_to_integer([A])*64) +
-                                    (list_to_integer([B])*8) +
-                                    list_to_integer([C])
-                                   | Acc]);
-escaped_string_to_binary([C|Rest], Acc) ->
-    escaped_string_to_binary(Rest, [C|Acc]).
-
-hex_string_to_binary(Str) when is_list(Str) ->
-    list_to_binary(lists:reverse(hex_string_to_binary(Str, []))).
-
-hex_string_to_binary([], Acc) ->
-    Acc;
-hex_string_to_binary([A,B|Rest], Acc) ->
-    {ok, [N], []} = io_lib:fread("~16u", [A,B]),
-    hex_string_to_binary(Rest, [N | Acc]).
-
-%% ---- INTERNAL RAW FUNCTIONS ----
-
-internal_deliver(Q, ReadMsg, State = #dbstate { db_conn = Conn }) ->
-    QStr = binary_to_escaped_string(term_to_binary(Q)),
-    case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
-        {selected, _, []} ->
-            odbc:commit(Conn, commit),
-            {ok, empty, State};
-        {selected, _, [{ReadSeqId}]} ->
-            case odbc:sql_query(Conn, "select is_delivered, msg_id from ledger where queue = " ++ QStr ++
-                                " and seq_id = " ++ integer_to_list(ReadSeqId)) of
-                {selected, _, []} ->
-                    {ok, empty, State};
-                {selected, _, [{IsDeliveredStr, MsgIdStr}]} ->
-                    IsDelivered = IsDeliveredStr /= "0",
-                    if IsDelivered -> ok;
-                       true -> odbc:sql_query(Conn, "update ledger set is_delivered = true where queue = " ++
-                                              QStr ++ " and seq_id = " ++ integer_to_list(ReadSeqId))
-                    end,
-                    MsgId = binary_to_term(hex_string_to_binary(MsgIdStr)),
-                    %% yeah, this is really necessary. sigh
-                    MsgIdStr2 = binary_to_escaped_string(term_to_binary(MsgId)),
-                    odbc:sql_query(Conn, "update sequence set next_read = " ++ integer_to_list(ReadSeqId + 1) ++
-                                   " where queue = " ++ QStr),
-                    if ReadMsg ->
-                            {selected, _, [{MsgBodyStr}]} =
-                                odbc:sql_query(Conn, "select msg from message where msg_id = " ++ MsgIdStr2),
-                            odbc:commit(Conn, commit),
-                            MsgBody = hex_string_to_binary(MsgBodyStr),
-                            BodySize = size(MsgBody),
-                            {ok, {MsgId, MsgBody, BodySize, IsDelivered, {MsgId, ReadSeqId}}, State};
-                       true ->
-                            odbc:commit(Conn, commit),
-                            {ok, {MsgId, IsDelivered, {MsgId, ReadSeqId}}, State}
-                    end
-            end
-    end.
-
-internal_ack(Q, MsgSeqIds, State) ->
-    remove_messages(Q, MsgSeqIds, true, State).
-
-%% Q is only needed if LedgerDelete /= false
-%% called from tx_cancel with LedgerDelete = false
-%% called from internal_tx_cancel with LedgerDelete = true
-%% called from ack with LedgerDelete = true
-remove_messages(Q, MsgSeqIds, LedgerDelete, State = #dbstate { db_conn = Conn }) ->
-    QStr = binary_to_escaped_string(term_to_binary(Q)),
-    lists:foreach(
-      fun ({MsgId, SeqId}) ->
-              MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
-              {selected, _, [{RefCount}]} =
-                  odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++
-                                 MsgIdStr),
-              case RefCount of
-                  1 -> odbc:sql_query(Conn, "delete from message where msg_id = " ++
-                                      MsgIdStr);
-                  _ -> odbc:sql_query(Conn, "update message set ref_count = " ++
-                                      integer_to_list(RefCount - 1) ++ " where msg_id = " ++
-                                      MsgIdStr)
-              end,
-              if LedgerDelete ->
-                      odbc:sql_query(Conn, "delete from ledger where queue = " ++
-                                     QStr ++ " and seq_id = " ++ integer_to_list(SeqId));
-                 true -> ok
-              end
-      end, MsgSeqIds),
-    odbc:commit(Conn, commit),
-    {ok, State}.
-
-internal_tx_publish(MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
-    MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
-    MsgStr = binary_to_escaped_string(MsgBody),
-    case odbc:sql_query(Conn, "select ref_count from message where msg_id = " ++ MsgIdStr) of
-        {selected, _, []} ->
-            odbc:sql_query(Conn, "insert into message (msg_id, msg, ref_count) values (" ++
-                           MsgIdStr ++ ", " ++ MsgStr ++ ", 1)");
-        {selected, _, [{RefCount}]} ->
-            odbc:sql_query(Conn, "update message set ref_count = " ++
-                           integer_to_list(RefCount + 1) ++ " where msg_id = " ++ MsgIdStr)
-    end,
-    odbc:commit(Conn, commit),
-    {ok, State}.
-
-internal_tx_commit(Q, PubMsgIds, AckSeqIds, State = #dbstate { db_conn = Conn }) ->
-    QStr = binary_to_escaped_string(term_to_binary(Q)),
-    {InsertOrUpdate, NextWrite} =
-        case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
-            {selected, _, []} -> {insert, 0};
-            {selected, _, [{NextWrite2}]} -> {update, NextWrite2}
-        end,
-    NextWrite3 =
-        lists:foldl(fun (MsgId, WriteSeqInteger) ->
-                            MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
-                            odbc:sql_query(Conn,
-                                           "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
-                                           QStr ++ ", " ++ integer_to_list(WriteSeqInteger) ++ ", false, " ++
-                                           MsgIdStr ++ ")"),
-                            WriteSeqInteger + 1
-                    end, NextWrite, PubMsgIds),
-    case InsertOrUpdate of
-        update -> odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(NextWrite3) ++
-                                 " where queue = " ++ QStr);
-        insert -> odbc:sql_query(Conn, "insert into sequence (queue, next_read, next_write) values (" ++
-                                 QStr ++ ", 0, " ++ integer_to_list(NextWrite3) ++ ")")
-    end,
-    odbc:commit(Conn, commit),
-    remove_messages(Q, AckSeqIds, true, State),
-    {ok, State}.
-
-internal_publish(Q, MsgId, MsgBody, State = #dbstate { db_conn = Conn }) ->
-    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    MsgIdStr = binary_to_escaped_string(term_to_binary(MsgId)),
-    QStr = binary_to_escaped_string(term_to_binary(Q)),
-    NextWrite =
-        case odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr) of
-            {selected, _, []} ->
-                odbc:sql_query(Conn,
-                               "insert into sequence (queue, next_read, next_write) values (" ++
-                               QStr ++ ", 0, 1)"),
-                0;
-            {selected, _, [{NextWrite2}]} ->
-                odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(1 + NextWrite2) ++
-                               " where queue = " ++ QStr),
-                NextWrite2
-        end,
-    odbc:sql_query(Conn, "insert into ledger (queue, seq_id, is_delivered, msg_id) values (" ++
-                   QStr ++ ", " ++ integer_to_list(NextWrite) ++ ", false, " ++ MsgIdStr ++ ")"),
-    odbc:commit(Conn, commit),
-    {ok, State1}.
-
-internal_tx_cancel(MsgIds, State) ->
-    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
-    remove_messages(undefined, MsgSeqIds, false, State).
-
-internal_requeue(Q, MsgSeqIds, State = #dbstate { db_conn = Conn }) ->
-    QStr = binary_to_escaped_string(term_to_binary(Q)),
-    {selected, _, [{WriteSeqId}]} =
-        odbc:sql_query(Conn, "select next_write from sequence where queue = " ++ QStr),
-    WriteSeqId2 =
-        lists:foldl(
-          fun ({_MsgId, SeqId}, NextWriteSeqId) ->
-                  odbc:sql_query(Conn, "update ledger set seq_id = " ++ integer_to_list(NextWriteSeqId) ++
-                                 " where seq_id = " ++ integer_to_list(SeqId) ++ " and queue = " ++ QStr),
-                  NextWriteSeqId + 1
-          end, WriteSeqId, MsgSeqIds),
-    odbc:sql_query(Conn, "update sequence set next_write = " ++ integer_to_list(WriteSeqId2) ++
-                   " where queue = " ++ QStr),
-    odbc:commit(Conn, commit),
-    {ok, State}.
-                                 
-
-compact_already_delivered(#dbstate { db_conn = Conn }) ->
-    {selected, _, Seqs} = odbc:sql_query(Conn, "select queue, next_read from sequence"),
-    lists:foreach(
-      fun ({QHexStr, ReadSeqId}) ->
-              Q = binary_to_term(hex_string_to_binary(QHexStr)),
-              QStr = binary_to_escaped_string(term_to_binary(Q)),
-              case odbc:sql_query(Conn, "select min(seq_id) from ledger where queue = "
-                                  ++ QStr) of
-                  {selected, _, []} -> ok;
-                  {selected, _, [{null}]} -> ok; %% AGH!
-                  {selected, _, [{Min}]} ->
-                      Gap = shuffle_up(Conn, QStr, Min - 1, ReadSeqId - 1, 0),
-                      odbc:sql_query(Conn, "update sequence set next_read = " ++
-                                     integer_to_list(Min + Gap) ++
-                                     " where queue = " ++ QStr)
-              end
-      end, Seqs),
-    odbc:commit(Conn, commit).
-
-shuffle_up(_Conn, _QStr, SeqId, SeqId, Gap) ->
-    Gap;
-shuffle_up(Conn, QStr, BaseSeqId, SeqId, Gap) ->
-    GapInc =
-        case odbc:sql_query(Conn, "select count(1) from ledger where queue = " ++
-                            QStr ++ " and seq_id = " ++ integer_to_list(SeqId)) of
-            {selected, _, [{"0"}]} ->
-                1;
-            {selected, _, [{"1"}]} ->
-                if Gap =:= 0 -> ok;
-                   true -> odbc:sql_query(Conn, "update ledger set seq_id = " ++
-                                          integer_to_list(SeqId + Gap) ++ " where seq_id = " ++
-                                          integer_to_list(SeqId) ++ " and queue = " ++ QStr)
-                end,
-                0
-        end,
-    shuffle_up(Conn, QStr, BaseSeqId, SeqId - 1, Gap + GapInc).
-
-internal_purge(Q, State = #dbstate { db_conn = Conn }) ->
-    QStr = binary_to_escaped_string(term_to_binary(Q)),
-    case odbc:sql_query(Conn, "select next_read from sequence where queue = " ++ QStr) of
-        {selected, _, []} ->
-            odbc:commit(Conn, commit),
-            {ok, 0, State};
-        {selected, _, [{ReadSeqId}]} ->
-            odbc:sql_query(Conn, "update sequence set next_read = next_write where queue = " ++ QStr),
-            {selected, _, MsgSeqIds} =
-                odbc:sql_query(Conn, "select msg_id, seq_id from ledger where queue = " ++
-                               QStr ++ " and seq_id >= " ++ ReadSeqId),
-            MsgSeqIds2 = lists:map(
-                           fun ({MsgIdStr, SeqIdStr}) ->
-                                   { binary_to_term(hex_string_to_binary(MsgIdStr)),
-                                     list_to_integer(SeqIdStr) }
-                           end, MsgSeqIds),
-            {ok, State2} = remove_messages(Q, MsgSeqIds2, true, State),
-            {ok, length(MsgSeqIds2), State2}
-    end.
diff --git a/src/rabbit_db_queue_schema.sql b/src/rabbit_db_queue_schema.sql
deleted file mode 100644
index f5c49e8d..00000000
--- a/src/rabbit_db_queue_schema.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-create table message (
-       msg_id bytea PRIMARY KEY,
-       msg bytea,
-       ref_count integer NOT NULL
-);
-create index message_msg_id_index on message (msg_id);
-
-create table sequence (
-       queue bytea PRIMARY KEY,
-       next_read integer NOT NULL,
-       next_write integer NOT NULL
-);
-create index sequence_queue_index on sequence (queue);
-
-create table ledger (
-       queue bytea NOT NULL,
-       seq_id integer NOT NULL,
-       is_delivered boolean NOT NULL,
-       msg_id bytea NOT NULL
-);
-create index ledger_queue_seq_id_index on ledger (queue, seq_id);
-
-- 
cgit v1.2.1


From c0d066cb692606aed377912a4cf2e384a12fd9a3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 12:52:35 +0100
Subject: fixed line lengths

---
 src/rabbit.erl                    |  6 +--
 src/rabbit_amqqueue.erl           | 15 ++++---
 src/rabbit_amqqueue_process.erl   | 72 +++++++++++++++++++-----------
 src/rabbit_disk_queue.erl         | 71 ++++++++++++++++++------------
 src/rabbit_limiter.erl            |  6 ++-
 src/rabbit_misc.erl               |  3 +-
 src/rabbit_mixed_queue.erl        |  3 +-
 src/rabbit_mnesia.erl             |  9 ++--
 src/rabbit_queue_mode_manager.erl | 15 ++++---
 src/rabbit_tests.erl              | 93 +++++++++++++++++++++++----------------
 10 files changed, 177 insertions(+), 116 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 2eecac5e..fbadc5f2 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -158,9 +158,9 @@ start(normal, []) ->
                 ok = rabbit_exchange:recover(),
                 {ok, DurableQueues} = rabbit_amqqueue:recover(),
                 DurableQueueNames =
-                    sets:from_list(lists:map(
-                                     fun(Q) -> Q #amqqueue.name end, DurableQueues)),
-                ok = rabbit_disk_queue:delete_non_durable_queues(DurableQueueNames)
+                    sets:from_list([ Q #amqqueue.name || Q <- DurableQueues ]),
+                ok = rabbit_disk_queue:delete_non_durable_queues(
+                       DurableQueueNames)
         end},
        {"guid generator",
         fun () ->
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 01d40aa1..a1f36f31 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -136,12 +136,15 @@ recover_durable_queues() ->
                   %% another node has deleted the queue (and possibly
                   %% re-created it).
                   case rabbit_misc:execute_mnesia_transaction(
-                         fun () -> case mnesia:match_object(
-                                          rabbit_durable_queue, RecoveredQ, read) of
-                                       [_] -> ok = store_queue(Q),
-                                              true;
-                                       []  -> false
-                                   end
+                         fun () ->
+                                 Match =
+                                     mnesia:match_object(
+                                       rabbit_durable_queue, RecoveredQ, read),
+                                 case Match of
+                                     [_] -> ok = store_queue(Q),
+                                            true;
+                                     []  -> false
+                                 end
                          end) of
                       true  -> [Q|Acc];
                       false -> exit(Q#amqqueue.pid, shutdown),
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a701fa4d..6fc31664 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -40,7 +40,8 @@
 
 -export([start_link/1]).
 
--export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]).
+-export([init/1, terminate/2, code_change/3,
+         handle_call/3, handle_cast/2, handle_info/2]).
 
 -import(queue).
 -import(erlang).
@@ -191,10 +192,11 @@ deliver_queue(Fun, FunAcc0,
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
                       {QName, self(), NextId, IsDelivered, Msg}),
-                    NewUAM = case AckRequired of
-                                 true  -> dict:store(NextId, {Msg, AckTag}, UAM);
-                                 false -> UAM
-                             end,
+                    NewUAM =
+                        case AckRequired of
+                            true  -> dict:store(NextId, {Msg, AckTag}, UAM);
+                            false -> UAM
+                        end,
                     NewC = C#cr{unsent_message_count = Count + 1,
                                 unacked_messages = NewUAM},
                     store_ch_record(NewC),
@@ -210,9 +212,10 @@ deliver_queue(Fun, FunAcc0,
                                 {ActiveConsumers1,
                                  queue:in(QEntry, BlockedConsumers1)}
                         end,
-                    State3 = State2 #q { active_consumers = NewActiveConsumers,
-                                         blocked_consumers = NewBlockedConsumers,
-                                         next_msg_id = NextId + 1
+                    State3 = State2 #q {
+                               active_consumers = NewActiveConsumers,
+                               blocked_consumers = NewBlockedConsumers,
+                               next_msg_id = NextId + 1
                                        },
                     if Remaining == 0 -> {FunAcc1, State3};
                        true -> deliver_queue(Fun, FunAcc1, State3)
@@ -238,7 +241,8 @@ deliver_queue(Fun, FunAcc0,
 
 deliver_from_queue(is_message_ready, undefined, #q { mixed_state = MS }) ->
     not rabbit_mixed_queue:is_empty(MS);
-deliver_from_queue(AckRequired, Acc = undefined, State = #q { mixed_state = MS }) ->
+deliver_from_queue(AckRequired, Acc = undefined,
+                   State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
     MS3 = case {Res, AckRequired} of
               {_, true} -> MS2;
@@ -250,7 +254,8 @@ deliver_from_queue(AckRequired, Acc = undefined, State = #q { mixed_state = MS }
     {Res, Acc, State #q { mixed_state = MS3 }}.
 
 run_message_queue(State) ->
-    {undefined, State2} = deliver_queue(fun deliver_from_queue/3, undefined, State),
+    {undefined, State2} =
+        deliver_queue(fun deliver_from_queue/3, undefined, State),
     State2.
 
 attempt_immediate_delivery(none, _ChPid, Msg, State) ->
@@ -260,8 +265,9 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
             (AckRequired, false, State2) ->
                 {AckTag, State3} =
                     if AckRequired ->
-                            {ok, AckTag2, MS} = rabbit_mixed_queue:publish_delivered(
-                                                  Msg, State2 #q.mixed_state),
+                            {ok, AckTag2, MS} =
+                                rabbit_mixed_queue:publish_delivered(
+                                  Msg, State2 #q.mixed_state),
                             {AckTag2, State2 #q { mixed_state = MS }};
                        true ->
                             {noack, State2}
@@ -290,19 +296,24 @@ deliver_or_requeue_n([], State) ->
     run_message_queue(State);
 deliver_or_requeue_n(MsgsWithAcks, State) ->
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
-        deliver_queue(fun deliver_or_requeue_msgs/3, {length(MsgsWithAcks) - 1, [], MsgsWithAcks}, State),
-    {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks), NewState #q.mixed_state),
+        deliver_queue(fun deliver_or_requeue_msgs/3,
+                      {length(MsgsWithAcks) - 1, [], MsgsWithAcks}, State),
+    {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks),
+                                      NewState #q.mixed_state),
     case OutstandingMsgs of
         [] -> run_message_queue(NewState #q { mixed_state = MS });
         _ -> {ok, MS2} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
              NewState #q { mixed_state = MS2 }
     end.
 
-deliver_or_requeue_msgs(is_message_ready, {Len, _AcksAcc, _MsgsWithAcks}, _State) ->
+deliver_or_requeue_msgs(is_message_ready, {Len, _AcksAcc, _MsgsWithAcks},
+                        _State) ->
     -1 < Len;
-deliver_or_requeue_msgs(false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+deliver_or_requeue_msgs(false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]},
+                        State) ->
     {{Msg, true, noack, Len}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
-deliver_or_requeue_msgs(true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+deliver_or_requeue_msgs(true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]},
+                        State) ->
     {{Msg, true, AckTag, Len}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
 
 add_consumer(ChPid, Consumer, Queue) -> queue:in({ChPid, Consumer}, Queue).
@@ -426,8 +437,10 @@ all_tx_record() ->
 all_tx() ->
     [Txn || {{txn, Txn}, _} <- get()].
 
-record_pending_message(Txn, ChPid, Message = #basic_message { is_persistent = IsPersistent }) ->
-    Tx = #tx{pending_messages = Pending, is_persistent = IsPersistentTxn } = lookup_tx(Txn),
+record_pending_message(Txn, ChPid, Message =
+                       #basic_message { is_persistent = IsPersistent }) ->
+    Tx = #tx{pending_messages = Pending, is_persistent = IsPersistentTxn } =
+        lookup_tx(Txn),
     record_current_channel_tx(ChPid, Txn),
     store_tx(Txn, Tx #tx { pending_messages = [Message | Pending],
                            is_persistent = IsPersistentTxn orelse IsPersistent
@@ -465,7 +478,8 @@ commit_transaction(Txn, State) ->
 rollback_transaction(Txn, State) ->
     #tx { pending_messages = PendingMessages
         } = lookup_tx(Txn),
-    {ok, MS} = rabbit_mixed_queue:tx_cancel(lists:reverse(PendingMessages), State #q.mixed_state),
+    {ok, MS} = rabbit_mixed_queue:tx_cancel(lists:reverse(PendingMessages),
+                                            State #q.mixed_state),
     erase_tx(Txn),
     State #q { mixed_state = MS }.
 
@@ -534,7 +548,8 @@ handle_call({deliver_immediately, Txn, Message, ChPid}, _From, State) ->
     %% just all ready-to-consume queues get the message, with unready
     %% queues discarding the message?
     %%
-    {Delivered, NewState} = attempt_immediate_delivery(Txn, ChPid, Message, State),
+    {Delivered, NewState} =
+        attempt_immediate_delivery(Txn, ChPid, Message, State),
     reply(Delivered, NewState);
 
 handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
@@ -682,8 +697,8 @@ handle_call(purge, _From, State) ->
     reply({ok, Count},
           State #q { mixed_state = MS });
 
-handle_call({claim_queue, ReaderPid}, _From, State = #q{owner = Owner,
-                                                        exclusive_consumer = Holder}) ->
+handle_call({claim_queue, ReaderPid}, _From,
+            State = #q{owner = Owner, exclusive_consumer = Holder}) ->
     case Owner of
         none ->
             case check_exclusive_access(Holder, true, State) of
@@ -696,7 +711,10 @@ handle_call({claim_queue, ReaderPid}, _From, State = #q{owner = Owner,
                     %% pid...
                     reply(locked, State);
                 ok ->
-                    reply(ok, State#q{owner = {ReaderPid, erlang:monitor(process, ReaderPid)}})
+                    reply(ok, State #q { owner =
+                                         {ReaderPid,
+                                          erlang:monitor(process, ReaderPid)} })
+                                                 
             end;
         {ReaderPid, _MonitorRef} ->
             reply(ok, State);
@@ -717,8 +735,10 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
             case Txn of
                 none ->
-                    Acks = lists:map(fun ({_Msg, AckTag}) -> AckTag end, MsgWithAcks),
-                    {ok, MS} = rabbit_mixed_queue:ack(Acks, State #q.mixed_state),
+                    Acks = lists:map(fun ({_Msg, AckTag}) -> AckTag end,
+                                     MsgWithAcks),
+                    {ok, MS} =
+                        rabbit_mixed_queue:ack(Acks, State #q.mixed_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
                     noreply(State #q { mixed_state = MS });
                 _  ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3b30a0da..f6090634 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -46,7 +46,8 @@
 
 -export([length/1, is_empty/1, next_write_seq/1]).
 
--export([stop/0, stop_and_obliterate/0, to_disk_only_mode/0, to_ram_disk_mode/0]).
+-export([stop/0, stop_and_obliterate/0,
+         to_disk_only_mode/0, to_ram_disk_mode/0]).
 
 -include("rabbit.hrl").
 
@@ -67,20 +68,22 @@
 -define(MAX_READ_FILE_HANDLES, 256).
 -define(FILE_SIZE_LIMIT, (256*1024*1024)).
 
--record(dqstate, {msg_location_dets,       %% where are messages?
-                  msg_location_ets,        %% as above, but for ets version
-                  operation_mode,          %% ram_disk | disk_only
-                  file_summary,            %% what's in the files?
-                  sequences,               %% next read and write for each q
-                  current_file_num,        %% current file name as number
-                  current_file_name,       %% current file name
-                  current_file_handle,     %% current file handle
-                  current_offset,          %% current offset within current file
-                  current_dirty,           %% has the current file been written to since the last fsync?
-                  file_size_limit,         %% how big can our files get?
-                  read_file_handles,       %% file handles for reading (LRU)
-                  read_file_handles_limit  %% how many file handles can we open?
-                 }).
+-record(dqstate,
+        {msg_location_dets,       %% where are messages?
+         msg_location_ets,        %% as above, but for ets version
+         operation_mode,          %% ram_disk | disk_only
+         file_summary,            %% what's in the files?
+         sequences,               %% next read and write for each q
+         current_file_num,        %% current file name as number
+         current_file_name,       %% current file name
+         current_file_handle,     %% current file handle
+         current_offset,          %% current offset within current file
+         current_dirty,           %% has the current file been written to
+                                  %% since the last fsync?
+         file_size_limit,         %% how big can our files get?
+         read_file_handles,       %% file handles for reading (LRU)
+         read_file_handles_limit  %% how many file handles can we open?
+        }).
 
 %% The components:
 %%
@@ -233,23 +236,28 @@
 -spec(start_link/0 :: () ->
               {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(publish/4 :: (queue_name(), msg_id(), binary(), bool()) -> 'ok').
--spec(publish_with_seq/5 :: (queue_name(), msg_id(), seq_id_or_next(), binary(), bool()) -> 'ok').
+-spec(publish_with_seq/5 :: (queue_name(), msg_id(), seq_id_or_next(), binary(),
+                             bool()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
              {'empty' | {msg_id(), binary(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
 -spec(phantom_deliver/1 :: (queue_name()) ->
-             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
+             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
+                          non_neg_integer()}}).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) -> 'ok').
+-spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
+             'ok').
 -spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id_or_next()}],
                                 [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
--spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()}, seq_id_or_next()}]) -> 'ok').
+-spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()},
+                                              seq_id_or_next()}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
--spec(dump_queue/1 :: (queue_name()) -> [{msg_id(), binary(), non_neg_integer(),
-                                          bool(), {msg_id(), seq_id()}, seq_id()}]).
+-spec(dump_queue/1 :: (queue_name()) ->
+             [{msg_id(), binary(), non_neg_integer(), bool(),
+               {msg_id(), seq_id()}, seq_id()}]).
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
@@ -321,7 +329,8 @@ dump_queue(Q) ->
     gen_server2:call(?SERVER, {dump_queue, Q}, infinity).
 
 delete_non_durable_queues(DurableQueues) ->
-    gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues}, infinity).
+    gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues},
+                     infinity).
 
 stop() ->
     gen_server2:call(?SERVER, stop, infinity).
@@ -483,7 +492,8 @@ handle_call(to_ram_disk_mode, _From,
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     {reply, Length, State};
-handle_call({next_write_seq, Q}, _From, State = #dqstate { sequences = Sequences }) ->
+handle_call({next_write_seq, Q}, _From,
+            State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, WriteSeqId, _Length} = sequence_lookup(Sequences, Q),
     {reply, WriteSeqId, State};
 handle_call({dump_queue, Q}, _From, State) ->
@@ -494,10 +504,12 @@ handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {reply, ok, State1}.
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
-    {ok, _MsgSeqId, State1} = internal_publish(Q, MsgId, next, MsgBody, false, State),
+    {ok, _MsgSeqId, State1} =
+        internal_publish(Q, MsgId, next, MsgBody, false, State),
     {noreply, State1};
 handle_cast({publish_with_seq, Q, MsgId, SeqId, MsgBody}, State) ->
-    {ok, _MsgSeqId, State1} = internal_publish(Q, MsgId, SeqId, MsgBody, false, State),
+    {ok, _MsgSeqId, State1} =
+        internal_publish(Q, MsgId, SeqId, MsgBody, false, State),
     {noreply, State1};
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
@@ -702,14 +714,16 @@ sequence_lookup(Sequences, Q) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, ReadMsg, FakeDeliver, State = #dqstate { sequences = Sequences }) ->
+internal_deliver(Q, ReadMsg, FakeDeliver,
+                 State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, empty, State};
         [{Q, SeqId, SeqId, 0}] -> {ok, empty, State};
         [{Q, ReadSeqId, WriteSeqId, Length}] when Length > 0 ->
             Remaining = Length - 1,
             {ok, Result, NextReadSeqId, State1} =
-                internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State),
+                internal_read_message(
+                  Q, ReadSeqId, FakeDeliver, ReadMsg, State),
             true = ets:insert(Sequences,
                               {Q, NextReadSeqId, WriteSeqId, Remaining}),
             {ok,
@@ -873,7 +887,8 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                                   _TotalSize}] = dets_ets_lookup(State, MsgId),
                                  SeqId2 = adjust_last_msg_seq_id(
                                             Q, ExpectedSeqId, SeqId, write),
-                                 NextSeqId2 = find_next_seq_id(SeqId2, NextSeqId),
+                                 NextSeqId2 =
+                                    find_next_seq_id(SeqId2, NextSeqId),
                                  ok = mnesia:write(
                                         rabbit_disk_queue,
                                         #dq_msg_loc { queue_and_seq_id =
diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl
index 9f3dcbd0..92078acd 100644
--- a/src/rabbit_limiter.erl
+++ b/src/rabbit_limiter.erl
@@ -101,8 +101,10 @@ ack(LimiterPid, Count) -> gen_server2:cast(LimiterPid, {ack, Count}).
 register(undefined, _QPid) -> ok;
 register(LimiterPid, QPid) -> gen_server2:cast(LimiterPid, {register, QPid}).
 
-unregister(undefined, _QPid) -> ok;
-unregister(LimiterPid, QPid) -> gen_server2:cast(LimiterPid, {unregister, QPid}).
+unregister(undefined, _QPid) ->
+    ok;
+unregister(LimiterPid, QPid) ->
+    gen_server2:cast(LimiterPid, {unregister, QPid}).
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index c965c693..bf4a69db 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -349,7 +349,8 @@ dirty_foreach_key1(F, TableName, K) ->
     end.
 
 dirty_dump_log(FileName) ->
-    {ok, LH} = disk_log:open([{name, dirty_dump_log}, {mode, read_only}, {file, FileName}]),
+    {ok, LH} = disk_log:open([{name, dirty_dump_log}, {mode, read_only},
+                              {file, FileName}]),
     dirty_dump_log1(LH, disk_log:chunk(LH, start)),
     disk_log:close(LH).
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 5082fe55..5933357c 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -118,7 +118,8 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
     {Acks, Requeue, Length} =
         deliver_all_messages(Q, IsDurable, [], [], 0),
     ok = if Requeue == [] -> ok;
-            true -> rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
+            true ->
+                 rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
     ok = if Acks == [] -> ok;
             true -> rabbit_disk_queue:ack(Q, lists:reverse(Acks))
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 77e309fe..9e341584 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -188,7 +188,8 @@ ensure_mnesia_not_running() ->
 
 check_schema_integrity() ->
     %%TODO: more thorough checks
-    case catch [mnesia:table_info(Tab, version) || Tab <- replicated_table_names()] of
+    case catch [mnesia:table_info(Tab, version)
+                || Tab <- replicated_table_names()] of
         {'EXIT', Reason} -> {error, Reason};
         _ -> ok
     end.
@@ -353,12 +354,14 @@ create_local_table_copies(Type) ->
               HasDiscCopies =
                   case lists:keysearch(disc_copies, 1, TabDef) of
                       false -> false;
-                      {value, {disc_copies, List1}} -> lists:member(node(), List1)
+                      {value, {disc_copies, List1}} ->
+                          lists:member(node(), List1)
                   end,
               HasDiscOnlyCopies =
                   case lists:keysearch(disc_only_copies, 1, TabDef) of
                       false -> false;
-                      {value, {disc_only_copies, List2}} -> lists:member(node(), List2)
+                      {value, {disc_only_copies, List2}} ->
+                          lists:member(node(), List2)
                   end,
               StorageType =
                   case Type of
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 32ad6b4c..c37cb842 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -69,28 +69,29 @@ init([]) ->
                   queues = []
                 }}.
 
-handle_call({register, Pid}, _From, State = #state { queues = Qs, mode = Mode }) ->
+handle_call({register, Pid}, _From,
+            State = #state { queues = Qs, mode = Mode }) ->
     Result = case Mode of
                  unlimited -> mixed;
                  _ -> disk
              end,
     {reply, {ok, Result}, State #state { queues = [Pid | Qs] }}.
 
-handle_cast({change_memory_usage, true}, State = #state { mode = disk_only }) ->
+handle_cast({change_memory_usage, true}, State = #state { mode=disk_only }) ->
     {noreply, State};
-handle_cast({change_memory_usage, true}, State = #state { mode = ram_disk }) ->
+handle_cast({change_memory_usage, true}, State = #state { mode=ram_disk }) ->
     constrain_queues(true, State #state.queues),
     {noreply, State #state { mode = disk_only }};
-handle_cast({change_memory_usage, true}, State = #state { mode = unlimited }) ->
+handle_cast({change_memory_usage, true}, State = #state { mode=unlimited }) ->
     ok = rabbit_disk_queue:to_disk_only_mode(),
     {noreply, State #state { mode = ram_disk }};
 
-handle_cast({change_memory_usage, false}, State = #state { mode = unlimited }) ->
+handle_cast({change_memory_usage, false}, State = #state { mode=unlimited }) ->
     {noreply, State};
-handle_cast({change_memory_usage, false}, State = #state { mode = ram_disk }) ->
+handle_cast({change_memory_usage, false}, State = #state { mode=ram_disk }) ->
     ok = rabbit_disk_queue:to_ram_disk_mode(),
     {noreply, State #state { mode = unlimited }};
-handle_cast({change_memory_usage, false}, State = #state { mode = disk_only }) ->
+handle_cast({change_memory_usage, false}, State = #state { mode=disk_only }) ->
     constrain_queues(false, State #state.queues),
     {noreply, State #state { mode = ram_disk }}.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a2a31a18..62d5c03a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -730,17 +730,18 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
                              || Q <- Qs] end
                   ]]),
     {Deliver, ok} =
-        timer:tc(?MODULE, rdq_time_commands,
-                 [[fun() -> [begin SeqIds =
-                                       [begin
-                                            Remaining = MsgCount - N,
-                                            {N, Msg, MsgSizeBytes, false, SeqId, Remaining} =
-                                                  rabbit_disk_queue:deliver(Q),
-                                            SeqId
-                                        end || N <- List],
-                                   ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
-                             end || Q <- Qs]
-                   end]]),
+        timer:tc(
+          ?MODULE, rdq_time_commands,
+          [[fun() -> [begin SeqIds =
+                                [begin
+                                     Remaining = MsgCount - N,
+                                     {N, Msg, MsgSizeBytes, false, SeqId,
+                                      Remaining} = rabbit_disk_queue:deliver(Q),
+                                     SeqId
+                                 end || N <- List],
+                            ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
+                      end || Q <- Qs]
+            end]]),
     io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
               [MsgCount, MsgSizeBytes, QCount, float(Startup),
                float(Publish), (Publish / (MsgCount * QCount)),
@@ -749,8 +750,9 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
                (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
     rdq_stop().
 
-% we know each file is going to be 1024*1024*10 bytes in size (10MB), so make sure we have
-% several files, and then keep punching holes in a reasonably sensible way.
+% we know each file is going to be 1024*1024*10 bytes in size (10MB),
+% so make sure we have several files, and then keep punching holes in
+% a reasonably sensible way.
 rdq_stress_gc(MsgCount) ->
     rdq_virgin(),
     rdq_start(),
@@ -804,7 +806,8 @@ rdq_test_startup_with_queue_gaps() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q), SeqId
+                {N, Msg, 256, false, SeqId, Remaining} =
+                    rabbit_disk_queue:deliver(q), SeqId
             end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     %% ack every other message we have delivered (starting at the _first_)
@@ -819,10 +822,12 @@ rdq_test_startup_with_queue_gaps() ->
     rdq_stop(),
     rdq_start(),
     io:format("Startup (with shuffle) done~n", []),
-    %% should have shuffled up. So we should now get lists:seq(2,500,2) already delivered
+    %% should have shuffled up. So we should now get
+    %% lists:seq(2,500,2) already delivered
     Seqs2 = [begin
                  Remaining = round(Total - ((Half + N)/2)),
-                 {N, Msg, 256, true, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 {N, Msg, 256, true, SeqId, Remaining} =
+                     rabbit_disk_queue:deliver(q),
                  SeqId
              end || N <- lists:seq(2,Half,2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
@@ -830,7 +835,8 @@ rdq_test_startup_with_queue_gaps() ->
     %% and now fetch the rest
     Seqs3 = [begin
                  Remaining = Total - N,
-                 {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 {N, Msg, 256, false, SeqId, Remaining} =
+                     rabbit_disk_queue:deliver(q),
                  SeqId
              end || N <- lists:seq(1 + Half,Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
@@ -852,7 +858,8 @@ rdq_test_redeliver() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                {N, Msg, 256, false, SeqId, Remaining} =
+                    rabbit_disk_queue:deliver(q),
                 SeqId
             end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
@@ -867,16 +874,19 @@ rdq_test_redeliver() ->
                 end, true, Seqs),
     rabbit_disk_queue:tx_commit(q, [], []),
     io:format("Redeliver and acking done~n", []),
-    %% we should now get the 2nd half in order, followed by every-other-from-the-first-half
+    %% we should now get the 2nd half in order, followed by
+    %% every-other-from-the-first-half
     Seqs2 = [begin
                  Remaining = round(Total - N + (Half/2)),
-                 {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 {N, Msg, 256, false, SeqId, Remaining} =
+                     rabbit_disk_queue:deliver(q),
                  SeqId
              end || N <- lists:seq(1+Half, Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     Seqs3 = [begin
                  Remaining = round((Half - N) / 2) - 1,
-                 {N, Msg, 256, true, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                 {N, Msg, 256, true, SeqId, Remaining} =
+                     rabbit_disk_queue:deliver(q),
                  SeqId
              end || N <- lists:seq(1, Half, 2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
@@ -897,7 +907,8 @@ rdq_test_purge() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {N, Msg, 256, false, SeqId, Remaining} = rabbit_disk_queue:deliver(q),
+                {N, Msg, 256, false, SeqId, Remaining} =
+                    rabbit_disk_queue:deliver(q),
                 SeqId
             end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
@@ -926,7 +937,8 @@ rdq_test_dump_queue() ->
     lists:foreach(
       fun (N) ->
               Remaining = Total - N,
-              {N, Msg, 256, false, _SeqId, Remaining} = rabbit_disk_queue:deliver(q)
+              {N, Msg, 256, false, _SeqId, Remaining} =
+                  rabbit_disk_queue:deliver(q)
       end, All),
     [] = rabbit_disk_queue:dump_queue(q),
     rdq_stop(),
@@ -943,22 +955,25 @@ rdq_test_mixed_queue_modes() ->
     rdq_start(),
     Payload = <<0:(8*256)>>,
     {ok, MS} = rabbit_mixed_queue:start_link(q, true, mixed),
-    MS2 = lists:foldl(fun (_N, MS1) ->
-                              Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
-                              {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
-                              MS1a
-                      end, MS, lists:seq(1,10)),
-    MS4 = lists:foldl(fun (_N, MS3) ->
-                              Msg = (rabbit_basic:message(x, <<>>, <<>>, Payload))
-                                  #basic_message { is_persistent = true },
-                              {ok, MS3a} = rabbit_mixed_queue:publish(Msg, MS3),
-                              MS3a
-                      end, MS2, lists:seq(1,10)),
-    MS6 = lists:foldl(fun (_N, MS5) ->
-                              Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
-                              {ok, MS5a} = rabbit_mixed_queue:publish(Msg, MS5),
-                              MS5a
-                      end, MS4, lists:seq(1,10)),
+    MS2 = lists:foldl(
+            fun (_N, MS1) ->
+                    Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
+                    {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
+                    MS1a
+            end, MS, lists:seq(1,10)),
+    MS4 = lists:foldl(
+            fun (_N, MS3) ->
+                    Msg = (rabbit_basic:message(x, <<>>, <<>>, Payload))
+                        #basic_message { is_persistent = true },
+                    {ok, MS3a} = rabbit_mixed_queue:publish(Msg, MS3),
+                    MS3a
+            end, MS2, lists:seq(1,10)),
+    MS6 = lists:foldl(
+            fun (_N, MS5) ->
+                    Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
+                    {ok, MS5a} = rabbit_mixed_queue:publish(Msg, MS5),
+                    MS5a
+            end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages~n"),
     {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode(MS6),
-- 
cgit v1.2.1


From cabc5ab67fc2bc5925a0eec1b10d6fac409b2b7b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 12:53:27 +0100
Subject: tabs and line length

---
 include/rabbit.hrl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index a2840931..b8425baf 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -62,7 +62,8 @@
 
 -record(listener, {node, protocol, host, port}).
 
--record(basic_message, {exchange_name, routing_key, content, guid, is_persistent}).
+-record(basic_message, {exchange_name, routing_key, content,
+                        guid, is_persistent}).
 
 -record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id, next_seq_id}).
 
@@ -136,7 +137,7 @@
       #basic_message{exchange_name  :: exchange_name(),
                      routing_key    :: routing_key(),
                      content        :: content(),
-		     guid           :: guid(),
+                     guid           :: guid(),
                      is_persistent  :: bool()}).
 -type(message() :: basic_message()).
 -type(delivery() ::
-- 
cgit v1.2.1


From cd3312a50493218168fa26349023c9935d52292e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 13:09:57 +0100
Subject: mainly if ==> case in suitable places, but also some formatting

---
 src/rabbit_amqqueue_process.erl   | 44 ++++++++++----------
 src/rabbit_disk_queue.erl         | 84 ++++++++++++++++++++++-----------------
 src/rabbit_limiter.erl            |  6 +--
 src/rabbit_mnesia.erl             |  5 ++-
 src/rabbit_queue_mode_manager.erl | 18 ++++++---
 5 files changed, 88 insertions(+), 69 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6fc31664..e2a99d19 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -217,8 +217,9 @@ deliver_queue(Fun, FunAcc0,
                                blocked_consumers = NewBlockedConsumers,
                                next_msg_id = NextId + 1
                                        },
-                    if Remaining == 0 -> {FunAcc1, State3};
-                       true -> deliver_queue(Fun, FunAcc1, State3)
+                    case Remaining of
+                        0 -> {FunAcc1, State3};
+                        _ -> deliver_queue(Fun, FunAcc1, State3)
                     end;
                 %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
@@ -264,12 +265,13 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
                 true;
             (AckRequired, false, State2) ->
                 {AckTag, State3} =
-                    if AckRequired ->
+                    case AckRequired of
+                        true ->
                             {ok, AckTag2, MS} =
                                 rabbit_mixed_queue:publish_delivered(
                                   Msg, State2 #q.mixed_state),
                             {AckTag2, State2 #q { mixed_state = MS }};
-                       true ->
+                        false ->
                             {noack, State2}
                     end,
                 {{Msg, false, AckTag, 0}, true, State3}
@@ -579,12 +581,13 @@ handle_call({basic_get, ChPid, NoAck}, _From,
         {{Msg, IsDelivered, AckTag, Remaining}, MS2} ->
             AckRequired = not(NoAck),
             {ok, MS3} =
-                if AckRequired ->
+                case AckRequired of
+                    true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
                         {ok, MS2};
-                   true ->
+                    false ->
                         rabbit_mixed_queue:ack([AckTag], MS2)
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
@@ -612,15 +615,14 @@ handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
                                          ack_required = not(NoAck)},
                     store_ch_record(C#cr{consumer_count = ConsumerCount +1,
                                          limiter_pid = LimiterPid}),
-                    if ConsumerCount == 0 ->
-                            ok = rabbit_limiter:register(LimiterPid, self());
-                       true ->
-                            ok
+                    case ConsumerCount of
+                        0 -> ok = rabbit_limiter:register(LimiterPid, self());
+                        _ -> ok
                     end,
-                    ExclusiveConsumer =
-                        if ExclusiveConsume -> {ChPid, ConsumerTag};
-                           true             -> ExistingHolder
-                        end,
+                    ExclusiveConsumer = case ExclusiveConsume of
+                                            true  -> {ChPid, ConsumerTag};
+                                            false -> ExistingHolder
+                                        end,
                     State1 = State#q{has_had_consumers = true,
                                      exclusive_consumer = ExclusiveConsumer},
                     ok = maybe_send_reply(ChPid, OkMsg),
@@ -650,11 +652,10 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
             reply(ok, State);
         C = #cr{consumer_count = ConsumerCount, limiter_pid = LimiterPid} ->
             store_ch_record(C#cr{consumer_count = ConsumerCount - 1}),
-            if ConsumerCount == 1 ->
-                    ok = rabbit_limiter:unregister(LimiterPid, self());
-               true ->
-                    ok
-            end,
+            ok = case ConsumerCount of
+                     1 -> rabbit_limiter:unregister(LimiterPid, self());
+                     _ -> ok
+                 end,
             ok = maybe_send_reply(ChPid, OkMsg),
             NewState =
                 State#q{exclusive_consumer = cancel_holder(ChPid,
@@ -791,8 +792,9 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
         end));
 
 handle_cast({constrain, Constrain}, State = #q { mixed_state = MS }) ->
-    {ok, MS2} = if Constrain -> rabbit_mixed_queue:to_disk_only_mode(MS);
-                   true -> rabbit_mixed_queue:to_mixed_mode(MS)
+    {ok, MS2} = case Constrain of
+                    true  -> rabbit_mixed_queue:to_disk_only_mode(MS);
+                    false -> rabbit_mixed_queue:to_mixed_mode(MS)
                 end,
     noreply(State #q { mixed_state = MS2 }).
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f6090634..87c7abc9 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -419,8 +419,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
              end,
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
-    if Exists -> {ok, Offset} = file:position(FileHdl, {bof, Offset});
-       true -> %% new file, so preallocate
+    case Exists of
+        true -> {ok, Offset} = file:position(FileHdl, {bof, Offset});
+        false -> %% new file, so preallocate
             ok = preallocate(FileHdl, FileSizeLimit, Offset)
     end,
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
@@ -552,9 +553,10 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
     true = ets:delete_all_objects(MsgLocationEts),
-    if FileHdl =:= undefined -> ok;
-       true -> file:sync(FileHdl),
-               file:close(FileHdl)
+    case FileHdl of
+        undefined -> ok;
+        _ -> file:sync(FileHdl),
+             file:close(FileHdl)
     end,
     dict:fold(fun (_File, Hdl, _Acc) ->
                      file:close(Hdl)
@@ -652,10 +654,11 @@ get_read_handle(File, State =
                            current_file_handle = CurHdl,
                            current_dirty = IsDirty
                          }) ->
-    IsDirty2 = if CurName == File andalso IsDirty ->
+    IsDirty2 = case CurName of
+                   File when IsDirty ->
                        file:sync(CurHdl),
                        false;
-                  true -> IsDirty
+                   _ -> IsDirty
                end,
     Now = now(),
     {FileHdl, ReadHdls1, ReadHdlsAge1} =
@@ -749,13 +752,14 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
                 mnesia:dirty_write(rabbit_disk_queue,
                                    Obj #dq_msg_loc {is_delivered = true})
         end,
-    if ReadMsg ->
+    case ReadMsg of
+        true ->
             {FileHdl, State1} = get_read_handle(File, State),
             {ok, {MsgBody, BodySize}} =
                 read_message_at_offset(FileHdl, Offset, TotalSize),
             {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
              NextReadSeqId, State1};
-       true ->
+        false ->
             {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
     end.
 
@@ -785,7 +789,8 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                   [{MsgId, RefCount, File, Offset, TotalSize}] =
                       dets_ets_lookup(State, MsgId),
                   Files3 =
-                      if 1 =:= RefCount ->
+                      case RefCount of
+                          1 ->
                               ok = dets_ets_delete(State, MsgId),
                               [{File, ValidTotalSize, ContiguousTop,
                                 Left, Right}] = ets:lookup(FileSummary, File),
@@ -799,19 +804,20 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                               if CurName =:= File -> Files2;
                                  true -> sets:add_element(File, Files2)
                               end;
-                         1 < RefCount ->
+                          _ when 1 < RefCount ->
                               ok = dets_ets_insert(
                                      State, {MsgId, RefCount - 1,
                                              File, Offset, TotalSize}),
                               Files2
                       end,
-                  ok = if MnesiaDelete ->
+                  ok = case MnesiaDelete of
+                           true ->
                                mnesia:dirty_delete(rabbit_disk_queue,
                                                    {Q, SeqId});
-                          MnesiaDelete =:= txn ->
+                           txn ->
                                mnesia:delete(rabbit_disk_queue,
                                              {Q, SeqId}, write);
-                          true -> ok
+                           _ -> ok
                        end,
                   Files3
           end, sets:new(), MsgSeqIds),
@@ -834,10 +840,11 @@ internal_tx_publish(MsgId, MsgBody,
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize +
                 ?FILE_PACKING_ADJUSTMENT,
-            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
+            ContiguousTop1 = case CurOffset of
+                                 ContiguousTop ->
                                      %% can't be any holes in this file
                                      ValidTotalSize1;
-                                true -> ContiguousTop
+                                 _ -> ContiguousTop
                              end,
             true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
                                             ContiguousTop1, Left, undefined}),
@@ -904,9 +911,10 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                    {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
                    {Sync2, WriteSeqId3, State3}
           end),
-    true = if PubList =:= [] -> true;
-              true -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
-                                             Length + erlang:length(PubList)})
+    true = case PubList of
+               [] -> true;
+               _  -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
+                                            Length + erlang:length(PubList)})
            end,
     IsDirty2 = if IsDirty andalso Sync ->
                        ok = file:sync(CurHdl),
@@ -1089,8 +1097,9 @@ maybe_roll_to_new_file(Offset,
                                           file_summary = FileSummary
                                         }
                       ) when Offset >= FileSizeLimit ->
-    ok = if IsDirty -> file:sync(CurHdl);
-            true -> ok
+    ok = case IsDirty of
+             true -> file:sync(CurHdl);
+             false -> ok
          end,
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
@@ -1181,8 +1190,9 @@ adjust_meta_and_combine(
     end.
 
 sort_msg_locations_by_offset(Asc, List) ->
-    Comp = if Asc  -> fun erlang:'<'/2;
-              true -> fun erlang:'>'/2
+    Comp = case Asc of
+               true  -> fun erlang:'<'/2;
+               false -> fun erlang:'>'/2
            end,
     lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
                        Comp(OffA, OffB)
@@ -1212,10 +1222,11 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     %%   the DestinationContiguousTop to a tmp file then truncate,
     %%   copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
-    if DestinationContiguousTop =:= DestinationValid ->
+    case DestinationContiguousTop of
+        DestinationValid ->
             ok = truncate_and_extend_file(DestinationHdl,
                                           DestinationValid, ExpectedSize);
-       true ->
+        _ ->
             Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} =
                 file:open(form_filename(Tmp),
@@ -1413,9 +1424,9 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                                         %% subtraction to get the
                                         %% right length
                                         lists:max([Write, NextWrite]), Length},
-                                if Orig /= Repl ->
-                                        true = ets:insert(Sequences, Repl);
-                                   true -> true
+                                case Orig of
+                                    Repl -> true;
+                                    _ -> true = ets:insert(Sequences, Repl)
                                 end
                         end
                 end, true, rabbit_disk_queue)
@@ -1455,14 +1466,15 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
         case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
             [] -> 1;
             [Obj] ->
-                if Gap =:= 0 -> ok;
-                   true -> mnesia:write(rabbit_disk_queue,
-                                        Obj #dq_msg_loc {
-                                          queue_and_seq_id = {Q, SeqId + Gap },
-                                          next_seq_id = SeqId + Gap + 1
-                                                        },
-                                        write),
-                           mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
+                case Gap of
+                    0 -> ok;
+                    _ -> mnesia:write(rabbit_disk_queue,
+                                      Obj #dq_msg_loc {
+                                        queue_and_seq_id = {Q, SeqId + Gap },
+                                        next_seq_id = SeqId + Gap + 1
+                                       },
+                                      write),
+                         mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
                 end,
                 0
         end,
diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl
index 92078acd..9f3dcbd0 100644
--- a/src/rabbit_limiter.erl
+++ b/src/rabbit_limiter.erl
@@ -101,10 +101,8 @@ ack(LimiterPid, Count) -> gen_server2:cast(LimiterPid, {ack, Count}).
 register(undefined, _QPid) -> ok;
 register(LimiterPid, QPid) -> gen_server2:cast(LimiterPid, {register, QPid}).
 
-unregister(undefined, _QPid) ->
-    ok;
-unregister(LimiterPid, QPid) ->
-    gen_server2:cast(LimiterPid, {unregister, QPid}).
+unregister(undefined, _QPid) -> ok;
+unregister(LimiterPid, QPid) -> gen_server2:cast(LimiterPid, {unregister, QPid}).
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 9e341584..6c583cb4 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -272,7 +272,8 @@ init_db(ClusterNodes) ->
     ExtraNodes = ClusterNodes -- [node()],
     case mnesia:change_config(extra_db_nodes, ExtraNodes) of
         {ok, []} ->
-            if WasDiskNode ->
+            case WasDiskNode of
+                true ->
                     case check_schema_integrity() of
                         ok ->
                             ok;
@@ -287,7 +288,7 @@ init_db(ClusterNodes) ->
                             ok = move_db(),
                             ok = create_schema()
                     end;
-               true ->
+                false ->
                     ok = create_schema()
             end;
         {ok, [_|_]} ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index c37cb842..c905d99b 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -77,21 +77,27 @@ handle_call({register, Pid}, _From,
              end,
     {reply, {ok, Result}, State #state { queues = [Pid | Qs] }}.
 
-handle_cast({change_memory_usage, true}, State = #state { mode=disk_only }) ->
+handle_cast({change_memory_usage, true},
+            State = #state { mode = disk_only }) ->
     {noreply, State};
-handle_cast({change_memory_usage, true}, State = #state { mode=ram_disk }) ->
+handle_cast({change_memory_usage, true},
+            State = #state { mode = ram_disk }) ->
     constrain_queues(true, State #state.queues),
     {noreply, State #state { mode = disk_only }};
-handle_cast({change_memory_usage, true}, State = #state { mode=unlimited }) ->
+handle_cast({change_memory_usage, true},
+            State = #state { mode = unlimited }) ->
     ok = rabbit_disk_queue:to_disk_only_mode(),
     {noreply, State #state { mode = ram_disk }};
 
-handle_cast({change_memory_usage, false}, State = #state { mode=unlimited }) ->
+handle_cast({change_memory_usage, false},
+            State = #state { mode = unlimited }) ->
     {noreply, State};
-handle_cast({change_memory_usage, false}, State = #state { mode=ram_disk }) ->
+handle_cast({change_memory_usage, false},
+            State = #state { mode = ram_disk }) ->
     ok = rabbit_disk_queue:to_ram_disk_mode(),
     {noreply, State #state { mode = unlimited }};
-handle_cast({change_memory_usage, false}, State = #state { mode=disk_only }) ->
+handle_cast({change_memory_usage, false},
+            State = #state { mode = disk_only }) ->
     constrain_queues(false, State #state.queues),
     {noreply, State #state { mode = ram_disk }}.
 
-- 
cgit v1.2.1


From 6a6356582c923c3b408bbd34a73a33e72b76f328 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 13:18:15 +0100
Subject: post case/if discussion

---
 src/rabbit_disk_queue.erl | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 87c7abc9..eeb6896a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -654,11 +654,10 @@ get_read_handle(File, State =
                            current_file_handle = CurHdl,
                            current_dirty = IsDirty
                          }) ->
-    IsDirty2 = case CurName of
-                   File when IsDirty ->
+    IsDirty2 = if CurName =:= File andalso IsDirty ->
                        file:sync(CurHdl),
                        false;
-                   _ -> IsDirty
+                  true -> IsDirty
                end,
     Now = now(),
     {FileHdl, ReadHdls1, ReadHdlsAge1} =
@@ -840,11 +839,10 @@ internal_tx_publish(MsgId, MsgBody,
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize +
                 ?FILE_PACKING_ADJUSTMENT,
-            ContiguousTop1 = case CurOffset of
-                                 ContiguousTop ->
+            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
                                      %% can't be any holes in this file
                                      ValidTotalSize1;
-                                 _ -> ContiguousTop
+                                true -> ContiguousTop
                              end,
             true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
                                             ContiguousTop1, Left, undefined}),
@@ -1222,11 +1220,10 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     %%   the DestinationContiguousTop to a tmp file then truncate,
     %%   copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
-    case DestinationContiguousTop of
-        DestinationValid ->
+    if DestinationContiguousTop =:= DestinationValid ->
             ok = truncate_and_extend_file(DestinationHdl,
                                           DestinationValid, ExpectedSize);
-        _ ->
+       true ->
             Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} =
                 file:open(form_filename(Tmp),
-- 
cgit v1.2.1


From bf5ba32265dea67d4191937edf1ed6cc524089ab Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 13:20:25 +0100
Subject: further discussion

---
 src/rabbit_disk_queue.erl | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index eeb6896a..f3e63127 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1407,25 +1407,26 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
               mnesia:foldl(
                 fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
                         NextWrite = SeqId + 1,
-                        case ets:lookup(Sequences, Q) of
-                            [] -> true =
-                                      ets:insert_new(Sequences,
+                        true =
+                            case ets:lookup(Sequences, Q) of
+                                [] -> ets:insert_new(Sequences,
                                                      {Q, SeqId, NextWrite, -1});
-                            [Orig = {Q, Read, Write, Length}] ->
-                                Repl = {Q, lists:min([Read, SeqId]),
-                                        %% Length is wrong here, but
-                                        %% it doesn't matter because
-                                        %% we'll pull out the gaps in
-                                        %% remove_gaps_in_sequences in
-                                        %% then do a straight
-                                        %% subtraction to get the
-                                        %% right length
-                                        lists:max([Write, NextWrite]), Length},
-                                case Orig of
-                                    Repl -> true;
-                                    _ -> true = ets:insert(Sequences, Repl)
-                                end
-                        end
+                                [Orig = {Q, Read, Write, Length}] ->
+                                    Repl = {Q, lists:min([Read, SeqId]),
+                                            %% Length is wrong here,
+                                            %% but it doesn't matter
+                                            %% because we'll pull out
+                                            %% the gaps in
+                                            %% remove_gaps_in_sequences
+                                            %% in then do a straight
+                                            %% subtraction to get the
+                                            %% right length
+                                            lists:max([Write, NextWrite]),
+                                            Length},
+                                    if Orig =:= Repl -> true;
+                                       true -> ets:insert(Sequences, Repl)
+                                    end
+                            end
                 end, true, rabbit_disk_queue)
       end),
     remove_gaps_in_sequences(State),
-- 
cgit v1.2.1


From 67af217a39218ee8cf5e344786e8166c821adcdb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 13:33:38 +0100
Subject: More tidying

---
 src/rabbit_amqqueue_process.erl | 26 +++++++++++---------------
 src/rabbit_disk_queue.erl       |  2 +-
 src/rabbit_mixed_queue.erl      |  8 ++++----
 src/rabbit_tests.erl            |  6 +++---
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index e2a99d19..620b497b 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -96,7 +96,7 @@ start_link(Q) ->
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     {ok, Mode} = rabbit_queue_mode_manager:register(self()),
-    {ok, MS} = rabbit_mixed_queue:start_link(QName, Durable, Mode),
+    {ok, MS} = rabbit_mixed_queue:init(QName, Durable, Mode),
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
@@ -461,20 +461,17 @@ commit_transaction(Txn, State) ->
         } = lookup_tx(Txn),
     PendingMessagesOrdered = lists:reverse(PendingMessages),
     PendingAcksOrdered = lists:append(lists:reverse(PendingAcks)),
-    {ok, MS} =
+    Acks =
         case lookup_ch(ChPid) of
-            not_found ->
-                rabbit_mixed_queue:tx_commit(
-                  PendingMessagesOrdered, [], State #q.mixed_state);
+            not_found -> [];
             C = #cr { unacked_messages = UAM } ->
                 {MsgWithAcks, Remaining} =
                     collect_messages(PendingAcksOrdered, UAM),
                 store_ch_record(C#cr{unacked_messages = Remaining}),
-                rabbit_mixed_queue:tx_commit(
-                  PendingMessagesOrdered,
-                  lists:map(fun ({_Msg, AckTag}) -> AckTag end, MsgWithAcks),
-                  State #q.mixed_state)
+                [ AckTag || {_Msg, AckTag} <- MsgWithAcks ]              
         end,
+    {ok, MS} = rabbit_mixed_queue:tx_commit(
+                 PendingMessagesOrdered, Acks, State #q.mixed_state),
     State #q { mixed_state = MS }.
 
 rollback_transaction(Txn, State) ->
@@ -736,8 +733,7 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
             case Txn of
                 none ->
-                    Acks = lists:map(fun ({_Msg, AckTag}) -> AckTag end,
-                                     MsgWithAcks),
+                    Acks = [ AckTag || {_Msg, AckTag} <- MsgWithAcks ],
                     {ok, MS} =
                         rabbit_mixed_queue:ack(Acks, State #q.mixed_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
@@ -792,10 +788,10 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
         end));
 
 handle_cast({constrain, Constrain}, State = #q { mixed_state = MS }) ->
-    {ok, MS2} = case Constrain of
-                    true  -> rabbit_mixed_queue:to_disk_only_mode(MS);
-                    false -> rabbit_mixed_queue:to_mixed_mode(MS)
-                end,
+    {ok, MS2} = (case Constrain of
+                    true  -> fun rabbit_mixed_queue:to_disk_only_mode/1;
+                    false -> fun rabbit_mixed_queue:to_mixed_mode/1
+                 end)(MS),
     noreply(State #q { mixed_state = MS2 }).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f3e63127..e82feb99 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1413,6 +1413,7 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                                                      {Q, SeqId, NextWrite, -1});
                                 [Orig = {Q, Read, Write, Length}] ->
                                     Repl = {Q, lists:min([Read, SeqId]),
+                                            lists:max([Write, NextWrite]),
                                             %% Length is wrong here,
                                             %% but it doesn't matter
                                             %% because we'll pull out
@@ -1421,7 +1422,6 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                                             %% in then do a straight
                                             %% subtraction to get the
                                             %% right length
-                                            lists:max([Write, NextWrite]),
                                             Length},
                                     if Orig =:= Repl -> true;
                                        true -> ets:insert(Sequences, Repl)
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 5933357c..a2e01bda 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -33,7 +33,7 @@
 
 -include("rabbit.hrl").
 
--export([start_link/3]).
+-export([init/3]).
 
 -export([publish/2, publish_delivered/2, deliver/1, ack/2,
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
@@ -49,12 +49,12 @@
                  }
        ).
 
-start_link(Queue, IsDurable, disk) ->
+init(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
                  is_durable = IsDurable, length = 0 });
-start_link(Queue, IsDurable, mixed) ->
-    {ok, State} = start_link(Queue, IsDurable, disk),
+init(Queue, IsDurable, mixed) ->
+    {ok, State} = init(Queue, IsDurable, disk),
     to_mixed_mode(State).
 
 to_disk_only_mode(State = #mqstate { mode = disk }) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 62d5c03a..f45a36bb 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -954,7 +954,7 @@ rdq_test_mixed_queue_modes() ->
     rdq_virgin(),
     rdq_start(),
     Payload = <<0:(8*256)>>,
-    {ok, MS} = rabbit_mixed_queue:start_link(q, true, mixed),
+    {ok, MS} = rabbit_mixed_queue:init(q, true, mixed),
     MS2 = lists:foldl(
             fun (_N, MS1) ->
                     Msg = rabbit_basic:message(x, <<>>, <<>>, Payload),
@@ -998,7 +998,7 @@ rdq_test_mixed_queue_modes() ->
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
-    {ok, MS12} = rabbit_mixed_queue:start_link(q, true, mixed),
+    {ok, MS12} = rabbit_mixed_queue:init(q, true, mixed),
     10 = rabbit_mixed_queue:length(MS12),
     io:format("Recovered queue~n"),
     {MS14, AckTags} =
@@ -1018,7 +1018,7 @@ rdq_test_mixed_queue_modes() ->
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
-    {ok, MS17} = rabbit_mixed_queue:start_link(q, true, mixed),
+    {ok, MS17} = rabbit_mixed_queue:init(q, true, mixed),
     0 = rabbit_mixed_queue:length(MS17),
     io:format("Recovered queue~n"),
     rdq_stop(),
-- 
cgit v1.2.1


From b2722146d4e3eb4b80f952a2dc01f631e78a3e2d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 14:10:17 +0100
Subject: adjusted HO-ness in deliver queue beautifully. Thus in the
 deliver_from_queue case, we now reduce n calls to mixed_queue:is_empty to 1
 call and pass around the remaining count as the acc. l33t

---
 src/rabbit_amqqueue_process.erl | 87 +++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 620b497b..0ab44a53 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -170,7 +170,7 @@ record_current_channel_tx(ChPid, Txn) ->
     %% that wasn't happening already)
     store_ch_record((ch_record(ChPid))#cr{txn = Txn}).
     
-deliver_queue(Fun, FunAcc0,
+deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
               State = #q{q = #amqqueue{name = QName},
                          active_consumers = ActiveConsumers,
                          blocked_consumers = BlockedConsumers,
@@ -182,12 +182,12 @@ deliver_queue(Fun, FunAcc0,
             C = #cr{limiter_pid = LimiterPid,
                     unsent_message_count = Count,
                     unacked_messages = UAM} = ch_record(ChPid),
-            IsMsgReady = Fun(is_message_ready, FunAcc0, State),
+            IsMsgReady = PredFun(FunAcc0, State),
             case (IsMsgReady andalso
                   rabbit_limiter:can_send( LimiterPid, self(), AckRequired )) of
                 true ->
-                    {{Msg, IsDelivered, AckTag, Remaining}, FunAcc1, State2} =
-                        Fun(AckRequired, FunAcc0, State),
+                    {{Msg, IsDelivered, AckTag}, FunAcc1, State2} =
+                        DeliverFun(AckRequired, FunAcc0, State),
                     ?LOGDEBUG("AMQQUEUE ~p DELIVERY:~n~p~n", [QName, Msg]),
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
@@ -217,10 +217,7 @@ deliver_queue(Fun, FunAcc0,
                                blocked_consumers = NewBlockedConsumers,
                                next_msg_id = NextId + 1
                                        },
-                    case Remaining of
-                        0 -> {FunAcc1, State3};
-                        _ -> deliver_queue(Fun, FunAcc1, State3)
-                    end;
+                    deliver_queue(Funs, FunAcc1, State3);
                 %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
                     store_ch_record(C#cr{is_limit_active = true}),
@@ -229,7 +226,7 @@ deliver_queue(Fun, FunAcc0,
                                        ActiveConsumers,
                                        BlockedConsumers),
                     deliver_queue(
-                      Fun, FunAcc0, 
+                      Funs, FunAcc0,
                       State#q{active_consumers = NewActiveConsumers,
                               blocked_consumers = NewBlockedConsumers});
                 false ->
@@ -240,30 +237,35 @@ deliver_queue(Fun, FunAcc0,
             {FunAcc0, State}
     end.
 
-deliver_from_queue(is_message_ready, undefined, #q { mixed_state = MS }) ->
-    not rabbit_mixed_queue:is_empty(MS);
-deliver_from_queue(AckRequired, Acc = undefined,
-                   State = #q { mixed_state = MS }) ->
+deliver_from_queue_pred(IsEmpty, _State) ->
+    not IsEmpty.
+deliver_from_queue_deliver(AckRequired, false,
+                           State = #q { mixed_state = MS }) ->
     {Res, MS2} = rabbit_mixed_queue:deliver(MS),
-    MS3 = case {Res, AckRequired} of
-              {_, true} -> MS2;
-              {empty, _} -> MS2;
-              {{_Msg, _IsDelivered, AckTag, _Remaining}, false} ->
-                  {ok, MS4} = rabbit_mixed_queue:ack([AckTag], MS2),
-                  MS4
-          end,
-    {Res, Acc, State #q { mixed_state = MS3 }}.
-
-run_message_queue(State) ->
-    {undefined, State2} =
-        deliver_queue(fun deliver_from_queue/3, undefined, State),
+    {Res2, MS3, IsEmpty} =
+        case Res of
+            empty -> {empty, MS2, true};
+            {Msg, IsDelivered, AckTag, Remaining} ->
+                {ok, MS4} = case AckRequired of
+                                true -> {ok, MS2};
+                                false -> rabbit_mixed_queue:ack([AckTag], MS2)
+                            end,
+                {{Msg, IsDelivered, AckTag}, MS4, 0 == Remaining}
+        end,
+    {Res2, IsEmpty, State #q { mixed_state = MS3 }}.
+
+run_message_queue(State = #q { mixed_state = MS }) ->
+    Funs = { fun deliver_from_queue_pred/2,
+             fun deliver_from_queue_deliver/3 },
+    IsEmpty = rabbit_mixed_queue:is_empty(MS),
+    {_IsEmpty2, State2} =
+        deliver_queue(Funs, IsEmpty, State),
     State2.
 
 attempt_immediate_delivery(none, _ChPid, Msg, State) ->
-    Fun =
-        fun (is_message_ready, false, _State) ->
-                true;
-            (AckRequired, false, State2) ->
+    PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
+    DeliverFun =
+        fun (AckRequired, false, State2) ->
                 {AckTag, State3} =
                     case AckRequired of
                         true ->
@@ -274,9 +276,9 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
                         false ->
                             {noack, State2}
                     end,
-                {{Msg, false, AckTag, 0}, true, State3}
+                {{Msg, false, AckTag}, true, State3}
         end,
-    deliver_queue(Fun, false, State);
+    deliver_queue({ PredFun, DeliverFun }, false, State);
 attempt_immediate_delivery(Txn, ChPid, Msg, State) ->
     {ok, MS} = rabbit_mixed_queue:tx_publish(Msg, State #q.mixed_state),
     record_pending_message(Txn, ChPid, Msg),
@@ -297,9 +299,11 @@ deliver_or_enqueue(Txn, ChPid, Msg, State) ->
 deliver_or_requeue_n([], State) ->
     run_message_queue(State);
 deliver_or_requeue_n(MsgsWithAcks, State) ->
+    Funs = { fun deliver_or_requeue_msgs_pred/2,
+             fun deliver_or_requeue_msgs_deliver/3 },
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
-        deliver_queue(fun deliver_or_requeue_msgs/3,
-                      {length(MsgsWithAcks) - 1, [], MsgsWithAcks}, State),
+        deliver_queue(Funs, {length(MsgsWithAcks) - 1, [], MsgsWithAcks},
+                      State),
     {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks),
                                       NewState #q.mixed_state),
     case OutstandingMsgs of
@@ -308,15 +312,14 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
              NewState #q { mixed_state = MS2 }
     end.
 
-deliver_or_requeue_msgs(is_message_ready, {Len, _AcksAcc, _MsgsWithAcks},
-                        _State) ->
-    -1 < Len;
-deliver_or_requeue_msgs(false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]},
-                        State) ->
-    {{Msg, true, noack, Len}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
-deliver_or_requeue_msgs(true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]},
-                        State) ->
-    {{Msg, true, AckTag, Len}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
+deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
+    -1 < Len.
+deliver_or_requeue_msgs_deliver(
+  false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+    {{Msg, true, noack}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
+deliver_or_requeue_msgs_deliver(
+  true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+    {{Msg, true, AckTag}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
 
 add_consumer(ChPid, Consumer, Queue) -> queue:in({ChPid, Consumer}, Queue).
 
-- 
cgit v1.2.1


From a8d81857f25962ff5af6ad1d14d1345c400a67a4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 14:54:46 +0100
Subject: added batching for autoacks for general run_message_queue

---
 src/rabbit_amqqueue_process.erl | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 0ab44a53..593746a7 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -237,30 +237,29 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
             {FunAcc0, State}
     end.
 
-deliver_from_queue_pred(IsEmpty, _State) ->
+deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
     not IsEmpty.
-deliver_from_queue_deliver(AckRequired, false,
+deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
                            State = #q { mixed_state = MS }) ->
-    {Res, MS2} = rabbit_mixed_queue:deliver(MS),
-    {Res2, MS3, IsEmpty} =
-        case Res of
-            empty -> {empty, MS2, true};
-            {Msg, IsDelivered, AckTag, Remaining} ->
-                {ok, MS4} = case AckRequired of
-                                true -> {ok, MS2};
-                                false -> rabbit_mixed_queue:ack([AckTag], MS2)
-                            end,
-                {{Msg, IsDelivered, AckTag}, MS4, 0 == Remaining}
+    {{Msg, IsDelivered, AckTag, Remaining}, MS2} =
+        rabbit_mixed_queue:deliver(MS),
+    AutoAcks2 =
+        case AckRequired of
+            true -> AutoAcks;
+            false -> [AckTag | AutoAcks]
         end,
-    {Res2, IsEmpty, State #q { mixed_state = MS3 }}.
+    {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks2},
+     State #q { mixed_state = MS2 }}.
 
 run_message_queue(State = #q { mixed_state = MS }) ->
     Funs = { fun deliver_from_queue_pred/2,
              fun deliver_from_queue_deliver/3 },
     IsEmpty = rabbit_mixed_queue:is_empty(MS),
-    {_IsEmpty2, State2} =
-        deliver_queue(Funs, IsEmpty, State),
-    State2.
+    {{_IsEmpty2, AutoAcks}, State2} =
+        deliver_queue(Funs, {IsEmpty, []}, State),
+    {ok, MS2} =
+        rabbit_mixed_queue:ack(lists:reverse(AutoAcks), State2 #q.mixed_state),
+    State2 #q { mixed_state = MS2 }.
 
 attempt_immediate_delivery(none, _ChPid, Msg, State) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
-- 
cgit v1.2.1


From 4e61d413e034b213472024c42873e34db6e1a22e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 15:44:55 +0100
Subject: Renaming variables. All tests still pass

---
 src/rabbit_amqqueue_process.erl |  54 +++++++++++-----------
 src/rabbit_disk_queue.erl       | 100 ++++++++++++++++++++--------------------
 src/rabbit_mixed_queue.erl      |  40 ++++++++--------
 3 files changed, 95 insertions(+), 99 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 593746a7..6dbd95c2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -186,7 +186,7 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
             case (IsMsgReady andalso
                   rabbit_limiter:can_send( LimiterPid, self(), AckRequired )) of
                 true ->
-                    {{Msg, IsDelivered, AckTag}, FunAcc1, State2} =
+                    {{Msg, IsDelivered, AckTag}, FunAcc1, State1} =
                         DeliverFun(AckRequired, FunAcc0, State),
                     ?LOGDEBUG("AMQQUEUE ~p DELIVERY:~n~p~n", [QName, Msg]),
                     rabbit_channel:deliver(
@@ -212,12 +212,12 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
                                 {ActiveConsumers1,
                                  queue:in(QEntry, BlockedConsumers1)}
                         end,
-                    State3 = State2 #q {
+                    State2 = State1 #q {
                                active_consumers = NewActiveConsumers,
                                blocked_consumers = NewBlockedConsumers,
                                next_msg_id = NextId + 1
                                        },
-                    deliver_queue(Funs, FunAcc1, State3);
+                    deliver_queue(Funs, FunAcc1, State2);
                 %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
                     store_ch_record(C#cr{is_limit_active = true}),
@@ -241,41 +241,41 @@ deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
     not IsEmpty.
 deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
                            State = #q { mixed_state = MS }) ->
-    {{Msg, IsDelivered, AckTag, Remaining}, MS2} =
+    {{Msg, IsDelivered, AckTag, Remaining}, MS1} =
         rabbit_mixed_queue:deliver(MS),
-    AutoAcks2 =
+    AutoAcks1 =
         case AckRequired of
             true -> AutoAcks;
             false -> [AckTag | AutoAcks]
         end,
-    {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks2},
-     State #q { mixed_state = MS2 }}.
+    {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
+     State #q { mixed_state = MS1 }}.
 
 run_message_queue(State = #q { mixed_state = MS }) ->
     Funs = { fun deliver_from_queue_pred/2,
              fun deliver_from_queue_deliver/3 },
     IsEmpty = rabbit_mixed_queue:is_empty(MS),
-    {{_IsEmpty2, AutoAcks}, State2} =
+    {{_IsEmpty1, AutoAcks}, State1} =
         deliver_queue(Funs, {IsEmpty, []}, State),
-    {ok, MS2} =
-        rabbit_mixed_queue:ack(lists:reverse(AutoAcks), State2 #q.mixed_state),
-    State2 #q { mixed_state = MS2 }.
+    {ok, MS1} =
+        rabbit_mixed_queue:ack(lists:reverse(AutoAcks), State1 #q.mixed_state),
+    State1 #q { mixed_state = MS1 }.
 
 attempt_immediate_delivery(none, _ChPid, Msg, State) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
     DeliverFun =
-        fun (AckRequired, false, State2) ->
-                {AckTag, State3} =
+        fun (AckRequired, false, State1) ->
+                {AckTag, State2} =
                     case AckRequired of
                         true ->
-                            {ok, AckTag2, MS} =
+                            {ok, AckTag1, MS} =
                                 rabbit_mixed_queue:publish_delivered(
-                                  Msg, State2 #q.mixed_state),
-                            {AckTag2, State2 #q { mixed_state = MS }};
+                                  Msg, State1 #q.mixed_state),
+                            {AckTag1, State1 #q { mixed_state = MS }};
                         false ->
-                            {noack, State2}
+                            {noack, State1}
                     end,
-                {{Msg, false, AckTag}, true, State3}
+                {{Msg, false, AckTag}, true, State2}
         end,
     deliver_queue({ PredFun, DeliverFun }, false, State);
 attempt_immediate_delivery(Txn, ChPid, Msg, State) ->
@@ -307,8 +307,8 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
                                       NewState #q.mixed_state),
     case OutstandingMsgs of
         [] -> run_message_queue(NewState #q { mixed_state = MS });
-        _ -> {ok, MS2} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
-             NewState #q { mixed_state = MS2 }
+        _ -> {ok, MS1} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
+             NewState #q { mixed_state = MS1 }
     end.
 
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
@@ -378,7 +378,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                 deliver_or_requeue_n(
                   [MsgWithAck ||
                       {_MsgId, MsgWithAck} <- dict:to_list(UAM)],
-                  State1 # q {
+                  State1 #q {
                     exclusive_consumer = case Holder of
                                              {ChPid, _} -> none;
                                              Other -> Other
@@ -576,8 +576,8 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                        mixed_state = MS
                        }) ->
     case rabbit_mixed_queue:deliver(MS) of
-        {empty, MS2} -> reply(empty, State #q { mixed_state = MS2 });
-        {{Msg, IsDelivered, AckTag, Remaining}, MS2} ->
+        {empty, MS1} -> reply(empty, State #q { mixed_state = MS1 });
+        {{Msg, IsDelivered, AckTag, Remaining}, MS1} ->
             AckRequired = not(NoAck),
             {ok, MS3} =
                 case AckRequired of
@@ -585,9 +585,9 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        {ok, MS2};
+                        {ok, MS1};
                     false ->
-                        rabbit_mixed_queue:ack([AckTag], MS2)
+                        rabbit_mixed_queue:ack([AckTag], MS1)
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
             reply({ok, Remaining, Message},
@@ -790,11 +790,11 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
         end));
 
 handle_cast({constrain, Constrain}, State = #q { mixed_state = MS }) ->
-    {ok, MS2} = (case Constrain of
+    {ok, MS1} = (case Constrain of
                     true  -> fun rabbit_mixed_queue:to_disk_only_mode/1;
                     false -> fun rabbit_mixed_queue:to_mixed_mode/1
                  end)(MS),
-    noreply(State #q { mixed_state = MS2 }).
+    noreply(State #q { mixed_state = MS1 }).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e82feb99..a33a4b28 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -654,7 +654,7 @@ get_read_handle(File, State =
                            current_file_handle = CurHdl,
                            current_dirty = IsDirty
                          }) ->
-    IsDirty2 = if CurName =:= File andalso IsDirty ->
+    IsDirty1 = if CurName =:= File andalso IsDirty ->
                        file:sync(CurHdl),
                        false;
                   true -> IsDirty
@@ -680,10 +680,10 @@ get_read_handle(File, State =
             {ok, {Hdl, Then}} ->
                 {Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
         end,
-    ReadHdls3 = dict:store(File, {FileHdl, Now}, ReadHdls1),
+    ReadHdls2 = dict:store(File, {FileHdl, Now}, ReadHdls1),
     ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
-    {FileHdl, State #dqstate { read_file_handles = {ReadHdls3, ReadHdlsAge3},
-                               current_dirty = IsDirty2
+    {FileHdl, State #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge3},
+                               current_dirty = IsDirty1
                              }}.
 
 adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
@@ -784,10 +784,10 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                                  }) ->
     Files =
         lists:foldl(
-          fun ({MsgId, SeqId}, Files2) ->
+          fun ({MsgId, SeqId}, Files1) ->
                   [{MsgId, RefCount, File, Offset, TotalSize}] =
                       dets_ets_lookup(State, MsgId),
-                  Files3 =
+                  Files2 =
                       case RefCount of
                           1 ->
                               ok = dets_ets_delete(State, MsgId),
@@ -800,28 +800,26 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                                              {File, (ValidTotalSize-TotalSize-
                                                      ?FILE_PACKING_ADJUSTMENT),
                                                  ContiguousTop1, Left, Right}),
-                              if CurName =:= File -> Files2;
-                                 true -> sets:add_element(File, Files2)
+                              if CurName =:= File -> Files1;
+                                 true -> sets:add_element(File, Files1)
                               end;
                           _ when 1 < RefCount ->
                               ok = dets_ets_insert(
                                      State, {MsgId, RefCount - 1,
                                              File, Offset, TotalSize}),
-                              Files2
+                              Files1
                       end,
                   ok = case MnesiaDelete of
-                           true ->
-                               mnesia:dirty_delete(rabbit_disk_queue,
-                                                   {Q, SeqId});
-                           txn ->
-                               mnesia:delete(rabbit_disk_queue,
-                                             {Q, SeqId}, write);
+                           true -> mnesia:dirty_delete(rabbit_disk_queue,
+                                                       {Q, SeqId});
+                           txn -> mnesia:delete(rabbit_disk_queue,
+                                                {Q, SeqId}, write);
                            _ -> ok
                        end,
-                  Files3
+                  Files2
           end, sets:new(), MsgSeqIds),
-    State2 = compact(Files, State),
-    {ok, State2}.
+    State1 = compact(Files, State),
+    {ok, State1}.
 
 internal_tx_publish(MsgId, MsgBody,
                     State = #dqstate { current_file_handle = CurHdl,
@@ -870,12 +868,12 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
             [{_, FirstSeqIdTo}|_] ->
                 {InitReadSeqId, InitWriteSeqId, InitLength} =
                     sequence_lookup(Sequences, Q),
-                InitReadSeqId2 = determine_next_read_id(
+                InitReadSeqId1 = determine_next_read_id(
                                    InitReadSeqId, InitWriteSeqId, FirstSeqIdTo),
                 { zip_with_tail(PubMsgSeqIds, {last, {next, next}}),
-                  InitWriteSeqId, InitReadSeqId2, InitLength}
+                  InitWriteSeqId, InitReadSeqId1, InitLength}
         end,
-    {atomic, {Sync, WriteSeqId, State2}} =
+    {atomic, {Sync, WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
@@ -884,42 +882,42 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                   %% it's been published, which is clearly
                   %% nonsense. I.e. in commit, do not do things in an
                   %% order which _could_not_ have happened.
-                  {Sync2, WriteSeqId3} =
+                  {Sync1, WriteSeqId1} =
                       lists:foldl(
                         fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
                              {Acc, ExpectedSeqId}) ->
                                 [{MsgId, _RefCount, File, _Offset,
                                   _TotalSize}] = dets_ets_lookup(State, MsgId),
-                                 SeqId2 = adjust_last_msg_seq_id(
+                                 SeqId1 = adjust_last_msg_seq_id(
                                             Q, ExpectedSeqId, SeqId, write),
-                                 NextSeqId2 =
-                                    find_next_seq_id(SeqId2, NextSeqId),
+                                 NextSeqId1 =
+                                    find_next_seq_id(SeqId1, NextSeqId),
                                  ok = mnesia:write(
                                         rabbit_disk_queue,
                                         #dq_msg_loc { queue_and_seq_id =
-                                                      {Q, SeqId2},
+                                                      {Q, SeqId1},
                                                       msg_id = MsgId,
                                                       is_delivered = false,
-                                                      next_seq_id = NextSeqId2
+                                                      next_seq_id = NextSeqId1
                                                      },
                                         write),
-                                 {Acc orelse (CurName =:= File), NextSeqId2}
+                                 {Acc orelse (CurName =:= File), NextSeqId1}
                          end, {false, PubAcc}, PubList),
 
-                   {ok, State3} = remove_messages(Q, AckSeqIds, txn, State),
-                   {Sync2, WriteSeqId3, State3}
+                   {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
+                   {Sync1, WriteSeqId1, State2}
           end),
     true = case PubList of
                [] -> true;
                _  -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
                                             Length + erlang:length(PubList)})
            end,
-    IsDirty2 = if IsDirty andalso Sync ->
+    IsDirty1 = if IsDirty andalso Sync ->
                        ok = file:sync(CurHdl),
                        false;
                   true -> IsDirty
                end,
-    {ok, State2 #dqstate { current_dirty = IsDirty2 }}.
+    {ok, State1 #dqstate { current_dirty = IsDirty1 }}.
 
 %% SeqId can be 'next'
 internal_publish(Q, MsgId, SeqId, MsgBody, IsDelivered, State) ->
@@ -971,44 +969,44 @@ internal_requeue(Q, MsgSeqIds = [{_, {FirstSeqIdTo, _}}|_],
     %% as they have no concept of sequence id anyway).
 
     {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
-    ReadSeqId2 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
+    ReadSeqId1 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
     MsgSeqIdsZipped = zip_with_tail(MsgSeqIds, {last, {next, {next, true}}}),
-    {atomic, {WriteSeqId2, Q}} =
+    {atomic, {WriteSeqId1, Q}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   lists:foldl(fun requeue_message/2, {WriteSeqId, Q},
                               MsgSeqIdsZipped)
           end),
-    true = ets:insert(Sequences, {Q, ReadSeqId2, WriteSeqId2,
+    true = ets:insert(Sequences, {Q, ReadSeqId1, WriteSeqId1,
                                   Length + erlang:length(MsgSeqIds)}),
     {ok, State}.
 
 requeue_message({{{MsgId, SeqIdOrig}, {SeqIdTo, NewIsDelivered}},
                  {_NextMsgSeqId, {NextSeqIdTo, _NextNewIsDelivered}}},
                 {ExpectedSeqIdTo, Q}) ->
-    SeqIdTo2 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
-    NextSeqIdTo2 = find_next_seq_id(SeqIdTo2, NextSeqIdTo),
+    SeqIdTo1 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
+    NextSeqIdTo1 = find_next_seq_id(SeqIdTo1, NextSeqIdTo),
     [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId,
                          next_seq_id = NextSeqIdOrig }] =
         mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
-    if SeqIdTo2 == SeqIdOrig andalso NextSeqIdTo2 == NextSeqIdOrig -> ok;
+    if SeqIdTo1 == SeqIdOrig andalso NextSeqIdTo1 == NextSeqIdOrig -> ok;
        true ->
             ok = mnesia:write(rabbit_disk_queue,
-                              Obj #dq_msg_loc {queue_and_seq_id = {Q, SeqIdTo2},
-                                               next_seq_id = NextSeqIdTo2,
+                              Obj #dq_msg_loc {queue_and_seq_id = {Q, SeqIdTo1},
+                                               next_seq_id = NextSeqIdTo1,
                                                is_delivered = NewIsDelivered
                                               },
                               write),
             ok = mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write)
     end,
-    {NextSeqIdTo2, Q}.
+    {NextSeqIdTo1, Q}.
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, 0, State};
         [{Q, ReadSeqId, WriteSeqId, _Length}] ->
-            {atomic, {ok, State2}} =
+            {atomic, {ok, State1}} =
                 mnesia:transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
@@ -1025,7 +1023,7 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                           remove_messages(Q, MsgSeqIds, txn, State)
                   end),
             true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId, 0}),
-            {ok, WriteSeqId - ReadSeqId, State2}
+            {ok, WriteSeqId - ReadSeqId, State1}
     end.
 
 internal_delete_queue(Q, State) ->
@@ -1279,7 +1277,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, State) ->
-    {FinalOffset, BlockStart2, BlockEnd2} =
+    {FinalOffset, BlockStart1, BlockEnd1} =
         lists:foldl(
           fun ({MsgId, RefCount, _Source, Offset, TotalSize},
                {CurOffset, BlockStart, BlockEnd}) ->
@@ -1309,9 +1307,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                   end
           end, {InitOffset, undefined, undefined}, WorkList),
     %% do the last remaining block
-    BSize2 = BlockEnd2 - BlockStart2,
-    {ok, BlockStart2} = file:position(SourceHdl, {bof, BlockStart2}),
-    {ok, BSize2} = file:copy(SourceHdl, DestinationHdl, BSize2),
+    BSize1 = BlockEnd1 - BlockStart1,
+    {ok, BlockStart1} = file:position(SourceHdl, {bof, BlockStart1}),
+    {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
     ok.
 
 close_file(File, State = #dqstate { read_file_handles =
@@ -1366,7 +1364,7 @@ del_index() ->
         %% hmm, something weird must be going on, but it's probably
         %% not the end of the world
         {aborted, {no_exists, rabbit_disk_queue,_}} -> ok;
-        E2 -> E2
+        E1 -> E1
     end.
 
 load_from_disk(State) ->
@@ -1449,11 +1447,11 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
                   lists:foreach(
                     fun ({Q, ReadSeqId, WriteSeqId, _Length}) ->
                             Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
-                            ReadSeqId2 = ReadSeqId + Gap,
-                            Length = WriteSeqId - ReadSeqId2,
+                            ReadSeqId1 = ReadSeqId + Gap,
+                            Length = WriteSeqId - ReadSeqId1,
                             true =
                                 ets:insert(Sequences,
-                                           {Q, ReadSeqId2, WriteSeqId, Length})
+                                           {Q, ReadSeqId1, WriteSeqId, Length})
                     end, ets:match_object(Sequences, '_'))
           end).
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index a2e01bda..6caea55d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -133,14 +133,14 @@ deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
             #basic_message { guid = MsgId, is_persistent = IsPersistent } =
                 bin_to_msg(MsgBin),
             OnDisk = IsPersistent andalso IsDurable,
-            {Acks2, Requeue2, Length2} =
+            {Acks1, Requeue1, Length1} =
                 if OnDisk -> {Acks,
                               [{AckTag, {next, IsDelivered}} | Requeue],
                               Length + 1
                              };
                    true -> {[AckTag | Acks], Requeue, Length}
                 end,
-            deliver_all_messages(Q, IsDurable, Acks2, Requeue2, Length2)
+            deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1)
     end.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
@@ -196,25 +196,25 @@ deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
         = rabbit_disk_queue:deliver(Q),
     #basic_message { guid = MsgId, is_persistent = IsPersistent } =
         Msg = bin_to_msg(MsgBin),
-    AckTag2 = if IsPersistent andalso IsDurable -> AckTag;
+    AckTag1 = if IsPersistent andalso IsDurable -> AckTag;
                  true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
                          noack
               end,
-    {{Msg, IsDelivered, AckTag2, Remaining},
+    {{Msg, IsDelivered, AckTag1, Remaining},
              State #mqstate { length = Length - 1}};
        
 deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
                            msg_buf = MsgBuf, length = Length }) ->
     {{value, {Msg = #basic_message { guid = MsgId,
                                           is_persistent = IsPersistent },
-              IsDelivered, OnDisk}}, MsgBuf2}
+              IsDelivered, OnDisk}}, MsgBuf1}
         = queue:out(MsgBuf),
     AckTag =
         if OnDisk ->
                 if IsPersistent andalso IsDurable -> 
-                        {MsgId, IsDelivered, AckTag2, _PersistRem} =
+                        {MsgId, IsDelivered, AckTag1, _PersistRem} =
                             rabbit_disk_queue:phantom_deliver(Q),
-                        AckTag2;
+                        AckTag1;
                    true ->
                         ok = rabbit_disk_queue:auto_ack_next_message(Q),
                         noack
@@ -223,7 +223,7 @@ deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
         end,
     Rem = Length - 1,
     {{Msg, IsDelivered, AckTag, Rem},
-     State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
+     State #mqstate { msg_buf = MsgBuf1, length = Rem }}.
 
 remove_noacks(Acks) ->
     lists:filter(fun (A) -> A /= noack end, Acks).
@@ -264,17 +264,16 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                                               is_durable = IsDurable,
                                               length = Length
                                             }) ->
-    {PersistentPubs, MsgBuf2} =
+    {PersistentPubs, MsgBuf1} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
-                         {Acc, MsgBuf3}) ->
+                         {Acc, MsgBuf2}) ->
                             OnDisk = IsPersistent andalso IsDurable,
-                            Acc2 =
+                            Acc1 =
                                 if OnDisk ->
                                         [Msg #basic_message.guid | Acc];
                                    true -> Acc
                                 end,
-                            MsgBuf4 = queue:in({Msg, false, OnDisk}, MsgBuf3),
-                            {Acc2, MsgBuf4}
+                            {Acc1, queue:in({Msg, false, OnDisk}, MsgBuf2)}
                     end, {[], MsgBuf}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
     %% requirements of rabbit_disk_queue (ascending SeqIds)
@@ -284,7 +283,7 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                  rabbit_disk_queue:tx_commit(
                    Q, lists:reverse(PersistentPubs), RealAcks)
          end,
-    {ok, State #mqstate { msg_buf = MsgBuf2,
+    {ok, State #mqstate { msg_buf = MsgBuf1,
                           length = Length + erlang:length(Publishes) }}.
 
 only_persistent_msg_ids(Pubs) ->
@@ -325,7 +324,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                             true -> rabbit_disk_queue:requeue(
                                       Q, lists:reverse(RQ))
                          end,
-                    _AckTag2 = rabbit_disk_queue:publish(
+                    _AckTag1 = rabbit_disk_queue:publish(
                                  Q, MsgId, msg_to_bin(Msg), true),
                     []
             end, [], MessagesWithAckTags),
@@ -336,22 +335,21 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 is_durable = IsDurable,
                                                 length = Length
                                               }) ->
-    {PersistentPubs, MsgBuf2} =
+    {PersistentPubs, MsgBuf1} =
         lists:foldl(
           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-               {Acc, MsgBuf3}) ->
+               {Acc, MsgBuf2}) ->
                   OnDisk = IsDurable andalso IsPersistent,
-                  Acc2 =
+                  Acc1 =
                       if OnDisk -> [AckTag | Acc];
                          true -> Acc
                       end,
-                  MsgBuf4 = queue:in({Msg, true, OnDisk}, MsgBuf3),
-                  {Acc2, MsgBuf4}
+                  {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2)}
           end, {[], MsgBuf}, MessagesWithAckTags),
     ok = if [] == PersistentPubs -> ok;
             true -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
-    {ok, State #mqstate {msg_buf = MsgBuf2,
+    {ok, State #mqstate {msg_buf = MsgBuf1,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
 purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
-- 
cgit v1.2.1


From 3583826f205304df7cf5d6f82b96ef7c6605f5eb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 16:07:02 +0100
Subject: more renaming and mnesia change to bat file

---
 scripts/rabbitmq-server.bat       |  1 +
 src/rabbit_control.erl            |  4 ++--
 src/rabbit_queue_mode_manager.erl | 30 +++++++++++++++---------------
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index 9915727b..1cf6c6ba 100755
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -127,6 +127,7 @@ if "%RABBITMQ_MNESIA_DIR%"=="" (
 -os_mon memsup_system_only true ^
 -os_mon system_memory_high_watermark 0.95 ^
 -mnesia dir \""%RABBITMQ_MNESIA_DIR%"\" ^
+-mnesia dump_log_write_threshold 10000 ^
 %CLUSTER_CONFIG% ^
 %RABBITMQ_SERVER_START_ARGS% ^
 %*
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 0ead9533..586c06c0 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -281,11 +281,11 @@ action(list_connections, Node, Args, Inform) ->
 
 action(reduce_memory_footprint, Node, _Args, Inform) ->
     Inform("Reducing memory footprint", []),
-    call(Node, {rabbit_queue_mode_manager, reduce_memory_usage, []});
+    call(Node, {rabbit_queue_mode_manager, reduce_memory_footprint, []});
 
 action(increase_memory_footprint, Node, _Args, Inform) ->
     Inform("Reducing memory footprint", []),
-    call(Node, {rabbit_queue_mode_manager, increase_memory_usage, []});
+    call(Node, {rabbit_queue_mode_manager, increase_memory_footprint, []});
 
 action(Command, Node, Args, Inform) ->
     {VHost, RemainingArgs} = parse_vhost_flag(Args),
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index c905d99b..6bb197ef 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,8 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/1, change_memory_usage/2,
-         reduce_memory_usage/0, increase_memory_usage/0]).
+-export([register/1, change_memory_footprint/2,
+         reduce_memory_footprint/0, increase_memory_footprint/0]).
 
 -define(SERVER, ?MODULE).
 
@@ -53,18 +53,18 @@ start_link() ->
 register(Pid) ->
     gen_server2:call(?SERVER, {register, Pid}).
 
-change_memory_usage(_Pid, Conserve) ->
-    gen_server2:cast(?SERVER, {change_memory_usage, Conserve}).
+change_memory_footprint(_Pid, Conserve) ->
+    gen_server2:cast(?SERVER, {change_memory_footprint, Conserve}).
 
-reduce_memory_usage() ->
-    gen_server2:cast(?SERVER, {change_memory_usage, true}).
+reduce_memory_footprint() ->
+    gen_server2:cast(?SERVER, {change_memory_footprint, true}).
                            
-increase_memory_usage() ->
-    gen_server2:cast(?SERVER, {change_memory_usage, false}).
+increase_memory_footprint() ->
+    gen_server2:cast(?SERVER, {change_memory_footprint, false}).
                            
 init([]) ->
     process_flag(trap_exit, true),
-    ok = rabbit_alarm:register(self(), {?MODULE, change_memory_usage, []}),
+    ok = rabbit_alarm:register(self(), {?MODULE, change_memory_footprint, []}),
     {ok, #state { mode = unlimited,
                   queues = []
                 }}.
@@ -77,26 +77,26 @@ handle_call({register, Pid}, _From,
              end,
     {reply, {ok, Result}, State #state { queues = [Pid | Qs] }}.
 
-handle_cast({change_memory_usage, true},
+handle_cast({change_memory_footprint, true},
             State = #state { mode = disk_only }) ->
     {noreply, State};
-handle_cast({change_memory_usage, true},
+handle_cast({change_memory_footprint, true},
             State = #state { mode = ram_disk }) ->
     constrain_queues(true, State #state.queues),
     {noreply, State #state { mode = disk_only }};
-handle_cast({change_memory_usage, true},
+handle_cast({change_memory_footprint, true},
             State = #state { mode = unlimited }) ->
     ok = rabbit_disk_queue:to_disk_only_mode(),
     {noreply, State #state { mode = ram_disk }};
 
-handle_cast({change_memory_usage, false},
+handle_cast({change_memory_footprint, false},
             State = #state { mode = unlimited }) ->
     {noreply, State};
-handle_cast({change_memory_usage, false},
+handle_cast({change_memory_footprint, false},
             State = #state { mode = ram_disk }) ->
     ok = rabbit_disk_queue:to_ram_disk_mode(),
     {noreply, State #state { mode = unlimited }};
-handle_cast({change_memory_usage, false},
+handle_cast({change_memory_footprint, false},
             State = #state { mode = disk_only }) ->
     constrain_queues(false, State #state.queues),
     {noreply, State #state { mode = ram_disk }}.
-- 
cgit v1.2.1


From a3215a0525acc7e3de7b07b12d316b9c5b98fe6b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 16:31:01 +0100
Subject: FunAcc0 ==> FunAcc

---
 src/rabbit_amqqueue_process.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6dbd95c2..6869846d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -170,7 +170,7 @@ record_current_channel_tx(ChPid, Txn) ->
     %% that wasn't happening already)
     store_ch_record((ch_record(ChPid))#cr{txn = Txn}).
     
-deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
+deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc,
               State = #q{q = #amqqueue{name = QName},
                          active_consumers = ActiveConsumers,
                          blocked_consumers = BlockedConsumers,
@@ -182,12 +182,12 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
             C = #cr{limiter_pid = LimiterPid,
                     unsent_message_count = Count,
                     unacked_messages = UAM} = ch_record(ChPid),
-            IsMsgReady = PredFun(FunAcc0, State),
+            IsMsgReady = PredFun(FunAcc, State),
             case (IsMsgReady andalso
                   rabbit_limiter:can_send( LimiterPid, self(), AckRequired )) of
                 true ->
                     {{Msg, IsDelivered, AckTag}, FunAcc1, State1} =
-                        DeliverFun(AckRequired, FunAcc0, State),
+                        DeliverFun(AckRequired, FunAcc, State),
                     ?LOGDEBUG("AMQQUEUE ~p DELIVERY:~n~p~n", [QName, Msg]),
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
@@ -226,15 +226,15 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc0,
                                        ActiveConsumers,
                                        BlockedConsumers),
                     deliver_queue(
-                      Funs, FunAcc0,
+                      Funs, FunAcc,
                       State#q{active_consumers = NewActiveConsumers,
                               blocked_consumers = NewBlockedConsumers});
                 false ->
                     %% no message was ready, so we don't need to block anyone
-                    {FunAcc0, State}
+                    {FunAcc, State}
             end;
         {empty, _} ->
-            {FunAcc0, State}
+            {FunAcc, State}
     end.
 
 deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
-- 
cgit v1.2.1


From 3045dac32d73412b19181647e1f150bf2f88b3a5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 16:37:51 +0100
Subject: comment

---
 src/rabbit.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index fbadc5f2..7d5e2a79 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -150,7 +150,8 @@ start(normal, []) ->
        {"disk queue",
         fun () ->
                 ok = start_child(rabbit_disk_queue),
-                ok = rabbit_disk_queue:to_ram_disk_mode() %% TODO, CHANGE ME
+                %% TODO, CHANGE ME, waiting on bug 20980
+                ok = rabbit_disk_queue:to_ram_disk_mode()
         end},
        {"recovery",
         fun () ->
-- 
cgit v1.2.1


From 2f9346bac74b8a91d42b13d225f5add815e97e73 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 17:31:47 +0100
Subject: sorted out specs.

---
 src/rabbit_amqqueue.erl           |  2 +-
 src/rabbit_disk_queue.erl         | 12 ++++++------
 src/rabbit_guid.erl               |  2 +-
 src/rabbit_misc.erl               |  2 +-
 src/rabbit_mixed_queue.erl        | 39 +++++++++++++++++++++++++++++++++++++++
 src/rabbit_queue_mode_manager.erl | 14 ++++++++++++++
 6 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index a1f36f31..9d3cead6 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -65,7 +65,7 @@
       'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
 
 -spec(start/0 :: () -> 'ok').
--spec(recover/0 :: () -> 'ok').
+-spec(recover/0 :: () -> {'ok', [amqqueue()]}).
 -spec(declare/4 :: (queue_name(), bool(), bool(), amqp_table()) ->
              amqqueue()).
 -spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()).
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a33a4b28..2f8fd223 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -231,19 +231,19 @@
 -ifdef(use_specs).
 
 -type(seq_id() :: non_neg_integer()).
--type(seq_id_or_next() :: { seq_id() | 'next' }).
+-type(seq_id_or_next() :: ( seq_id() | 'next' )).
 
 -spec(start_link/0 :: () ->
-              {'ok', pid()} | 'ignore' | {'error', any()}).
+              ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(publish/4 :: (queue_name(), msg_id(), binary(), bool()) -> 'ok').
 -spec(publish_with_seq/5 :: (queue_name(), msg_id(), seq_id_or_next(), binary(),
                              bool()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
-             {'empty' | {msg_id(), binary(), non_neg_integer(),
-                         bool(), {msg_id(), seq_id()}, non_neg_integer()}}).
+             ('empty' | {msg_id(), binary(), non_neg_integer(),
+                         bool(), {msg_id(), seq_id()}, non_neg_integer()})).
 -spec(phantom_deliver/1 :: (queue_name()) ->
-             { 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
-                          non_neg_integer()}}).
+             ( 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
+                          non_neg_integer()})).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl
index fe5acc83..3aa2989a 100644
--- a/src/rabbit_guid.erl
+++ b/src/rabbit_guid.erl
@@ -42,7 +42,7 @@
          terminate/2, code_change/3]).
 
 -define(SERVER, ?MODULE).
--define(SERIAL_FILENAME, rabbit_guid).
+-define(SERIAL_FILENAME, "rabbit_guid").
 
 -record(state, {serial}).
 
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index bf4a69db..2971e332 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -114,7 +114,7 @@
 -spec(format_stderr/2 :: (string(), [any()]) -> 'ok').
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
--spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> [B]).
+-spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 
 -endif.
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 6caea55d..9aa290a0 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -49,6 +49,45 @@
                  }
        ).
 
+-ifdef(use_specs).
+
+-type(mode() :: ( 'disk' | 'mixed' )).
+-type(mqstate() :: #mqstate { mode :: mode(),
+                              msg_buf :: queue(),
+                              queue :: queue_name(),
+                              is_durable :: bool(),
+                              length :: non_neg_integer()
+                            }).
+-type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
+
+-spec(init/3 :: (queue_name(), bool(), mode()) ->
+             {'ok', mqstate()}).
+-spec(publish/2 :: (message(), mqstate()) ->
+             {'ok', mqstate()}).
+-spec(publish_delivered/2 :: (message(), mqstate()) ->
+             {'ok', acktag(), mqstate()}).
+-spec(deliver/1 :: (mqstate()) ->
+             {('empty' | {message(), bool(), acktag(), non_neg_integer()}),
+              mqstate()}).
+-spec(ack/2 :: ([acktag()], mqstate()) ->
+             {'ok', mqstate()}).
+-spec(tx_publish/2 :: (message(), mqstate()) ->
+             {'ok', mqstate()}).
+-spec(tx_commit/3 :: ([message()], [acktag()], mqstate()) ->
+             {'ok', mqstate()}).
+-spec(tx_cancel/2 :: ([message()], mqstate()) ->
+             {'ok', mqstate()}).
+-spec(requeue/2 :: ([{message(), acktag()}], mqstate()) ->
+             {'ok', mqstate()}).
+-spec(purge/1 :: (mqstate()) ->
+             {'ok', mqstate()}).
+-spec(delete_queue/1 :: (mqstate()) ->
+             {'ok', mqstate()}).
+-spec(length/1 :: (mqstate()) -> non_neg_integer()).
+-spec(is_empty/1 :: (mqstate()) -> bool()).
+
+-endif.
+
 init(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 6bb197ef..b36bb8be 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -43,6 +43,20 @@
 
 -define(SERVER, ?MODULE).
 
+-ifdef(use_specs).
+
+-type(mode() :: ( 'unlimited' | 'ram_disk' | 'disk_only' )).
+-type(queue_mode() :: ( 'mixed' | 'disk' )).
+
+-spec(start_link/0 :: () ->
+              ({'ok', pid()} | 'ignore' | {'error', any()})).
+-spec(register/1 :: (pid()) -> {'ok', queue_mode()}).
+-spec(change_memory_footprint/2 :: (pid(), bool()) -> 'ok').
+-spec(reduce_memory_footprint/0 :: () -> 'ok').
+-spec(increase_memory_footprint/0 :: () -> 'ok').
+
+-endif.
+
 -record(state, { mode,
                  queues
                }).
-- 
cgit v1.2.1


From 1b770570c4288b7be7ef79a3fc223fe37b235452 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 17:40:23 +0100
Subject: preemptive tidying

---
 src/rabbit_mixed_queue.erl | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9aa290a0..e7ac171c 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -59,30 +59,24 @@
                               length :: non_neg_integer()
                             }).
 -type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
+-type(okmqs() :: {'ok', mqstate()}).
 
--spec(init/3 :: (queue_name(), bool(), mode()) ->
-             {'ok', mqstate()}).
--spec(publish/2 :: (message(), mqstate()) ->
-             {'ok', mqstate()}).
+-spec(init/3 :: (queue_name(), bool(), mode()) -> okmqs()).
+-spec(publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(publish_delivered/2 :: (message(), mqstate()) ->
              {'ok', acktag(), mqstate()}).
 -spec(deliver/1 :: (mqstate()) ->
              {('empty' | {message(), bool(), acktag(), non_neg_integer()}),
               mqstate()}).
--spec(ack/2 :: ([acktag()], mqstate()) ->
-             {'ok', mqstate()}).
--spec(tx_publish/2 :: (message(), mqstate()) ->
-             {'ok', mqstate()}).
--spec(tx_commit/3 :: ([message()], [acktag()], mqstate()) ->
-             {'ok', mqstate()}).
--spec(tx_cancel/2 :: ([message()], mqstate()) ->
-             {'ok', mqstate()}).
--spec(requeue/2 :: ([{message(), acktag()}], mqstate()) ->
-             {'ok', mqstate()}).
--spec(purge/1 :: (mqstate()) ->
-             {'ok', mqstate()}).
--spec(delete_queue/1 :: (mqstate()) ->
-             {'ok', mqstate()}).
+-spec(ack/2 :: ([acktag()], mqstate()) -> okmqs()).
+-spec(tx_publish/2 :: (message(), mqstate()) -> okmqs()).
+-spec(tx_commit/3 :: ([message()], [acktag()], mqstate()) -> okmqs()).
+-spec(tx_cancel/2 :: ([message()], mqstate()) -> okmqs()).
+-spec(requeue/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
+-spec(purge/1 :: (mqstate()) -> okmqs()).
+             
+-spec(delete_queue/1 :: (mqstate()) -> {'ok', mqstate()}).
+             
 -spec(length/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> bool()).
 
-- 
cgit v1.2.1


From d67534ad96b7ce9b788b10de7783fe41c6561797 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Jun 2009 23:44:25 +0100
Subject: removal of two unused functions from disk_queue. There are two more
 unused functions which I can't work out what to do about... Also cosmetic

---
 Makefile                  | 11 ++++++-----
 src/rabbit_disk_queue.erl | 16 ++--------------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile
index 367f153a..14ed50ff 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=/tmp
+RABBITMQ_MNESIA_DIR=~/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=~/tmp
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
@@ -94,10 +94,11 @@ run-node: all
 run-tests: all
 	echo "rabbit_tests:all_tests()." | $(ERL_CALL)
 
-start-background-node:
+start-background-node: stop-node
 	$(BASIC_SCRIPT_ENVIRONMENT_SETTINGS) \
 		RABBITMQ_NODE_ONLY=true \
-		./scripts/rabbitmq-server -detached; sleep 1
+		RABBITMQ_SERVER_START_ARGS="$(RABBITMQ_SERVER_START_ARGS) -detached" \
+		./scripts/rabbitmq-server ; sleep 1
 
 start-rabbit-on-node: all
 	echo "rabbit:start()." | $(ERL_CALL)
@@ -129,7 +130,7 @@ srcdist: distclean
 	cp README.in $(TARGET_SRC_DIR)/README
 	elinks -dump -no-references -no-numbering $(WEB_URL)build-server.html \
 		>> $(TARGET_SRC_DIR)/BUILD
-	sed -i.save 's/%%VERSION%%/$(VERSION)/' $(TARGET_SRC_DIR)/ebin/rabbit_app.in && rm -f $(TARGET_SRC_DIR)/ebin/rabbit_app.in.save
+	sed -i 's/%%VERSION%%/$(VERSION)/' $(TARGET_SRC_DIR)/ebin/rabbit_app.in
 
 	cp -r $(AMQP_CODEGEN_DIR)/* $(TARGET_SRC_DIR)/codegen/
 	cp codegen.py Makefile generate_app $(TARGET_SRC_DIR)
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2f8fd223..e0720620 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -44,7 +44,7 @@
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
--export([length/1, is_empty/1, next_write_seq/1]).
+-export([length/1]).
 
 -export([stop/0, stop_and_obliterate/0,
          to_disk_only_mode/0, to_ram_disk_mode/0]).
@@ -205,7 +205,7 @@
 %% +-------+    +-------+         +-------+
 %% |   B   |    |   X   |         |   B   |
 %% +-------+    +-------+         +-------+
-%% |   A   |    |   E   |          |   A   |
+%% |   A   |    |   E   |         |   A   |
 %% +-------+    +-------+         +-------+
 %%   left         right             left
 %%
@@ -264,8 +264,6 @@
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
--spec(next_write_seq/1 :: (queue_name()) -> non_neg_integer()).
--spec(is_empty/1 :: (queue_name()) -> bool()).
 
 -endif.
 
@@ -347,12 +345,6 @@ to_ram_disk_mode() ->
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
 
-next_write_seq(Q) ->
-    gen_server2:call(?SERVER, {next_write_seq, Q}, infinity).
-
-is_empty(Q) ->
-    0 == rabbit_disk_queue:length(Q).
-
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -493,10 +485,6 @@ handle_call(to_ram_disk_mode, _From,
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     {reply, Length, State};
-handle_call({next_write_seq, Q}, _From,
-            State = #dqstate { sequences = Sequences }) ->
-    {_ReadSeqId, WriteSeqId, _Length} = sequence_lookup(Sequences, Q),
-    {reply, WriteSeqId, State};
 handle_call({dump_queue, Q}, _From, State) ->
     {Result, State1} = internal_dump_queue(Q, State),
     {reply, Result, State1};
-- 
cgit v1.2.1


From 7f010b5db4338b2646c698f638a9ef97d009edb0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 10:25:22 +0100
Subject: fixing up my issues with tmp dir, hopefully once and for all. TMPDIR
 is a standard unix variable which should be honoured

---
 Makefile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 14ed50ff..fb1853ba 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,11 @@
+ifndef TMPDIR
+TMPDIR := /tmp
+endif
+
 RABBITMQ_NODENAME=rabbit
 RABBITMQ_SERVER_START_ARGS=
-RABBITMQ_MNESIA_DIR=~/tmp/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
-RABBITMQ_LOG_BASE=~/tmp
+RABBITMQ_MNESIA_DIR=$(TMPDIR)/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
+RABBITMQ_LOG_BASE=$(TMPDIR)
 
 SOURCE_DIR=src
 EBIN_DIR=ebin
-- 
cgit v1.2.1


From dcc60acba1f22da0497534f5227677c7cb4b8228 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 10:54:39 +0100
Subject: removing two usused functions: publish_with_seq and
 tx_commit_with_seq

---
 src/rabbit_disk_queue.erl | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e0720620..3370ef84 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,8 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/4, publish_with_seq/5, deliver/1, phantom_deliver/1, ack/2,
-         tx_publish/2, tx_commit/3, tx_commit_with_seqs/3, tx_cancel/1,
+-export([publish/4, deliver/1, phantom_deliver/1, ack/2,
+         tx_publish/2, tx_commit/3, tx_cancel/1,
          requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
@@ -236,8 +236,6 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(publish/4 :: (queue_name(), msg_id(), binary(), bool()) -> 'ok').
--spec(publish_with_seq/5 :: (queue_name(), msg_id(), seq_id_or_next(), binary(),
-                             bool()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
              ('empty' | {msg_id(), binary(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()})).
@@ -248,8 +246,6 @@
 -spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
              'ok').
--spec(tx_commit_with_seqs/3 :: (queue_name(), [{msg_id(), seq_id_or_next()}],
-                                [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()},
@@ -278,12 +274,6 @@ publish(Q, MsgId, Msg, false) when is_binary(Msg) ->
 publish(Q, MsgId, Msg, true) when is_binary(Msg) ->
     gen_server2:call(?SERVER, {publish, Q, MsgId, Msg}, infinity).
 
-publish_with_seq(Q, MsgId, SeqId, Msg, false) when is_binary(Msg) ->
-    gen_server2:cast(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg});
-publish_with_seq(Q, MsgId, SeqId, Msg, true) when is_binary(Msg) ->
-    gen_server2:call(?SERVER, {publish_with_seq, Q, MsgId, SeqId, Msg},
-                     infinity).
-
 deliver(Q) ->
     gen_server2:call(?SERVER, {deliver, Q}, infinity).
 
@@ -303,11 +293,6 @@ tx_commit(Q, PubMsgIds, AckSeqIds)
   when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
     gen_server2:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
 
-tx_commit_with_seqs(Q, PubMsgSeqIds, AckSeqIds)
-  when is_list(PubMsgSeqIds) andalso is_list(AckSeqIds) ->
-    gen_server2:call(?SERVER, {tx_commit_with_seqs, Q, PubMsgSeqIds, AckSeqIds},
-                     infinity).
-
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server2:cast(?SERVER, {tx_cancel, MsgIds}).
 
@@ -422,10 +407,6 @@ handle_call({publish, Q, MsgId, MsgBody}, _From, State) ->
     {ok, MsgSeqId, State1} =
         internal_publish(Q, MsgId, next, MsgBody, true, State),
     {reply, MsgSeqId, State1};
-handle_call({publish_with_seq, Q, MsgId, SeqId, MsgBody}, _From, State) ->
-    {ok, MsgSeqId, State1} =
-        internal_publish(Q, MsgId, SeqId, MsgBody, true, State),
-    {reply, MsgSeqId, State1};
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
     {reply, Result, State1};
@@ -436,9 +417,6 @@ handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
     PubMsgSeqIds = zip_with_tail(PubMsgIds, {duplicate, next}),
     {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, State),
     {reply, ok, State1};
-handle_call({tx_commit_with_seqs, Q, PubSeqMsgIds, AckSeqIds}, _From, State) ->
-    {ok, State1} = internal_tx_commit(Q, PubSeqMsgIds, AckSeqIds, State),
-    {reply, ok, State1};
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     {reply, Count, State1};
@@ -496,10 +474,6 @@ handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, _MsgSeqId, State1} =
         internal_publish(Q, MsgId, next, MsgBody, false, State),
     {noreply, State1};
-handle_cast({publish_with_seq, Q, MsgId, SeqId, MsgBody}, State) ->
-    {ok, _MsgSeqId, State1} =
-        internal_publish(Q, MsgId, SeqId, MsgBody, false, State),
-    {noreply, State1};
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
     {noreply, State1};
-- 
cgit v1.2.1


From 61405ee6e1a2a02189f77ceddebfc471d917956c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 13:27:42 +0100
Subject: well, I think it works, but it's now much much slower.

---
 src/rabbit_disk_queue.erl | 171 +++++++++++++++++++++++++++-------------------
 1 file changed, 100 insertions(+), 71 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3370ef84..1e2226bb 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -44,7 +44,7 @@
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
--export([length/1]).
+-export([length/1, filesync/0]).
 
 -export([stop/0, stop_and_obliterate/0,
          to_disk_only_mode/0, to_ram_disk_mode/0]).
@@ -68,6 +68,8 @@
 -define(MAX_READ_FILE_HANDLES, 256).
 -define(FILE_SIZE_LIMIT, (256*1024*1024)).
 
+-define(SYNC_INTERVAL, 5). %% milliseconds
+
 -record(dqstate,
         {msg_location_dets,       %% where are messages?
          msg_location_ets,        %% as above, but for ets version
@@ -82,7 +84,9 @@
                                   %% since the last fsync?
          file_size_limit,         %% how big can our files get?
          read_file_handles,       %% file handles for reading (LRU)
-         read_file_handles_limit  %% how many file handles can we open?
+         read_file_handles_limit, %% how many file handles can we open?
+         on_sync_functions,       %% list of functions to run on sync (reversed)
+         timer_ref                %% TRef for our interval timer
         }).
 
 %% The components:
@@ -260,6 +264,7 @@
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
+-spec(filesync/0 :: () -> 'ok').
 
 -endif.
 
@@ -322,14 +327,17 @@ stop_and_obliterate() ->
     gen_server2:call(?SERVER, stop_vaporise, infinity).
 
 to_disk_only_mode() ->
-    gen_server2:pcall(?SERVER, 10, to_disk_only_mode, infinity).
+    gen_server2:pcall(?SERVER, 9, to_disk_only_mode, infinity).
 
 to_ram_disk_mode() ->
-    gen_server2:pcall(?SERVER, 10, to_ram_disk_mode, infinity).
+    gen_server2:pcall(?SERVER, 9, to_ram_disk_mode, infinity).
 
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
 
+filesync() ->
+    gen_server2:pcast(?SERVER, 10, filesync).
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -368,6 +376,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
 
+    {ok, TRef} = timer:apply_interval(?SYNC_INTERVAL, ?MODULE, filesync, []),
+
     InitName = "0" ++ ?FILE_EXTENSION,
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
@@ -384,7 +394,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
                    read_file_handles       = {dict:new(), gb_trees:empty()},
-                   read_file_handles_limit = ReadFileHandlesLimit
+                   read_file_handles_limit = ReadFileHandlesLimit,
+                   on_sync_functions       = [],
+                   timer_ref               = TRef
                   },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
@@ -406,20 +418,20 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 handle_call({publish, Q, MsgId, MsgBody}, _From, State) ->
     {ok, MsgSeqId, State1} =
         internal_publish(Q, MsgId, next, MsgBody, true, State),
-    {reply, MsgSeqId, State1};
+    reply(MsgSeqId, State1);
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
-    {reply, Result, State1};
+    reply(Result, State1);
 handle_call({phantom_deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, false, false, State),
-    {reply, Result, State1};
-handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, _From, State) ->
+    reply(Result, State1);
+handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     PubMsgSeqIds = zip_with_tail(PubMsgIds, {duplicate, next}),
-    {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, State),
-    {reply, ok, State1};
+    {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From, State),
+    noreply(State1);
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
-    {reply, Count, State1};
+    reply(Count, State1);
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
@@ -436,7 +448,7 @@ handle_call(stop_vaporise, _From, State) ->
     %% gen_server now calls terminate, which then calls shutdown
 handle_call(to_disk_only_mode, _From,
             State = #dqstate { operation_mode = disk_only }) ->
-    {reply, ok, State};
+    reply(ok, State);
 handle_call(to_disk_only_mode, _From,
             State = #dqstate { operation_mode = ram_disk,
                                msg_location_dets = MsgLocationDets,
@@ -446,10 +458,10 @@ handle_call(to_disk_only_mode, _From,
                                                  disc_only_copies),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
-    {reply, ok, State #dqstate { operation_mode = disk_only }};
+    reply(ok, State #dqstate { operation_mode = disk_only });
 handle_call(to_ram_disk_mode, _From,
             State = #dqstate { operation_mode = ram_disk }) ->
-    {reply, ok, State};
+    reply(ok, State);
 handle_call(to_ram_disk_mode, _From,
             State = #dqstate { operation_mode = disk_only,
                                msg_location_dets = MsgLocationDets,
@@ -459,46 +471,50 @@ handle_call(to_ram_disk_mode, _From,
                                                  disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
-    {reply, ok, State #dqstate { operation_mode = ram_disk }};
+    reply(ok, State #dqstate { operation_mode = ram_disk });
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
-    {reply, Length, State};
+    reply(Length, State);
 handle_call({dump_queue, Q}, _From, State) ->
     {Result, State1} = internal_dump_queue(Q, State),
-    {reply, Result, State1};
+    reply(Result, State1);
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
-    {reply, ok, State1}.
+    reply(ok, State1).
 
 handle_cast({publish, Q, MsgId, MsgBody}, State) ->
     {ok, _MsgSeqId, State1} =
         internal_publish(Q, MsgId, next, MsgBody, false, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({auto_ack_next_message, Q}, State) ->
     {ok, State1} = internal_auto_ack(Q, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({tx_publish, MsgId, MsgBody}, State) ->
     {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
     MsgSeqSeqIds = zip_with_tail(MsgSeqIds, {duplicate, {next, true}}),
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
-    {noreply, State1};
+    noreply(State1);
 handle_cast({delete_queue, Q}, State) ->
     {ok, State1} = internal_delete_queue(Q, State),
-    {noreply, State1}.
+    noreply(State1);
+handle_cast(filesync, State) ->
+    noreply(sync_current_file_handle(State)).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
+handle_info(timeout, State = #dqstate { current_dirty = true }) ->
+    noreply(sync_current_file_handle(State));
 handle_info(_Info, State) ->
     {noreply, State}.
 
@@ -508,16 +524,18 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                             msg_location_ets = MsgLocationEts,
                             current_file_handle = FileHdl,
-                            read_file_handles = {ReadHdls, _ReadHdlsAge}
+                            read_file_handles = {ReadHdls, _ReadHdlsAge},
+                            timer_ref = TRef
                           }) ->
     %% deliberately ignoring return codes here
+    timer:cancel(TRef),
     dets:close(MsgLocationDets),
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
     true = ets:delete_all_objects(MsgLocationEts),
     case FileHdl of
         undefined -> ok;
-        _ -> file:sync(FileHdl),
+        _ -> sync_current_file_handle(State),
              file:close(FileHdl)
     end,
     dict:fold(fun (_File, Hdl, _Acc) ->
@@ -525,13 +543,25 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
               end, ok, ReadHdls),
     State #dqstate { current_file_handle = undefined,
                      current_dirty = false,
-                     read_file_handles = {dict:new(), gb_trees:empty()}}.
+                     read_file_handles = {dict:new(), gb_trees:empty()},
+                     timer_ref = undefined
+                   }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 %% ---- UTILITY FUNCTIONS ----
 
+noreply(NewState = #dqstate { current_dirty = true }) ->
+    {noreply, NewState, 0};
+noreply(NewState) ->
+    {noreply, NewState, infinity}.
+
+reply(Reply, NewState = #dqstate { current_dirty = true }) ->
+    {reply, Reply, NewState, 0};
+reply(Reply, NewState) ->
+    {reply, Reply, NewState, infinity}.
+
 form_filename(Name) ->
     filename:join(base_directory(), Name).
 
@@ -613,14 +643,12 @@ get_read_handle(File, State =
                 #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
                            read_file_handles_limit = ReadFileHandlesLimit,
                            current_file_name = CurName,
-                           current_file_handle = CurHdl,
                            current_dirty = IsDirty
                          }) ->
-    IsDirty1 = if CurName =:= File andalso IsDirty ->
-                       file:sync(CurHdl),
-                       false;
-                  true -> IsDirty
-               end,
+    State1 = if CurName =:= File andalso IsDirty ->
+                     sync_current_file_handle(State);
+                true -> State
+             end,
     Now = now(),
     {FileHdl, ReadHdls1, ReadHdlsAge1} =
         case dict:find(File, ReadHdls) of
@@ -644,9 +672,8 @@ get_read_handle(File, State =
         end,
     ReadHdls2 = dict:store(File, {FileHdl, Now}, ReadHdls1),
     ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
-    {FileHdl, State #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge3},
-                               current_dirty = IsDirty1
-                             }}.
+    {FileHdl,
+     State1 #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge3} }}.
 
 adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
     ExpectedSeqId;
@@ -676,6 +703,17 @@ sequence_lookup(Sequences, Q) ->
             {ReadSeqId, WriteSeqId, Length}
     end.
 
+sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
+                                            current_dirty = IsDirty,
+                                            on_sync_functions = Funcs
+                                          }) ->
+    ok = case IsDirty of
+             true -> file:sync(CurHdl);
+             false -> ok
+         end,
+    lists:map(fun (Fun) -> Fun() end, lists:reverse(Funcs)),
+    State #dqstate { current_dirty = false, on_sync_functions = [] }.
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, ReadMsg, FakeDeliver,
@@ -818,12 +856,10 @@ internal_tx_publish(MsgId, MsgBody,
     end.
 
 %% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
-internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
-                   State = #dqstate { current_file_handle = CurHdl,
-                                      current_file_name = CurName,
-                                      current_dirty = IsDirty,
-                                      sequences = Sequences
-                                     }) ->
+internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
+                   State = #dqstate { sequences = Sequences,
+                                      on_sync_functions = SyncFuncs
+                                    }) ->
     {PubList, PubAcc, ReadSeqId, Length} =
         case PubMsgSeqIds of
             [] -> {[], undefined, undefined, undefined};
@@ -835,7 +871,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                 { zip_with_tail(PubMsgSeqIds, {last, {next, next}}),
                   InitWriteSeqId, InitReadSeqId1, InitLength}
         end,
-    {atomic, {Sync, WriteSeqId, State1}} =
+    {atomic, {WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
@@ -844,11 +880,11 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                   %% it's been published, which is clearly
                   %% nonsense. I.e. in commit, do not do things in an
                   %% order which _could_not_ have happened.
-                  {Sync1, WriteSeqId1} =
+                  WriteSeqId1 =
                       lists:foldl(
                         fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
-                             {Acc, ExpectedSeqId}) ->
-                                [{MsgId, _RefCount, File, _Offset,
+                             ExpectedSeqId) ->
+                                [{MsgId, _RefCount, _File, _Offset,
                                   _TotalSize}] = dets_ets_lookup(State, MsgId),
                                  SeqId1 = adjust_last_msg_seq_id(
                                             Q, ExpectedSeqId, SeqId, write),
@@ -863,23 +899,21 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
                                                       next_seq_id = NextSeqId1
                                                      },
                                         write),
-                                 {Acc orelse (CurName =:= File), NextSeqId1}
-                         end, {false, PubAcc}, PubList),
-
+                                 NextSeqId1
+                         end, PubAcc, PubList),
                    {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
-                   {Sync1, WriteSeqId1, State2}
+                   {WriteSeqId1, State2}
           end),
     true = case PubList of
                [] -> true;
                _  -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
                                             Length + erlang:length(PubList)})
            end,
-    IsDirty1 = if IsDirty andalso Sync ->
-                       ok = file:sync(CurHdl),
-                       false;
-                  true -> IsDirty
-               end,
-    {ok, State1 #dqstate { current_dirty = IsDirty1 }}.
+    {ok,
+     State1 #dqstate { on_sync_functions = [fun() ->
+                                                    gen_server2:reply(From, ok)
+                                            end | SyncFuncs]}
+    }.
 
 %% SeqId can be 'next'
 internal_publish(Q, MsgId, SeqId, MsgBody, IsDelivered, State) ->
@@ -1051,14 +1085,10 @@ maybe_roll_to_new_file(Offset,
                                           current_file_name = CurName,
                                           current_file_handle = CurHdl,
                                           current_file_num = CurNum,
-                                          current_dirty = IsDirty,
                                           file_summary = FileSummary
                                         }
                       ) when Offset >= FileSizeLimit ->
-    ok = case IsDirty of
-             true -> file:sync(CurHdl);
-             false -> ok
-         end,
+    State1 = sync_current_file_handle(State),
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
@@ -1067,13 +1097,12 @@ maybe_roll_to_new_file(Offset,
     ok = preallocate(NextHdl, FileSizeLimit, 0),
     true = ets:update_element(FileSummary, CurName, {5, NextName}),%% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
-    State1 = State #dqstate { current_file_name = NextName,
-                              current_file_handle = NextHdl,
-                              current_file_num = NextNum,
-                              current_offset = 0,
-                              current_dirty = false
-                             },
-    {ok, compact(sets:from_list([CurName]), State1)};
+    State2 = State1 #dqstate { current_file_name = NextName,
+                               current_file_handle = NextHdl,
+                               current_file_num = NextNum,
+                               current_offset = 0
+                              },
+    {ok, compact(sets:from_list([CurName]), State2)};
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
-- 
cgit v1.2.1


From 132d3f05fd04ce55564b8b30855e109210760c75 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 14:03:48 +0100
Subject: Done. Problem was needing to reply instantly if we're not dirty.

---
 src/rabbit_disk_queue.erl | 51 ++++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 1e2226bb..c6076635 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -85,7 +85,7 @@
          file_size_limit,         %% how big can our files get?
          read_file_handles,       %% file handles for reading (LRU)
          read_file_handles_limit, %% how many file handles can we open?
-         on_sync_functions,       %% list of functions to run on sync (reversed)
+         on_sync_froms,           %% list of commiters to run on sync (reversed)
          timer_ref                %% TRef for our interval timer
         }).
 
@@ -395,7 +395,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    file_size_limit         = FileSizeLimit,
                    read_file_handles       = {dict:new(), gb_trees:empty()},
                    read_file_handles_limit = ReadFileHandlesLimit,
-                   on_sync_functions       = [],
+                   on_sync_froms       = [],
                    timer_ref               = TRef
                   },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
@@ -427,8 +427,12 @@ handle_call({phantom_deliver, Q}, _From, State) ->
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     PubMsgSeqIds = zip_with_tail(PubMsgIds, {duplicate, next}),
-    {ok, State1} = internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From, State),
-    noreply(State1);
+    {Reply, State1} =
+        internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From, State),
+    case Reply of
+        true -> reply(ok, State1);
+        false -> noreply(State1)
+    end;
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     reply(Count, State1);
@@ -703,16 +707,20 @@ sequence_lookup(Sequences, Q) ->
             {ReadSeqId, WriteSeqId, Length}
     end.
 
+sync_current_file_handle(State = #dqstate { current_dirty = false,
+                                            on_sync_froms = [] }) ->
+    State;
 sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
                                             current_dirty = IsDirty,
-                                            on_sync_functions = Funcs
+                                            on_sync_froms = Froms
                                           }) ->
     ok = case IsDirty of
              true -> file:sync(CurHdl);
              false -> ok
          end,
-    lists:map(fun (Fun) -> Fun() end, lists:reverse(Funcs)),
-    State #dqstate { current_dirty = false, on_sync_functions = [] }.
+    lists:map(fun (From) -> gen_server2:reply(From, ok) end,
+              lists:reverse(Froms)),
+    State #dqstate { current_dirty = false, on_sync_froms = [] }.
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
@@ -858,7 +866,9 @@ internal_tx_publish(MsgId, MsgBody,
 %% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
 internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                    State = #dqstate { sequences = Sequences,
-                                      on_sync_functions = SyncFuncs
+                                      current_file_name = CurFile,
+                                      current_dirty = IsDirty,
+                                      on_sync_froms = SyncFroms
                                     }) ->
     {PubList, PubAcc, ReadSeqId, Length} =
         case PubMsgSeqIds of
@@ -871,7 +881,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                 { zip_with_tail(PubMsgSeqIds, {last, {next, next}}),
                   InitWriteSeqId, InitReadSeqId1, InitLength}
         end,
-    {atomic, {WriteSeqId, State1}} =
+    {atomic, {InCurFile, WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
@@ -880,11 +890,11 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                   %% it's been published, which is clearly
                   %% nonsense. I.e. in commit, do not do things in an
                   %% order which _could_not_ have happened.
-                  WriteSeqId1 =
+                  {InCurFile1, WriteSeqId1} =
                       lists:foldl(
                         fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
-                             ExpectedSeqId) ->
-                                [{MsgId, _RefCount, _File, _Offset,
+                             {InCurFileAcc, ExpectedSeqId}) ->
+                                [{MsgId, _RefCount, File, _Offset,
                                   _TotalSize}] = dets_ets_lookup(State, MsgId),
                                  SeqId1 = adjust_last_msg_seq_id(
                                             Q, ExpectedSeqId, SeqId, write),
@@ -899,21 +909,22 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                                                       next_seq_id = NextSeqId1
                                                      },
                                         write),
-                                 NextSeqId1
-                         end, PubAcc, PubList),
+                                 {InCurFileAcc orelse File =:= CurFile,
+                                  NextSeqId1}
+                         end, {false, PubAcc}, PubList),
                    {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
-                   {WriteSeqId1, State2}
+                   {InCurFile1, WriteSeqId1, State2}
           end),
     true = case PubList of
                [] -> true;
                _  -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
                                             Length + erlang:length(PubList)})
            end,
-    {ok,
-     State1 #dqstate { on_sync_functions = [fun() ->
-                                                    gen_server2:reply(From, ok)
-                                            end | SyncFuncs]}
-    }.
+    if IsDirty andalso InCurFile ->
+            {false, State1 #dqstate { on_sync_froms = [From | SyncFroms] }};
+       true ->
+            {true, State1}
+    end.
 
 %% SeqId can be 'next'
 internal_publish(Q, MsgId, SeqId, MsgBody, IsDelivered, State) ->
-- 
cgit v1.2.1


From ac39c8b78683bf04529f60daec89fb0d8281a15e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 14:19:16 +0100
Subject: stop the commit timer if we're no longer dirty. This means it should
 no longer be a repeat timer because once it's set were either going to
 receive the explicit sync call or we're going to timeout on message queue at
 which point we're no longer dirty and so we'll then cancel the timer....

---
 src/rabbit_disk_queue.erl | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c6076635..192995b2 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -376,8 +376,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
 
-    {ok, TRef} = timer:apply_interval(?SYNC_INTERVAL, ?MODULE, filesync, []),
-
     InitName = "0" ++ ?FILE_EXTENSION,
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
@@ -395,8 +393,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    file_size_limit         = FileSizeLimit,
                    read_file_handles       = {dict:new(), gb_trees:empty()},
                    read_file_handles_limit = ReadFileHandlesLimit,
-                   on_sync_froms       = [],
-                   timer_ref               = TRef
+                   on_sync_froms           = [],
+                   timer_ref               = undefined
                   },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
@@ -528,11 +526,10 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                             msg_location_ets = MsgLocationEts,
                             current_file_handle = FileHdl,
-                            read_file_handles = {ReadHdls, _ReadHdlsAge},
-                            timer_ref = TRef
+                            read_file_handles = {ReadHdls, _ReadHdlsAge}
                           }) ->
+    State1 = stop_commit_timer(State),
     %% deliberately ignoring return codes here
-    timer:cancel(TRef),
     dets:close(MsgLocationDets),
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
@@ -545,11 +542,10 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
     dict:fold(fun (_File, Hdl, _Acc) ->
                      file:close(Hdl)
               end, ok, ReadHdls),
-    State #dqstate { current_file_handle = undefined,
-                     current_dirty = false,
-                     read_file_handles = {dict:new(), gb_trees:empty()},
-                     timer_ref = undefined
-                   }.
+    State1 #dqstate { current_file_handle = undefined,
+                      current_dirty = false,
+                      read_file_handles = {dict:new(), gb_trees:empty()}
+                    }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -557,14 +553,14 @@ code_change(_OldVsn, State, _Extra) ->
 %% ---- UTILITY FUNCTIONS ----
 
 noreply(NewState = #dqstate { current_dirty = true }) ->
-    {noreply, NewState, 0};
+    {noreply, start_commit_timer(NewState), 0};
 noreply(NewState) ->
-    {noreply, NewState, infinity}.
+    {noreply, stop_commit_timer(NewState), infinity}.
 
 reply(Reply, NewState = #dqstate { current_dirty = true }) ->
-    {reply, Reply, NewState, 0};
+    {reply, Reply, start_commit_timer(NewState), 0};
 reply(Reply, NewState) ->
-    {reply, Reply, NewState, infinity}.
+    {reply, Reply, stop_commit_timer(NewState), infinity}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
@@ -707,6 +703,18 @@ sequence_lookup(Sequences, Q) ->
             {ReadSeqId, WriteSeqId, Length}
     end.
 
+start_commit_timer(State = #dqstate { timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
+    State #dqstate { timer_ref = TRef };
+start_commit_timer(State) ->
+    State.
+
+stop_commit_timer(State = #dqstate { timer_ref = undefined }) ->
+    State;
+stop_commit_timer(State = #dqstate { timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #dqstate { timer_ref = undefined }.
+
 sync_current_file_handle(State = #dqstate { current_dirty = false,
                                             on_sync_froms = [] }) ->
     State;
-- 
cgit v1.2.1


From 5e726f8233ec449fe8319724053f21e218111b5a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 16:52:03 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index e7ac171c..1793b635 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -239,7 +239,7 @@ deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
 deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
                            msg_buf = MsgBuf, length = Length }) ->
     {{value, {Msg = #basic_message { guid = MsgId,
-                                          is_persistent = IsPersistent },
+                                     is_persistent = IsPersistent },
               IsDelivered, OnDisk}}, MsgBuf1}
         = queue:out(MsgBuf),
     AckTag =
-- 
cgit v1.2.1


From fee75199308a236a973848d05428b3e6d05b429c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 17:27:27 +0100
Subject: initial work making the mixed_queue keep track of some number vaguely
 related to memory use when it's in disk only mode which could be used to
 estimate how much more memory is needed to switch to mixed mode

---
 src/rabbit_mixed_queue.erl | 133 ++++++++++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 51 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 1793b635..edbc51a6 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,13 +39,14 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1]).
 
--export([to_disk_only_mode/1, to_mixed_mode/1]).
+-export([to_disk_only_mode/1, to_mixed_mode/1, estimate_extra_memory/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
                    queue,
                    is_durable,
-                   length
+                   length,
+                   memory_size
                  }
        ).
 
@@ -56,7 +57,8 @@
                               msg_buf :: queue(),
                               queue :: queue_name(),
                               is_durable :: bool(),
-                              length :: non_neg_integer()
+                              length :: non_neg_integer(),
+                              memory_size :: non_neg_integer()
                             }).
 -type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
 -type(okmqs() :: {'ok', mqstate()}).
@@ -79,13 +81,14 @@
              
 -spec(length/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> bool()).
+-spec(estimate_extra_memory/1 :: (mqstate()) -> non_neg_integer).
 
 -endif.
 
 init(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-                 is_durable = IsDurable, length = 0 });
+                 is_durable = IsDurable, length = 0, memory_size = 0 });
 init(Queue, IsDurable, mixed) ->
     {ok, State} = init(Queue, IsDurable, disk),
     to_mixed_mode(State).
@@ -102,30 +105,35 @@ to_disk_only_mode(State =
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
     Msgs = queue:to_list(MsgBuf),
-    Requeue =
+    {Requeue, Size} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk},
-               RQueueAcc) ->
-                  if OnDisk ->
-                          {MsgId, IsDelivered, AckTag, _PersistRemaining} =
-                              rabbit_disk_queue:phantom_deliver(Q),
-                          [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
-                     true ->
-                          ok = if [] == RQueueAcc -> ok;
-                                  true ->
-                                       rabbit_disk_queue:requeue_with_seqs(
-                                         Q, lists:reverse(RQueueAcc))
-                               end,
-                          ok = rabbit_disk_queue:publish(
-                                 Q, MsgId, msg_to_bin(Msg), false),
-                          []
-                  end
-          end, [], Msgs),
+               {RQueueAcc, SizeAcc}) ->
+                  {MsgBin, MsgSize} = msg_to_bin(Msg),
+                  SizeAcc1 = SizeAcc + MsgSize,
+                  RQueueAcc1 =
+                      if OnDisk ->
+                              {MsgId, IsDelivered, AckTag, _PersistRemaining} =
+                                  rabbit_disk_queue:phantom_deliver(Q),
+                              [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
+                         true ->
+                              ok = if [] == RQueueAcc -> ok;
+                                      true ->
+                                           rabbit_disk_queue:requeue_with_seqs(
+                                             Q, lists:reverse(RQueueAcc))
+                                   end,
+                              ok = rabbit_disk_queue:publish(
+                                     Q, MsgId, MsgBin, false),
+                              []
+                      end,
+                  {RQueueAcc1, SizeAcc1}
+          end, {[], 0}, Msgs),
     ok = if [] == Requeue -> ok;
             true ->
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
-    {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
+    {ok,
+     State #mqstate { mode = disk, msg_buf = queue:new(), memory_size = Size }}.
 
 to_mixed_mode(State = #mqstate { mode = mixed }) ->
     {ok, State};
@@ -141,7 +149,7 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
                   Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
                   {queue:in({Msg, IsDelivered, true}, Buf), L+1}
           end, {queue:new(), 0}, QList),
-    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
+    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1, memory_size = 0 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
                                                  is_durable = IsDurable }) ->
@@ -178,21 +186,25 @@ deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
-    term_to_binary(Msg #basic_message { content = ClearedContent }).
+    Bin = term_to_binary(Msg #basic_message { content = ClearedContent }),
+    {Bin, size(Bin)}.
 
 bin_to_msg(MsgBin) ->
     binary_to_term(MsgBin).
 
 publish(Msg = #basic_message { guid = MsgId },
-        State = #mqstate { mode = disk, queue = Q, length = Length }) ->
-    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
-    {ok, State #mqstate { length = Length + 1 }};
+        State = #mqstate { mode = disk, queue = Q, length = Length,
+                           memory_size = Size}) ->
+    {MsgBin, MsgSize} = msg_to_bin(Msg),
+    ok = rabbit_disk_queue:publish(Q, MsgId, MsgBin, false),
+    {ok, State #mqstate { length = Length + 1, memory_size = Size + MsgSize }};
 publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
         State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                            msg_buf = MsgBuf, length = Length }) ->
     OnDisk = IsDurable andalso IsPersistent,
+    {MsgBin, _MsgSize} = msg_to_bin(Msg),
     ok = if OnDisk ->
-                 rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false);
+                 rabbit_disk_queue:publish(Q, MsgId, MsgBin, false);
             true -> ok
          end,
     {ok, State #mqstate { msg_buf = queue:in({Msg, false, OnDisk}, MsgBuf),
@@ -205,7 +217,8 @@ publish_delivered(Msg =
                   State = #mqstate { mode = Mode, is_durable = IsDurable,
                                      queue = Q, length = 0 })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
+    {MsgBin, _MsgSize} = msg_to_bin(Msg),
+    rabbit_disk_queue:publish(Q, MsgId, MsgBin, false),
     if IsDurable andalso IsPersistent ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
@@ -224,8 +237,8 @@ publish_delivered(_Msg, State = #mqstate { mode = mixed, length = 0 }) ->
 deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
 deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
-                           length = Length }) ->
-    {MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining}
+                           length = Length, memory_size = QSize }) ->
+    {MsgId, MsgBin, Size, IsDelivered, AckTag, Remaining}
         = rabbit_disk_queue:deliver(Q),
     #basic_message { guid = MsgId, is_persistent = IsPersistent } =
         Msg = bin_to_msg(MsgBin),
@@ -234,8 +247,7 @@ deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
                          noack
               end,
     {{Msg, IsDelivered, AckTag1, Remaining},
-             State #mqstate { length = Length - 1}};
-       
+     State #mqstate { length = Length - 1, memory_size = QSize - Size }};
 deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
                            msg_buf = MsgBuf, length = Length }) ->
     {{value, {Msg = #basic_message { guid = MsgId,
@@ -269,13 +281,15 @@ ack(Acks, State = #mqstate { queue = Q }) ->
     end.
                                                    
 tx_publish(Msg = #basic_message { guid = MsgId },
-           State = #mqstate { mode = disk }) ->
-    ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
-    {ok, State};
+           State = #mqstate { mode = disk, memory_size = Size }) ->
+    {MsgBin, MsgSize} = msg_to_bin(Msg),
+    ok = rabbit_disk_queue:tx_publish(MsgId, MsgBin),
+    {ok, State #mqstate { memory_size = Size + MsgSize }};
 tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
            State = #mqstate { mode = mixed, is_durable = IsDurable })
   when IsDurable andalso IsPersistent ->
-    ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
+    {MsgBin, _MsgSize} = msg_to_bin(Msg),
+    ok = rabbit_disk_queue:tx_publish(MsgId, MsgBin),
     {ok, State};
 tx_publish(_Msg, State = #mqstate { mode = mixed }) ->
     %% this message will reappear in the tx_commit, so ignore for now
@@ -328,9 +342,15 @@ only_persistent_msg_ids(Pubs) ->
                 end
         end, [], Pubs)).
 
-tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
-    ok = rabbit_disk_queue:tx_cancel(only_msg_ids(Publishes)),
-    {ok, State};
+tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = TSize }) ->
+    {MsgIds, CSize} =
+        lists:foldl(
+          fun (Msg = #basic_message { guid = MsgId }, {MsgIdsAcc, CSizeAcc}) ->
+                  {_MsgBin, MsgSize} = msg_to_bin(Msg),
+                  {[MsgId | MsgIdsAcc], CSizeAcc + MsgSize}
+          end, {[], 0}, Publishes),
+    ok = rabbit_disk_queue:tx_cancel(lists:reverse(MsgIds)),
+    {ok, State #mqstate { memory_size = TSize - CSize }};
 tx_cancel(Publishes,
           State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
     ok =
@@ -343,26 +363,34 @@ tx_cancel(Publishes,
 %% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                                                 is_durable = IsDurable,
-                                                length = Length }) ->
+                                                length = Length,
+                                                memory_size = TSize
+                                              }) ->
     %% here, we may have messages with no ack tags, because of the
     %% fact they are not persistent, but nevertheless we want to
     %% requeue them. This means publishing them delivered.
-    Requeue
+    {Requeue, CSize}
         = lists:foldl(
-            fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
+            fun ({Msg = #basic_message { is_persistent = IsPersistent },
+                  AckTag}, {RQ, SizeAcc})
                 when IsPersistent andalso IsDurable ->
-                    [AckTag | RQ];
-                ({Msg = #basic_message { guid = MsgId }, _AckTag}, RQ) ->
+                    {_MsgBin, MsgSize} = msg_to_bin(Msg),
+                    {[AckTag | RQ], SizeAcc + MsgSize};
+                ({Msg = #basic_message { guid = MsgId }, _AckTag},
+                 {RQ, SizeAcc}) ->
                     ok = if RQ == [] -> ok;
                             true -> rabbit_disk_queue:requeue(
                                       Q, lists:reverse(RQ))
                          end,
+                    {MsgBin, MsgSize} = msg_to_bin(Msg),
                     _AckTag1 = rabbit_disk_queue:publish(
-                                 Q, MsgId, msg_to_bin(Msg), true),
-                    []
-            end, [], MessagesWithAckTags),
+                                 Q, MsgId, MsgBin, true),
+                    {[], SizeAcc + MsgSize}
+            end, {[], 0}, MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
-    {ok, State #mqstate {length = Length + erlang:length(MessagesWithAckTags)}};
+    {ok, State #mqstate { length = Length + erlang:length(MessagesWithAckTags),
+                          memory_size = TSize + CSize
+                        }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
@@ -387,14 +415,14 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
 
 purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
     Count = rabbit_disk_queue:purge(Q),
-    {Count, State #mqstate { length = 0 }};
+    {Count, State #mqstate { length = 0, memory_size = 0 }};
 purge(State = #mqstate { queue = Q, mode = mixed, length = Length }) ->
     rabbit_disk_queue:purge(Q),
     {Length, State #mqstate { msg_buf = queue:new(), length = 0 }}.
 
 delete_queue(State = #mqstate { queue = Q, mode = disk }) ->
     rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { length = 0 }};
+    {ok, State #mqstate { length = 0, memory_size = 0 }};
 delete_queue(State = #mqstate { queue = Q, mode = mixed }) ->
     rabbit_disk_queue:delete_queue(Q),
     {ok, State #mqstate { msg_buf = queue:new(), length = 0 }}.
@@ -404,3 +432,6 @@ length(#mqstate { length = Length }) ->
 
 is_empty(#mqstate { length = Length }) ->
     0 == Length.
+
+estimate_extra_memory(#mqstate { memory_size = Size }) ->
+    2 * Size. %% Magic number. Will probably need playing with.
-- 
cgit v1.2.1


From d6d781c13aa76c4ff9612800fef6e1f3b2242384 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 17:30:51 +0100
Subject: exercise tiny area of the code to try and suggest it might be a
 zero-sum game.

---
 src/rabbit_tests.erl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f45a36bb..3d25399d 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -975,13 +975,16 @@ rdq_test_mixed_queue_modes() ->
                     MS5a
             end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:length(MS6),
-    io:format("Published a mixture of messages~n"),
+    io:format("Published a mixture of messages; ~w~n",
+              [rabbit_mixed_queue:estimate_extra_memory(MS6)]),
     {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode(MS6),
     30 = rabbit_mixed_queue:length(MS7),
-    io:format("Converted to disk only mode~n"),
+    io:format("Converted to disk only mode; ~w~n",
+             [rabbit_mixed_queue:estimate_extra_memory(MS7)]),
     {ok, MS8} = rabbit_mixed_queue:to_mixed_mode(MS7),
     30 = rabbit_mixed_queue:length(MS8),
-    io:format("Converted to mixed mode~n"),
+    io:format("Converted to mixed mode; ~w~n",
+              [rabbit_mixed_queue:estimate_extra_memory(MS8)]),
     MS10 =
         lists:foldl(
           fun (N, MS9) ->
@@ -1020,6 +1023,7 @@ rdq_test_mixed_queue_modes() ->
     rdq_start(),
     {ok, MS17} = rabbit_mixed_queue:init(q, true, mixed),
     0 = rabbit_mixed_queue:length(MS17),
+    0 = rabbit_mixed_queue:estimate_extra_memory(MS17),
     io:format("Recovered queue~n"),
     rdq_stop(),
     passed.
-- 
cgit v1.2.1


From 9866d0b2daaf9d03902092626ee2a081b41a3904 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Jun 2009 18:16:26 +0100
Subject: wiring things up

---
 src/rabbit_amqqueue.erl           |  6 +++++-
 src/rabbit_amqqueue_process.erl   |  7 ++++++-
 src/rabbit_control.erl            |  2 +-
 src/rabbit_queue_mode_manager.erl | 25 +++++++++++++++++++++----
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 9d3cead6..57269c53 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([constrain_memory/2]).
+-export([constrain_memory/2, report_desired_memory/1]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -109,6 +109,7 @@
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
+-spec(report_desired_memory/1 :: (pid()) -> non_neg_integer()).
 
 -endif.
 
@@ -356,6 +357,9 @@ pseudo_queue(QueueName, Pid) ->
               arguments = [],
               pid = Pid}.
 
+report_desired_memory(QPid) ->
+    gen_server2:pcall(QPid, 9, report_desired_memory, infinity).
+
 safe_pmap_ok(H, F, L) ->
     case [R || R <- rabbit_misc:upmap(
                       fun (V) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6869846d..084529a4 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -720,7 +720,12 @@ handle_call({claim_queue, ReaderPid}, _From,
             reply(ok, State);
         _ ->
             reply(locked, State)
-    end.
+    end;
+
+handle_call(report_desired_memory, _From, State = #q { mixed_state = MS }) ->
+    MSize = rabbit_mixed_queue:estimate_extra_memory(MS),
+    {memory, PSize} = process_info(self(), memory),
+    reply(PSize + MSize, State).
 
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 586c06c0..9c1553b8 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -284,7 +284,7 @@ action(reduce_memory_footprint, Node, _Args, Inform) ->
     call(Node, {rabbit_queue_mode_manager, reduce_memory_footprint, []});
 
 action(increase_memory_footprint, Node, _Args, Inform) ->
-    Inform("Reducing memory footprint", []),
+    Inform("Increasing memory footprint", []),
     call(Node, {rabbit_queue_mode_manager, increase_memory_footprint, []});
 
 action(Command, Node, Args, Inform) ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index b36bb8be..e317feda 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -39,13 +39,14 @@
          terminate/2, code_change/3]).
 
 -export([register/1, change_memory_footprint/2,
-         reduce_memory_footprint/0, increase_memory_footprint/0]).
+         reduce_memory_footprint/0, increase_memory_footprint/0,
+         gather_memory_estimates/0
+        ]).
 
 -define(SERVER, ?MODULE).
 
 -ifdef(use_specs).
 
--type(mode() :: ( 'unlimited' | 'ram_disk' | 'disk_only' )).
 -type(queue_mode() :: ( 'mixed' | 'disk' )).
 
 -spec(start_link/0 :: () ->
@@ -75,10 +76,14 @@ reduce_memory_footprint() ->
                            
 increase_memory_footprint() ->
     gen_server2:cast(?SERVER, {change_memory_footprint, false}).
-                           
+
+gather_memory_estimates() ->
+    gen_server2:cast(?SERVER, gather_memory_estimates).
+
 init([]) ->
     process_flag(trap_exit, true),
     ok = rabbit_alarm:register(self(), {?MODULE, change_memory_footprint, []}),
+    {ok, _TRef} = timer:apply_interval(5000, ?MODULE, gather_memory_estimates, []),
     {ok, #state { mode = unlimited,
                   queues = []
                 }}.
@@ -113,7 +118,11 @@ handle_cast({change_memory_footprint, false},
 handle_cast({change_memory_footprint, false},
             State = #state { mode = disk_only }) ->
     constrain_queues(false, State #state.queues),
-    {noreply, State #state { mode = ram_disk }}.
+    {noreply, State #state { mode = ram_disk }};
+
+handle_cast(gather_memory_estimates, State) ->
+    State1 = internal_gather(State),
+    {noreply, State1}.
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -131,3 +140,11 @@ constrain_queues(Constrain, Qs) ->
       fun (QPid) ->
               ok = rabbit_amqqueue:constrain_memory(QPid, Constrain)
       end, Qs).
+
+internal_gather(State = #state { queues = Qs }) ->
+    lists:foreach(fun(Q) ->
+                          io:format("Queue memory request: ~w is ~w bytes~n",
+                                    [Q, rabbit_amqqueue:report_desired_memory(Q)
+                                    ])
+                  end, Qs),
+    State.
-- 
cgit v1.2.1


From 28e78457c0a1fd762ffd963ebf900fc46b4d0b2e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 11:28:11 +0100
Subject: Reworked reporting of memory requests so that the queues are
 proactive about deciding when to report. This isn't quite good enough though
 because GC means the memory size fluctuates too much. Need to switch to just
 grabbing the size of the messages in the queue.

---
 src/rabbit_amqqueue.erl           |  6 +----
 src/rabbit_amqqueue_process.erl   | 53 +++++++++++++++++++++++++++++++--------
 src/rabbit_queue_mode_manager.erl | 38 +++++++++++++---------------
 3 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 57269c53..9d3cead6 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([constrain_memory/2, report_desired_memory/1]).
+-export([constrain_memory/2]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -109,7 +109,6 @@
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
--spec(report_desired_memory/1 :: (pid()) -> non_neg_integer()).
 
 -endif.
 
@@ -357,9 +356,6 @@ pseudo_queue(QueueName, Pid) ->
               arguments = [],
               pid = Pid}.
 
-report_desired_memory(QPid) ->
-    gen_server2:pcall(QPid, 9, report_desired_memory, infinity).
-
 safe_pmap_ok(H, F, L) ->
     case [R || R <- rabbit_misc:upmap(
                       fun (V) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 084529a4..19f6f308 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -37,6 +37,7 @@
 
 -define(UNSENT_MESSAGE_LIMIT, 100).
 -define(HIBERNATE_AFTER, 1000).
+-define(MEMORY_REPORT_INTERVAL, 500).
 
 -export([start_link/1]).
 
@@ -55,7 +56,10 @@
             mixed_state,
             next_msg_id,
             active_consumers,
-            blocked_consumers}).
+            blocked_consumers,
+            memory_report_counter,
+            old_memory_report
+           }).
 
 -record(consumer, {tag, ack_required}).
 
@@ -104,7 +108,10 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
             mixed_state = MS,
             next_msg_id = 1,
             active_consumers = queue:new(),
-            blocked_consumers = queue:new()}, ?HIBERNATE_AFTER}.
+            blocked_consumers = queue:new(),
+            memory_report_counter = ?MEMORY_REPORT_INTERVAL,
+            old_memory_report = 1
+           }, ?HIBERNATE_AFTER}.
 
 terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
@@ -121,9 +128,16 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-reply(Reply, NewState) -> {reply, Reply, NewState, ?HIBERNATE_AFTER}.
+reply(Reply, NewState = #q { memory_report_counter = 0 }) ->
+    {reply, Reply, report_memory(NewState), ?HIBERNATE_AFTER};
+reply(Reply, NewState = #q { memory_report_counter = C }) ->
+    {reply, Reply, NewState #q { memory_report_counter = C - 1 },
+     ?HIBERNATE_AFTER}.
 
-noreply(NewState) -> {noreply, NewState, ?HIBERNATE_AFTER}.
+noreply(NewState = #q { memory_report_counter = 0}) ->
+    {noreply, report_memory(NewState), ?HIBERNATE_AFTER};
+noreply(NewState = #q { memory_report_counter = C}) ->
+    {noreply, NewState #q { memory_report_counter = C - 1 }, ?HIBERNATE_AFTER}.
 
 lookup_ch(ChPid) ->
     case get({ch, ChPid}) of
@@ -524,6 +538,22 @@ i(memory, _) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
+report_memory(State = #q { old_memory_report = OldMem,
+                           mixed_state = MS }) ->
+    MSize = rabbit_mixed_queue:estimate_extra_memory(MS),
+    {memory, PSize} = process_info(self(), memory),
+    NewMem = case MSize + PSize of
+                 0 -> 1; %% avoid / 0
+                 N -> N
+             end,
+    State1 = State #q { memory_report_counter = ?MEMORY_REPORT_INTERVAL },
+    case (NewMem / OldMem) > 1.1 orelse (OldMem / NewMem) > 1.1 of
+        true ->
+            rabbit_queue_mode_manager:report_memory(self(), NewMem),
+            State1 #q { old_memory_report = NewMem };
+        false -> State1
+    end.
+
 %---------------------------------------------------------------------------
 
 handle_call(info, _From, State) ->
@@ -720,12 +750,7 @@ handle_call({claim_queue, ReaderPid}, _From,
             reply(ok, State);
         _ ->
             reply(locked, State)
-    end;
-
-handle_call(report_desired_memory, _From, State = #q { mixed_state = MS }) ->
-    MSize = rabbit_mixed_queue:estimate_extra_memory(MS),
-    {memory, PSize} = process_info(self(), memory),
-    reply(PSize + MSize, State).
+    end.
 
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
@@ -817,11 +842,17 @@ handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
 handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     handle_ch_down(DownPid, State);
 
-handle_info(timeout, State) ->
+handle_info(timeout, State = #q { memory_report_counter = Count }) 
+  when Count == ?MEMORY_REPORT_INTERVAL ->
+    %% Have to do the +1 because the timeout below, with noreply, will -1
     %% TODO: Once we drop support for R11B-5, we can change this to
     %% {noreply, State, hibernate};
     proc_lib:hibernate(gen_server2, enter_loop, [?MODULE, [], State]);
 
+handle_info(timeout, State) ->
+    State1 = report_memory(State),
+    noreply(State1 #q { memory_report_counter = 1 + ?MEMORY_REPORT_INTERVAL });
+
 handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index e317feda..0e59f7d2 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -40,7 +40,7 @@
 
 -export([register/1, change_memory_footprint/2,
          reduce_memory_footprint/0, increase_memory_footprint/0,
-         gather_memory_estimates/0
+         report_memory/2
         ]).
 
 -define(SERVER, ?MODULE).
@@ -55,6 +55,7 @@
 -spec(change_memory_footprint/2 :: (pid(), bool()) -> 'ok').
 -spec(reduce_memory_footprint/0 :: () -> 'ok').
 -spec(increase_memory_footprint/0 :: () -> 'ok').
+-spec(report_memory/2 :: (pid(), non_neg_integer()) -> 'ok').
 
 -endif.
 
@@ -77,24 +78,24 @@ reduce_memory_footprint() ->
 increase_memory_footprint() ->
     gen_server2:cast(?SERVER, {change_memory_footprint, false}).
 
-gather_memory_estimates() ->
-    gen_server2:cast(?SERVER, gather_memory_estimates).
+report_memory(Pid, Memory) ->
+    gen_server2:cast(?SERVER, {report_memory, Pid, Memory}).
 
 init([]) ->
     process_flag(trap_exit, true),
     ok = rabbit_alarm:register(self(), {?MODULE, change_memory_footprint, []}),
-    {ok, _TRef} = timer:apply_interval(5000, ?MODULE, gather_memory_estimates, []),
     {ok, #state { mode = unlimited,
-                  queues = []
+                  queues = dict:new()
                 }}.
 
 handle_call({register, Pid}, _From,
             State = #state { queues = Qs, mode = Mode }) ->
+    _MRef = erlang:monitor(process, Pid),
     Result = case Mode of
                  unlimited -> mixed;
                  _ -> disk
              end,
-    {reply, {ok, Result}, State #state { queues = [Pid | Qs] }}.
+    {reply, {ok, Result}, State #state { queues = dict:store(Pid, 0, Qs) }}.
 
 handle_cast({change_memory_footprint, true},
             State = #state { mode = disk_only }) ->
@@ -120,10 +121,13 @@ handle_cast({change_memory_footprint, false},
     constrain_queues(false, State #state.queues),
     {noreply, State #state { mode = ram_disk }};
 
-handle_cast(gather_memory_estimates, State) ->
-    State1 = internal_gather(State),
-    {noreply, State1}.
+handle_cast({report_memory, Pid, Memory}, State = #state { queues = Qs }) ->
+    io:format("Queue ~w requested ~w bytes~n", [Pid, Memory]),
+    {noreply, State #state { queues = dict:store(Pid, Memory, Qs) }}.
 
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+            State = #state { queues = Qs }) ->
+    {noreply, State #state { queues = dict:erase(Pid, Qs) }};
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(_Info, State) ->
@@ -136,15 +140,7 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 constrain_queues(Constrain, Qs) ->
-    lists:foreach(
-      fun (QPid) ->
-              ok = rabbit_amqqueue:constrain_memory(QPid, Constrain)
-      end, Qs).
-
-internal_gather(State = #state { queues = Qs }) ->
-    lists:foreach(fun(Q) ->
-                          io:format("Queue memory request: ~w is ~w bytes~n",
-                                    [Q, rabbit_amqqueue:report_desired_memory(Q)
-                                    ])
-                  end, Qs),
-    State.
+    dict:fold(
+      fun (QPid, _Mem, ok) ->
+              rabbit_amqqueue:constrain_memory(QPid, Constrain)
+      end, ok, Qs).
-- 
cgit v1.2.1


From ead53c1c88bf2b70d676c44d593fa93dde13c722 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 15:35:44 +0100
Subject: Altered API so that the disk_queue understands about #basic_message.
 This means that the mixed_queue avoids unnecessary term_to_binary calls.
 Tests adjusted and whole test suite still passes

---
 src/rabbit_basic.erl       |  9 +++++--
 src/rabbit_disk_queue.erl  | 67 +++++++++++++++++++++++++++-------------------
 src/rabbit_mixed_queue.erl | 46 ++++++++++++-------------------
 src/rabbit_tests.erl       | 66 +++++++++++++++++++++++++++++++--------------
 4 files changed, 109 insertions(+), 79 deletions(-)

diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index 0673bdd8..f9a8f488 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -33,7 +33,7 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([publish/1, message/4, delivery/4]).
+-export([publish/1, message/4, message/5, delivery/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -44,6 +44,8 @@
 -spec(delivery/4 :: (bool(), bool(), maybe(txn()), message()) -> delivery()). 
 -spec(message/4 :: (exchange_name(), routing_key(), binary(), binary()) ->
              message()).
+-spec(message/5 :: (exchange_name(), routing_key(), binary(), binary(), guid()) ->
+             message()).
 
 -endif.
 
@@ -64,6 +66,9 @@ delivery(Mandatory, Immediate, Txn, Message) ->
               sender = self(), message = Message}.
 
 message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin) ->
+    message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, rabbit_guid:guid()).
+
+message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, MsgId) ->
     {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'),
     Content = #content{class_id = ClassId,
                        properties = #'P_basic'{content_type = ContentTypeBin},
@@ -72,5 +77,5 @@ message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin) ->
     #basic_message{exchange_name  = ExchangeName,
                    routing_key    = RoutingKeyBin,
                    content        = Content,
-                   guid           = rabbit_guid:guid(),
+                   guid           = MsgId,
                    is_persistent  = false}.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3370ef84..b133f538 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -38,8 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/4, deliver/1, phantom_deliver/1, ack/2,
-         tx_publish/2, tx_commit/3, tx_cancel/1,
+-export([publish/3, deliver/1, phantom_deliver/1, ack/2,
+         tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
@@ -235,21 +235,22 @@
 
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(publish/4 :: (queue_name(), msg_id(), binary(), bool()) -> 'ok').
+-spec(publish/3 :: (queue_name(), message(), bool()) -> 'ok').
 -spec(deliver/1 :: (queue_name()) ->
-             ('empty' | {msg_id(), binary(), non_neg_integer(),
+             ('empty' | {message(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()})).
 -spec(phantom_deliver/1 :: (queue_name()) ->
              ( 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
                           non_neg_integer()})).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
--spec(tx_publish/2 :: (msg_id(), binary()) -> 'ok').
+-spec(tx_publish/1 :: (message()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
              'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
--spec(requeue_with_seqs/2 :: (queue_name(), [{{msg_id(), seq_id()},
-                                              seq_id_or_next()}]) -> 'ok').
+-spec(requeue_with_seqs/2 ::
+      (queue_name(),
+       [{{msg_id(), seq_id()}, {seq_id_or_next(), bool()}}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(dump_queue/1 :: (queue_name()) ->
              [{msg_id(), binary(), non_neg_integer(), bool(),
@@ -269,10 +270,10 @@ start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE,
                            [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
 
-publish(Q, MsgId, Msg, false) when is_binary(Msg) ->
-    gen_server2:cast(?SERVER, {publish, Q, MsgId, Msg});
-publish(Q, MsgId, Msg, true) when is_binary(Msg) ->
-    gen_server2:call(?SERVER, {publish, Q, MsgId, Msg}, infinity).
+publish(Q, Message = #basic_message {}, false) ->
+    gen_server2:cast(?SERVER, {publish, Q, Message});
+publish(Q, Message = #basic_message {}, true) ->
+    gen_server2:call(?SERVER, {publish, Q, Message}, infinity).
 
 deliver(Q) ->
     gen_server2:call(?SERVER, {deliver, Q}, infinity).
@@ -286,8 +287,8 @@ ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
 auto_ack_next_message(Q) ->
     gen_server2:cast(?SERVER, {auto_ack_next_message, Q}).
 
-tx_publish(MsgId, Msg) when is_binary(Msg) ->
-    gen_server2:cast(?SERVER, {tx_publish, MsgId, Msg}).
+tx_publish(Message = #basic_message {}) ->
+    gen_server2:cast(?SERVER, {tx_publish, Message}).
 
 tx_commit(Q, PubMsgIds, AckSeqIds)
   when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
@@ -403,9 +404,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     end,
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
-handle_call({publish, Q, MsgId, MsgBody}, _From, State) ->
+handle_call({publish, Q, Message}, _From, State) ->
     {ok, MsgSeqId, State1} =
-        internal_publish(Q, MsgId, next, MsgBody, true, State),
+        internal_publish(Q, Message, next, true, State),
     {reply, MsgSeqId, State1};
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
@@ -470,9 +471,9 @@ handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
     {reply, ok, State1}.
 
-handle_cast({publish, Q, MsgId, MsgBody}, State) ->
+handle_cast({publish, Q, Message}, State) ->
     {ok, _MsgSeqId, State1} =
-        internal_publish(Q, MsgId, next, MsgBody, false, State),
+        internal_publish(Q, Message, next, false, State),
     {noreply, State1};
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
@@ -480,8 +481,8 @@ handle_cast({ack, Q, MsgSeqIds}, State) ->
 handle_cast({auto_ack_next_message, Q}, State) ->
     {ok, State1} = internal_auto_ack(Q, State),
     {noreply, State1};
-handle_cast({tx_publish, MsgId, MsgBody}, State) ->
-    {ok, State1} = internal_tx_publish(MsgId, MsgBody, State),
+handle_cast({tx_publish, Message = #basic_message { guid = MsgId }}, State) ->
+    {ok, State1} = internal_tx_publish(MsgId, Message, State),
     {noreply, State1};
 handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
@@ -676,6 +677,13 @@ sequence_lookup(Sequences, Q) ->
             {ReadSeqId, WriteSeqId, Length}
     end.
 
+msg_to_bin(Msg = #basic_message { content = Content }) ->
+    ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
+    term_to_binary(Msg #basic_message { content = ClearedContent }).
+
+bin_to_msg(MsgBin) ->
+    binary_to_term(MsgBin).
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, ReadMsg, FakeDeliver,
@@ -694,8 +702,8 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
              case Result of
                  {MsgId, Delivered, {MsgId, ReadSeqId}} ->
                      {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining};
-                 {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}} ->
-                     {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId},
+                 {Message, BodySize, Delivered, {MsgId, ReadSeqId}} ->
+                     {Message, BodySize, Delivered, {MsgId, ReadSeqId},
                       Remaining}
              end, State1}
     end.
@@ -718,7 +726,8 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
             {FileHdl, State1} = get_read_handle(File, State),
             {ok, {MsgBody, BodySize}} =
                 read_message_at_offset(FileHdl, Offset, TotalSize),
-            {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
+            Message = bin_to_msg(MsgBody),
+            {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
              NextReadSeqId, State1};
         false ->
             {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
@@ -783,7 +792,7 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
     State1 = compact(Files, State),
     {ok, State1}.
 
-internal_tx_publish(MsgId, MsgBody,
+internal_tx_publish(MsgId, Message,
                     State = #dqstate { current_file_handle = CurHdl,
                                        current_file_name = CurName,
                                        current_offset = CurOffset,
@@ -792,7 +801,8 @@ internal_tx_publish(MsgId, MsgBody,
     case dets_ets_lookup(State, MsgId) of
         [] ->
             %% New message, lots to do
-            {ok, TotalSize} = append_message(CurHdl, MsgId, MsgBody),
+            {ok, TotalSize} =
+                append_message(CurHdl, MsgId, msg_to_bin(Message)),
             true = dets_ets_insert_new(State, {MsgId, 1, CurName,
                                                CurOffset, TotalSize}),
             [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
@@ -882,9 +892,10 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds,
     {ok, State1 #dqstate { current_dirty = IsDirty1 }}.
 
 %% SeqId can be 'next'
-internal_publish(Q, MsgId, SeqId, MsgBody, IsDelivered, State) ->
+internal_publish(Q, Message = #basic_message { guid = MsgId }, SeqId,
+                 IsDelivered, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
-        internal_tx_publish(MsgId, MsgBody, State),
+        internal_tx_publish(MsgId, Message, State),
     {ReadSeqId, WriteSeqId, Length} =
         sequence_lookup(Sequences, Q),
     ReadSeqId3 = determine_next_read_id(ReadSeqId, WriteSeqId, SeqId),
@@ -1023,12 +1034,12 @@ internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
                   fun ({SeqId, _State1}) when SeqId == WriteSeq ->
                           false;
                       ({SeqId, State1}) ->
-                          {ok, {MsgId, Msg, Size, Delivered, {MsgId, SeqId}},
+                          {ok, {Message, Size, Delivered, {MsgId, SeqId}},
                            NextReadSeqId, State2} =
                               internal_read_message(Q, SeqId, true, true,
                                                     State1),
                           {true,
-                           {MsgId, Msg, Size, Delivered, {MsgId, SeqId}, SeqId},
+                           {Message, Size, Delivered, {MsgId, SeqId}, SeqId},
                            {NextReadSeqId, State2}}
                   end, {ReadSeq, State}),
             {lists:reverse(QList), State3}
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index e7ac171c..9b99ab7f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -117,7 +117,7 @@ to_disk_only_mode(State =
                                          Q, lists:reverse(RQueueAcc))
                                end,
                           ok = rabbit_disk_queue:publish(
-                                 Q, MsgId, msg_to_bin(Msg), false),
+                                 Q, Msg, false),
                           []
                   end
           end, [], Msgs),
@@ -136,9 +136,8 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
     QList = rabbit_disk_queue:dump_queue(Q),
     {MsgBuf1, Length} =
         lists:foldl(
-          fun ({MsgId, MsgBin, _Size, IsDelivered, _AckTag, _SeqId},
+          fun ({Msg, _Size, IsDelivered, _AckTag, _SeqId},
                {Buf, L}) ->
-                  Msg = #basic_message { guid = MsgId } = bin_to_msg(MsgBin),
                   {queue:in({Msg, IsDelivered, true}, Buf), L+1}
           end, {queue:new(), 0}, QList),
     {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
@@ -162,9 +161,8 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
 deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
     case rabbit_disk_queue:deliver(Q) of
         empty -> {Acks, Requeue, Length};
-        {MsgId, MsgBin, _Size, IsDelivered, AckTag, _Remaining} ->
-            #basic_message { guid = MsgId, is_persistent = IsPersistent } =
-                bin_to_msg(MsgBin),
+        {#basic_message { is_persistent = IsPersistent },
+         _Size, IsDelivered, AckTag, _Remaining} ->
             OnDisk = IsPersistent andalso IsDurable,
             {Acks1, Requeue1, Length1} =
                 if OnDisk -> {Acks,
@@ -176,23 +174,15 @@ deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
             deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1)
     end.
 
-msg_to_bin(Msg = #basic_message { content = Content }) ->
-    ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
-    term_to_binary(Msg #basic_message { content = ClearedContent }).
-
-bin_to_msg(MsgBin) ->
-    binary_to_term(MsgBin).
-
-publish(Msg = #basic_message { guid = MsgId },
-        State = #mqstate { mode = disk, queue = Q, length = Length }) ->
-    ok = rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
+publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length }) ->
+    ok = rabbit_disk_queue:publish(Q, Msg, false),
     {ok, State #mqstate { length = Length + 1 }};
-publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
+publish(Msg = #basic_message { is_persistent = IsPersistent },
         State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                            msg_buf = MsgBuf, length = Length }) ->
     OnDisk = IsDurable andalso IsPersistent,
     ok = if OnDisk ->
-                 rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false);
+                 rabbit_disk_queue:publish(Q, Msg, false);
             true -> ok
          end,
     {ok, State #mqstate { msg_buf = queue:in({Msg, false, OnDisk}, MsgBuf),
@@ -205,7 +195,7 @@ publish_delivered(Msg =
                   State = #mqstate { mode = Mode, is_durable = IsDurable,
                                      queue = Q, length = 0 })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    rabbit_disk_queue:publish(Q, MsgId, msg_to_bin(Msg), false),
+    rabbit_disk_queue:publish(Q, Msg, false),
     if IsDurable andalso IsPersistent ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
@@ -225,10 +215,9 @@ deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
 deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
                            length = Length }) ->
-    {MsgId, MsgBin, _Size, IsDelivered, AckTag, Remaining}
+    {Msg = #basic_message { is_persistent = IsPersistent },
+     _Size, IsDelivered, AckTag, Remaining}
         = rabbit_disk_queue:deliver(Q),
-    #basic_message { guid = MsgId, is_persistent = IsPersistent } =
-        Msg = bin_to_msg(MsgBin),
     AckTag1 = if IsPersistent andalso IsDurable -> AckTag;
                  true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
                          noack
@@ -268,14 +257,13 @@ ack(Acks, State = #mqstate { queue = Q }) ->
                    {ok, State}
     end.
                                                    
-tx_publish(Msg = #basic_message { guid = MsgId },
-           State = #mqstate { mode = disk }) ->
-    ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
+tx_publish(Msg, State = #mqstate { mode = disk }) ->
+    ok = rabbit_disk_queue:tx_publish(Msg),
     {ok, State};
-tx_publish(Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
+tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
            State = #mqstate { mode = mixed, is_durable = IsDurable })
   when IsDurable andalso IsPersistent ->
-    ok = rabbit_disk_queue:tx_publish(MsgId, msg_to_bin(Msg)),
+    ok = rabbit_disk_queue:tx_publish(Msg),
     {ok, State};
 tx_publish(_Msg, State = #mqstate { mode = mixed }) ->
     %% this message will reappear in the tx_commit, so ignore for now
@@ -352,13 +340,13 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
             fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
                 when IsPersistent andalso IsDurable ->
                     [AckTag | RQ];
-                ({Msg = #basic_message { guid = MsgId }, _AckTag}, RQ) ->
+                ({Msg, _AckTag}, RQ) ->
                     ok = if RQ == [] -> ok;
                             true -> rabbit_disk_queue:requeue(
                                       Q, lists:reverse(RQ))
                          end,
                     _AckTag1 = rabbit_disk_queue:publish(
-                                 Q, MsgId, msg_to_bin(Msg), true),
+                                 Q, Msg, true),
                     []
             end, [], MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f45a36bb..bddb451a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -716,6 +716,15 @@ benchmark_disk_queue() ->
     ok = control_action(start_app, []),
     passed.
 
+rdq_message(MsgId, MsgBody) ->
+    rabbit_basic:message(x, <<>>, <<>>, MsgBody, MsgId).
+
+rdq_match_message(
+  #basic_message { guid = MsgId, content =
+                   #content { payload_fragments_rev = [MsgBody] }},
+  MsgId, MsgBody, Size) when size(MsgBody) =:= Size ->
+    ok.
+
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Startup = rdq_virgin(),
     rdq_start(),
@@ -724,7 +733,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     List = lists:seq(1, MsgCount),
     {Publish, ok} =
         timer:tc(?MODULE, rdq_time_commands,
-                 [[fun() -> [rabbit_disk_queue:tx_publish(N, Msg)
+                 [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg))
                              || N <- List, _ <- Qs] end,
                    fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List, [])
                              || Q <- Qs] end
@@ -735,8 +744,9 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
           [[fun() -> [begin SeqIds =
                                 [begin
                                      Remaining = MsgCount - N,
-                                     {N, Msg, MsgSizeBytes, false, SeqId,
+                                     {Message, _TSize, false, SeqId,
                                       Remaining} = rabbit_disk_queue:deliver(Q),
+                                     ok = rdq_match_message(Message, N, Msg, MsgSizeBytes),
                                      SeqId
                                  end || N <- List],
                             ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
@@ -759,7 +769,7 @@ rdq_stress_gc(MsgCount) ->
     MsgSizeBytes = 256*1024,
     Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
-    [rabbit_disk_queue:tx_publish(N, Msg) || N <- List],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- List],
     rabbit_disk_queue:tx_commit(q, List, []),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
@@ -780,8 +790,9 @@ rdq_stress_gc(MsgCount) ->
         lists:foldl(
           fun (MsgId, Acc) ->
                   Remaining = MsgCount - MsgId,
-                  {MsgId, Msg, MsgSizeBytes, false, SeqId, Remaining} =
+                  {Message, _TSize, false, SeqId, Remaining} =
                       rabbit_disk_queue:deliver(q),
+                  ok = rdq_match_message(Message, MsgId, Msg, MsgSizeBytes),
                   dict:store(MsgId, SeqId, Acc)
           end, dict:new(), List),
     %% we really do want to ack each of this individually
@@ -800,14 +811,16 @@ rdq_test_startup_with_queue_gaps() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {N, Msg, 256, false, SeqId, Remaining} =
-                    rabbit_disk_queue:deliver(q), SeqId
+                {Message, _TSize, false, SeqId, Remaining} =
+                    rabbit_disk_queue:deliver(q),
+                ok = rdq_match_message(Message, N, Msg, 256),
+                SeqId
             end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
     %% ack every other message we have delivered (starting at the _first_)
@@ -826,8 +839,9 @@ rdq_test_startup_with_queue_gaps() ->
     %% lists:seq(2,500,2) already delivered
     Seqs2 = [begin
                  Remaining = round(Total - ((Half + N)/2)),
-                 {N, Msg, 256, true, SeqId, Remaining} =
+                 {Message, _TSize, true, SeqId, Remaining} =
                      rabbit_disk_queue:deliver(q),
+                 ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(2,Half,2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
@@ -835,8 +849,9 @@ rdq_test_startup_with_queue_gaps() ->
     %% and now fetch the rest
     Seqs3 = [begin
                  Remaining = Total - N,
-                 {N, Msg, 256, false, SeqId, Remaining} =
+                 {Message, _TSize, false, SeqId, Remaining} =
                      rabbit_disk_queue:deliver(q),
+                 ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(1 + Half,Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
@@ -852,14 +867,15 @@ rdq_test_redeliver() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {N, Msg, 256, false, SeqId, Remaining} =
+                {Message, _TSize, false, SeqId, Remaining} =
                     rabbit_disk_queue:deliver(q),
+                ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
@@ -878,15 +894,17 @@ rdq_test_redeliver() ->
     %% every-other-from-the-first-half
     Seqs2 = [begin
                  Remaining = round(Total - N + (Half/2)),
-                 {N, Msg, 256, false, SeqId, Remaining} =
+                 {Message, _TSize, false, SeqId, Remaining} =
                      rabbit_disk_queue:deliver(q),
+                 ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(1+Half, Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     Seqs3 = [begin
                  Remaining = round((Half - N) / 2) - 1,
-                 {N, Msg, 256, true, SeqId, Remaining} =
+                 {Message, _TSize, true, SeqId, Remaining} =
                      rabbit_disk_queue:deliver(q),
+                 ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(1, Half, 2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
@@ -901,14 +919,15 @@ rdq_test_purge() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {N, Msg, 256, false, SeqId, Remaining} =
+                {Message, _TSize, false, SeqId, Remaining} =
                     rabbit_disk_queue:deliver(q),
+                ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- lists:seq(1,Half)],
     io:format("Deliver first half done~n", []),
@@ -926,10 +945,13 @@ rdq_test_dump_queue() ->
     Msg = <<0:(8*256)>>,
     Total = 1000,
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(N, Msg) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
-    QList = [{N, Msg, 256, false, {N, (N-1)}, (N-1)} || N <- All],
+    QList = [begin Message = rdq_message(N, Msg),
+                   Size = size(term_to_binary(Message)),
+                   {Message, Size, false, {N, (N-1)}, (N-1)}
+             end || N <- All],
     QList = rabbit_disk_queue:dump_queue(q),
     rdq_stop(),
     io:format("dump ok undelivered~n", []),
@@ -937,14 +959,18 @@ rdq_test_dump_queue() ->
     lists:foreach(
       fun (N) ->
               Remaining = Total - N,
-              {N, Msg, 256, false, _SeqId, Remaining} =
-                  rabbit_disk_queue:deliver(q)
+              {Message, _TSize, false, _SeqId, Remaining} =
+                  rabbit_disk_queue:deliver(q),
+              ok = rdq_match_message(Message, N, Msg, 256)
       end, All),
     [] = rabbit_disk_queue:dump_queue(q),
     rdq_stop(),
     io:format("dump ok post delivery~n", []),
     rdq_start(),
-    QList2 = [{N, Msg, 256, true, {N, (N-1)}, (N-1)} || N <- All],
+    QList2 = [begin Message = rdq_message(N, Msg),
+                    Size = size(term_to_binary(Message)),
+                    {Message, Size, true, {N, (N-1)}, (N-1)}
+              end || N <- All],
     QList2 = rabbit_disk_queue:dump_queue(q),
     io:format("dump ok post delivery + restart~n", []),
     rdq_stop(),
-- 
cgit v1.2.1


From 01394c9a224e536c3e7e44723dad32ca269f9ca4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 16:49:29 +0100
Subject: Added caching layer using ets which, when a message is shared between
 multiple queues, eliminates the need for multiple reads, provided the /next/
 copy of the message is requested before the previous copy of the message has
 been acked. Should reduce memory pressure.

---
 src/rabbit_disk_queue.erl | 71 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 61 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b133f538..a7d4e6e3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -58,6 +58,7 @@
 -define(MSG_LOC_NAME,             rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME,    rabbit_disk_queue_file_summary).
 -define(SEQUENCE_ETS_NAME,        rabbit_disk_queue_sequences).
+-define(CACHE_ETS_NAME,           rabbit_disk_queue_cache).
 -define(FILE_EXTENSION,           ".rdq").
 -define(FILE_EXTENSION_TMP,       ".rdt").
 -define(FILE_EXTENSION_DETS,      ".dets").
@@ -82,7 +83,8 @@
                                   %% since the last fsync?
          file_size_limit,         %% how big can our files get?
          read_file_handles,       %% file handles for reading (LRU)
-         read_file_handles_limit  %% how many file handles can we open?
+         read_file_handles_limit, %% how many file handles can we open?
+         message_cache            %% ets message cache
         }).
 
 %% The components:
@@ -385,8 +387,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
                    read_file_handles       = {dict:new(), gb_trees:empty()},
-                   read_file_handles_limit = ReadFileHandlesLimit
-                  },
+                   read_file_handles_limit = ReadFileHandlesLimit,
+                   message_cache           = ets:new(?CACHE_ETS_NAME,
+                                                     [set, private])
+                 },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
         load_from_disk(State),
@@ -684,6 +688,37 @@ msg_to_bin(Msg = #basic_message { content = Content }) ->
 bin_to_msg(MsgBin) ->
     binary_to_term(MsgBin).
 
+remove_cache_entry(MsgId, #dqstate { message_cache = Cache }) ->
+    true = ets:delete(Cache, MsgId),
+    ok.
+
+fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
+    case ets:lookup(Cache, MsgId) of
+        [] ->
+            not_found;
+        [{MsgId, Message, MsgSize, _RefCount}] ->
+            NewRefCount = ets:update_counter(Cache, MsgId, {4, 1}),
+            {Message, MsgSize, NewRefCount}
+    end.
+
+decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
+    true = try case ets:update_counter(Cache, MsgId, {4, -1}) of
+                   0  -> ets:delete(Cache, MsgId);
+                   _N -> true
+               end
+           catch error:badarg -> 
+                   %% MsgId is not in there because although it's been
+                   %% delivered, it's never actually been read (think:
+                   %% persistent message in mixed queue)
+                   true
+           end,
+    ok.
+
+insert_into_cache(Message = #basic_message { guid = MsgId },
+                  MsgSize, #dqstate { message_cache = Cache }) ->
+    true = ets:insert_new(Cache, {MsgId, Message, MsgSize, 1}),
+    ok.
+
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_deliver(Q, ReadMsg, FakeDeliver,
@@ -713,7 +748,7 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
      #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
                   next_seq_id = NextReadSeqId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
-    [{MsgId, _RefCount, File, Offset, TotalSize}] =
+    [{MsgId, RefCount, File, Offset, TotalSize}] =
         dets_ets_lookup(State, MsgId),
     ok =
         if FakeDeliver orelse Delivered -> ok;
@@ -723,12 +758,27 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
         end,
     case ReadMsg of
         true ->
-            {FileHdl, State1} = get_read_handle(File, State),
-            {ok, {MsgBody, BodySize}} =
-                read_message_at_offset(FileHdl, Offset, TotalSize),
-            Message = bin_to_msg(MsgBody),
-            {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
-             NextReadSeqId, State1};
+            case fetch_and_increment_cache(MsgId, State) of
+                false ->
+                    {FileHdl, State1} = get_read_handle(File, State),
+                    {ok, {MsgBody, BodySize}} =
+                        read_message_at_offset(FileHdl, Offset, TotalSize),
+                    Message = bin_to_msg(MsgBody),
+                    ok = case RefCount of
+                             1 ->
+                                 %% it's not in the cache and we only
+                                 %% have 1 queue with the message. So
+                                 %% don't bother putting it in the
+                                 %% queue.
+                                 ok;
+                             _ -> insert_into_cache(Message, BodySize, State1)
+                         end,
+                    {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
+                     NextReadSeqId, State1};
+                {Message, BodySize, _RefCount} ->
+                    {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
+                     NextReadSeqId, State}
+            end;
         false ->
             {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
     end.
@@ -758,6 +808,7 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
           fun ({MsgId, SeqId}, Files1) ->
                   [{MsgId, RefCount, File, Offset, TotalSize}] =
                       dets_ets_lookup(State, MsgId),
+                  ok = decrement_cache(MsgId, State),
                   Files2 =
                       case RefCount of
                           1 ->
-- 
cgit v1.2.1


From 35833f1de8efa38aa3062052506e0f667f5f15a8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 17:17:56 +0100
Subject: fixes

---
 src/rabbit_disk_queue.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a7d4e6e3..329b453a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -759,7 +759,7 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
     case ReadMsg of
         true ->
             case fetch_and_increment_cache(MsgId, State) of
-                false ->
+                not_found ->
                     {FileHdl, State1} = get_read_handle(File, State),
                     {ok, {MsgBody, BodySize}} =
                         read_message_at_offset(FileHdl, Offset, TotalSize),
@@ -808,11 +808,11 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
           fun ({MsgId, SeqId}, Files1) ->
                   [{MsgId, RefCount, File, Offset, TotalSize}] =
                       dets_ets_lookup(State, MsgId),
-                  ok = decrement_cache(MsgId, State),
                   Files2 =
                       case RefCount of
                           1 ->
                               ok = dets_ets_delete(State, MsgId),
+                              ok = remove_cache_entry(MsgId, State),
                               [{File, ValidTotalSize, ContiguousTop,
                                 Left, Right}] = ets:lookup(FileSummary, File),
                               ContiguousTop1 =
@@ -826,6 +826,7 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                                  true -> sets:add_element(File, Files1)
                               end;
                           _ when 1 < RefCount ->
+                              ok = decrement_cache(MsgId, State),
                               ok = dets_ets_insert(
                                      State, {MsgId, RefCount - 1,
                                              File, Offset, TotalSize}),
-- 
cgit v1.2.1


From 503f152aee89c964c435970d4456edeb69ccf9cb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 17:52:00 +0100
Subject: Just added a means to get the ets:info out for the cache. Testing
 shows that it does seem to get emptied successfully.

So, using this revision, if you run:

rabbitmq-java-client/build/dist$ sh runjava.sh com/rabbitmq/examples/MulticastMain -y 10 -r 50 -s 1048576 -m 100 -z 120

then over the two mins, I see beam take between about 30% and 45% of my memory, once it's up and running.

Using the revision right after the API change, i.e. 9f0ee0399838, the same test tries to take between about 45% and 60% of my memory.

Don't forget to run:

rabbitmq-server$ ./scripts/rabbitmqctl reduce_memory_footprint
rabbitmq-server$ ./scripts/rabbitmqctl reduce_memory_footprint

before running the above test.
---
 src/rabbit_disk_queue.erl | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 329b453a..c8f72631 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -44,7 +44,7 @@
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
--export([length/1]).
+-export([length/1, get_cache_info/0]).
 
 -export([stop/0, stop_and_obliterate/0,
          to_disk_only_mode/0, to_ram_disk_mode/0]).
@@ -263,6 +263,7 @@
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
+-spec(get_cache_info/0 :: () -> [{atom(), term()}]).
 
 -endif.
 
@@ -333,6 +334,9 @@ to_ram_disk_mode() ->
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
 
+get_cache_info() ->
+    gen_server2:call(?SERVER, get_cache_info, infinity).
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -473,7 +477,9 @@ handle_call({dump_queue, Q}, _From, State) ->
     {reply, Result, State1};
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
-    {reply, ok, State1}.
+    {reply, ok, State1};
+handle_call(get_cache_info, _From, State = #dqstate { message_cache = Cache }) ->
+    {reply, ets:info(Cache), State}.
 
 handle_cast({publish, Q, Message}, State) ->
     {ok, _MsgSeqId, State1} =
-- 
cgit v1.2.1


From c8060dfc66eb29bf486733780da178cc4d4536fb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 18:17:16 +0100
Subject: get_cache_info ==> cache_info.

An even better test (see parent commit message) is:

rabbitmq-java-client/build/dist$ sh runjava.sh com/rabbitmq/examples/MulticastMain -y 50 -r 100 -s 1048576 -m 100 -z 120

Rabbit will now happily just sit there and work away (again, run reduce_memory_footprint twice first) even though it's seeing 100MB new a second which is going to 50 consumers, so 5GB a second. Needless to say, go back a few revisions, and it blows up within seconds.
---
 src/rabbit_disk_queue.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c8f72631..6374fc6f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -44,7 +44,7 @@
          dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
--export([length/1, get_cache_info/0]).
+-export([length/1, cache_info/0]).
 
 -export([stop/0, stop_and_obliterate/0,
          to_disk_only_mode/0, to_ram_disk_mode/0]).
@@ -263,7 +263,7 @@
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
--spec(get_cache_info/0 :: () -> [{atom(), term()}]).
+-spec(cache_info/0 :: () -> [{atom(), term()}]).
 
 -endif.
 
@@ -334,8 +334,8 @@ to_ram_disk_mode() ->
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
 
-get_cache_info() ->
-    gen_server2:call(?SERVER, get_cache_info, infinity).
+cache_info() ->
+    gen_server2:call(?SERVER, cache_info, infinity).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
@@ -478,7 +478,7 @@ handle_call({dump_queue, Q}, _From, State) ->
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
     {reply, ok, State1};
-handle_call(get_cache_info, _From, State = #dqstate { message_cache = Cache }) ->
+handle_call(cache_info, _From, State = #dqstate { message_cache = Cache }) ->
     {reply, ets:info(Cache), State}.
 
 handle_cast({publish, Q, Message}, State) ->
-- 
cgit v1.2.1


From 02f10f61c710d1244748fbc64469583193e20925 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 19 Jun 2009 18:26:39 +0100
Subject: comment typeo

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 6374fc6f..0dcbb5ae 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -775,7 +775,7 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
                                  %% it's not in the cache and we only
                                  %% have 1 queue with the message. So
                                  %% don't bother putting it in the
-                                 %% queue.
+                                 %% cache.
                                  ok;
                              _ -> insert_into_cache(Message, BodySize, State1)
                          end,
-- 
cgit v1.2.1


From 0ce01488ba4be1e305744ae831f13e5328ab56f5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 20 Jun 2009 23:20:43 +0100
Subject: fixed bug documented in preceeding comment

---
 src/rabbit_disk_queue.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0dcbb5ae..5829533e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1002,11 +1002,11 @@ internal_requeue(Q, MsgSeqIds = [{_, {FirstSeqIdTo, _}}|_],
     {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     ReadSeqId1 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
     MsgSeqIdsZipped = zip_with_tail(MsgSeqIds, {last, {next, {next, true}}}),
-    {atomic, {WriteSeqId1, Q}} =
+    {atomic, {WriteSeqId1, Q, State}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foldl(fun requeue_message/2, {WriteSeqId, Q},
+                  lists:foldl(fun requeue_message/2, {WriteSeqId, Q, State},
                               MsgSeqIdsZipped)
           end),
     true = ets:insert(Sequences, {Q, ReadSeqId1, WriteSeqId1,
@@ -1015,7 +1015,7 @@ internal_requeue(Q, MsgSeqIds = [{_, {FirstSeqIdTo, _}}|_],
 
 requeue_message({{{MsgId, SeqIdOrig}, {SeqIdTo, NewIsDelivered}},
                  {_NextMsgSeqId, {NextSeqIdTo, _NextNewIsDelivered}}},
-                {ExpectedSeqIdTo, Q}) ->
+                {ExpectedSeqIdTo, Q, State}) ->
     SeqIdTo1 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
     NextSeqIdTo1 = find_next_seq_id(SeqIdTo1, NextSeqIdTo),
     [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId,
@@ -1031,7 +1031,8 @@ requeue_message({{{MsgId, SeqIdOrig}, {SeqIdTo, NewIsDelivered}},
                               write),
             ok = mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write)
     end,
-    {NextSeqIdTo1, Q}.
+    decrement_cache(MsgId, State),
+    {NextSeqIdTo1, Q, State}.
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
-- 
cgit v1.2.1


From ec2b80fb4aaaff9f710b9a137b7bb8c196f8f5d2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 20 Jun 2009 23:43:23 +0100
Subject: A test. The problem really does exist. Not worked out how to fix it
 yet.

---
 src/rabbit_tests.erl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index bddb451a..f53ce6e6 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -696,6 +696,7 @@ test_disk_queue() ->
     passed = rdq_test_purge(),
     passed = rdq_test_dump_queue(),
     passed = rdq_test_mixed_queue_modes(),
+    passed = rdq_test_mode_conversion_mid_txn(),
     rdq_virgin(),
     ok = control_action(stop_app, []),
     ok = control_action(start_app, []),
@@ -1050,6 +1051,54 @@ rdq_test_mixed_queue_modes() ->
     rdq_stop(),
     passed.
 
+rdq_test_mode_conversion_mid_txn() ->
+    rdq_virgin(),
+    rdq_start(),
+    Payload = <<0:(8*256)>>,
+    {ok, MS} = rabbit_mixed_queue:init(q, true, mixed),
+    MsgIds = lists:seq(1,10),
+    {MS2, Msgs} =
+        lists:foldl(
+            fun (N, {MS1, Acc}) ->
+                    Msg = rabbit_basic:message(x, <<>>, <<>>, Payload, N),
+                    {ok, MS1a} = rabbit_mixed_queue:tx_publish(Msg, MS1),
+                    {MS1a, [Msg | Acc]}
+            end, {MS, []}, MsgIds),
+    {ok, MS3} = rabbit_mixed_queue:to_disk_only_mode(MS2),
+    {ok, MS4} = rabbit_mixed_queue:tx_commit(lists:reverse(Msgs), [], MS3),
+    MS6 =
+        lists:foldl(
+          fun (N, MS5) ->
+                  Rem = 10 - N,
+                  {{#basic_message { is_persistent = false },
+                    false, _AckTag, Rem},
+                   MS5a} = rabbit_mixed_queue:deliver(MS5),
+                  MS5a
+          end, MS4, MsgIds),
+    0 = rabbit_mixed_queue:length(MS6),
+    {ok, MS7} = rabbit_mixed_queue:init(q, true, disk),
+    {MS9, Msgs1} =
+        lists:foldl(
+            fun (N, {MS8, Acc}) ->
+                    Msg = rabbit_basic:message(x, <<>>, <<>>, Payload, N),
+                    {ok, MS8a} = rabbit_mixed_queue:tx_publish(Msg, MS8),
+                    {MS8a, [Msg | Acc]}
+            end, {MS7, []}, MsgIds),
+    {ok, MS10} = rabbit_mixed_queue:to_mixed_mode(MS9),
+    {ok, MS11} = rabbit_mixed_queue:tx_commit(lists:reverse(Msgs1), [], MS10),
+    MS13 =
+        lists:foldl(
+          fun (N, MS12) ->
+                  Rem = 10 - N,
+                  {{#basic_message { is_persistent = false },
+                    false, _AckTag, Rem},
+                   MS12a} = rabbit_mixed_queue:deliver(MS12),
+                  MS12a
+          end, MS11, MsgIds),
+    0 = rabbit_mixed_queue:length(MS13),
+    rdq_stop(),
+    passed.
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
-- 
cgit v1.2.1


From fe5b6b0c437affc60acbaf22460932a15dcdec38 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 11:02:16 +0100
Subject: fixed.

There was a choice here of either pushing all the txn accountancy into the mixed_queue and taking it out of queue_process or just passing in all the txn pending messages to the mode switch. I chose the latter because the queue_process is already the more readable of the two modules and I didn't want to further complicate the mixed_queue. Also, this way is a smaller API change and really not that much code.

Tests pass but I'm about to rewrite the test and bulk it up a bit. Also, running the previous tests - rabbitmq-java-client/build/dist$ sh runjava.sh com/rabbitmq/examples/MulticastMain -y 50 -r 100 -s 104857 -m 100 -z 120 - whilst running (reduce|increase)_memory_footprint is a good thing to do.
---
 src/rabbit_amqqueue_process.erl |  9 ++++++---
 src/rabbit_mixed_queue.erl      | 44 ++++++++++++++++++++++++++++++++++-------
 src/rabbit_tests.erl            | 18 +++++++++--------
 3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6869846d..9fe6f50d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -790,10 +790,13 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
         end));
 
 handle_cast({constrain, Constrain}, State = #q { mixed_state = MS }) ->
+    PendingMessages =
+        lists:flatten([Pending || #tx { pending_messages = Pending}
+                                      <- all_tx_record()]),
     {ok, MS1} = (case Constrain of
-                    true  -> fun rabbit_mixed_queue:to_disk_only_mode/1;
-                    false -> fun rabbit_mixed_queue:to_mixed_mode/1
-                 end)(MS),
+                    true  -> fun rabbit_mixed_queue:to_disk_only_mode/2;
+                    false -> fun rabbit_mixed_queue:to_mixed_mode/2
+                 end)(PendingMessages, MS),
     noreply(State #q { mixed_state = MS1 }).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9b99ab7f..bb9b90a3 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,7 +39,7 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1]).
 
--export([to_disk_only_mode/1, to_mixed_mode/1]).
+-export([to_disk_only_mode/2, to_mixed_mode/2]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -80,6 +80,9 @@
 -spec(length/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> bool()).
 
+-spec(to_disk_only_mode/2 :: ([message()], mqstate()) -> okmqs()).
+-spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
+
 -endif.
 
 init(Queue, IsDurable, disk) ->
@@ -88,12 +91,13 @@ init(Queue, IsDurable, disk) ->
                  is_durable = IsDurable, length = 0 });
 init(Queue, IsDurable, mixed) ->
     {ok, State} = init(Queue, IsDurable, disk),
-    to_mixed_mode(State).
+    to_mixed_mode([], State).
 
-to_disk_only_mode(State = #mqstate { mode = disk }) ->
+to_disk_only_mode(_TxnMessages, State = #mqstate { mode = disk }) ->
     {ok, State};
-to_disk_only_mode(State =
-                  #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf }) ->
+to_disk_only_mode(TxnMessages, State =
+                  #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+                             is_durable = IsDurable }) ->
     rabbit_log:info("Converting queue to disk only mode: ~p~n", [Q]),
     %% We enqueue _everything_ here. This means that should a message
     %% already be in the disk queue we must remove it and add it back
@@ -125,11 +129,24 @@ to_disk_only_mode(State =
             true ->
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
+    %% tx_publish txn messages. Some of these will have been already
+    %% published if they really are durable and persistent which is
+    %% why we can't just use our own tx_publish/2 function (would end
+    %% up publishing twice, so refcount would go wrong in disk_queue).
+    lists:foreach(
+      fun (Msg = #basic_message { is_persistent = IsPersistent }) ->
+              ok = case IsDurable andalso IsPersistent of
+                       true -> ok;
+                       _    -> rabbit_disk_queue:tx_publish(Msg)
+                   end
+      end, TxnMessages),
     {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
 
-to_mixed_mode(State = #mqstate { mode = mixed }) ->
+to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
     {ok, State};
-to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
+to_mixed_mode(TxnMessages, State =
+              #mqstate { mode = disk, queue = Q, length = Length,
+                         is_durable = IsDurable }) ->
     rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
@@ -140,6 +157,19 @@ to_mixed_mode(State = #mqstate { mode = disk, queue = Q, length = Length }) ->
                {Buf, L}) ->
                   {queue:in({Msg, IsDelivered, true}, Buf), L+1}
           end, {queue:new(), 0}, QList),
+    %% remove txn messages from disk which are neither persistent and
+    %% durable. This is necessary to avoid leaks. This is also pretty
+    %% much the inverse behaviour of our own tx_cancel/2 which is why
+    %% we're not using it.
+    Cancel =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
+                  case IsDurable andalso IsPersistent of
+                      true -> Acc;
+                      _    -> [Msg #basic_message.guid | Acc]
+                  end
+          end, [], TxnMessages),
+    ok = rabbit_disk_queue:tx_cancel(lists:reverse(Cancel)),
     {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f53ce6e6..37c0121c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1003,10 +1003,10 @@ rdq_test_mixed_queue_modes() ->
             end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages~n"),
-    {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode(MS6),
+    {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode([], MS6),
     30 = rabbit_mixed_queue:length(MS7),
     io:format("Converted to disk only mode~n"),
-    {ok, MS8} = rabbit_mixed_queue:to_mixed_mode(MS7),
+    {ok, MS8} = rabbit_mixed_queue:to_mixed_mode([], MS7),
     30 = rabbit_mixed_queue:length(MS8),
     io:format("Converted to mixed mode~n"),
     MS10 =
@@ -1020,7 +1020,7 @@ rdq_test_mixed_queue_modes() ->
           end, MS8, lists:seq(1,10)),
     20 = rabbit_mixed_queue:length(MS10),
     io:format("Delivered initial non persistent messages~n"),
-    {ok, MS11} = rabbit_mixed_queue:to_disk_only_mode(MS10),
+    {ok, MS11} = rabbit_mixed_queue:to_disk_only_mode([], MS10),
     20 = rabbit_mixed_queue:length(MS11),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
@@ -1040,7 +1040,7 @@ rdq_test_mixed_queue_modes() ->
     0 = rabbit_mixed_queue:length(MS14),
     {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
     io:format("Delivered and acked all messages~n"),
-    {ok, MS16} = rabbit_mixed_queue:to_disk_only_mode(MS15),
+    {ok, MS16} = rabbit_mixed_queue:to_disk_only_mode([], MS15),
     0 = rabbit_mixed_queue:length(MS16),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
@@ -1064,8 +1064,9 @@ rdq_test_mode_conversion_mid_txn() ->
                     {ok, MS1a} = rabbit_mixed_queue:tx_publish(Msg, MS1),
                     {MS1a, [Msg | Acc]}
             end, {MS, []}, MsgIds),
-    {ok, MS3} = rabbit_mixed_queue:to_disk_only_mode(MS2),
-    {ok, MS4} = rabbit_mixed_queue:tx_commit(lists:reverse(Msgs), [], MS3),
+    MsgsOrdered = lists:reverse(Msgs),
+    {ok, MS3} = rabbit_mixed_queue:to_disk_only_mode(MsgsOrdered, MS2),
+    {ok, MS4} = rabbit_mixed_queue:tx_commit(MsgsOrdered, [], MS3),
     MS6 =
         lists:foldl(
           fun (N, MS5) ->
@@ -1084,8 +1085,9 @@ rdq_test_mode_conversion_mid_txn() ->
                     {ok, MS8a} = rabbit_mixed_queue:tx_publish(Msg, MS8),
                     {MS8a, [Msg | Acc]}
             end, {MS7, []}, MsgIds),
-    {ok, MS10} = rabbit_mixed_queue:to_mixed_mode(MS9),
-    {ok, MS11} = rabbit_mixed_queue:tx_commit(lists:reverse(Msgs1), [], MS10),
+    Msgs1Ordered = lists:reverse(Msgs1),
+    {ok, MS10} = rabbit_mixed_queue:to_mixed_mode(Msgs1Ordered, MS9),
+    {ok, MS11} = rabbit_mixed_queue:tx_commit(Msgs1Ordered, [], MS10),
     MS13 =
         lists:foldl(
           fun (N, MS12) ->
-- 
cgit v1.2.1


From 5bf142c185374a42b7e06a733c98ce84bc4c05df Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 11:48:41 +0100
Subject: substantially bulked up the tests for this bug. All tests pass.

---
 src/rabbit_basic.erl |  13 ++++--
 src/rabbit_tests.erl | 129 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 94 insertions(+), 48 deletions(-)

diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index f9a8f488..63d6a481 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -33,7 +33,7 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([publish/1, message/4, message/5, delivery/4]).
+-export([publish/1, message/4, message/5, message/6, delivery/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -44,8 +44,10 @@
 -spec(delivery/4 :: (bool(), bool(), maybe(txn()), message()) -> delivery()). 
 -spec(message/4 :: (exchange_name(), routing_key(), binary(), binary()) ->
              message()).
--spec(message/5 :: (exchange_name(), routing_key(), binary(), binary(), guid()) ->
-             message()).
+-spec(message/5 :: (exchange_name(), routing_key(), binary(), binary(),
+                    guid()) -> message()).
+-spec(message/6 :: (exchange_name(), routing_key(), binary(), binary(),
+                    guid(), bool()) -> message()).
 
 -endif.
 
@@ -69,6 +71,9 @@ message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin) ->
     message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, rabbit_guid:guid()).
 
 message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, MsgId) ->
+    message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, MsgId, false).
+
+message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, MsgId, IsPersistent) ->
     {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'),
     Content = #content{class_id = ClassId,
                        properties = #'P_basic'{content_type = ContentTypeBin},
@@ -78,4 +83,4 @@ message(ExchangeName, RoutingKeyBin, ContentTypeBin, BodyBin, MsgId) ->
                    routing_key    = RoutingKeyBin,
                    content        = Content,
                    guid           = MsgId,
-                   is_persistent  = false}.
+                   is_persistent  = IsPersistent}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 37c0121c..b00331c2 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1052,55 +1052,91 @@ rdq_test_mixed_queue_modes() ->
     passed.
 
 rdq_test_mode_conversion_mid_txn() ->
+    Payload = <<0:(8*256)>>,
+    MsgIdsA = lists:seq(0,9),
+    MsgsA = [ rabbit_basic:message(x, <<>>, <<>>, Payload, MsgId,
+                                   (0 == MsgId rem 2))
+            || MsgId <- MsgIdsA ],
+    MsgIdsB = lists:seq(10,20),
+    MsgsB = [ rabbit_basic:message(x, <<>>, <<>>, Payload, MsgId,
+                                   (0 == MsgId rem 2))
+            || MsgId <- MsgIdsB ],
+
     rdq_virgin(),
     rdq_start(),
-    Payload = <<0:(8*256)>>,
-    {ok, MS} = rabbit_mixed_queue:init(q, true, mixed),
-    MsgIds = lists:seq(1,10),
-    {MS2, Msgs} =
-        lists:foldl(
-            fun (N, {MS1, Acc}) ->
-                    Msg = rabbit_basic:message(x, <<>>, <<>>, Payload, N),
-                    {ok, MS1a} = rabbit_mixed_queue:tx_publish(Msg, MS1),
-                    {MS1a, [Msg | Acc]}
-            end, {MS, []}, MsgIds),
-    MsgsOrdered = lists:reverse(Msgs),
-    {ok, MS3} = rabbit_mixed_queue:to_disk_only_mode(MsgsOrdered, MS2),
-    {ok, MS4} = rabbit_mixed_queue:tx_commit(MsgsOrdered, [], MS3),
-    MS6 =
-        lists:foldl(
-          fun (N, MS5) ->
-                  Rem = 10 - N,
-                  {{#basic_message { is_persistent = false },
-                    false, _AckTag, Rem},
-                   MS5a} = rabbit_mixed_queue:deliver(MS5),
-                  MS5a
-          end, MS4, MsgIds),
-    0 = rabbit_mixed_queue:length(MS6),
-    {ok, MS7} = rabbit_mixed_queue:init(q, true, disk),
-    {MS9, Msgs1} =
-        lists:foldl(
-            fun (N, {MS8, Acc}) ->
-                    Msg = rabbit_basic:message(x, <<>>, <<>>, Payload, N),
-                    {ok, MS8a} = rabbit_mixed_queue:tx_publish(Msg, MS8),
-                    {MS8a, [Msg | Acc]}
-            end, {MS7, []}, MsgIds),
-    Msgs1Ordered = lists:reverse(Msgs1),
-    {ok, MS10} = rabbit_mixed_queue:to_mixed_mode(Msgs1Ordered, MS9),
-    {ok, MS11} = rabbit_mixed_queue:tx_commit(Msgs1Ordered, [], MS10),
-    MS13 =
-        lists:foldl(
-          fun (N, MS12) ->
-                  Rem = 10 - N,
-                  {{#basic_message { is_persistent = false },
-                    false, _AckTag, Rem},
-                   MS12a} = rabbit_mixed_queue:deliver(MS12),
-                  MS12a
-          end, MS11, MsgIds),
-    0 = rabbit_mixed_queue:length(MS13),
+    {ok, MS0} = rabbit_mixed_queue:init(q, true, mixed),
+    passed = rdq_tx_publish_mixed_alter_commit_get(
+               MS0, MsgsA, MsgsB, fun rabbit_mixed_queue:to_disk_only_mode/2, commit),
+
+    rdq_stop_virgin_start(),
+    {ok, MS1} = rabbit_mixed_queue:init(q, true, mixed),
+    passed = rdq_tx_publish_mixed_alter_commit_get(
+               MS1, MsgsA, MsgsB, fun rabbit_mixed_queue:to_disk_only_mode/2, cancel),
+
+
+    rdq_stop_virgin_start(),
+    {ok, MS2} = rabbit_mixed_queue:init(q, true, disk),
+    passed = rdq_tx_publish_mixed_alter_commit_get(
+               MS2, MsgsA, MsgsB, fun rabbit_mixed_queue:to_mixed_mode/2, commit),
+
+    rdq_stop_virgin_start(),
+    {ok, MS3} = rabbit_mixed_queue:init(q, true, disk),
+    passed = rdq_tx_publish_mixed_alter_commit_get(
+               MS3, MsgsA, MsgsB, fun rabbit_mixed_queue:to_mixed_mode/2, cancel),
+
     rdq_stop(),
     passed.
 
+rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCancel) ->
+    0 = rabbit_mixed_queue:length(MS0),
+    MS2 = lists:foldl(
+            fun (Msg, MS1) ->
+                    {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
+                    MS1a
+            end, MS0, MsgsA),
+    Len0 = length(MsgsA),
+    Len0 = rabbit_mixed_queue:length(MS2),
+    MS4 = lists:foldl(
+            fun (Msg, MS3) ->
+                    {ok, MS3a} = rabbit_mixed_queue:tx_publish(Msg, MS3),
+                    MS3a
+            end, MS2, MsgsB),
+    Len0 = rabbit_mixed_queue:length(MS4),
+    {ok, MS5} = ChangeFun(MsgsB, MS4),
+    Len0 = rabbit_mixed_queue:length(MS5),
+    {ok, MS9} =
+        case CommitOrCancel of
+            commit ->
+                {ok, MS6} = rabbit_mixed_queue:tx_commit(MsgsB, [], MS5),
+                Len1 = Len0 + length(MsgsB),
+                Len1 = rabbit_mixed_queue:length(MS6),
+                {AckTags, MS8} =
+                    lists:foldl(
+                      fun (Msg, {Acc, MS7}) ->
+                              Rem = Len1 - (Msg #basic_message.guid) - 1,
+                              {{Msg, false, AckTag, Rem}, MS7a} =
+                                  rabbit_mixed_queue:deliver(MS7),
+                              {[AckTag | Acc], MS7a}
+                      end, {[], MS6}, MsgsA ++ MsgsB),
+                0 = rabbit_mixed_queue:length(MS8),
+                rabbit_mixed_queue:ack(lists:reverse(AckTags), MS8);
+            cancel ->
+                {ok, MS6} = rabbit_mixed_queue:tx_cancel(MsgsB, MS5),
+                Len0 = rabbit_mixed_queue:length(MS6),
+                {AckTags, MS8} =
+                    lists:foldl(
+                      fun (Msg, {Acc, MS7}) ->
+                              Rem = Len0 - (Msg #basic_message.guid) - 1,
+                              {{Msg, false, AckTag, Rem}, MS7a} =
+                                  rabbit_mixed_queue:deliver(MS7),
+                              {[AckTag | Acc], MS7a}
+                      end, {[], MS6}, MsgsA),
+                0 = rabbit_mixed_queue:length(MS8),
+                rabbit_mixed_queue:ack(lists:reverse(AckTags), MS8)
+        end,
+    0 = rabbit_mixed_queue:length(MS9),
+    passed.
+
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
@@ -1119,3 +1155,8 @@ rdq_start() ->
 rdq_stop() ->
     rabbit_disk_queue:stop(),
     timer:sleep(1000).
+
+rdq_stop_virgin_start() ->
+    rdq_stop(),
+    rdq_virgin(),
+    rdq_start().
-- 
cgit v1.2.1


From a40abd337bcd854c65d1da05a3012d6ab0403eca Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 12:27:51 +0100
Subject: annoying bug which appeared ages ago and has only just been squashed.
 Meant queues were being started up in the wrong mode when in the "middle"
 mode...

---
 src/rabbit_queue_mode_manager.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 0e59f7d2..d3a753a6 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -92,8 +92,8 @@ handle_call({register, Pid}, _From,
             State = #state { queues = Qs, mode = Mode }) ->
     _MRef = erlang:monitor(process, Pid),
     Result = case Mode of
-                 unlimited -> mixed;
-                 _ -> disk
+                 disk_only -> disk;
+                 _ -> mixed
              end,
     {reply, {ok, Result}, State #state { queues = dict:store(Pid, 0, Qs) }}.
 
-- 
cgit v1.2.1


From ff4d224a0da6152e4dc1f02e597de0fe4f012cbf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 12:51:02 +0100
Subject: These two fixes were done in 20980 but should really be in this
 branch.

---
 src/rabbit_control.erl            | 2 +-
 src/rabbit_queue_mode_manager.erl | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 586c06c0..9c1553b8 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -284,7 +284,7 @@ action(reduce_memory_footprint, Node, _Args, Inform) ->
     call(Node, {rabbit_queue_mode_manager, reduce_memory_footprint, []});
 
 action(increase_memory_footprint, Node, _Args, Inform) ->
-    Inform("Reducing memory footprint", []),
+    Inform("Increasing memory footprint", []),
     call(Node, {rabbit_queue_mode_manager, increase_memory_footprint, []});
 
 action(Command, Node, Args, Inform) ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index b36bb8be..040c9a18 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -86,8 +86,8 @@ init([]) ->
 handle_call({register, Pid}, _From,
             State = #state { queues = Qs, mode = Mode }) ->
     Result = case Mode of
-                 unlimited -> mixed;
-                 _ -> disk
+                 disk_only -> disk;
+                 _ -> mixed
              end,
     {reply, {ok, Result}, State #state { queues = [Pid | Qs] }}.
 
-- 
cgit v1.2.1


From 6bee0b1347f380e1173f412d503f6d65c969bb42 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 13:32:44 +0100
Subject: Switched to tracking memory size of the queue at all times. Removed
 use of process_info(memory,self()) for reasons outlined in the bug comments.
 The annoying thing about using a 10% change as the threshold is that it means
 you get many many more updates when the queue is empty because the % change
 is much greater.

---
 src/rabbit_amqqueue_process.erl |   5 +-
 src/rabbit_mixed_queue.erl      | 146 +++++++++++++++++++++-------------------
 2 files changed, 77 insertions(+), 74 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b19ff7a0..0eff9e1b 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -540,9 +540,8 @@ i(Item, _) ->
 
 report_memory(State = #q { old_memory_report = OldMem,
                            mixed_state = MS }) ->
-    MSize = rabbit_mixed_queue:estimate_extra_memory(MS),
-    {memory, PSize} = process_info(self(), memory),
-    NewMem = case MSize + PSize of
+    MSize = rabbit_mixed_queue:estimate_queue_memory(MS),
+    NewMem = case MSize of
                  0 -> 1; %% avoid / 0
                  N -> N
              end,
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index e0f9d2f2..5c00b380 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,7 +39,7 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1]).
 
--export([to_disk_only_mode/2, to_mixed_mode/2, estimate_extra_memory/1]).
+-export([to_disk_only_mode/2, to_mixed_mode/2, estimate_queue_memory/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -85,7 +85,7 @@
 -spec(to_disk_only_mode/2 :: ([message()], mqstate()) -> okmqs()).
 -spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
 
--spec(estimate_extra_memory/1 :: (mqstate()) -> non_neg_integer).
+-spec(estimate_queue_memory/1 :: (mqstate()) -> non_neg_integer).
 
 -endif.
 
@@ -116,28 +116,25 @@ to_disk_only_mode(TxnMessages, State =
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
     Msgs = queue:to_list(MsgBuf),
-    {Requeue, Size} =
+    Requeue =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk},
-               {RQueueAcc, SizeAcc}) ->
-                  SizeAcc1 = SizeAcc + size_of_message(Msg),
-                  RQueueAcc1 =
-                      if OnDisk ->
-                              {MsgId, IsDelivered, AckTag, _PersistRemaining} =
-                                  rabbit_disk_queue:phantom_deliver(Q),
-                              [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
-                         true ->
-                              ok = if [] == RQueueAcc -> ok;
-                                      true ->
-                                           rabbit_disk_queue:requeue_with_seqs(
-                                             Q, lists:reverse(RQueueAcc))
-                                   end,
-                              ok = rabbit_disk_queue:publish(
-                                     Q, Msg, false),
-                              []
-                      end,
-                  {RQueueAcc1, SizeAcc1}
-          end, {[], 0}, Msgs),
+               RQueueAcc) ->
+                  if OnDisk ->
+                          {MsgId, IsDelivered, AckTag, _PersistRemaining} =
+                              rabbit_disk_queue:phantom_deliver(Q),
+                          [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
+                     true ->
+                          ok = if [] == RQueueAcc -> ok;
+                                  true ->
+                                       rabbit_disk_queue:requeue_with_seqs(
+                                         Q, lists:reverse(RQueueAcc))
+                               end,
+                          ok = rabbit_disk_queue:publish(
+                                 Q, Msg, false),
+                          []
+                  end
+          end, [], Msgs),
     ok = if [] == Requeue -> ok;
             true ->
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
@@ -153,8 +150,7 @@ to_disk_only_mode(TxnMessages, State =
                        _    -> rabbit_disk_queue:tx_publish(Msg)
                    end
       end, TxnMessages),
-    {ok,
-     State #mqstate { mode = disk, msg_buf = queue:new(), memory_size = Size }}.
+    {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
 
 to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
     {ok, State};
@@ -184,7 +180,7 @@ to_mixed_mode(TxnMessages, State =
                   end
           end, [], TxnMessages),
     ok = rabbit_disk_queue:tx_cancel(lists:reverse(Cancel)),
-    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1, memory_size = 0 }}.
+    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
                                                  is_durable = IsDurable }) ->
@@ -223,16 +219,17 @@ publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
     ok = rabbit_disk_queue:publish(Q, Msg, false),
     Size1 = Size + size_of_message(Msg),
     {ok, State #mqstate { length = Length + 1, memory_size = Size1 }};
-publish(Msg = #basic_message { is_persistent = IsPersistent },
-        State = #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
-                           msg_buf = MsgBuf, length = Length }) ->
+publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
+        #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
+                   msg_buf = MsgBuf, length = Length, memory_size = Size }) ->
     OnDisk = IsDurable andalso IsPersistent,
     ok = if OnDisk ->
                  rabbit_disk_queue:publish(Q, Msg, false);
             true -> ok
          end,
+    Size1 = Size + size_of_message(Msg),
     {ok, State #mqstate { msg_buf = queue:in({Msg, false, OnDisk}, MsgBuf),
-                          length = Length + 1 }}.
+                          length = Length + 1, memory_size = Size1 }}.
 
 %% Assumption here is that the queue is empty already (only called via
 %% attempt_immediate_delivery).
@@ -264,15 +261,16 @@ deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
     {Msg = #basic_message { is_persistent = IsPersistent },
      _Size, IsDelivered, AckTag, Remaining}
         = rabbit_disk_queue:deliver(Q),
-    Size = size_of_message(Msg),
+    QSize1 = QSize - size_of_message(Msg),
     AckTag1 = if IsPersistent andalso IsDurable -> AckTag;
                  true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
                          noack
               end,
     {{Msg, IsDelivered, AckTag1, Remaining},
-     State #mqstate { length = Length - 1, memory_size = QSize - Size }};
-deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
-                           msg_buf = MsgBuf, length = Length }) ->
+     State #mqstate { length = Length - 1, memory_size = QSize1 }};
+deliver(State =
+        #mqstate { mode = mixed, msg_buf = MsgBuf, is_durable = IsDurable,
+                   queue = Q, length = Length, memory_size = QSize }) ->
     {{value, {Msg = #basic_message { guid = MsgId,
                                      is_persistent = IsPersistent },
               IsDelivered, OnDisk}}, MsgBuf1}
@@ -290,8 +288,9 @@ deliver(State = #mqstate { mode = mixed, queue = Q, is_durable = IsDurable,
            true -> noack
         end,
     Rem = Length - 1,
+    QSize1 = QSize - size_of_message(Msg),
     {{Msg, IsDelivered, AckTag, Rem},
-     State #mqstate { msg_buf = MsgBuf1, length = Rem }}.
+     State #mqstate { msg_buf = MsgBuf1, length = Rem, memory_size = QSize1 }}.
 
 remove_noacks(Acks) ->
     lists:filter(fun (A) -> A /= noack end, Acks).
@@ -303,17 +302,18 @@ ack(Acks, State = #mqstate { queue = Q }) ->
                    {ok, State}
     end.
                                                    
-tx_publish(Msg, State = #mqstate { mode = disk, memory_size = Size }) ->
+tx_publish(Msg, State = #mqstate { mode = disk, memory_size = QSize }) ->
     ok = rabbit_disk_queue:tx_publish(Msg),
-    {ok, State #mqstate { memory_size = Size + size_of_message(Msg) }};
-tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
-           State = #mqstate { mode = mixed, is_durable = IsDurable })
+    {ok, State #mqstate { memory_size = QSize + size_of_message(Msg) }};
+tx_publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
+           #mqstate { mode = mixed, is_durable = IsDurable,
+                      memory_size = QSize })
   when IsDurable andalso IsPersistent ->
     ok = rabbit_disk_queue:tx_publish(Msg),
-    {ok, State};
-tx_publish(_Msg, State = #mqstate { mode = mixed }) ->
+    {ok, State #mqstate { memory_size = QSize + size_of_message(Msg) }};
+tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize }) ->
     %% this message will reappear in the tx_commit, so ignore for now
-    {ok, State}.
+    {ok, State #mqstate { memory_size = QSize + size_of_message(Msg) }}.
 
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
@@ -353,37 +353,38 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
     {ok, State #mqstate { msg_buf = MsgBuf1,
                           length = Length + erlang:length(Publishes) }}.
 
-only_persistent_msg_ids(Pubs) ->
-    lists:reverse(
-      lists:foldl(
-        fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
-                if IsPersistent -> [Msg #basic_message.guid | Acc];
-                   true -> Acc
-                end
-        end, [], Pubs)).
-
-tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = TSize }) ->
+tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = QSize }) ->
     {MsgIds, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { guid = MsgId }, {MsgIdsAcc, CSizeAcc}) ->
                   {[MsgId | MsgIdsAcc], CSizeAcc + size_of_message(Msg)}
           end, {[], 0}, Publishes),
     ok = rabbit_disk_queue:tx_cancel(lists:reverse(MsgIds)),
-    {ok, State #mqstate { memory_size = TSize - CSize }};
-tx_cancel(Publishes,
-          State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
+    {ok, State #mqstate { memory_size = QSize - CSize }};
+tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable,
+                                        memory_size = QSize }) ->
+    {PersistentPubs, CSize} =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent,
+                                      guid = MsgId }, {Acc, CSizeAcc}) ->
+                  CSizeAcc1 = CSizeAcc + size_of_message(Msg),
+                  {case IsPersistent of
+                       true -> [MsgId | Acc];
+                       _    -> Acc
+                   end, CSizeAcc1}
+          end, {[], 0}, Publishes),
     ok =
         if IsDurable ->
-                rabbit_disk_queue:tx_cancel(only_persistent_msg_ids(Publishes));
+                rabbit_disk_queue:tx_cancel(lists:reverse(PersistentPubs));
            true -> ok
         end,
-    {ok, State}.
+    {ok, State #mqstate { memory_size = QSize - CSize }}.
 
 %% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                                                 is_durable = IsDurable,
                                                 length = Length,
-                                                memory_size = TSize
+                                                memory_size = QSize
                                               }) ->
     %% here, we may have messages with no ack tags, because of the
     %% fact they are not persistent, but nevertheless we want to
@@ -391,42 +392,44 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
     {Requeue, CSize}
         = lists:foldl(
             fun ({Msg = #basic_message { is_persistent = IsPersistent },
-                  AckTag}, {RQ, SizeAcc})
+                  AckTag}, {RQ, CSizeAcc})
                 when IsPersistent andalso IsDurable ->
-                    {[AckTag | RQ], SizeAcc + size_of_message(Msg)};
-                ({Msg, _AckTag}, {RQ, SizeAcc}) ->
+                    {[AckTag | RQ], CSizeAcc + size_of_message(Msg)};
+                ({Msg, _AckTag}, {RQ, CSizeAcc}) ->
                     ok = if RQ == [] -> ok;
                             true -> rabbit_disk_queue:requeue(
                                       Q, lists:reverse(RQ))
                          end,
                     _AckTag1 = rabbit_disk_queue:publish(
                                  Q, Msg, true),
-                    {[], SizeAcc + size_of_message(Msg)}
+                    {[], CSizeAcc + size_of_message(Msg)}
             end, {[], 0}, MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
     {ok, State #mqstate { length = Length + erlang:length(MessagesWithAckTags),
-                          memory_size = TSize + CSize
+                          memory_size = QSize + CSize
                         }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
-                                                length = Length
+                                                length = Length,
+                                                memory_size = QSize
                                               }) ->
-    {PersistentPubs, MsgBuf1} =
+    {PersistentPubs, MsgBuf1, CSize} =
         lists:foldl(
           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-               {Acc, MsgBuf2}) ->
+               {Acc, MsgBuf2, CSizeAcc}) ->
                   OnDisk = IsDurable andalso IsPersistent,
                   Acc1 =
                       if OnDisk -> [AckTag | Acc];
                          true -> Acc
                       end,
-                  {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2)}
-          end, {[], MsgBuf}, MessagesWithAckTags),
+                  CSizeAcc1 = CSizeAcc + size_of_message(Msg),
+                  {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2), CSizeAcc1}
+          end, {[], MsgBuf, 0}, MessagesWithAckTags),
     ok = if [] == PersistentPubs -> ok;
             true -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
-    {ok, State #mqstate {msg_buf = MsgBuf1,
+    {ok, State #mqstate {msg_buf = MsgBuf1, memory_size = QSize + CSize,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
 purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
@@ -434,14 +437,15 @@ purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
     {Count, State #mqstate { length = 0, memory_size = 0 }};
 purge(State = #mqstate { queue = Q, mode = mixed, length = Length }) ->
     rabbit_disk_queue:purge(Q),
-    {Length, State #mqstate { msg_buf = queue:new(), length = 0 }}.
+    {Length,
+     State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0 }}.
 
 delete_queue(State = #mqstate { queue = Q, mode = disk }) ->
     rabbit_disk_queue:delete_queue(Q),
     {ok, State #mqstate { length = 0, memory_size = 0 }};
 delete_queue(State = #mqstate { queue = Q, mode = mixed }) ->
     rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { msg_buf = queue:new(), length = 0 }}.
+    {ok, State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0 }}.
 
 length(#mqstate { length = Length }) ->
     Length.
@@ -449,5 +453,5 @@ length(#mqstate { length = Length }) ->
 is_empty(#mqstate { length = Length }) ->
     0 == Length.
 
-estimate_extra_memory(#mqstate { memory_size = Size }) ->
+estimate_queue_memory(#mqstate { memory_size = Size }) ->
     2 * Size. %% Magic number. Will probably need playing with.
-- 
cgit v1.2.1


From 5a7765c27c7a7eeb0a586799897dde330a07ace2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 13:50:47 +0100
Subject: Added a time interval so that we report at a maximum of once per
 second. I guess the only reason for keeping the other counter which is the
 min number of messages the process has received between reporting memory, is
 that it's much faster to decrement that counter and ceq0 than do the time
 calculation for every single message.

---
 src/rabbit_amqqueue_process.erl | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 0eff9e1b..2245e12f 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -38,6 +38,7 @@
 -define(UNSENT_MESSAGE_LIMIT, 100).
 -define(HIBERNATE_AFTER, 1000).
 -define(MEMORY_REPORT_INTERVAL, 500).
+-define(MEMORY_REPORT_TIME_INTERVAL, 1000000). %% 1 second in microseconds
 
 -export([start_link/1]).
 
@@ -110,7 +111,7 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
             memory_report_counter = ?MEMORY_REPORT_INTERVAL,
-            old_memory_report = 1
+            old_memory_report = {1, now()}
            }, ?HIBERNATE_AFTER}.
 
 terminate(_Reason, State) ->
@@ -538,7 +539,7 @@ i(memory, _) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
-report_memory(State = #q { old_memory_report = OldMem,
+report_memory(State = #q { old_memory_report = {OldMem, Then},
                            mixed_state = MS }) ->
     MSize = rabbit_mixed_queue:estimate_queue_memory(MS),
     NewMem = case MSize of
@@ -546,10 +547,12 @@ report_memory(State = #q { old_memory_report = OldMem,
                  N -> N
              end,
     State1 = State #q { memory_report_counter = ?MEMORY_REPORT_INTERVAL },
-    case (NewMem / OldMem) > 1.1 orelse (OldMem / NewMem) > 1.1 of
+    Now = now(),
+    case ((NewMem / OldMem) > 1.1 orelse (OldMem / NewMem) > 1.1) andalso
+        (?MEMORY_REPORT_TIME_INTERVAL < timer:now_diff(Now, Then)) of
         true ->
             rabbit_queue_mode_manager:report_memory(self(), NewMem),
-            State1 #q { old_memory_report = NewMem };
+            State1 #q { old_memory_report = {NewMem, Now} };
         false -> State1
     end.
 
-- 
cgit v1.2.1


From 7867b5f7aed94ee6ee9eaabcffdfece2f2b65208 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 15:43:40 +0100
Subject: Added ability to manually set queue modes. Also altered manager so
 that if you are in all mixed mode, and then set one queue to disk, and then
 call increase_memory_footprint, you will indeed set that one queue back to
 mixed mode. And vice versa with mixed and decrease

---
 src/rabbit_amqqueue.erl           | 13 +++++++++++--
 src/rabbit_amqqueue_process.erl   | 15 ++++++++++-----
 src/rabbit_control.erl            | 11 +++++++++--
 src/rabbit_mixed_queue.erl        |  7 ++++++-
 src/rabbit_queue_mode_manager.erl |  3 +++
 5 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 9d3cead6..05a32a09 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([constrain_memory/2]).
+-export([constrain_memory/2, set_mode/3]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -225,6 +225,12 @@ list(VHostPath) ->
 
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
+set_mode(VHostPath, Queue, ModeBin)
+  when is_binary(VHostPath) andalso is_binary(Queue) ->
+    Mode = list_to_atom(binary_to_list(ModeBin)),
+    with(rabbit_misc:r(VHostPath, queue, Queue),
+         fun(Q) -> gen_server2:cast(Q #amqqueue.pid, {set_mode, Mode}) end).
+
 info(#amqqueue{ pid = QPid }) ->
     gen_server2:pcall(QPid, 9, info, infinity).
 
@@ -318,7 +324,10 @@ unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 10, {unblock, ChPid}).
 
 constrain_memory(QPid, Constrain) ->
-    gen_server2:pcast(QPid, 10, {constrain, Constrain}).
+    gen_server2:pcast(QPid, 10, {set_mode, case Constrain of
+                                              true -> disk;
+                                              false -> mixed
+                                           end}).
 
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 2245e12f..5e607a46 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -89,7 +89,9 @@
          acks_uncommitted,
          consumers,
          transactions,
-         memory]).
+         memory,
+         mode
+        ]).
          
 %%----------------------------------------------------------------------------
 
@@ -512,6 +514,8 @@ i(name,        #q{q = #amqqueue{name        = Name}})       -> Name;
 i(durable,     #q{q = #amqqueue{durable     = Durable}})    -> Durable;
 i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete;
 i(arguments,   #q{q = #amqqueue{arguments   = Arguments}})  -> Arguments;
+i(mode,        #q{ mixed_state = MS })                      ->
+         rabbit_mixed_queue:info(MS);
 i(pid, _) ->
     self();
 i(messages_ready, #q { mixed_state = MS }) ->
@@ -821,15 +825,16 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
         end));
 
-handle_cast({constrain, Constrain}, State = #q { mixed_state = MS }) ->
+handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
     PendingMessages =
         lists:flatten([Pending || #tx { pending_messages = Pending}
                                       <- all_tx_record()]),
-    {ok, MS1} = (case Constrain of
-                    true  -> fun rabbit_mixed_queue:to_disk_only_mode/2;
-                    false -> fun rabbit_mixed_queue:to_mixed_mode/2
+    {ok, MS1} = (case Mode of
+                    disk  -> fun rabbit_mixed_queue:to_disk_only_mode/2;
+                    mixed -> fun rabbit_mixed_queue:to_mixed_mode/2
                  end)(PendingMessages, MS),
     noreply(State #q { mixed_state = MS1 }).
+                                                 
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 9c1553b8..291dfae0 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -140,6 +140,8 @@ Available commands:
   reduce_memory_footprint
   increase_memory_footprint
 
+  set_queue_mode <QueueName> (disk|mixed)
+
 Quiet output mode is selected with the \"-q\" flag. Informational messages
 are suppressed when quiet mode is in effect.
 
@@ -155,8 +157,8 @@ virtual host parameter for which to display results. The default value is \"/\".
 
 <QueueInfoItem> must be a member of the list [name, durable, auto_delete, 
 arguments, node, messages_ready, messages_unacknowledged, messages_uncommitted, 
-messages, acks_uncommitted, consumers, transactions, memory]. The default is 
- to display name and (number of) messages.
+messages, acks_uncommitted, consumers, transactions, memory, mode]. The default 
+is to display name and (number of) messages.
 
 <ExchangeInfoItem> must be a member of the list [name, type, durable, 
 auto_delete, arguments]. The default is to display name and type.
@@ -291,6 +293,11 @@ action(Command, Node, Args, Inform) ->
     {VHost, RemainingArgs} = parse_vhost_flag(Args),
     action(Command, Node, VHost, RemainingArgs, Inform).
 
+action(set_queue_mode, Node, VHost, [Queue, Mode], Inform) ->
+    Inform("Setting queue mode to ~p for queue ~p in vhost ~p",
+           [Mode, Queue, VHost]),
+    call(Node, {rabbit_amqqueue, set_mode, [VHost, Queue, Mode]});
+    
 action(set_permissions, Node, VHost, [Username, CPerm, WPerm, RPerm], Inform) ->
     Inform("Setting permissions for user ~p in vhost ~p", [Username, VHost]),
     call(Node, {rabbit_access_control, set_permissions,
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 5c00b380..e700d3d2 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,7 +39,8 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1]).
 
--export([to_disk_only_mode/2, to_mixed_mode/2, estimate_queue_memory/1]).
+-export([to_disk_only_mode/2, to_mixed_mode/2, estimate_queue_memory/1,
+         info/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -86,6 +87,7 @@
 -spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
 
 -spec(estimate_queue_memory/1 :: (mqstate()) -> non_neg_integer).
+-spec(info/1 :: (mqstate()) -> mode()).
 
 -endif.
 
@@ -455,3 +457,6 @@ is_empty(#mqstate { length = Length }) ->
 
 estimate_queue_memory(#mqstate { memory_size = Size }) ->
     2 * Size. %% Magic number. Will probably need playing with.
+
+info(#mqstate { mode = Mode }) ->
+    Mode.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index d3a753a6..db174d24 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -99,6 +99,7 @@ handle_call({register, Pid}, _From,
 
 handle_cast({change_memory_footprint, true},
             State = #state { mode = disk_only }) ->
+    constrain_queues(true, State #state.queues),
     {noreply, State};
 handle_cast({change_memory_footprint, true},
             State = #state { mode = ram_disk }) ->
@@ -111,10 +112,12 @@ handle_cast({change_memory_footprint, true},
 
 handle_cast({change_memory_footprint, false},
             State = #state { mode = unlimited }) ->
+    constrain_queues(false, State #state.queues),
     {noreply, State};
 handle_cast({change_memory_footprint, false},
             State = #state { mode = ram_disk }) ->
     ok = rabbit_disk_queue:to_ram_disk_mode(),
+    constrain_queues(false, State #state.queues),
     {noreply, State #state { mode = unlimited }};
 handle_cast({change_memory_footprint, false},
             State = #state { mode = disk_only }) ->
-- 
cgit v1.2.1


From e8e285189baca982b6a001004c2e92b5876c523b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 22 Jun 2009 17:25:49 +0100
Subject: whoops, missed off a priority pcast.

---
 src/rabbit_amqqueue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 05a32a09..4858a984 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -229,7 +229,7 @@ set_mode(VHostPath, Queue, ModeBin)
   when is_binary(VHostPath) andalso is_binary(Queue) ->
     Mode = list_to_atom(binary_to_list(ModeBin)),
     with(rabbit_misc:r(VHostPath, queue, Queue),
-         fun(Q) -> gen_server2:cast(Q #amqqueue.pid, {set_mode, Mode}) end).
+         fun(Q) -> gen_server2:pcast(Q #amqqueue.pid, 10, {set_mode, Mode}) end).
 
 info(#amqqueue{ pid = QPid }) ->
     gen_server2:pcall(QPid, 9, info, infinity).
-- 
cgit v1.2.1


From 89c55fd18ad344b638c29f8a7cc6dac5634864de Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Jun 2009 12:07:05 +0100
Subject: Tidying up before starting to play with tokens.

o) Removed redundant rubbish from manager
o) Wired alarms straight to disk_queue (+ minor refactor)
o) Removed reduce/increase_memory_footprint from control and amqqueue

Testing shows that one of the reasons the memory estimates are off is because they only track messages in the queue - i.e. remove on deliver, not on ack. Watching ./scripts/rabbitmqctl list_queues name memory mode messages messages_ready messages_unacknowledged messages_uncommitted during various tests shows that acks are not getting through and so we actually need to track unacknowledged message size - i.e. remove on ack, not on deliver
---
 src/rabbit_amqqueue.erl           | 10 ++----
 src/rabbit_control.erl            | 11 -------
 src/rabbit_disk_queue.erl         | 68 +++++++++++++++++++++++----------------
 src/rabbit_queue_mode_manager.erl | 56 ++------------------------------
 4 files changed, 45 insertions(+), 100 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 4858a984..c045b3ca 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([constrain_memory/2, set_mode/3]).
+-export([set_mode/3]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -104,7 +104,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(constrain_memory/2 :: (pid(), bool()) -> 'ok').
+-spec(set_mode/3 :: (vhost(), amqqueue(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -323,12 +323,6 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 10, {unblock, ChPid}).
 
-constrain_memory(QPid, Constrain) ->
-    gen_server2:pcast(QPid, 10, {set_mode, case Constrain of
-                                              true -> disk;
-                                              false -> mixed
-                                           end}).
-
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 291dfae0..ab5fe1bc 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -137,9 +137,6 @@ Available commands:
   list_bindings  [-p <VHostPath>] 
   list_connections [<ConnectionInfoItem> ...]
 
-  reduce_memory_footprint
-  increase_memory_footprint
-
   set_queue_mode <QueueName> (disk|mixed)
 
 Quiet output mode is selected with the \"-q\" flag. Informational messages
@@ -281,14 +278,6 @@ action(list_connections, Node, Args, Inform) ->
                                [ArgAtoms]),
                       ArgAtoms);
 
-action(reduce_memory_footprint, Node, _Args, Inform) ->
-    Inform("Reducing memory footprint", []),
-    call(Node, {rabbit_queue_mode_manager, reduce_memory_footprint, []});
-
-action(increase_memory_footprint, Node, _Args, Inform) ->
-    Inform("Increasing memory footprint", []),
-    call(Node, {rabbit_queue_mode_manager, increase_memory_footprint, []});
-
 action(Command, Node, Args, Inform) ->
     {VHost, RemainingArgs} = parse_vhost_flag(Args),
     action(Command, Node, VHost, RemainingArgs, Inform).
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index bf2de565..de31b238 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -46,7 +46,7 @@
 
 -export([length/1, filesync/0, cache_info/0]).
 
--export([stop/0, stop_and_obliterate/0,
+-export([stop/0, stop_and_obliterate/0, change_memory_footprint/2,
          to_disk_only_mode/0, to_ram_disk_mode/0]).
 
 -include("rabbit.hrl").
@@ -269,6 +269,7 @@
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
+-spec(change_memory_footprint/2 :: (pid(), bool()) -> 'ok').
 
 -endif.
 
@@ -345,6 +346,9 @@ filesync() ->
 cache_info() ->
     gen_server2:call(?SERVER, cache_info, infinity).
 
+change_memory_footprint(_Pid, Conserve) ->
+    gen_server2:pcast(?SERVER, 9, {change_memory_footprint, Conserve}).
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -357,6 +361,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
+    ok = rabbit_alarm:register(self(), {?MODULE, change_memory_footprint, []}),
     Node = node(),
     ok = 
         case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
@@ -457,32 +462,10 @@ handle_call(stop_vaporise, _From, State) ->
      State1 #dqstate { current_file_handle = undefined,
                        read_file_handles = {dict:new(), gb_trees:empty()}}};
     %% gen_server now calls terminate, which then calls shutdown
-handle_call(to_disk_only_mode, _From,
-            State = #dqstate { operation_mode = disk_only }) ->
-    reply(ok, State);
-handle_call(to_disk_only_mode, _From,
-            State = #dqstate { operation_mode = ram_disk,
-                               msg_location_dets = MsgLocationDets,
-                               msg_location_ets = MsgLocationEts }) ->
-    rabbit_log:info("Converting disk queue to disk only mode~n", []),
-    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
-                                                 disc_only_copies),
-    ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
-    true = ets:delete_all_objects(MsgLocationEts),
-    reply(ok, State #dqstate { operation_mode = disk_only });
-handle_call(to_ram_disk_mode, _From,
-            State = #dqstate { operation_mode = ram_disk }) ->
-    reply(ok, State);
-handle_call(to_ram_disk_mode, _From,
-            State = #dqstate { operation_mode = disk_only,
-                               msg_location_dets = MsgLocationDets,
-                               msg_location_ets = MsgLocationEts }) ->
-    rabbit_log:info("Converting disk queue to ram disk mode~n", []),
-    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
-                                                 disc_copies),
-    true = ets:from_dets(MsgLocationEts, MsgLocationDets),
-    ok = dets:delete_all_objects(MsgLocationDets),
-    reply(ok, State #dqstate { operation_mode = ram_disk });
+handle_call(to_disk_only_mode, _From, State) ->
+    reply(ok, to_disk_only_mode(State));
+handle_call(to_ram_disk_mode, _From, State) ->
+    reply(ok, to_ram_disk_mode(State));
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     reply(Length, State);
@@ -522,7 +505,12 @@ handle_cast({delete_queue, Q}, State) ->
     {ok, State1} = internal_delete_queue(Q, State),
     noreply(State1);
 handle_cast(filesync, State) ->
-    noreply(sync_current_file_handle(State)).
+    noreply(sync_current_file_handle(State));
+handle_cast({change_memory_footprint, Conserve}, State) ->
+    noreply((case Conserve of
+                 true -> fun to_disk_only_mode/1;
+                 false -> fun to_ram_disk_mode/1
+             end)(State)).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -563,6 +551,30 @@ code_change(_OldVsn, State, _Extra) ->
 
 %% ---- UTILITY FUNCTIONS ----
 
+to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
+    State;
+to_disk_only_mode(State = #dqstate { operation_mode = ram_disk,
+                                     msg_location_dets = MsgLocationDets,
+                                     msg_location_ets = MsgLocationEts }) ->
+    rabbit_log:info("Converting disk queue to disk only mode~n", []),
+    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
+                                                 disc_only_copies),
+    ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
+    true = ets:delete_all_objects(MsgLocationEts),
+    State #dqstate { operation_mode = disk_only }.
+
+to_ram_disk_mode(State = #dqstate { operation_mode = ram_disk }) ->
+    State;
+to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
+                                    msg_location_dets = MsgLocationDets,
+                                    msg_location_ets = MsgLocationEts }) ->
+    rabbit_log:info("Converting disk queue to ram disk mode~n", []),
+    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
+                                                 disc_copies),
+    true = ets:from_dets(MsgLocationEts, MsgLocationDets),
+    ok = dets:delete_all_objects(MsgLocationDets),
+    State #dqstate { operation_mode = ram_disk }.
+
 noreply(NewState = #dqstate { current_dirty = true }) ->
     {noreply, start_commit_timer(NewState), 0};
 noreply(NewState) ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index db174d24..cc10074c 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,10 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/1, change_memory_footprint/2,
-         reduce_memory_footprint/0, increase_memory_footprint/0,
-         report_memory/2
-        ]).
+-export([register/1, report_memory/2]).
 
 -define(SERVER, ?MODULE).
 
@@ -52,9 +49,6 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/1 :: (pid()) -> {'ok', queue_mode()}).
--spec(change_memory_footprint/2 :: (pid(), bool()) -> 'ok').
--spec(reduce_memory_footprint/0 :: () -> 'ok').
--spec(increase_memory_footprint/0 :: () -> 'ok').
 -spec(report_memory/2 :: (pid(), non_neg_integer()) -> 'ok').
 
 -endif.
@@ -69,21 +63,11 @@ start_link() ->
 register(Pid) ->
     gen_server2:call(?SERVER, {register, Pid}).
 
-change_memory_footprint(_Pid, Conserve) ->
-    gen_server2:cast(?SERVER, {change_memory_footprint, Conserve}).
-
-reduce_memory_footprint() ->
-    gen_server2:cast(?SERVER, {change_memory_footprint, true}).
-                           
-increase_memory_footprint() ->
-    gen_server2:cast(?SERVER, {change_memory_footprint, false}).
-
 report_memory(Pid, Memory) ->
     gen_server2:cast(?SERVER, {report_memory, Pid, Memory}).
 
 init([]) ->
     process_flag(trap_exit, true),
-    ok = rabbit_alarm:register(self(), {?MODULE, change_memory_footprint, []}),
     {ok, #state { mode = unlimited,
                   queues = dict:new()
                 }}.
@@ -97,36 +81,8 @@ handle_call({register, Pid}, _From,
              end,
     {reply, {ok, Result}, State #state { queues = dict:store(Pid, 0, Qs) }}.
 
-handle_cast({change_memory_footprint, true},
-            State = #state { mode = disk_only }) ->
-    constrain_queues(true, State #state.queues),
-    {noreply, State};
-handle_cast({change_memory_footprint, true},
-            State = #state { mode = ram_disk }) ->
-    constrain_queues(true, State #state.queues),
-    {noreply, State #state { mode = disk_only }};
-handle_cast({change_memory_footprint, true},
-            State = #state { mode = unlimited }) ->
-    ok = rabbit_disk_queue:to_disk_only_mode(),
-    {noreply, State #state { mode = ram_disk }};
-
-handle_cast({change_memory_footprint, false},
-            State = #state { mode = unlimited }) ->
-    constrain_queues(false, State #state.queues),
-    {noreply, State};
-handle_cast({change_memory_footprint, false},
-            State = #state { mode = ram_disk }) ->
-    ok = rabbit_disk_queue:to_ram_disk_mode(),
-    constrain_queues(false, State #state.queues),
-    {noreply, State #state { mode = unlimited }};
-handle_cast({change_memory_footprint, false},
-            State = #state { mode = disk_only }) ->
-    constrain_queues(false, State #state.queues),
-    {noreply, State #state { mode = ram_disk }};
-
-handle_cast({report_memory, Pid, Memory}, State = #state { queues = Qs }) ->
-    io:format("Queue ~w requested ~w bytes~n", [Pid, Memory]),
-    {noreply, State #state { queues = dict:store(Pid, Memory, Qs) }}.
+handle_cast(_Any, State) ->
+    {noreply, State}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { queues = Qs }) ->
@@ -141,9 +97,3 @@ terminate(_Reason, State) ->
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
-
-constrain_queues(Constrain, Qs) ->
-    dict:fold(
-      fun (QPid, _Mem, ok) ->
-              rabbit_amqqueue:constrain_memory(QPid, Constrain)
-      end, ok, Qs).
-- 
cgit v1.2.1


From 06aaffd73a10a99c240853580c2f4d82e9100b8e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Jun 2009 14:24:24 +0100
Subject: only reduce memory size when messages are acked, not when they're
 delivered.

---
 src/rabbit_amqqueue_process.erl   |  15 +++---
 src/rabbit_mixed_queue.erl        | 106 +++++++++++++++++++++-----------------
 src/rabbit_queue_mode_manager.erl |   3 +-
 3 files changed, 67 insertions(+), 57 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5e607a46..f15a58cd 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -112,7 +112,7 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
             next_msg_id = 1,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
-            memory_report_counter = ?MEMORY_REPORT_INTERVAL,
+            memory_report_counter = 0,
             old_memory_report = {1, now()}
            }, ?HIBERNATE_AFTER}.
 
@@ -263,7 +263,7 @@ deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
     AutoAcks1 =
         case AckRequired of
             true -> AutoAcks;
-            false -> [AckTag | AutoAcks]
+            false -> [{Msg, AckTag} | AutoAcks]
         end,
     {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
      State #q { mixed_state = MS1 }}.
@@ -331,8 +331,8 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
     -1 < Len.
 deliver_or_requeue_msgs_deliver(
-  false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
-    {{Msg, true, noack}, {Len - 1, [AckTag|AcksAcc], MsgsWithAcks}, State};
+  false, {Len, AcksAcc, [(MsgAckTag = {Msg, _}) | MsgsWithAcks]}, State) ->
+    {{Msg, true, noack}, {Len - 1, [MsgAckTag | AcksAcc], MsgsWithAcks}, State};
 deliver_or_requeue_msgs_deliver(
   true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
     {{Msg, true, AckTag}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
@@ -487,7 +487,7 @@ commit_transaction(Txn, State) ->
                 {MsgWithAcks, Remaining} =
                     collect_messages(PendingAcksOrdered, UAM),
                 store_ch_record(C#cr{unacked_messages = Remaining}),
-                [ AckTag || {_Msg, AckTag} <- MsgWithAcks ]              
+                MsgWithAcks
         end,
     {ok, MS} = rabbit_mixed_queue:tx_commit(
                  PendingMessagesOrdered, Acks, State #q.mixed_state),
@@ -623,7 +623,7 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
                         {ok, MS1};
                     false ->
-                        rabbit_mixed_queue:ack([AckTag], MS1)
+                        rabbit_mixed_queue:ack([{Msg, AckTag}], MS1)
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
             reply({ok, Remaining, Message},
@@ -771,9 +771,8 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
             case Txn of
                 none ->
-                    Acks = [ AckTag || {_Msg, AckTag} <- MsgWithAcks ],
                     {ok, MS} =
-                        rabbit_mixed_queue:ack(Acks, State #q.mixed_state),
+                        rabbit_mixed_queue:ack(MsgWithAcks, State #q.mixed_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
                     noreply(State #q { mixed_state = MS });
                 _  ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index e700d3d2..24360003 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -185,12 +185,13 @@ to_mixed_mode(TxnMessages, State =
     {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
-                                                 is_durable = IsDurable }) ->
+                                                 is_durable = IsDurable,
+                                                 memory_size = QSize }) ->
     %% iterate through the content on disk, ack anything which isn't
     %% persistent, accumulate everything else that is persistent and
     %% requeue it
-    {Acks, Requeue, Length} =
-        deliver_all_messages(Q, IsDurable, [], [], 0),
+    {Acks, Requeue, Length, ASize} =
+        deliver_all_messages(Q, IsDurable, [], [], 0, 0),
     ok = if Requeue == [] -> ok;
             true ->
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
@@ -198,22 +199,22 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
     ok = if Acks == [] -> ok;
             true -> rabbit_disk_queue:ack(Q, lists:reverse(Acks))
          end,
-    {ok, State #mqstate { length = Length }}.
+    {ok, State #mqstate { length = Length, memory_size = QSize - ASize }}.
 
-deliver_all_messages(Q, IsDurable, Acks, Requeue, Length) ->
+deliver_all_messages(Q, IsDurable, Acks, Requeue, Length, ASize) ->
     case rabbit_disk_queue:deliver(Q) of
-        empty -> {Acks, Requeue, Length};
-        {#basic_message { is_persistent = IsPersistent },
+        empty -> {Acks, Requeue, Length, ASize};
+        {Msg = #basic_message { is_persistent = IsPersistent },
          _Size, IsDelivered, AckTag, _Remaining} ->
             OnDisk = IsPersistent andalso IsDurable,
-            {Acks1, Requeue1, Length1} =
-                if OnDisk -> {Acks,
-                              [{AckTag, {next, IsDelivered}} | Requeue],
-                              Length + 1
-                             };
-                   true -> {[AckTag | Acks], Requeue, Length}
+            {Acks1, Requeue1, Length1, ASize1} =
+                if OnDisk -> { Acks,
+                               [{AckTag, {next, IsDelivered}} | Requeue],
+                               Length + 1, ASize };
+                   true   -> { [AckTag | Acks], Requeue, Length,
+                               ASize + size_of_message(Msg) }
                 end,
-            deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1)
+            deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1, ASize1)
     end.
 
 publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
@@ -237,42 +238,43 @@ publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
 %% attempt_immediate_delivery).
 publish_delivered(Msg =
                   #basic_message { guid = MsgId, is_persistent = IsPersistent},
-                  State = #mqstate { mode = Mode, is_durable = IsDurable,
-                                     queue = Q, length = 0 })
+                  State =
+                  #mqstate { mode = Mode, is_durable = IsDurable,
+                             queue = Q, length = 0, memory_size = QSize })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     rabbit_disk_queue:publish(Q, Msg, false),
+    State1 = State #mqstate { memory_size = QSize + size_of_message(Msg) },
     if IsDurable andalso IsPersistent ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
             {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
-            {ok, AckTag, State};
+            {ok, AckTag, State1};
        true ->
             %% in this case, we don't actually care about the ack, so
             %% auto ack it (asynchronously).
             ok = rabbit_disk_queue:auto_ack_next_message(Q),
-            {ok, noack, State}
+            {ok, noack, State1}
     end;
-publish_delivered(_Msg, State = #mqstate { mode = mixed, length = 0 }) ->
-    {ok, noack, State}.
+publish_delivered(Msg, State =
+                  #mqstate { mode = mixed, length = 0, memory_size = QSize }) ->
+    {ok, noack, State #mqstate { memory_size = QSize + size_of_message(Msg) }}.
 
 deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
 deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
-                           length = Length, memory_size = QSize }) ->
+                           length = Length }) ->
     {Msg = #basic_message { is_persistent = IsPersistent },
      _Size, IsDelivered, AckTag, Remaining}
         = rabbit_disk_queue:deliver(Q),
-    QSize1 = QSize - size_of_message(Msg),
     AckTag1 = if IsPersistent andalso IsDurable -> AckTag;
                  true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
                          noack
               end,
     {{Msg, IsDelivered, AckTag1, Remaining},
-     State #mqstate { length = Length - 1, memory_size = QSize1 }};
-deliver(State =
-        #mqstate { mode = mixed, msg_buf = MsgBuf, is_durable = IsDurable,
-                   queue = Q, length = Length, memory_size = QSize }) ->
+     State #mqstate { length = Length - 1 }};
+deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
+                           is_durable = IsDurable, length = Length }) ->
     {{value, {Msg = #basic_message { guid = MsgId,
                                      is_persistent = IsPersistent },
               IsDelivered, OnDisk}}, MsgBuf1}
@@ -290,18 +292,24 @@ deliver(State =
            true -> noack
         end,
     Rem = Length - 1,
-    QSize1 = QSize - size_of_message(Msg),
     {{Msg, IsDelivered, AckTag, Rem},
-     State #mqstate { msg_buf = MsgBuf1, length = Rem, memory_size = QSize1 }}.
-
-remove_noacks(Acks) ->
-    lists:filter(fun (A) -> A /= noack end, Acks).
-
-ack(Acks, State = #mqstate { queue = Q }) ->
-    case remove_noacks(Acks) of
-        [] -> {ok, State};
-        AckTags -> ok = rabbit_disk_queue:ack(Q, AckTags),
-                   {ok, State}
+     State #mqstate { msg_buf = MsgBuf1, length = Rem }}.
+
+remove_noacks(MsgsWithAcks) ->
+    {AckTags, ASize} =
+      lists:foldl(
+        fun ({Msg, noack}, {AccAckTags, AccSize}) ->
+                {AccAckTags, size_of_message(Msg) + AccSize};
+            ({Msg, AckTag}, {AccAckTags, AccSize}) ->
+                {[AckTag | AccAckTags], size_of_message(Msg) + AccSize}
+        end, {[], 0}, MsgsWithAcks),
+    {lists:reverse(AckTags), ASize}.
+
+ack(MsgsWithAcks, State = #mqstate { queue = Q, memory_size = QSize }) ->
+    case remove_noacks(MsgsWithAcks) of
+        {[], ASize} -> {ok, State #mqstate { memory_size = QSize - ASize }};
+        {AckTags, ASize} -> ok = rabbit_disk_queue:ack(Q, AckTags),
+                            {ok, State #mqstate { memory_size = QSize - ASize }}
     end.
                                                    
 tx_publish(Msg, State = #mqstate { mode = disk, memory_size = QSize }) ->
@@ -320,19 +328,20 @@ tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize }) ->
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
-tx_commit(Publishes, Acks, State = #mqstate { mode = disk, queue = Q,
-                                              length = Length }) ->
-    RealAcks = remove_noacks(Acks),
+tx_commit(Publishes, MsgsWithAcks,
+          State = #mqstate { mode = disk, queue = Q, length = Length,
+                             memory_size = QSize }) ->
+    {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
     ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
             true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
                                                 RealAcks)
          end,
-    {ok, State #mqstate { length = Length + erlang:length(Publishes) }};
-tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
-                                              msg_buf = MsgBuf,
-                                              is_durable = IsDurable,
-                                              length = Length
-                                            }) ->
+    {ok, State #mqstate { length = Length + erlang:length(Publishes),
+                          memory_size = QSize - ASize }};
+tx_commit(Publishes, MsgsWithAcks,
+          State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+                             is_durable = IsDurable, length = Length,
+                             memory_size = QSize }) ->
     {PersistentPubs, MsgBuf1} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
                          {Acc, MsgBuf2}) ->
@@ -346,14 +355,15 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                     end, {[], MsgBuf}, Publishes),
     %% foldl reverses, so re-reverse PersistentPubs to match
     %% requirements of rabbit_disk_queue (ascending SeqIds)
-    RealAcks = remove_noacks(Acks),
+    {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
     ok = if ([] == PersistentPubs) andalso ([] == RealAcks) -> ok;
             true ->
                  rabbit_disk_queue:tx_commit(
                    Q, lists:reverse(PersistentPubs), RealAcks)
          end,
     {ok, State #mqstate { msg_buf = MsgBuf1,
-                          length = Length + erlang:length(Publishes) }}.
+                          length = Length + erlang:length(Publishes),
+                          memory_size = QSize - ASize }}.
 
 tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = QSize }) ->
     {MsgIds, CSize} =
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index cc10074c..50f66063 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -81,7 +81,8 @@ handle_call({register, Pid}, _From,
              end,
     {reply, {ok, Result}, State #state { queues = dict:store(Pid, 0, Qs) }}.
 
-handle_cast(_Any, State) ->
+handle_cast(Any, State) ->
+    io:format("~w~n", [Any]),
     {noreply, State}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
-- 
cgit v1.2.1


From ebd89f3cf10cb755a450936ed767e437048ef100 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Jun 2009 14:28:51 +0100
Subject: without this, rabbit_disk_queue seems to refuse to start up if there
 are messages to be recovered, due to mnesia not being running fast enough.

---
 src/rabbit_misc.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 2971e332..f38ee631 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -409,7 +409,7 @@ format_stderr(Fmt, Args) ->
 manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) ->
     Iterate(fun (App, Acc) ->
                     case Do(App) of
-                        ok -> [App | Acc];
+                        ok -> timer:sleep(100), [App | Acc];
                         {error, {SkipError, _}} -> Acc;
                         {error, Reason} ->
                             lists:foreach(Undo, Acc),
-- 
cgit v1.2.1


From dc3a44a4492b5d80092e48199dc1007f89657334 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Jun 2009 15:01:26 +0100
Subject: acktags in ack or tx_commit do not need to be ordered. Messages in
 tx_cancel do not need to be ordered either. Hence removal of quite a lot of
 lists:reverse.

---
 src/rabbit_amqqueue_process.erl | 8 ++++----
 src/rabbit_mixed_queue.erl      | 7 +++----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9fe6f50d..3425ebd2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -258,7 +258,7 @@ run_message_queue(State = #q { mixed_state = MS }) ->
     {{_IsEmpty1, AutoAcks}, State1} =
         deliver_queue(Funs, {IsEmpty, []}, State),
     {ok, MS1} =
-        rabbit_mixed_queue:ack(lists:reverse(AutoAcks), State1 #q.mixed_state),
+        rabbit_mixed_queue:ack(AutoAcks, State1 #q.mixed_state),
     State1 #q { mixed_state = MS1 }.
 
 attempt_immediate_delivery(none, _ChPid, Msg, State) ->
@@ -303,7 +303,7 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
         deliver_queue(Funs, {length(MsgsWithAcks) - 1, [], MsgsWithAcks},
                       State),
-    {ok, MS} = rabbit_mixed_queue:ack(lists:reverse(AutoAcks),
+    {ok, MS} = rabbit_mixed_queue:ack(AutoAcks,
                                       NewState #q.mixed_state),
     case OutstandingMsgs of
         [] -> run_message_queue(NewState #q { mixed_state = MS });
@@ -462,7 +462,7 @@ commit_transaction(Txn, State) ->
           pending_acks = PendingAcks
         } = lookup_tx(Txn),
     PendingMessagesOrdered = lists:reverse(PendingMessages),
-    PendingAcksOrdered = lists:append(lists:reverse(PendingAcks)),
+    PendingAcksOrdered = lists:append(PendingAcks),
     Acks =
         case lookup_ch(ChPid) of
             not_found -> [];
@@ -479,7 +479,7 @@ commit_transaction(Txn, State) ->
 rollback_transaction(Txn, State) ->
     #tx { pending_messages = PendingMessages
         } = lookup_tx(Txn),
-    {ok, MS} = rabbit_mixed_queue:tx_cancel(lists:reverse(PendingMessages),
+    {ok, MS} = rabbit_mixed_queue:tx_cancel(PendingMessages,
                                             State #q.mixed_state),
     erase_tx(Txn),
     State #q { mixed_state = MS }.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index bb9b90a3..4392a006 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -169,7 +169,7 @@ to_mixed_mode(TxnMessages, State =
                       _    -> [Msg #basic_message.guid | Acc]
                   end
           end, [], TxnMessages),
-    ok = rabbit_disk_queue:tx_cancel(lists:reverse(Cancel)),
+    ok = rabbit_disk_queue:tx_cancel(Cancel),
     {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
@@ -184,7 +184,7 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
          end,
     ok = if Acks == [] -> ok;
-            true -> rabbit_disk_queue:ack(Q, lists:reverse(Acks))
+            true -> rabbit_disk_queue:ack(Q, Acks)
          end,
     {ok, State #mqstate { length = Length }}.
 
@@ -338,13 +338,12 @@ tx_commit(Publishes, Acks, State = #mqstate { mode = mixed, queue = Q,
                           length = Length + erlang:length(Publishes) }}.
 
 only_persistent_msg_ids(Pubs) ->
-    lists:reverse(
       lists:foldl(
         fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
                 if IsPersistent -> [Msg #basic_message.guid | Acc];
                    true -> Acc
                 end
-        end, [], Pubs)).
+        end, [], Pubs).
 
 tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
     ok = rabbit_disk_queue:tx_cancel(only_msg_ids(Publishes)),
-- 
cgit v1.2.1


From 184a3c9d3e2e4df58f8202aacde0e4120091473d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Jun 2009 16:58:55 +0100
Subject: Right, whilst tx_commits were being coalesced, this was not done
 properly protect non-tx publishes from unnecessary syncs. As a result,
 performance, of say:

rabbitmq-java-client/build/dist$ sh runjava.sh com.rabbitmq.examples.MulticastMain -r 3500 -s 12345 -f persistent -z 20

was horrible when compared to bug19662 - about 5 times slower.

Careful consideration of when we want to set the timer, when to cancel, and when the timeouts should be set has led to this revision which maintains the tx performance and restores non-tx performance.
---
 src/rabbit_disk_queue.erl | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 192995b2..92e5ece8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -515,10 +515,11 @@ handle_cast(filesync, State) ->
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
-handle_info(timeout, State = #dqstate { current_dirty = true }) ->
+handle_info(timeout, State = #dqstate { timer_ref = TRef })
+  when TRef /= undefined ->
     noreply(sync_current_file_handle(State));
 handle_info(_Info, State) ->
-    {noreply, State}.
+    noreply(State).
 
 terminate(_Reason, State) ->
     shutdown(State).
@@ -552,15 +553,23 @@ code_change(_OldVsn, State, _Extra) ->
 
 %% ---- UTILITY FUNCTIONS ----
 
-noreply(NewState = #dqstate { current_dirty = true }) ->
+noreply(NewState = #dqstate { on_sync_froms = [], timer_ref = undefined }) ->
+    {noreply, NewState, infinity};
+noreply(NewState = #dqstate { timer_ref = undefined }) ->
     {noreply, start_commit_timer(NewState), 0};
+noreply(NewState = #dqstate { on_sync_froms = [] }) ->
+    {noreply, stop_commit_timer(NewState), infinity};
 noreply(NewState) ->
-    {noreply, stop_commit_timer(NewState), infinity}.
+    {noreply, NewState, 0}.
 
-reply(Reply, NewState = #dqstate { current_dirty = true }) ->
+reply(Reply, NewState = #dqstate { on_sync_froms = [], timer_ref = undefined }) ->
+    {reply, Reply, NewState, infinity};
+reply(Reply, NewState = #dqstate { timer_ref = undefined }) ->
     {reply, Reply, start_commit_timer(NewState), 0};
+reply(Reply, NewState = #dqstate { on_sync_froms = [] }) ->
+    {reply, Reply, stop_commit_timer(NewState), infinity};
 reply(Reply, NewState) ->
-    {reply, Reply, stop_commit_timer(NewState), infinity}.
+    {reply, Reply, NewState, 0}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
@@ -705,12 +714,8 @@ sequence_lookup(Sequences, Q) ->
 
 start_commit_timer(State = #dqstate { timer_ref = undefined }) ->
     {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
-    State #dqstate { timer_ref = TRef };
-start_commit_timer(State) ->
-    State.
+    State #dqstate { timer_ref = TRef }.
 
-stop_commit_timer(State = #dqstate { timer_ref = undefined }) ->
-    State;
 stop_commit_timer(State = #dqstate { timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
     State #dqstate { timer_ref = undefined }.
-- 
cgit v1.2.1


From f5b1b57521a7c2a79f60879fd0895916f0697b27 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Jun 2009 17:29:21 +0100
Subject: Failed to either watch the logs or remember that the timer is
 cancelled on disk_queue exit

---
 src/rabbit_disk_queue.erl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 92e5ece8..ac58d89d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -716,6 +716,8 @@ start_commit_timer(State = #dqstate { timer_ref = undefined }) ->
     {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
     State #dqstate { timer_ref = TRef }.
 
+stop_commit_timer(State = #dqstate { timer_ref = undefined }) ->
+    State;
 stop_commit_timer(State = #dqstate { timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
     State #dqstate { timer_ref = undefined }.
-- 
cgit v1.2.1


From 04c2afe353a136ad9f96c70f055178a0e322e3e3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 24 Jun 2009 12:31:48 +0100
Subject: Removed the dumb timer:sleep, and after testing, properly sorted out
 the mnesia clustering details. This means that wait_for_tables now waits for
 _all_ tables which means the bug that was requiring the timer:sleep has gone
 away.

The solution to the clustering issue was to make sure that tables which are local content only are created explicitly on each node before you call wait_for_tables.

All tests pass.
---
 src/rabbit_misc.erl   |  2 +-
 src/rabbit_mnesia.erl | 33 +++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index f38ee631..2971e332 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -409,7 +409,7 @@ format_stderr(Fmt, Args) ->
 manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) ->
     Iterate(fun (App, Acc) ->
                     case Do(App) of
-                        ok -> timer:sleep(100), [App | Acc];
+                        ok -> [App | Acc];
                         {error, {SkipError, _}} -> Acc;
                         {error, Reason} ->
                             lists:foreach(Undo, Acc),
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 6c583cb4..0201017c 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -158,12 +158,14 @@ replicated_table_definitions() ->
                      not lists:member({local_content, true}, Attrs)
     ].
 
+non_replicated_table_definitions() ->
+    [{Tab, Attrs} || {Tab, Attrs} <- table_definitions(),
+                     lists:member({local_content, true}, Attrs)
+    ].
+
 table_names() ->
     [Tab || {Tab, _} <- table_definitions()].
 
-replicated_table_names() ->
-    [Tab || {Tab, _} <- replicated_table_definitions()].
-
 dir() -> mnesia:system_info(directory).
     
 ensure_mnesia_dir() ->
@@ -189,7 +191,7 @@ ensure_mnesia_not_running() ->
 check_schema_integrity() ->
     %%TODO: more thorough checks
     case catch [mnesia:table_info(Tab, version)
-                || Tab <- replicated_table_names()] of
+                || Tab <- table_names()] of
         {'EXIT', Reason} -> {error, Reason};
         _ -> ok
     end.
@@ -292,12 +294,13 @@ init_db(ClusterNodes) ->
                     ok = create_schema()
             end;
         {ok, [_|_]} ->
+            TableCopyType = case IsDiskNode of
+                                true  -> disc;
+                                false -> ram
+                            end,
+            ok = create_local_non_replicated_table_copies(TableCopyType),
             ok = wait_for_tables(),
-            ok = create_local_table_copies(
-                   case IsDiskNode of
-                       true  -> disc;
-                       false -> ram
-                   end);
+            ok = create_local_replicated_table_copies(TableCopyType);
         {error, Reason} ->
             %% one reason we may end up here is if we try to join
             %% nodes together that are currently running standalone or
@@ -348,7 +351,13 @@ create_tables() ->
                   table_definitions()),
     ok.
 
-create_local_table_copies(Type) ->
+create_local_replicated_table_copies(Type) ->
+    create_local_table_copies(Type, replicated_table_definitions()).
+
+create_local_non_replicated_table_copies(Type) ->
+    create_local_table_copies(Type, non_replicated_table_definitions()).
+
+create_local_table_copies(Type, TableDefinitions) ->
     ok = create_local_table_copy(schema, disc_copies),
     lists:foreach(
       fun({Tab, TabDef}) ->
@@ -384,7 +393,7 @@ create_local_table_copies(Type) ->
                   end,
               ok = create_local_table_copy(Tab, StorageType)
       end,
-      table_definitions()),
+      TableDefinitions),
     ok.
 
 create_local_table_copy(Tab, Type) ->
@@ -402,7 +411,7 @@ create_local_table_copy(Tab, Type) ->
 wait_for_tables() -> 
     case check_schema_integrity() of
         ok ->
-            case mnesia:wait_for_tables(replicated_table_names(), 30000) of
+            case mnesia:wait_for_tables(table_names(), 30000) of
                 ok -> ok;
                 {timeout, BadTabs} ->
                     throw({error, {timeout_waiting_for_tables, BadTabs}});
-- 
cgit v1.2.1


From 3e53df74f67765d9c63e87053f4dfaa3d24dba92 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 24 Jun 2009 13:16:47 +0100
Subject: memory size tracking was wrong on startup in disk mode for
 mixed_queue (and actually for mixed mode too, given that mixed mode starts up
 in disk mode, then converts, which maintains the same size).

---
 src/rabbit_mixed_queue.erl | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index fed95da3..88077f10 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -186,11 +186,11 @@ to_mixed_mode(TxnMessages, State =
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
                                                  is_durable = IsDurable,
-                                                 memory_size = QSize }) ->
+                                                 memory_size = 0 }) ->
     %% iterate through the content on disk, ack anything which isn't
     %% persistent, accumulate everything else that is persistent and
     %% requeue it
-    {Acks, Requeue, Length, ASize} =
+    {Acks, Requeue, Length, QSize} =
         deliver_all_messages(Q, IsDurable, [], [], 0, 0),
     ok = if Requeue == [] -> ok;
             true ->
@@ -199,22 +199,21 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
     ok = if Acks == [] -> ok;
             true -> rabbit_disk_queue:ack(Q, Acks)
          end,
-    {ok, State #mqstate { length = Length, memory_size = QSize - ASize }}.
+    {ok, State #mqstate { length = Length, memory_size = QSize }}.
 
-deliver_all_messages(Q, IsDurable, Acks, Requeue, Length, ASize) ->
+deliver_all_messages(Q, IsDurable, Acks, Requeue, Length, QSize) ->
     case rabbit_disk_queue:deliver(Q) of
-        empty -> {Acks, Requeue, Length, ASize};
+        empty -> {Acks, Requeue, Length, QSize};
         {Msg = #basic_message { is_persistent = IsPersistent },
          _Size, IsDelivered, AckTag, _Remaining} ->
             OnDisk = IsPersistent andalso IsDurable,
-            {Acks1, Requeue1, Length1, ASize1} =
+            {Acks1, Requeue1, Length1, QSize1} =
                 if OnDisk -> { Acks,
                                [{AckTag, {next, IsDelivered}} | Requeue],
-                               Length + 1, ASize };
-                   true   -> { [AckTag | Acks], Requeue, Length,
-                               ASize + size_of_message(Msg) }
+                               Length + 1, QSize + size_of_message(Msg) };
+                   true   -> { [AckTag | Acks], Requeue, Length, QSize }
                 end,
-            deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1, ASize1)
+            deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1, QSize1)
     end.
 
 publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
-- 
cgit v1.2.1


From 73564f77ec9b76d1b0d93c1ebe4f1256f763e83f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 24 Jun 2009 15:42:30 +0100
Subject: Changed reports so that we get bytes gained and lost since the last
 report.

Also, the sync version of publish is unnecessary as we were only ever using it in one place where we threw away the result. Thus even when publishing a message and marking it delivered in one up (as opposed to publish_delivered, which is quite different ;) ), we can make it cast, not call, as we don't need the acktag.

Also, the memory accounting was wrong for requeue in mixed_queue because requeue doesn't actually change the memory sizes (memory goes up on (tx_)publish, and down on ack/tx_cancel. Requeue has no effect. Nor does deliver.).
---
 src/rabbit_amqqueue_process.erl   |   8 +-
 src/rabbit_disk_queue.erl         |  14 +--
 src/rabbit_mixed_queue.erl        | 184 +++++++++++++++++++++++---------------
 src/rabbit_queue_mode_manager.erl |   9 +-
 src/rabbit_tests.erl              |  20 ++---
 5 files changed, 136 insertions(+), 99 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6b65a5a5..b6353bef 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -545,7 +545,8 @@ i(Item, _) ->
 
 report_memory(State = #q { old_memory_report = {OldMem, Then},
                            mixed_state = MS }) ->
-    MSize = rabbit_mixed_queue:estimate_queue_memory(MS),
+    {MSize, Gain, Loss} =
+        rabbit_mixed_queue:estimate_queue_memory(MS),
     NewMem = case MSize of
                  0 -> 1; %% avoid / 0
                  N -> N
@@ -555,8 +556,9 @@ report_memory(State = #q { old_memory_report = {OldMem, Then},
     case ((NewMem / OldMem) > 1.1 orelse (OldMem / NewMem) > 1.1) andalso
         (?MEMORY_REPORT_TIME_INTERVAL < timer:now_diff(Now, Then)) of
         true ->
-            rabbit_queue_mode_manager:report_memory(self(), NewMem),
-            State1 #q { old_memory_report = {NewMem, Now} };
+            rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss),
+            State1 #q { old_memory_report = {NewMem, Now},
+                        mixed_state = rabbit_mixed_queue:reset_counters(MS) };
         false -> State1
     end.
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d161a093..db1b314a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -279,10 +279,8 @@ start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE,
                            [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
 
-publish(Q, Message = #basic_message {}, false) ->
-    gen_server2:cast(?SERVER, {publish, Q, Message});
-publish(Q, Message = #basic_message {}, true) ->
-    gen_server2:call(?SERVER, {publish, Q, Message}, infinity).
+publish(Q, Message = #basic_message {}, IsDelivered) ->
+    gen_server2:cast(?SERVER, {publish, Q, Message, IsDelivered}).
 
 deliver(Q) ->
     gen_server2:call(?SERVER, {deliver, Q}, infinity).
@@ -427,10 +425,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     end,
     {ok, State1 #dqstate { current_file_handle = FileHdl }}.
 
-handle_call({publish, Q, Message}, _From, State) ->
-    {ok, MsgSeqId, State1} =
-        internal_publish(Q, Message, next, true, State),
-    reply(MsgSeqId, State1);
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
     reply(Result, State1);
@@ -478,9 +472,9 @@ handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
 handle_call(cache_info, _From, State = #dqstate { message_cache = Cache }) ->
     reply(ets:info(Cache), State).
 
-handle_cast({publish, Q, Message}, State) ->
+handle_cast({publish, Q, Message, IsDelivered}, State) ->
     {ok, _MsgSeqId, State1} =
-        internal_publish(Q, Message, next, false, State),
+        internal_publish(Q, Message, next, IsDelivered, State),
     noreply(State1);
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 88077f10..12fede17 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -40,14 +40,16 @@
          length/1, is_empty/1, delete_queue/1]).
 
 -export([to_disk_only_mode/2, to_mixed_mode/2, estimate_queue_memory/1,
-         info/1]).
+         reset_counters/1, info/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
                    queue,
                    is_durable,
                    length,
-                   memory_size
+                   memory_size,
+                   memory_gain,
+                   memory_loss
                  }
        ).
 
@@ -59,7 +61,9 @@
                               queue :: queue_name(),
                               is_durable :: bool(),
                               length :: non_neg_integer(),
-                              memory_size :: non_neg_integer()
+                              memory_size :: non_neg_integer(),
+                              memory_gain :: non_neg_integer(),
+                              memory_loss :: non_neg_integer()
                             }).
 -type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
 -type(okmqs() :: {'ok', mqstate()}).
@@ -86,7 +90,9 @@
 -spec(to_disk_only_mode/2 :: ([message()], mqstate()) -> okmqs()).
 -spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
 
--spec(estimate_queue_memory/1 :: (mqstate()) -> non_neg_integer).
+-spec(estimate_queue_memory/1 :: (mqstate()) ->
+             {non_neg_integer, non_neg_integer, non_neg_integer}).
+-spec(reset_counters/1 :: (mqstate()) -> (mqstate())).
 -spec(info/1 :: (mqstate()) -> mode()).
 
 -endif.
@@ -94,7 +100,8 @@
 init(Queue, IsDurable, disk) ->
     purge_non_persistent_messages(
       #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-                 is_durable = IsDurable, length = 0, memory_size = 0 });
+                 is_durable = IsDurable, length = 0, memory_size = 0,
+                 memory_gain = 0, memory_loss = 0 });
 init(Queue, IsDurable, mixed) ->
     {ok, State} = init(Queue, IsDurable, disk),
     to_mixed_mode([], State).
@@ -217,21 +224,24 @@ deliver_all_messages(Q, IsDurable, Acks, Requeue, Length, QSize) ->
     end.
 
 publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
-                                memory_size = Size }) ->
+                                memory_size = QSize, memory_gain = Gain }) ->
     ok = rabbit_disk_queue:publish(Q, Msg, false),
-    Size1 = Size + size_of_message(Msg),
-    {ok, State #mqstate { length = Length + 1, memory_size = Size1 }};
+    MsgSize = size_of_message(Msg),
+    {ok, State #mqstate { length = Length + 1, memory_size = QSize + MsgSize,
+                          memory_gain = Gain + MsgSize }};
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
-                   msg_buf = MsgBuf, length = Length, memory_size = Size }) ->
+                   msg_buf = MsgBuf, length = Length, memory_size = QSize,
+                   memory_gain = Gain }) ->
     OnDisk = IsDurable andalso IsPersistent,
     ok = if OnDisk ->
                  rabbit_disk_queue:publish(Q, Msg, false);
             true -> ok
          end,
-    Size1 = Size + size_of_message(Msg),
+    MsgSize = size_of_message(Msg),
     {ok, State #mqstate { msg_buf = queue:in({Msg, false, OnDisk}, MsgBuf),
-                          length = Length + 1, memory_size = Size1 }}.
+                          length = Length + 1, memory_size = QSize + MsgSize,
+                          memory_gain = Gain + MsgSize }}.
 
 %% Assumption here is that the queue is empty already (only called via
 %% attempt_immediate_delivery).
@@ -239,10 +249,13 @@ publish_delivered(Msg =
                   #basic_message { guid = MsgId, is_persistent = IsPersistent},
                   State =
                   #mqstate { mode = Mode, is_durable = IsDurable,
-                             queue = Q, length = 0, memory_size = QSize })
+                             queue = Q, length = 0, memory_size = QSize,
+                             memory_gain = Gain })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     rabbit_disk_queue:publish(Q, Msg, false),
-    State1 = State #mqstate { memory_size = QSize + size_of_message(Msg) },
+    MsgSize = size_of_message(Msg),
+    State1 = State #mqstate { memory_size = QSize + MsgSize,
+                              memory_gain = Gain + MsgSize },
     if IsDurable andalso IsPersistent ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
@@ -256,8 +269,11 @@ publish_delivered(Msg =
             {ok, noack, State1}
     end;
 publish_delivered(Msg, State =
-                  #mqstate { mode = mixed, length = 0, memory_size = QSize }) ->
-    {ok, noack, State #mqstate { memory_size = QSize + size_of_message(Msg) }}.
+                  #mqstate { mode = mixed, length = 0, memory_size = QSize,
+                             memory_gain = Gain }) ->
+    MsgSize = size_of_message(Msg),
+    {ok, noack, State #mqstate { memory_size = QSize + MsgSize,
+                                 memory_gain = Gain + MsgSize }}.
 
 deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
@@ -304,43 +320,56 @@ remove_noacks(MsgsWithAcks) ->
         end, {[], 0}, MsgsWithAcks),
     {AckTags, ASize}.
 
-ack(MsgsWithAcks, State = #mqstate { queue = Q, memory_size = QSize }) ->
-    case remove_noacks(MsgsWithAcks) of
-        {[], ASize} -> {ok, State #mqstate { memory_size = QSize - ASize }};
-        {AckTags, ASize} -> ok = rabbit_disk_queue:ack(Q, AckTags),
-                            {ok, State #mqstate { memory_size = QSize - ASize }}
-    end.
+ack(MsgsWithAcks, State = #mqstate { queue = Q, memory_size = QSize,
+                                     memory_loss = Loss }) ->
+    ASize = case remove_noacks(MsgsWithAcks) of
+                {[], ASize1} -> ASize1;
+                {AckTags, ASize1} -> rabbit_disk_queue:ack(Q, AckTags),
+                                     ASize1
+         end,
+    State1 = State #mqstate { memory_size = QSize - ASize,
+                              memory_loss = Loss + ASize },
+    {ok, State1}.
                                                    
-tx_publish(Msg, State = #mqstate { mode = disk, memory_size = QSize }) ->
+tx_publish(Msg, State = #mqstate { mode = disk, memory_size = QSize,
+                                   memory_gain = Gain }) ->
     ok = rabbit_disk_queue:tx_publish(Msg),
-    {ok, State #mqstate { memory_size = QSize + size_of_message(Msg) }};
+    MsgSize = size_of_message(Msg),
+    {ok, State #mqstate { memory_size = QSize + MsgSize,
+                          memory_gain = Gain + MsgSize }};
 tx_publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
            #mqstate { mode = mixed, is_durable = IsDurable,
-                      memory_size = QSize })
+                      memory_size = QSize, memory_gain = Gain })
   when IsDurable andalso IsPersistent ->
     ok = rabbit_disk_queue:tx_publish(Msg),
-    {ok, State #mqstate { memory_size = QSize + size_of_message(Msg) }};
-tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize }) ->
+    MsgSize = size_of_message(Msg),
+    {ok, State #mqstate { memory_size = QSize + MsgSize,
+                          memory_gain = Gain + MsgSize }};
+tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize,
+                                   memory_gain = Gain }) ->
     %% this message will reappear in the tx_commit, so ignore for now
-    {ok, State #mqstate { memory_size = QSize + size_of_message(Msg) }}.
+    MsgSize = size_of_message(Msg),
+    {ok, State #mqstate { memory_size = QSize + MsgSize,
+                          memory_gain = Gain + MsgSize }}.
 
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = disk, queue = Q, length = Length,
-                             memory_size = QSize }) ->
+                             memory_size = QSize, memory_loss = Loss }) ->
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
     ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
             true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
                                                 RealAcks)
          end,
     {ok, State #mqstate { length = Length + erlang:length(Publishes),
-                          memory_size = QSize - ASize }};
+                          memory_size = QSize - ASize,
+                          memory_loss = Loss + ASize }};
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                              is_durable = IsDurable, length = Length,
-                             memory_size = QSize }) ->
+                             memory_size = QSize, memory_loss = Loss }) ->
     {PersistentPubs, MsgBuf1} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
                          {Acc, MsgBuf2}) ->
@@ -360,20 +389,23 @@ tx_commit(Publishes, MsgsWithAcks,
                  rabbit_disk_queue:tx_commit(
                    Q, lists:reverse(PersistentPubs), RealAcks)
          end,
-    {ok, State #mqstate { msg_buf = MsgBuf1,
+    {ok, State #mqstate { msg_buf = MsgBuf1, memory_size = QSize - ASize,
                           length = Length + erlang:length(Publishes),
-                          memory_size = QSize - ASize }}.
+                          memory_loss = Loss + ASize }}.
 
-tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = QSize }) ->
+tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = QSize,
+                                        memory_loss = Loss }) ->
     {MsgIds, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { guid = MsgId }, {MsgIdsAcc, CSizeAcc}) ->
                   {[MsgId | MsgIdsAcc], CSizeAcc + size_of_message(Msg)}
           end, {[], 0}, Publishes),
     ok = rabbit_disk_queue:tx_cancel(MsgIds),
-    {ok, State #mqstate { memory_size = QSize - CSize }};
+    {ok, State #mqstate { memory_size = QSize - CSize,
+                          memory_loss = Loss + CSize }};
 tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable,
-                                        memory_size = QSize }) ->
+                                        memory_size = QSize,
+                                        memory_loss = Loss }) ->
     {PersistentPubs, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent,
@@ -389,74 +421,78 @@ tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable,
                 rabbit_disk_queue:tx_cancel(PersistentPubs);
            true -> ok
         end,
-    {ok, State #mqstate { memory_size = QSize - CSize }}.
+    {ok, State #mqstate { memory_size = QSize - CSize,
+                          memory_loss = Loss + CSize }}.
 
 %% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                                                 is_durable = IsDurable,
-                                                length = Length,
-                                                memory_size = QSize
+                                                length = Length
                                               }) ->
     %% here, we may have messages with no ack tags, because of the
     %% fact they are not persistent, but nevertheless we want to
     %% requeue them. This means publishing them delivered.
-    {Requeue, CSize}
+    Requeue
         = lists:foldl(
-            fun ({Msg = #basic_message { is_persistent = IsPersistent },
-                  AckTag}, {RQ, CSizeAcc})
-                when IsPersistent andalso IsDurable ->
-                    {[AckTag | RQ], CSizeAcc + size_of_message(Msg)};
-                ({Msg, _AckTag}, {RQ, CSizeAcc}) ->
-                    ok = if RQ == [] -> ok;
-                            true -> rabbit_disk_queue:requeue(
-                                      Q, lists:reverse(RQ))
+            fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
+                when IsDurable andalso IsPersistent ->
+                    [AckTag | RQ];
+                ({Msg, _AckTag}, RQ) ->
+                    ok = case RQ == [] of
+                             true  -> ok;
+                             false -> rabbit_disk_queue:requeue(
+                                        Q, lists:reverse(RQ))
                          end,
-                    _AckTag1 = rabbit_disk_queue:publish(
-                                 Q, Msg, true),
-                    {[], CSizeAcc + size_of_message(Msg)}
-            end, {[], 0}, MessagesWithAckTags),
+                    ok = rabbit_disk_queue:publish(Q, Msg, true),
+                    []
+            end, [], MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
-    {ok, State #mqstate { length = Length + erlang:length(MessagesWithAckTags),
-                          memory_size = QSize + CSize
-                        }};
+    {ok,
+     State #mqstate { length = Length + erlang:length(MessagesWithAckTags) }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
-                                                length = Length,
-                                                memory_size = QSize
+                                                length = Length
                                               }) ->
-    {PersistentPubs, MsgBuf1, CSize} =
+    {PersistentPubs, MsgBuf1} =
         lists:foldl(
           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-               {Acc, MsgBuf2, CSizeAcc}) ->
+               {Acc, MsgBuf2}) ->
                   OnDisk = IsDurable andalso IsPersistent,
                   Acc1 =
                       if OnDisk -> [AckTag | Acc];
                          true -> Acc
                       end,
-                  CSizeAcc1 = CSizeAcc + size_of_message(Msg),
-                  {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2), CSizeAcc1}
-          end, {[], MsgBuf, 0}, MessagesWithAckTags),
+                  {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2)}
+          end, {[], MsgBuf}, MessagesWithAckTags),
     ok = if [] == PersistentPubs -> ok;
             true -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
-    {ok, State #mqstate {msg_buf = MsgBuf1, memory_size = QSize + CSize,
+    {ok, State #mqstate {msg_buf = MsgBuf1,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
-purge(State = #mqstate { queue = Q, mode = disk, length = Count }) ->
+purge(State = #mqstate { queue = Q, mode = disk, length = Count,
+                         memory_loss = Loss, memory_size = QSize }) ->
     Count = rabbit_disk_queue:purge(Q),
-    {Count, State #mqstate { length = 0, memory_size = 0 }};
-purge(State = #mqstate { queue = Q, mode = mixed, length = Length }) ->
+    {Count, State #mqstate { length = 0, memory_size = 0,
+                             memory_loss = Loss + QSize }};
+purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
+                         memory_loss = Loss, memory_size = QSize }) ->
     rabbit_disk_queue:purge(Q),
     {Length,
-     State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0 }}.
+     State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
+                      memory_loss = Loss + QSize }}.
 
-delete_queue(State = #mqstate { queue = Q, mode = disk }) ->
+delete_queue(State = #mqstate { queue = Q, mode = disk, memory_size = QSize,
+                                memory_loss = Loss }) ->
     rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { length = 0, memory_size = 0 }};
-delete_queue(State = #mqstate { queue = Q, mode = mixed }) ->
+    {ok, State #mqstate { length = 0, memory_size = 0,
+                          memory_loss = Loss + QSize }};
+delete_queue(State = #mqstate { queue = Q, mode = mixed, memory_size = QSize,
+                                memory_loss = Loss }) ->
     rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0 }}.
+    {ok, State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
+                          memory_loss = Loss + QSize }}.
 
 length(#mqstate { length = Length }) ->
     Length.
@@ -464,8 +500,12 @@ length(#mqstate { length = Length }) ->
 is_empty(#mqstate { length = Length }) ->
     0 == Length.
 
-estimate_queue_memory(#mqstate { memory_size = Size }) ->
-    2 * Size. %% Magic number. Will probably need playing with.
+estimate_queue_memory(#mqstate { memory_size = Size, memory_gain = Gain,
+                                 memory_loss = Loss }) ->
+    {Size, Gain, Loss}.
+
+reset_counters(State) ->
+    State #mqstate { memory_gain = 0, memory_loss = 0 }.
 
 info(#mqstate { mode = Mode }) ->
     Mode.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 50f66063..5a3b464d 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/1, report_memory/2]).
+-export([register/1, report_memory/4]).
 
 -define(SERVER, ?MODULE).
 
@@ -49,7 +49,8 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/1 :: (pid()) -> {'ok', queue_mode()}).
--spec(report_memory/2 :: (pid(), non_neg_integer()) -> 'ok').
+-spec(report_memory/4 :: (pid(), non_neg_integer(),
+                          non_neg_integer(), non_neg_integer()) -> 'ok').
 
 -endif.
 
@@ -63,8 +64,8 @@ start_link() ->
 register(Pid) ->
     gen_server2:call(?SERVER, {register, Pid}).
 
-report_memory(Pid, Memory) ->
-    gen_server2:cast(?SERVER, {report_memory, Pid, Memory}).
+report_memory(Pid, Memory, Gain, Loss) ->
+    gen_server2:cast(?SERVER, {report_memory, Pid, Memory, Gain, Loss}).
 
 init([]) ->
     process_flag(trap_exit, true),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7d74968b..34a4fcb5 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1003,15 +1003,15 @@ rdq_test_mixed_queue_modes() ->
             end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages; ~w~n",
-              [rabbit_mixed_queue:estimate_extra_memory(MS6)]),
+              [rabbit_mixed_queue:estimate_queue_memory(MS6)]),
     {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode([], MS6),
     30 = rabbit_mixed_queue:length(MS7),
     io:format("Converted to disk only mode; ~w~n",
-             [rabbit_mixed_queue:estimate_extra_memory(MS7)]),
+             [rabbit_mixed_queue:estimate_queue_memory(MS7)]),
     {ok, MS8} = rabbit_mixed_queue:to_mixed_mode([], MS7),
     30 = rabbit_mixed_queue:length(MS8),
     io:format("Converted to mixed mode; ~w~n",
-              [rabbit_mixed_queue:estimate_extra_memory(MS8)]),
+              [rabbit_mixed_queue:estimate_queue_memory(MS8)]),
     MS10 =
         lists:foldl(
           fun (N, MS9) ->
@@ -1035,10 +1035,10 @@ rdq_test_mixed_queue_modes() ->
         lists:foldl(
           fun (N, {MS13, AcksAcc}) ->
                   Rem = 10 - N,
-                  {{#basic_message { is_persistent = true },
+                  {{Msg = #basic_message { is_persistent = true },
                     false, AckTag, Rem},
                    MS13a} = rabbit_mixed_queue:deliver(MS13),
-                  {MS13a, [AckTag | AcksAcc]}
+                  {MS13a, [{Msg, AckTag} | AcksAcc]}
           end, {MS12, []}, lists:seq(1,10)),
     0 = rabbit_mixed_queue:length(MS14),
     {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
@@ -1050,7 +1050,7 @@ rdq_test_mixed_queue_modes() ->
     rdq_start(),
     {ok, MS17} = rabbit_mixed_queue:init(q, true, mixed),
     0 = rabbit_mixed_queue:length(MS17),
-    0 = rabbit_mixed_queue:estimate_extra_memory(MS17),
+    {0,0,0} = rabbit_mixed_queue:estimate_queue_memory(MS17),
     io:format("Recovered queue~n"),
     rdq_stop(),
     passed.
@@ -1120,10 +1120,10 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                               Rem = Len1 - (Msg #basic_message.guid) - 1,
                               {{Msg, false, AckTag, Rem}, MS7a} =
                                   rabbit_mixed_queue:deliver(MS7),
-                              {[AckTag | Acc], MS7a}
+                              {[{Msg, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA ++ MsgsB),
                 0 = rabbit_mixed_queue:length(MS8),
-                rabbit_mixed_queue:ack(lists:reverse(AckTags), MS8);
+                rabbit_mixed_queue:ack(AckTags, MS8);
             cancel ->
                 {ok, MS6} = rabbit_mixed_queue:tx_cancel(MsgsB, MS5),
                 Len0 = rabbit_mixed_queue:length(MS6),
@@ -1133,10 +1133,10 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                               Rem = Len0 - (Msg #basic_message.guid) - 1,
                               {{Msg, false, AckTag, Rem}, MS7a} =
                                   rabbit_mixed_queue:deliver(MS7),
-                              {[AckTag | Acc], MS7a}
+                              {[{Msg, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA),
                 0 = rabbit_mixed_queue:length(MS8),
-                rabbit_mixed_queue:ack(lists:reverse(AckTags), MS8)
+                rabbit_mixed_queue:ack(AckTags, MS8)
         end,
     0 = rabbit_mixed_queue:length(MS9),
     passed.
-- 
cgit v1.2.1


From 4a1eefe5e346495ce96f52f6c3666aa7d2db88dd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 24 Jun 2009 23:27:29 +0100
Subject: some more scaffolding for tokens

---
 src/rabbit_memsup_linux.erl       | 72 +++++++++++++++++++++++++--------------
 src/rabbit_queue_mode_manager.erl | 67 ++++++++++++++++++++++++++++--------
 2 files changed, 99 insertions(+), 40 deletions(-)

diff --git a/src/rabbit_memsup_linux.erl b/src/rabbit_memsup_linux.erl
index ffdc7e99..158df679 100644
--- a/src/rabbit_memsup_linux.erl
+++ b/src/rabbit_memsup_linux.erl
@@ -44,7 +44,13 @@
 
 -define(DEFAULT_MEMORY_CHECK_INTERVAL, 1000).
 
--record(state, {memory_fraction, alarmed, timeout, timer}).   
+-record(state, {memory_fraction,
+                alarmed,
+                timeout,
+                timer,
+                total_memory,
+                allocated_memory
+               }).
 
 %%----------------------------------------------------------------------------
 
@@ -69,10 +75,13 @@ update() ->
 init(_Args) -> 
     Fraction = os_mon:get_env(memsup, system_memory_high_watermark),
     TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL),
-    {ok, #state{alarmed = false, 
-                memory_fraction = Fraction, 
-                timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL,
-                timer = TRef}}.
+    {ok, update(#state{alarmed = false, 
+                       memory_fraction = Fraction, 
+                       timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL,
+                       timer = TRef,
+                       total_memory = undefined,
+                       allocated_memory = undefined
+                      })}.
 
 start_timer(Timeout) ->
     {ok, TRef} = timer:apply_interval(Timeout, ?MODULE, update, []),
@@ -94,11 +103,33 @@ handle_call({set_check_interval, Timeout}, _From, State) ->
     {ok, cancel} = timer:cancel(State#state.timer),
     {reply, ok, State#state{timeout = Timeout, timer = start_timer(Timeout)}};
 
-handle_call(_Request, _From, State) -> 
+handle_call(get_memory_data, _From,
+            State = #state { total_memory = MemTotal,
+                             allocated_memory = MemUsed }) ->
+    {reply, {MemTotal, MemUsed, undefined}, State};
+
+handle_call(_Request, _From, State) ->
+    {noreply, State}.
+
+handle_cast(update, State) ->
+    {noreply, update(State)};
+
+handle_cast(_Request, State) -> 
+    {noreply, State}.
+
+handle_info(_Info, State) -> 
     {noreply, State}.
 
-handle_cast(update, State = #state{alarmed = Alarmed,
-                                   memory_fraction = MemoryFraction}) -> 
+terminate(_Reason, _State) -> 
+    ok.
+
+code_change(_OldVsn, State, _Extra) -> 
+    {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+update(State = #state{alarmed = Alarmed,
+                      memory_fraction = MemoryFraction}) -> 
     File = read_proc_file("/proc/meminfo"),
     Lines = string:tokens(File, "\n"),
     Dict = dict:from_list(lists:map(fun parse_line/1, Lines)),
@@ -116,21 +147,8 @@ handle_cast(update, State = #state{alarmed = Alarmed,
         _ ->
             ok
     end,
-    {noreply,  State#state{alarmed = NewAlarmed}};
-
-handle_cast(_Request, State) -> 
-    {noreply, State}.
-
-handle_info(_Info, State) -> 
-    {noreply, State}.
-
-terminate(_Reason, _State) -> 
-    ok.
-
-code_change(_OldVsn, State, _Extra) -> 
-    {ok, State}.
-
-%%----------------------------------------------------------------------------
+    State#state{alarmed = NewAlarmed,
+                total_memory = MemTotal, allocated_memory = MemUsed}.
 
 -define(BUFFER_SIZE, 1024).
 
@@ -152,5 +170,9 @@ read_proc_file(IoDevice, Acc) ->
 
 %% A line looks like "FooBar: 123456 kB"
 parse_line(Line) ->
-    [Name, Value | _] = string:tokens(Line, ": "),   
-    {list_to_atom(Name), list_to_integer(Value)}.
+    [Name, Value | Rest] = string:tokens(Line, ": "),
+    Value1 = case Rest of
+                 [] -> list_to_integer(Value); %% no units
+                 ["kB"] -> list_to_integer(Value) * 1024
+             end,
+    {list_to_atom(Name), Value1}.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 5a3b464d..4ed56fd3 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -40,6 +40,12 @@
 
 -export([register/1, report_memory/4]).
 
+-define(TOTAL_TOKENS, 1000).
+-define(LOW_WATER_MARK_FRACTION, 0.25).
+-define(EXPIRY_INTERVAL_MICROSECONDS, 5000000).
+-define(ACTIVITY_THRESHOLD, 10).
+-define(INITIAL_TOKEN_ALLOCATION, 10).
+
 -define(SERVER, ?MODULE).
 
 -ifdef(use_specs).
@@ -54,8 +60,10 @@
 
 -endif.
 
--record(state, { mode,
-                 queues
+-record(state, { remaining_tokens,
+                 mixed_queues,
+                 disk_queues,
+                 bytes_per_token
                }).
 
 start_link() ->
@@ -69,26 +77,48 @@ report_memory(Pid, Memory, Gain, Loss) ->
 
 init([]) ->
     process_flag(trap_exit, true),
-    {ok, #state { mode = unlimited,
-                  queues = dict:new()
+    %% todo, fix up this call as os_mon may not be running
+    {MemTotal, _MemUsed, _BigProc} = memsup:get_memory_data(),
+    {ok, #state { remaining_tokens = ?TOTAL_TOKENS,
+                  mixed_queues = dict:new(),
+                  disk_queues = sets:new(),
+                  bytes_per_token = MemTotal / ?TOTAL_TOKENS
                 }}.
 
 handle_call({register, Pid}, _From,
-            State = #state { queues = Qs, mode = Mode }) ->
+            State = #state { remaining_tokens = Remaining,
+                             mixed_queues = Mixed,
+                             disk_queues = Disk }) ->
     _MRef = erlang:monitor(process, Pid),
-    Result = case Mode of
-                 disk_only -> disk;
-                 _ -> mixed
-             end,
-    {reply, {ok, Result}, State #state { queues = dict:store(Pid, 0, Qs) }}.
-
-handle_cast(Any, State) ->
-    io:format("~w~n", [Any]),
+    {Result, State1} =
+        case Remaining >= ?INITIAL_TOKEN_ALLOCATION of
+            true ->
+                {mixed, State #state { remaining_tokens =
+                                       Remaining - ?INITIAL_TOKEN_ALLOCATION,
+                                       mixed_queues = dict:store
+                                       (Pid, {?INITIAL_TOKEN_ALLOCATION, now()},
+                                        Mixed) }};
+                                              
+            false ->
+                {disk, State #state { disk_queues =
+                                      sets:add_element(Pid, Disk) }}
+        end,
+    {reply, {ok, Result}, State1 }.
+
+handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost}, State) ->
     {noreply, State}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #state { queues = Qs }) ->
-    {noreply, State #state { queues = dict:erase(Pid, Qs) }};
+            State = #state { remaining_tokens = Remaining,
+                             mixed_queues = Mixed }) ->
+    State1 = case find_queue(Pid, State) of
+                 disk ->
+                     State;
+                 {mixed, {Tokens, _When}} ->
+                     State #state { remaining_tokens = Remaining + Tokens,
+                                    mixed_queues = dict:erase(Pid, Mixed) }
+             end,
+    {noreply, State1};
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(_Info, State) ->
@@ -99,3 +129,10 @@ terminate(_Reason, State) ->
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
+
+find_queue(Pid, #state { disk_queues = Disk, mixed_queues = Mixed }) ->
+    case sets:is_element(Pid, Disk) of
+        true -> disk;
+        false -> {mixed, dict:fetch(Pid, Mixed)}
+    end.
+            
-- 
cgit v1.2.1


From 9dde46a3958bcf4dab1fa0b2c8b7f2ae7ffde623 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 26 Jun 2009 18:19:34 +0100
Subject: Had been thinking about this optimisation for a while but someone
 mentioned it to me yesterday at the Erlang Factory conference.

When you sync, you know that everything up to the current state of the file is sync'd. Given that we're always appending, we know that any message before the current length of the file is available. Thus when we're reading messages from the current write file, even if the file is dirty, we don't need to sync unless the message we're reading is beyond the length of the file at the last sync.

This can be very effective, for example, if there are a few hundred messages in the queue and then you're reading and writing to the queue at the same rate, then this will mean that rather than doing a sync for every read, we now only sync once per size of queue (altitude or ramp size). Sure enough, my publish_one_in_one_out_receive(1000) (altitude of 1000, then 5000 @ one in, one out) reduces from 6089 calls to fsync to 21, and from 15.4 seconds to 3.6.

It's also possible to apply the same optimisation in tx_commit - not only do we now return immediately if the current file is not dirty or if none of the messages in the txn are in the current file, but we can also return immediately if the current file is dirty and messages are in the current file, but they're all below the last sync file size.

Surprising very little extra code needed.
---
 src/rabbit_disk_queue.erl | 42 ++++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ac58d89d..2a7505a7 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -86,7 +86,8 @@
          read_file_handles,       %% file handles for reading (LRU)
          read_file_handles_limit, %% how many file handles can we open?
          on_sync_froms,           %% list of commiters to run on sync (reversed)
-         timer_ref                %% TRef for our interval timer
+         timer_ref,               %% TRef for our interval timer
+         last_sync_offset         %% current_offset at the last time we sync'd
         }).
 
 %% The components:
@@ -394,7 +395,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    read_file_handles       = {dict:new(), gb_trees:empty()},
                    read_file_handles_limit = ReadFileHandlesLimit,
                    on_sync_froms           = [],
-                   timer_ref               = undefined
+                   timer_ref               = undefined,
+                   last_sync_offset        = 0
                   },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
@@ -648,13 +650,14 @@ determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
   when NextWrite >= CurrentWrite ->
     CurrentRead.
 
-get_read_handle(File, State =
+get_read_handle(File, Offset, State =
                 #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
                            read_file_handles_limit = ReadFileHandlesLimit,
                            current_file_name = CurName,
-                           current_dirty = IsDirty
+                           current_dirty = IsDirty,
+                           last_sync_offset = SyncOffset
                          }) ->
-    State1 = if CurName =:= File andalso IsDirty ->
+    State1 = if CurName =:= File andalso IsDirty andalso Offset >= SyncOffset ->
                      sync_current_file_handle(State);
                 true -> State
              end,
@@ -727,15 +730,19 @@ sync_current_file_handle(State = #dqstate { current_dirty = false,
     State;
 sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
                                             current_dirty = IsDirty,
-                                            on_sync_froms = Froms
+                                            current_offset = CurOffset,
+                                            on_sync_froms = Froms,
+                                            last_sync_offset = SyncOffset
                                           }) ->
-    ok = case IsDirty of
-             true -> file:sync(CurHdl);
-             false -> ok
-         end,
+    SyncOffset1 = case IsDirty of
+                      true -> ok = file:sync(CurHdl),
+                              CurOffset;
+                      false -> SyncOffset
+                  end,
     lists:map(fun (From) -> gen_server2:reply(From, ok) end,
               lists:reverse(Froms)),
-    State #dqstate { current_dirty = false, on_sync_froms = [] }.
+    State #dqstate { current_dirty = false, on_sync_froms = [],
+                     last_sync_offset = SyncOffset1 }.
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
@@ -776,7 +783,7 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
         end,
     case ReadMsg of
         true ->
-            {FileHdl, State1} = get_read_handle(File, State),
+            {FileHdl, State1} = get_read_handle(File, Offset, State),
             {ok, {MsgBody, BodySize}} =
                 read_message_at_offset(FileHdl, Offset, TotalSize),
             {ok, {MsgId, MsgBody, BodySize, Delivered, {MsgId, ReadSeqId}},
@@ -883,7 +890,8 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                    State = #dqstate { sequences = Sequences,
                                       current_file_name = CurFile,
                                       current_dirty = IsDirty,
-                                      on_sync_froms = SyncFroms
+                                      on_sync_froms = SyncFroms,
+                                      last_sync_offset = SyncOffset
                                     }) ->
     {PubList, PubAcc, ReadSeqId, Length} =
         case PubMsgSeqIds of
@@ -909,7 +917,7 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                       lists:foldl(
                         fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
                              {InCurFileAcc, ExpectedSeqId}) ->
-                                [{MsgId, _RefCount, File, _Offset,
+                                [{MsgId, _RefCount, File, Offset,
                                   _TotalSize}] = dets_ets_lookup(State, MsgId),
                                  SeqId1 = adjust_last_msg_seq_id(
                                             Q, ExpectedSeqId, SeqId, write),
@@ -924,7 +932,8 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                                                       next_seq_id = NextSeqId1
                                                      },
                                         write),
-                                 {InCurFileAcc orelse File =:= CurFile,
+                                 {InCurFileAcc orelse (File =:= CurFile andalso
+                                                       Offset >= SyncOffset),
                                   NextSeqId1}
                          end, {false, PubAcc}, PubList),
                    {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
@@ -1126,7 +1135,8 @@ maybe_roll_to_new_file(Offset,
     State2 = State1 #dqstate { current_file_name = NextName,
                                current_file_handle = NextHdl,
                                current_file_num = NextNum,
-                               current_offset = 0
+                               current_offset = 0,
+                               last_sync_offset = 0
                               },
     {ok, compact(sets:from_list([CurName]), State2)};
 maybe_roll_to_new_file(_, State) ->
-- 
cgit v1.2.1


From abd24e5e027da9262831c4962efbdfccc89b02fb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 29 Jun 2009 18:01:10 +0100
Subject: mmmm. It maybe sort of works. Needs work though

---
 src/rabbit.erl                    |   4 +-
 src/rabbit_amqqueue.erl           |  12 ++-
 src/rabbit_amqqueue_process.erl   |  76 +++++++++----------
 src/rabbit_disk_queue.erl         |  16 ++--
 src/rabbit_mixed_queue.erl        |   2 +-
 src/rabbit_queue_mode_manager.erl | 156 ++++++++++++++++++++++++++++++--------
 6 files changed, 181 insertions(+), 85 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 7d5e2a79..95872388 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -149,9 +149,7 @@ start(normal, []) ->
         end},
        {"disk queue",
         fun () ->
-                ok = start_child(rabbit_disk_queue),
-                %% TODO, CHANGE ME, waiting on bug 20980
-                ok = rabbit_disk_queue:to_ram_disk_mode()
+                ok = start_child(rabbit_disk_queue)
         end},
        {"recovery",
         fun () ->
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index c045b3ca..92272f0c 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([set_mode/3]).
+-export([set_mode/3, set_mode/2, report_memory/1]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -105,10 +105,12 @@
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
 -spec(set_mode/3 :: (vhost(), amqqueue(), ('disk' | 'mixed')) -> 'ok').
+-spec(set_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
+-spec(report_memory/1 :: (pid()) -> 'ok').
 
 -endif.
 
@@ -229,7 +231,13 @@ set_mode(VHostPath, Queue, ModeBin)
   when is_binary(VHostPath) andalso is_binary(Queue) ->
     Mode = list_to_atom(binary_to_list(ModeBin)),
     with(rabbit_misc:r(VHostPath, queue, Queue),
-         fun(Q) -> gen_server2:pcast(Q #amqqueue.pid, 10, {set_mode, Mode}) end).
+         fun(Q) -> set_mode(Q #amqqueue.pid, Mode) end).
+
+set_mode(QPid, Mode) ->
+    gen_server2:pcast(QPid, 10, {set_mode, Mode}).
+
+report_memory(QPid) ->
+    gen_server2:cast(QPid, report_memory).
 
 info(#amqqueue{ pid = QPid }) ->
     gen_server2:pcall(QPid, 9, info, infinity).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b6353bef..2bd170a2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -37,8 +37,7 @@
 
 -define(UNSENT_MESSAGE_LIMIT, 100).
 -define(HIBERNATE_AFTER, 1000).
--define(MEMORY_REPORT_INTERVAL, 500).
--define(MEMORY_REPORT_TIME_INTERVAL, 1000000). %% 1 second in microseconds
+-define(MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
 
 -export([start_link/1]).
 
@@ -58,8 +57,7 @@
             next_msg_id,
             active_consumers,
             blocked_consumers,
-            memory_report_counter,
-            old_memory_report
+            memory_report_timer
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -112,8 +110,7 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
             next_msg_id = 1,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
-            memory_report_counter = 0,
-            old_memory_report = {1, now()}
+            memory_report_timer = start_memory_timer()
            }, ?HIBERNATE_AFTER}.
 
 terminate(_Reason, State) ->
@@ -124,6 +121,7 @@ terminate(_Reason, State) ->
                             rollback_transaction(Txn, State1)
                     end, State, all_tx()),
     rabbit_mixed_queue:delete_queue(NewState #q.mixed_state),
+    stop_memory_timer(NewState),
     ok = rabbit_amqqueue:internal_delete(QName).
 
 code_change(_OldVsn, State, _Extra) ->
@@ -131,16 +129,30 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-reply(Reply, NewState = #q { memory_report_counter = 0 }) ->
-    {reply, Reply, report_memory(NewState), ?HIBERNATE_AFTER};
-reply(Reply, NewState = #q { memory_report_counter = C }) ->
-    {reply, Reply, NewState #q { memory_report_counter = C - 1 },
-     ?HIBERNATE_AFTER}.
-
-noreply(NewState = #q { memory_report_counter = 0}) ->
-    {noreply, report_memory(NewState), ?HIBERNATE_AFTER};
-noreply(NewState = #q { memory_report_counter = C}) ->
-    {noreply, NewState #q { memory_report_counter = C - 1 }, ?HIBERNATE_AFTER}.
+reply(Reply, NewState = #q { memory_report_timer = undefined }) ->
+    {reply, Reply, start_memory_timer(NewState), ?HIBERNATE_AFTER};
+reply(Reply, NewState) ->
+    {reply, Reply, NewState, ?HIBERNATE_AFTER}.
+
+noreply(NewState = #q { memory_report_timer = undefined }) ->
+    {noreply, start_memory_timer(NewState), ?HIBERNATE_AFTER};
+noreply(NewState) ->
+    {noreply, NewState, ?HIBERNATE_AFTER}.
+
+start_memory_timer() ->
+    {ok, TRef} = timer:apply_interval(?MEMORY_REPORT_TIME_INTERVAL,
+                                      rabbit_amqqueue, report_memory, [self()]),
+    TRef.
+start_memory_timer(State = #q { memory_report_timer = undefined }) ->
+    State #q { memory_report_timer = start_memory_timer() };
+start_memory_timer(State) ->
+    State.
+
+stop_memory_timer(State = #q { memory_report_timer = undefined }) ->
+    State;
+stop_memory_timer(State = #q { memory_report_timer = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #q { memory_report_timer = undefined }.
 
 lookup_ch(ChPid) ->
     case get({ch, ChPid}) of
@@ -543,24 +555,15 @@ i(memory, _) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
-report_memory(State = #q { old_memory_report = {OldMem, Then},
-                           mixed_state = MS }) ->
+report_memory(State = #q { mixed_state = MS }) ->
     {MSize, Gain, Loss} =
         rabbit_mixed_queue:estimate_queue_memory(MS),
     NewMem = case MSize of
                  0 -> 1; %% avoid / 0
                  N -> N
              end,
-    State1 = State #q { memory_report_counter = ?MEMORY_REPORT_INTERVAL },
-    Now = now(),
-    case ((NewMem / OldMem) > 1.1 orelse (OldMem / NewMem) > 1.1) andalso
-        (?MEMORY_REPORT_TIME_INTERVAL < timer:now_diff(Now, Then)) of
-        true ->
-            rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss),
-            State1 #q { old_memory_report = {NewMem, Now},
-                        mixed_state = rabbit_mixed_queue:reset_counters(MS) };
-        false -> State1
-    end.
+    rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss),
+    State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS) }.
 
 %---------------------------------------------------------------------------
 
@@ -834,8 +837,10 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
                     disk  -> fun rabbit_mixed_queue:to_disk_only_mode/2;
                     mixed -> fun rabbit_mixed_queue:to_mixed_mode/2
                  end)(PendingMessages, MS),
-    noreply(State #q { mixed_state = MS1 }).
-                                                 
+    noreply(State #q { mixed_state = MS1 });
+
+handle_cast(report_memory, State) ->
+    noreply(report_memory(State)).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -853,16 +858,11 @@ handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
 handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     handle_ch_down(DownPid, State);
 
-handle_info(timeout, State = #q { memory_report_counter = Count }) 
-  when Count == ?MEMORY_REPORT_INTERVAL ->
-    %% Have to do the +1 because the timeout below, with noreply, will -1
+handle_info(timeout, State) ->
     %% TODO: Once we drop support for R11B-5, we can change this to
     %% {noreply, State, hibernate};
-    proc_lib:hibernate(gen_server2, enter_loop, [?MODULE, [], State]);
-
-handle_info(timeout, State) ->
-    State1 = report_memory(State),
-    noreply(State1 #q { memory_report_counter = 1 + ?MEMORY_REPORT_INTERVAL });
+    State1 = stop_memory_timer(report_memory(State)),
+    proc_lib:hibernate(gen_server2, enter_loop, [?MODULE, [], State1]);
 
 handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 4333f667..8db8f249 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -46,7 +46,7 @@
 
 -export([length/1, filesync/0, cache_info/0]).
 
--export([stop/0, stop_and_obliterate/0, change_memory_footprint/2,
+-export([stop/0, stop_and_obliterate/0, conserve_memory/2,
          to_disk_only_mode/0, to_ram_disk_mode/0]).
 
 -include("rabbit.hrl").
@@ -270,7 +270,7 @@
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
--spec(change_memory_footprint/2 :: (pid(), bool()) -> 'ok').
+-spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
 
 -endif.
 
@@ -345,8 +345,8 @@ filesync() ->
 cache_info() ->
     gen_server2:call(?SERVER, cache_info, infinity).
 
-change_memory_footprint(_Pid, Conserve) ->
-    gen_server2:pcast(?SERVER, 9, {change_memory_footprint, Conserve}).
+conserve_memory(_Pid, Conserve) ->
+    gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
@@ -360,11 +360,11 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
-    ok = rabbit_alarm:register(self(), {?MODULE, change_memory_footprint, []}),
+    ok = rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
     Node = node(),
     ok = 
         case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
-                                           disc_only_copies) of
+                                           disc_copies) of
             {atomic, ok} -> ok;
             {aborted, {already_exists, rabbit_disk_queue, Node,
                        disc_only_copies}} -> ok;
@@ -391,7 +391,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
                    msg_location_ets        = MsgLocationEts,
-                   operation_mode          = disk_only,
+                   operation_mode          = ram_disk,
                    file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
                                                      [set, private]),
                    sequences               = ets:new(?SEQUENCE_ETS_NAME,
@@ -502,7 +502,7 @@ handle_cast({delete_queue, Q}, State) ->
     noreply(State1);
 handle_cast(filesync, State) ->
     noreply(sync_current_file_handle(State));
-handle_cast({change_memory_footprint, Conserve}, State) ->
+handle_cast({conserve_memory, Conserve}, State) ->
     noreply((case Conserve of
                  true -> fun to_disk_only_mode/1;
                  false -> fun to_ram_disk_mode/1
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 12fede17..d171cf18 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -502,7 +502,7 @@ is_empty(#mqstate { length = Length }) ->
 
 estimate_queue_memory(#mqstate { memory_size = Size, memory_gain = Gain,
                                  memory_loss = Loss }) ->
-    {Size, Gain, Loss}.
+    {2*Size, Gain, Loss}.
 
 reset_counters(State) ->
     State #mqstate { memory_gain = 0, memory_loss = 0 }.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 4ed56fd3..3a55833b 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -42,8 +42,7 @@
 
 -define(TOTAL_TOKENS, 1000).
 -define(LOW_WATER_MARK_FRACTION, 0.25).
--define(EXPIRY_INTERVAL_MICROSECONDS, 5000000).
--define(ACTIVITY_THRESHOLD, 10).
+-define(ACTIVITY_THRESHOLD, 25).
 -define(INITIAL_TOKEN_ALLOCATION, 10).
 
 -define(SERVER, ?MODULE).
@@ -60,10 +59,10 @@
 
 -endif.
 
--record(state, { remaining_tokens,
+-record(state, { available_tokens,
+                 available_etokens,
                  mixed_queues,
-                 disk_queues,
-                 bytes_per_token
+                 tokens_per_byte
                }).
 
 start_link() ->
@@ -78,44 +77,131 @@ report_memory(Pid, Memory, Gain, Loss) ->
 init([]) ->
     process_flag(trap_exit, true),
     %% todo, fix up this call as os_mon may not be running
-    {MemTotal, _MemUsed, _BigProc} = memsup:get_memory_data(),
-    {ok, #state { remaining_tokens = ?TOTAL_TOKENS,
+    {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
+    MemAvail = MemTotal - MemUsed,
+    Avail = ceil(?TOTAL_TOKENS * (1 - ?LOW_WATER_MARK_FRACTION)),
+    EAvail = ?TOTAL_TOKENS - Avail,
+    {ok, #state { available_tokens = Avail,
+                  available_etokens = EAvail,
                   mixed_queues = dict:new(),
-                  disk_queues = sets:new(),
-                  bytes_per_token = MemTotal / ?TOTAL_TOKENS
+                  tokens_per_byte = ?TOTAL_TOKENS / MemAvail
                 }}.
 
 handle_call({register, Pid}, _From,
-            State = #state { remaining_tokens = Remaining,
-                             mixed_queues = Mixed,
-                             disk_queues = Disk }) ->
+            State = #state { available_tokens = Avail,
+                             mixed_queues = Mixed }) ->
     _MRef = erlang:monitor(process, Pid),
     {Result, State1} =
-        case Remaining >= ?INITIAL_TOKEN_ALLOCATION of
+        case ?INITIAL_TOKEN_ALLOCATION > Avail of
             true ->
-                {mixed, State #state { remaining_tokens =
-                                       Remaining - ?INITIAL_TOKEN_ALLOCATION,
-                                       mixed_queues = dict:store
-                                       (Pid, {?INITIAL_TOKEN_ALLOCATION, now()},
-                                        Mixed) }};
-                                              
+                {disk, State};
             false ->
-                {disk, State #state { disk_queues =
-                                      sets:add_element(Pid, Disk) }}
+                {mixed, State #state { mixed_queues = dict:store
+                                       (Pid, {?INITIAL_TOKEN_ALLOCATION, 0}, Mixed) }}
         end,
-    {reply, {ok, Result}, State1 }.
-
-handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost}, State) ->
-    {noreply, State}.
+    {reply, {ok, Result}, State1}.
+                                              
+handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost},
+            State = #state { available_tokens = Avail,
+                             available_etokens = EAvail,
+                             tokens_per_byte = TPB,
+                             mixed_queues = Mixed
+                           }) ->
+    Req = ceil(Memory * TPB),
+    io:format("~w : ~w  ~w ~n", [Pid, Memory, Req]),
+    LowRate = (BytesGained < ?ACTIVITY_THRESHOLD)
+        andalso (BytesLost < ?ACTIVITY_THRESHOLD),
+    io:format("~w ~w~n", [O, LowRate]),
+    State1 =
+        case find_queue(Pid, State) of
+            disk ->
+                case Req > Avail orelse (2*Req) > (Avail + EAvail) orelse
+                    LowRate of
+                    true -> State; %% remain as disk queue
+                    false ->
+                        %% go to mixed, allocate double Req, and use Extra
+                        rabbit_amqqueue:set_mode(Pid, mixed),
+                        Alloc = lists:min([2*Req, Avail]),
+                        EAlloc = (2*Req) - Alloc,
+                        State #state { available_tokens = Avail - Alloc,
+                                       available_etokens = EAvail - EAlloc,
+                                       mixed_queues = dict:store
+                                       (Pid, {Alloc, EAlloc}, Mixed)
+                                      }
+                end;
+            {mixed, {OAlloc, OEAlloc}} ->
+                io:format("~w ; ~w ~w ~n", [Pid, OAlloc, OEAlloc]),
+                Avail1 = Avail + OAlloc,
+                EAvail1 = EAvail + OEAlloc,
+                case Req > (OAlloc + OEAlloc) of
+                    true -> %% getting bigger
+                        case Req > Avail1 of
+                            true -> %% go to disk
+                                rabbit_amqqueue:set_mode(Pid, disk),
+                                State #state { available_tokens = Avail1,
+                                               available_etokens = EAvail1,
+                                               mixed_queues =
+                                               dict:erase(Pid, Mixed) };
+                            false -> %% request not too big, stay mixed
+                                State #state { available_tokens = Avail1 - Req,
+                                               available_etokens = EAvail1,
+                                               mixed_queues = dict:store
+                                               (Pid, {Req, 0}, Mixed) }
+                        end;
+                    false -> %% getting smaller (or staying same)
+                        case 0 =:= OEAlloc of
+                            true ->
+                                case Req > Avail1 orelse LowRate of
+                                    true -> %% go to disk
+                                        rabbit_amqqueue:set_mode(Pid, disk),
+                                        State #state { available_tokens = Avail1,
+                                                       available_etokens = EAvail1,
+                                                       mixed_queues =
+                                                       dict:erase(Pid, Mixed) };
+                                    false -> %% request not too big, stay mixed
+                                        State #state { available_tokens = Avail1 - Req,
+                                                       available_etokens = EAvail1,
+                                                       mixed_queues = dict:store
+                                                       (Pid, {Req, 0}, Mixed) }
+                                end;
+                            false ->
+                                case Req > Avail1 of
+                                    true ->
+                                        EReq = Req - Avail1,
+                                        case EReq > EAvail1 of
+                                            true ->  %% go to disk
+                                                rabbit_amqqueue:set_mode(Pid, disk),
+                                                State #state { available_tokens = Avail1,
+                                                               available_etokens = EAvail1,
+                                                               mixed_queues =
+                                                               dict:erase(Pid, Mixed) };
+                                            false -> %% request not too big, stay mixed
+                                                State #state { available_tokens = 0,
+                                                               available_etokens = EAvail1 - EReq,
+                                                               mixed_queues = dict:store
+                                                               (Pid, {Avail1, EReq}, Mixed) }
+                                        end;
+                                    false -> %% request not too big, stay mixed
+                                        State #state { available_tokens = Avail1 - Req,
+                                                       available_etokens = EAvail1,
+                                                       mixed_queues = dict:store
+                                                       (Pid, {Req, 0}, Mixed) }
+                                end
+                        end
+                end
+        end,
+    {noreply, State1}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #state { remaining_tokens = Remaining,
+            State = #state { available_tokens = Avail,
+                             available_etokens = EAvail,
                              mixed_queues = Mixed }) ->
     State1 = case find_queue(Pid, State) of
                  disk ->
                      State;
-                 {mixed, {Tokens, _When}} ->
-                     State #state { remaining_tokens = Remaining + Tokens,
+                 {mixed, {Alloc, EAlloc}} ->
+                     State #state { available_tokens = Avail + Alloc,
+                                    available_etokens = EAvail + EAlloc,
                                     mixed_queues = dict:erase(Pid, Mixed) }
              end,
     {noreply, State1};
@@ -130,9 +216,13 @@ terminate(_Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-find_queue(Pid, #state { disk_queues = Disk, mixed_queues = Mixed }) ->
-    case sets:is_element(Pid, Disk) of
-        true -> disk;
-        false -> {mixed, dict:fetch(Pid, Mixed)}
+find_queue(Pid, #state { mixed_queues = Mixed }) ->
+    case dict:find(Pid, Mixed) of
+        {ok, Value} -> {mixed, Value};
+        error -> disk
     end.
-            
+
+ceil(N) when N - trunc(N) > 0 ->
+    1 + trunc(N);
+ceil(N) ->
+    N.
-- 
cgit v1.2.1


From 962c5a6c94dcf7c8492ef916abf857547f4f5501 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 30 Jun 2009 11:00:35 +0100
Subject: doh!

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8db8f249..cf8ddba0 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -367,7 +367,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                                            disc_copies) of
             {atomic, ok} -> ok;
             {aborted, {already_exists, rabbit_disk_queue, Node,
-                       disc_only_copies}} -> ok;
+                       disc_copies}} -> ok;
             E -> E
         end,
     ok = filelib:ensure_dir(form_filename("nothing")),
-- 
cgit v1.2.1


From 0e4cd86d69221ffb3f9dd76dcd850927850add91 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 30 Jun 2009 12:52:50 +0100
Subject: just adding timing to the dump test

---
 src/rabbit_tests.erl | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 34a4fcb5..f5447fe3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -694,7 +694,7 @@ test_disk_queue() ->
     passed = rdq_test_startup_with_queue_gaps(),
     passed = rdq_test_redeliver(),
     passed = rdq_test_purge(),
-    passed = rdq_test_dump_queue(),
+    passed = rdq_test_dump_queue(1000),
     passed = rdq_test_mixed_queue_modes(),
     passed = rdq_test_mode_conversion_mid_txn(),
     rdq_virgin(),
@@ -940,11 +940,10 @@ rdq_test_purge() ->
     rdq_stop(),
     passed.    
 
-rdq_test_dump_queue() ->
+rdq_test_dump_queue(Total) ->
     rdq_virgin(),
     rdq_start(),
     Msg = <<0:(8*256)>>,
-    Total = 1000,
     All = lists:seq(1,Total),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
@@ -953,9 +952,9 @@ rdq_test_dump_queue() ->
                    Size = size(term_to_binary(Message)),
                    {Message, Size, false, {N, (N-1)}, (N-1)}
              end || N <- All],
-    QList = rabbit_disk_queue:dump_queue(q),
+    {Micros, QList} = timer:tc(rabbit_disk_queue, dump_queue, [q]),
     rdq_stop(),
-    io:format("dump ok undelivered~n", []),
+    io:format("dump ok undelivered (~w micros)~n", [Micros]),
     rdq_start(),
     lists:foreach(
       fun (N) ->
@@ -972,8 +971,8 @@ rdq_test_dump_queue() ->
                     Size = size(term_to_binary(Message)),
                     {Message, Size, true, {N, (N-1)}, (N-1)}
               end || N <- All],
-    QList2 = rabbit_disk_queue:dump_queue(q),
-    io:format("dump ok post delivery + restart~n", []),
+    {Micros2, QList2} = timer:tc(rabbit_disk_queue, dump_queue, [q]),
+    io:format("dump ok post delivery + restart (~w micros)~n", [Micros2]),
     rdq_stop(),
     passed.
 
-- 
cgit v1.2.1


From 7c8f61b697d43622e4e71f97e085a328a06d0b14 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 30 Jun 2009 16:24:50 +0100
Subject: changed disk -> mixed mode so that messages stay on disk and don't
 get read. This means the conversion is much faster than it was which is a
 good thing, at the cost of slower initial delivery.

---
 src/rabbit_disk_queue.erl  | 39 ++++++++++++++------------
 src/rabbit_mixed_queue.erl | 69 ++++++++++++++++++++++++++++++----------------
 src/rabbit_tests.erl       | 13 +++------
 3 files changed, 71 insertions(+), 50 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index cf8ddba0..990e5917 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -259,9 +259,7 @@
       (queue_name(),
        [{{msg_id(), seq_id()}, {seq_id_or_next(), bool()}}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
--spec(dump_queue/1 :: (queue_name()) ->
-             [{msg_id(), binary(), non_neg_integer(), bool(),
-               {msg_id(), seq_id()}, seq_id()}]).
+-spec(dump_queue/1 :: (queue_name()) -> [{msg_id(), bool()}]).
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
@@ -1163,20 +1161,27 @@ internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {[], State};
         [{Q, ReadSeq, WriteSeq, _Length}] ->
-            {QList, {WriteSeq, State3}} =
-                rabbit_misc:unfold(
-                  fun ({SeqId, _State1}) when SeqId == WriteSeq ->
-                          false;
-                      ({SeqId, State1}) ->
-                          {ok, {Message, Size, Delivered, {MsgId, SeqId}},
-                           NextReadSeqId, State2} =
-                              internal_read_message(Q, SeqId, true, true,
-                                                    State1),
-                          {true,
-                           {Message, Size, Delivered, {MsgId, SeqId}, SeqId},
-                           {NextReadSeqId, State2}}
-                  end, {ReadSeq, State}),
-            {lists:reverse(QList), State3}
+            Objs =
+                mnesia:dirty_match_object(
+                  rabbit_disk_queue,
+                  #dq_msg_loc { queue_and_seq_id = {Q, '_'},
+                                msg_id = '_',
+                                is_delivered = '_',
+                                next_seq_id = '_'
+                               }),
+            {Msgs, WriteSeq} =
+                lists:foldl(
+                  fun (#dq_msg_loc { queue_and_seq_id = {_, Seq},
+                                     msg_id = MsgId,
+                                     is_delivered = Delivered,
+                                     next_seq_id = NextSeq },
+                       {Acc, Seq}) ->
+                          {[{MsgId, Delivered} | Acc], NextSeq};
+                      (#dq_msg_loc { queue_and_seq_id = {_, Seq} },
+                       {[], RSeq}) when Seq < RSeq ->
+                          {[], RSeq}
+                  end, {[], ReadSeq}, lists:keysort(2, Objs)),
+            {lists:reverse(Msgs), State}
     end.
 
 internal_delete_non_durable_queues(
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index d171cf18..3c60d25f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -129,11 +129,12 @@ to_disk_only_mode(TxnMessages, State =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk},
                RQueueAcc) ->
-                  if OnDisk ->
+                  case OnDisk of
+                      true ->
                           {MsgId, IsDelivered, AckTag, _PersistRemaining} =
                               rabbit_disk_queue:phantom_deliver(Q),
                           [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
-                     true ->
+                      false ->
                           ok = if [] == RQueueAcc -> ok;
                                   true ->
                                        rabbit_disk_queue:requeue_with_seqs(
@@ -142,7 +143,11 @@ to_disk_only_mode(TxnMessages, State =
                           ok = rabbit_disk_queue:publish(
                                  Q, Msg, false),
                           []
-                  end
+                  end;
+              ({MsgId, IsDelivered}, RQueueAcc) ->
+                  {MsgId, IsDelivered, AckTag, _PersistRemaining} =
+                      rabbit_disk_queue:phantom_deliver(Q),
+                  [ {AckTag, {next, IsDelivered}} | RQueueAcc ]
           end, [], Msgs),
     ok = if [] == Requeue -> ok;
             true ->
@@ -170,12 +175,8 @@ to_mixed_mode(TxnMessages, State =
     %% load up a new queue with everything that's on disk.
     %% don't remove non-persistent messages that happen to be on disk
     QList = rabbit_disk_queue:dump_queue(Q),
-    {MsgBuf1, Length} =
-        lists:foldl(
-          fun ({Msg, _Size, IsDelivered, _AckTag, _SeqId},
-               {Buf, L}) ->
-                  {queue:in({Msg, IsDelivered, true}, Buf), L+1}
-          end, {queue:new(), 0}, QList),
+    Length = erlang:length(QList),
+    MsgBuf = queue:from_list(QList),
     %% remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_cancel/2 which is why
@@ -189,7 +190,7 @@ to_mixed_mode(TxnMessages, State =
                   end
           end, [], TxnMessages),
     ok = rabbit_disk_queue:tx_cancel(Cancel),
-    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf1 }}.
+    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
                                                  is_durable = IsDurable,
@@ -290,21 +291,41 @@ deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
      State #mqstate { length = Length - 1 }};
 deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
                            is_durable = IsDurable, length = Length }) ->
-    {{value, {Msg = #basic_message { guid = MsgId,
-                                     is_persistent = IsPersistent },
-              IsDelivered, OnDisk}}, MsgBuf1}
+    {{value, Value}, MsgBuf1}
         = queue:out(MsgBuf),
-    AckTag =
-        if OnDisk ->
-                if IsPersistent andalso IsDurable -> 
-                        {MsgId, IsDelivered, AckTag1, _PersistRem} =
-                            rabbit_disk_queue:phantom_deliver(Q),
-                        AckTag1;
-                   true ->
-                        ok = rabbit_disk_queue:auto_ack_next_message(Q),
-                        noack
-                end;
-           true -> noack
+    {Msg, IsDelivered, AckTag} =
+        case Value of
+            {Msg1 = #basic_message { guid = MsgId,
+                                     is_persistent = IsPersistent },
+             IsDelivered1, OnDisk} ->
+                AckTag1 =
+                    case OnDisk of
+                        true ->
+                            case IsPersistent andalso IsDurable of
+                                true ->
+                                    {MsgId, IsDelivered1, AckTag2, _PersistRem}
+                                        = rabbit_disk_queue:phantom_deliver(Q),
+                                    AckTag2;
+                                false ->
+                                    ok = rabbit_disk_queue:auto_ack_next_message
+                                           (Q),
+                                    noack
+                            end;
+                        false -> noack
+                    end,
+                {Msg1, IsDelivered1, AckTag1};
+            {MsgId, IsDelivered1} ->
+                {Msg1 = #basic_message { guid = MsgId,
+                                         is_persistent = IsPersistent },
+                 _Size, IsDelivered1, AckTag1, _PersistRem}
+                    = rabbit_disk_queue:deliver(Q),
+                AckTag2 =
+                    case IsPersistent andalso IsDurable of
+                        true -> AckTag1;
+                        false -> rabbit_disk_queue:ack(Q, [AckTag1]),
+                                 noack
+                    end,
+                {Msg1, IsDelivered1, AckTag2}
         end,
     Rem = Length - 1,
     {{Msg, IsDelivered, AckTag, Rem},
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f5447fe3..b80bdab2 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -948,14 +948,12 @@ rdq_test_dump_queue(Total) ->
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
-    QList = [begin Message = rdq_message(N, Msg),
-                   Size = size(term_to_binary(Message)),
-                   {Message, Size, false, {N, (N-1)}, (N-1)}
-             end || N <- All],
+    QList = [{N, false} || N <- All],
     {Micros, QList} = timer:tc(rabbit_disk_queue, dump_queue, [q]),
     rdq_stop(),
     io:format("dump ok undelivered (~w micros)~n", [Micros]),
-    rdq_start(),
+    {Micros1, _} = timer:tc(rabbit_tests, rdq_start, []),
+    io:format("restarted (~w micros)~n", [Micros1]),
     lists:foreach(
       fun (N) ->
               Remaining = Total - N,
@@ -967,10 +965,7 @@ rdq_test_dump_queue(Total) ->
     rdq_stop(),
     io:format("dump ok post delivery~n", []),
     rdq_start(),
-    QList2 = [begin Message = rdq_message(N, Msg),
-                    Size = size(term_to_binary(Message)),
-                    {Message, Size, true, {N, (N-1)}, (N-1)}
-              end || N <- All],
+    QList2 = [{N, true} || N <- All],
     {Micros2, QList2} = timer:tc(rabbit_disk_queue, dump_queue, [q]),
     io:format("dump ok post delivery + restart (~w micros)~n", [Micros2]),
     rdq_stop(),
-- 
cgit v1.2.1


From 20a4eb8757c7536b406affb14d76b3e537e6a7bf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 30 Jun 2009 17:49:32 +0100
Subject: and now clustering seems to work again...

---
 src/rabbit_mnesia.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 0201017c..3681af0a 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -298,9 +298,10 @@ init_db(ClusterNodes) ->
                                 true  -> disc;
                                 false -> ram
                             end,
-            ok = create_local_non_replicated_table_copies(TableCopyType),
-            ok = wait_for_tables(),
-            ok = create_local_replicated_table_copies(TableCopyType);
+            ok = create_local_table_copy(schema, disc_copies),
+            ok = create_local_non_replicated_table_copies(disc),
+            ok = create_local_replicated_table_copies(TableCopyType),
+            ok = wait_for_tables();
         {error, Reason} ->
             %% one reason we may end up here is if we try to join
             %% nodes together that are currently running standalone or
@@ -358,7 +359,6 @@ create_local_non_replicated_table_copies(Type) ->
     create_local_table_copies(Type, non_replicated_table_definitions()).
 
 create_local_table_copies(Type, TableDefinitions) ->
-    ok = create_local_table_copy(schema, disc_copies),
     lists:foreach(
       fun({Tab, TabDef}) ->
               HasDiscCopies =
-- 
cgit v1.2.1


From 267af553fb4af94c6a3a3f8e99b7ec4963be4fa5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 30 Jun 2009 18:04:11 +0100
Subject: Well, this seems to work.

---
 src/rabbit_mnesia.erl | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 3681af0a..149501f8 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -166,6 +166,9 @@ non_replicated_table_definitions() ->
 table_names() ->
     [Tab || {Tab, _} <- table_definitions()].
 
+replicated_table_names() ->
+    [Tab || {Tab, _} <- replicated_table_definitions()].
+
 dir() -> mnesia:system_info(directory).
     
 ensure_mnesia_dir() ->
@@ -298,10 +301,10 @@ init_db(ClusterNodes) ->
                                 true  -> disc;
                                 false -> ram
                             end,
+            ok = wait_for_replicated_tables(),
             ok = create_local_table_copy(schema, disc_copies),
             ok = create_local_non_replicated_table_copies(disc),
-            ok = create_local_replicated_table_copies(TableCopyType),
-            ok = wait_for_tables();
+            ok = create_local_replicated_table_copies(TableCopyType);
         {error, Reason} ->
             %% one reason we may end up here is if we try to join
             %% nodes together that are currently running standalone or
@@ -408,10 +411,16 @@ create_local_table_copy(Tab, Type) ->
         end,
     ok.
 
-wait_for_tables() -> 
+wait_for_replicated_tables() ->
+    wait_for_tables(replicated_table_names()).
+
+wait_for_tables() ->
+    wait_for_tables(table_names()).
+
+wait_for_tables(TableNames) -> 
     case check_schema_integrity() of
         ok ->
-            case mnesia:wait_for_tables(table_names(), 30000) of
+            case mnesia:wait_for_tables(TableNames, 30000) of
                 ok -> ok;
                 {timeout, BadTabs} ->
                     throw({error, {timeout_waiting_for_tables, BadTabs}});
-- 
cgit v1.2.1


From a3b1dd4d2ff83b20fdb8f25a9420f94837233991 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 1 Jul 2009 17:53:53 +0100
Subject: Well after all that pain, simply doing the disk queue tests first
 seems to solve the problems. I don't quite buy this though, as all I was
 doing was stopping and starting the app so I don't understand why this was
 affecting the clustering configuration or causing issues _much_ further down
 the test line. But still, it seems to be repeatedly passing for me atm.

---
 src/rabbit_tests.erl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b80bdab2..05a6393b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -47,6 +47,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
+    passed = test_disk_queue(),
     passed = test_priority_queue(),
     passed = test_parsing(),
     passed = test_topic_matching(),
@@ -56,7 +57,6 @@ all_tests() ->
     passed = test_cluster_management(),
     passed = test_user_management(),
     passed = test_server_status(),
-    passed = test_disk_queue(),
     passed.
 
 test_priority_queue() ->
@@ -698,8 +698,6 @@ test_disk_queue() ->
     passed = rdq_test_mixed_queue_modes(),
     passed = rdq_test_mode_conversion_mid_txn(),
     rdq_virgin(),
-    ok = control_action(stop_app, []),
-    ok = control_action(start_app, []),
     passed.
 
 benchmark_disk_queue() ->
-- 
cgit v1.2.1


From 65b6aa354fd5e3410edb817e15d8c5bfa9d1d1c0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 1 Jul 2009 18:48:35 +0100
Subject: When converting to disk mode, use tx_publish and tx_commit instead of
 publish. This massively reduces the number of sync calls to disk_queue,
 potentially to one, if every message in the queue is non persistent (or the
 queue is non durable).

---
 src/rabbit_mixed_queue.erl | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 3c60d25f..23696f27 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -125,30 +125,37 @@ to_disk_only_mode(TxnMessages, State =
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
     Msgs = queue:to_list(MsgBuf),
-    Requeue =
+    {Requeue, TxPublish} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk},
-               RQueueAcc) ->
+               {RQueueAcc, TxPublishAcc}) ->
                   case OnDisk of
                       true ->
+                          ok = rabbit_disk_queue:tx_commit(Q, TxPublishAcc, []),
                           {MsgId, IsDelivered, AckTag, _PersistRemaining} =
                               rabbit_disk_queue:phantom_deliver(Q),
-                          [ {AckTag, {next, IsDelivered}} | RQueueAcc ];
+                          {[ {AckTag, {next, IsDelivered}} | RQueueAcc ], []};
                       false ->
                           ok = if [] == RQueueAcc -> ok;
                                   true ->
                                        rabbit_disk_queue:requeue_with_seqs(
                                          Q, lists:reverse(RQueueAcc))
                                end,
-                          ok = rabbit_disk_queue:publish(
-                                 Q, Msg, false),
-                          []
+                          ok = rabbit_disk_queue:tx_publish(Msg),
+                          {[], [ MsgId | TxPublishAcc ]}
                   end;
-              ({MsgId, IsDelivered}, RQueueAcc) ->
+              ({MsgId, IsDelivered}, {RQueueAcc, TxPublishAcc}) ->
+                  ok = if [] == TxPublishAcc -> ok;
+                          true -> rabbit_disk_queue:tx_commit(Q, TxPublishAcc,
+                                                              [])
+                       end,
                   {MsgId, IsDelivered, AckTag, _PersistRemaining} =
                       rabbit_disk_queue:phantom_deliver(Q),
-                  [ {AckTag, {next, IsDelivered}} | RQueueAcc ]
-          end, [], Msgs),
+                  {[ {AckTag, {next, IsDelivered}} | RQueueAcc ], []}
+          end, {[], []}, Msgs),
+    ok = if [] == TxPublish -> ok;
+            true -> rabbit_disk_queue:tx_commit(Q, TxPublish, [])
+         end,
     ok = if [] == Requeue -> ok;
             true ->
                  rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
-- 
cgit v1.2.1


From 241c2fad09648dca3087c73df1d275b6efe9aada Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 2 Jul 2009 16:29:13 +0100
Subject: Sorted out the timer versus hibernate binary backoff. The trick is to
 use apply_after, not apply interval, and then after reporting memory use,
 don't set a new timer going (but do set a new timer going on every other
 message (other than timeouts)). This means that if nothing is going on, after
 a memory report, the process can wait as long as it needs to before the
 hibernate timeout fires.

---
 src/rabbit_amqqueue_process.erl | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index e847b34c..bac7cfb5 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -134,18 +134,24 @@ code_change(_OldVsn, State, _Extra) ->
 %%----------------------------------------------------------------------------
 
 reply(Reply, NewState = #q { memory_report_timer = undefined }) ->
-    reply(Reply, start_memory_timer(NewState));
-reply(Reply, NewState = #q { hibernated_at = undefined }) ->
-    {reply, Reply, NewState, NewState #q.hibernate_after};
+    reply1(Reply, start_memory_timer(NewState));
 reply(Reply, NewState) ->
+    reply1(Reply, NewState).
+
+reply1(Reply, NewState = #q { hibernated_at = undefined }) ->
+    {reply, Reply, NewState, NewState #q.hibernate_after};
+reply1(Reply, NewState) ->
     NewState1 = adjust_hibernate_after(NewState),
     {reply, Reply, NewState1, NewState1 #q.hibernate_after}.
 
 noreply(NewState = #q { memory_report_timer = undefined }) ->
-    noreply(start_memory_timer(NewState));
-noreply(NewState = #q { hibernated_at = undefined }) ->
-    {noreply, NewState, NewState #q.hibernate_after};
+    noreply1(start_memory_timer(NewState));
 noreply(NewState) ->
+    noreply1(NewState).
+
+noreply1(NewState = #q { hibernated_at = undefined }) ->
+    {noreply, NewState, NewState #q.hibernate_after};
+noreply1(NewState) ->
     NewState1 = adjust_hibernate_after(NewState),
     {noreply, NewState1, NewState1 #q.hibernate_after}.
 
@@ -174,8 +180,8 @@ adjust_hibernate_after(State = #q { hibernated_at = Then,
     end.
 
 start_memory_timer() ->
-    {ok, TRef} = timer:apply_interval(?MEMORY_REPORT_TIME_INTERVAL,
-                                      rabbit_amqqueue, report_memory, [self()]),
+    {ok, TRef} = timer:apply_after(?MEMORY_REPORT_TIME_INTERVAL,
+                                   rabbit_amqqueue, report_memory, [self()]),
     TRef.
 start_memory_timer(State = #q { memory_report_timer = undefined }) ->
     State #q { memory_report_timer = start_memory_timer() };
@@ -597,7 +603,8 @@ report_memory(State = #q { mixed_state = MS }) ->
                  N -> N
              end,
     rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss),
-    State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS) }.
+    State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS),
+               memory_report_timer = undefined }.
 
 %---------------------------------------------------------------------------
 
@@ -874,7 +881,7 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
     noreply(State #q { mixed_state = MS1 });
 
 handle_cast(report_memory, State) ->
-    noreply(report_memory(State)).
+    noreply1(report_memory(State)).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
-- 
cgit v1.2.1


From 6726cc2f1990861bd924ce3477c10512c4721543 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 2 Jul 2009 17:35:48 +0100
Subject: well if we're going to not actually pull messages off disk when going
 to mixed mode, we may as well do it really lazily and not bother with any
 communication with the disk_queue. We just have a token in the queue which
 indicates how many messages we are expecting to get from the disk queue. This
 makes disk -> mixed almost instantaneous. This also means that performance is
 not initially brilliant. Maybe we need some way of the queue knowing that
 both it and the disk_queue are idle and deciding to prefetch. Even batching
 could work well. It's an endless trade off between getting operations to
 happen quickly and being able to get good performance. Dunno what the third
 thing is, probably not necessary, as you can't even have both of those, let
 alone pick 2 from 3!

---
 src/rabbit_disk_queue.erl  | 36 +-----------------------------------
 src/rabbit_mixed_queue.erl | 43 +++++++++++++++++++++++++++----------------
 src/rabbit_tests.erl       | 34 +---------------------------------
 3 files changed, 29 insertions(+), 84 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 990e5917..dc328792 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -41,7 +41,7 @@
 -export([publish/3, deliver/1, phantom_deliver/1, ack/2,
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
-         dump_queue/1, delete_non_durable_queues/1, auto_ack_next_message/1
+         delete_non_durable_queues/1, auto_ack_next_message/1
         ]).
 
 -export([length/1, filesync/0, cache_info/0]).
@@ -259,7 +259,6 @@
       (queue_name(),
        [{{msg_id(), seq_id()}, {seq_id_or_next(), bool()}}]) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
--spec(dump_queue/1 :: (queue_name()) -> [{msg_id(), bool()}]).
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
@@ -315,9 +314,6 @@ purge(Q) ->
 delete_queue(Q) ->
     gen_server2:cast(?SERVER, {delete_queue, Q}).
 
-dump_queue(Q) ->
-    gen_server2:call(?SERVER, {dump_queue, Q}, infinity).
-
 delete_non_durable_queues(DurableQueues) ->
     gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues},
                      infinity).
@@ -463,9 +459,6 @@ handle_call(to_ram_disk_mode, _From, State) ->
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
     reply(Length, State);
-handle_call({dump_queue, Q}, _From, State) ->
-    {Result, State1} = internal_dump_queue(Q, State),
-    reply(Result, State1);
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
     reply(ok, State1);
@@ -1157,33 +1150,6 @@ internal_delete_queue(Q, State) ->
           end),
     {ok, State2}.
 
-internal_dump_queue(Q, State = #dqstate { sequences = Sequences }) ->
-    case ets:lookup(Sequences, Q) of
-        [] -> {[], State};
-        [{Q, ReadSeq, WriteSeq, _Length}] ->
-            Objs =
-                mnesia:dirty_match_object(
-                  rabbit_disk_queue,
-                  #dq_msg_loc { queue_and_seq_id = {Q, '_'},
-                                msg_id = '_',
-                                is_delivered = '_',
-                                next_seq_id = '_'
-                               }),
-            {Msgs, WriteSeq} =
-                lists:foldl(
-                  fun (#dq_msg_loc { queue_and_seq_id = {_, Seq},
-                                     msg_id = MsgId,
-                                     is_delivered = Delivered,
-                                     next_seq_id = NextSeq },
-                       {Acc, Seq}) ->
-                          {[{MsgId, Delivered} | Acc], NextSeq};
-                      (#dq_msg_loc { queue_and_seq_id = {_, Seq} },
-                       {[], RSeq}) when Seq < RSeq ->
-                          {[], RSeq}
-                  end, {[], ReadSeq}, lists:keysort(2, Objs)),
-            {lists:reverse(Msgs), State}
-    end.
-
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
     ets:foldl(
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 23696f27..f6ef355d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -144,14 +144,20 @@ to_disk_only_mode(TxnMessages, State =
                           ok = rabbit_disk_queue:tx_publish(Msg),
                           {[], [ MsgId | TxPublishAcc ]}
                   end;
-              ({MsgId, IsDelivered}, {RQueueAcc, TxPublishAcc}) ->
+              ({disk, Count}, {RQueueAcc, TxPublishAcc}) ->
                   ok = if [] == TxPublishAcc -> ok;
                           true -> rabbit_disk_queue:tx_commit(Q, TxPublishAcc,
                                                               [])
                        end,
-                  {MsgId, IsDelivered, AckTag, _PersistRemaining} =
-                      rabbit_disk_queue:phantom_deliver(Q),
-                  {[ {AckTag, {next, IsDelivered}} | RQueueAcc ], []}
+                  {RQueueAcc1, 0} =
+                      rabbit_misc:unfold(
+                        fun (0) -> false;
+                            (N) ->
+                                {_MsgId, IsDelivered, AckTag, _PersistRemaining}
+                                    = rabbit_disk_queue:phantom_deliver(Q),
+                                {true, {AckTag, {next, IsDelivered}}, N - 1}
+                        end, Count),
+                  {RQueueAcc1 ++ RQueueAcc, []}
           end, {[], []}, Msgs),
     ok = if [] == TxPublish -> ok;
             true -> rabbit_disk_queue:tx_commit(Q, TxPublish, [])
@@ -179,11 +185,13 @@ to_mixed_mode(TxnMessages, State =
               #mqstate { mode = disk, queue = Q, length = Length,
                          is_durable = IsDurable }) ->
     rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
-    %% load up a new queue with everything that's on disk.
-    %% don't remove non-persistent messages that happen to be on disk
-    QList = rabbit_disk_queue:dump_queue(Q),
-    Length = erlang:length(QList),
-    MsgBuf = queue:from_list(QList),
+    %% load up a new queue with a token that says how many messages
+    %% are on disk
+    %% don't actually do anything to the disk
+    MsgBuf = case Length of
+                 0 -> queue:new();
+                 _ -> queue:from_list([{disk, Length}])
+             end,
     %% remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_cancel/2 which is why
@@ -300,7 +308,7 @@ deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
                            is_durable = IsDurable, length = Length }) ->
     {{value, Value}, MsgBuf1}
         = queue:out(MsgBuf),
-    {Msg, IsDelivered, AckTag} =
+    {Msg, IsDelivered, AckTag, MsgBuf2} =
         case Value of
             {Msg1 = #basic_message { guid = MsgId,
                                      is_persistent = IsPersistent },
@@ -320,10 +328,9 @@ deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
                             end;
                         false -> noack
                     end,
-                {Msg1, IsDelivered1, AckTag1};
-            {MsgId, IsDelivered1} ->
-                {Msg1 = #basic_message { guid = MsgId,
-                                         is_persistent = IsPersistent },
+                {Msg1, IsDelivered1, AckTag1, MsgBuf1};
+            {disk, Rem1} ->
+                {Msg1 = #basic_message { is_persistent = IsPersistent },
                  _Size, IsDelivered1, AckTag1, _PersistRem}
                     = rabbit_disk_queue:deliver(Q),
                 AckTag2 =
@@ -332,11 +339,15 @@ deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
                         false -> rabbit_disk_queue:ack(Q, [AckTag1]),
                                  noack
                     end,
-                {Msg1, IsDelivered1, AckTag2}
+                MsgBuf3 = case Rem1 of
+                              1 -> MsgBuf1;
+                              _ -> queue:in_r({disk, Rem1 - 1}, MsgBuf1)
+                          end,
+                {Msg1, IsDelivered1, AckTag2, MsgBuf3}
         end,
     Rem = Length - 1,
     {{Msg, IsDelivered, AckTag, Rem},
-     State #mqstate { msg_buf = MsgBuf1, length = Rem }}.
+     State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
 remove_noacks(MsgsWithAcks) ->
     {AckTags, ASize} =
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 0b70be0c..b56d71c8 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -690,11 +690,10 @@ delete_log_handlers(Handlers) ->
 test_disk_queue() ->
     rdq_stop(),
     rdq_virgin(),
-    passed = rdq_stress_gc(10000),
+    passed = rdq_stress_gc(1000),
     passed = rdq_test_startup_with_queue_gaps(),
     passed = rdq_test_redeliver(),
     passed = rdq_test_purge(),
-    passed = rdq_test_dump_queue(1000),
     passed = rdq_test_mixed_queue_modes(),
     passed = rdq_test_mode_conversion_mid_txn(),
     rdq_virgin(),
@@ -938,37 +937,6 @@ rdq_test_purge() ->
     rdq_stop(),
     passed.    
 
-rdq_test_dump_queue(Total) ->
-    rdq_virgin(),
-    rdq_start(),
-    Msg = <<0:(8*256)>>,
-    All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, All, []),
-    io:format("Publish done~n", []),
-    QList = [{N, false} || N <- All],
-    {Micros, QList} = timer:tc(rabbit_disk_queue, dump_queue, [q]),
-    rdq_stop(),
-    io:format("dump ok undelivered (~w micros)~n", [Micros]),
-    {Micros1, _} = timer:tc(rabbit_tests, rdq_start, []),
-    io:format("restarted (~w micros)~n", [Micros1]),
-    lists:foreach(
-      fun (N) ->
-              Remaining = Total - N,
-              {Message, _TSize, false, _SeqId, Remaining} =
-                  rabbit_disk_queue:deliver(q),
-              ok = rdq_match_message(Message, N, Msg, 256)
-      end, All),
-    [] = rabbit_disk_queue:dump_queue(q),
-    rdq_stop(),
-    io:format("dump ok post delivery~n", []),
-    rdq_start(),
-    QList2 = [{N, true} || N <- All],
-    {Micros2, QList2} = timer:tc(rabbit_disk_queue, dump_queue, [q]),
-    io:format("dump ok post delivery + restart (~w micros)~n", [Micros2]),
-    rdq_stop(),
-    passed.
-
 rdq_test_mixed_queue_modes() ->
     rdq_virgin(),
     rdq_start(),
-- 
cgit v1.2.1


From 1ab2a563106ede804eb0968e2eabb2df8980cfd0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 2 Jul 2009 17:42:17 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index f6ef355d..26fa029d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -146,8 +146,8 @@ to_disk_only_mode(TxnMessages, State =
                   end;
               ({disk, Count}, {RQueueAcc, TxPublishAcc}) ->
                   ok = if [] == TxPublishAcc -> ok;
-                          true -> rabbit_disk_queue:tx_commit(Q, TxPublishAcc,
-                                                              [])
+                          true ->
+                               rabbit_disk_queue:tx_commit(Q, TxPublishAcc, [])
                        end,
                   {RQueueAcc1, 0} =
                       rabbit_misc:unfold(
-- 
cgit v1.2.1


From 3660c5754ad4cda1899f7af346f727d44f3094a9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 2 Jul 2009 18:20:05 +0100
Subject: wip, dnc.

---
 src/rabbit_amqqueue_process.erl   |   8 +--
 src/rabbit_queue_mode_manager.erl | 108 ++++++++++++++++++++++++++++++--------
 2 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index bac7cfb5..ebee301e 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -595,14 +595,14 @@ i(memory, _) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
-report_memory(State = #q { mixed_state = MS }) ->
+report_memory(Hibernating, State = #q { mixed_state = MS }) ->
     {MSize, Gain, Loss} =
         rabbit_mixed_queue:estimate_queue_memory(MS),
     NewMem = case MSize of
                  0 -> 1; %% avoid / 0
                  N -> N
              end,
-    rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss),
+    rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss, Hibernating),
     State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS),
                memory_report_timer = undefined }.
 
@@ -881,7 +881,7 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
     noreply(State #q { mixed_state = MS1 });
 
 handle_cast(report_memory, State) ->
-    noreply1(report_memory(State)).
+    noreply1(report_memory(false, State)).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -902,7 +902,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
 handle_info(timeout, State) ->
     %% TODO: Once we drop support for R11B-5, we can change this to
     %% {noreply, State, hibernate};
-    State1 = (stop_memory_timer(report_memory(State)))
+    State1 = (stop_memory_timer(report_memory(true, State)))
         #q { hibernated_at = now() },
     proc_lib:hibernate(gen_server2, enter_loop, [?MODULE, [], State1]);
 
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 3a55833b..39524978 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/1, report_memory/4]).
+-export([register/1, report_memory/5]).
 
 -define(TOTAL_TOKENS, 1000).
 -define(LOW_WATER_MARK_FRACTION, 0.25).
@@ -54,15 +54,18 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/1 :: (pid()) -> {'ok', queue_mode()}).
--spec(report_memory/4 :: (pid(), non_neg_integer(),
-                          non_neg_integer(), non_neg_integer()) -> 'ok').
+-spec(report_memory/5 :: (pid(), non_neg_integer(),
+                          non_neg_integer(), non_neg_integer(), bool()) ->
+             'ok').
 
 -endif.
 
 -record(state, { available_tokens,
                  available_etokens,
                  mixed_queues,
-                 tokens_per_byte
+                 tokens_per_byte,
+                 low_rate,
+                 hibernated
                }).
 
 start_link() ->
@@ -71,8 +74,9 @@ start_link() ->
 register(Pid) ->
     gen_server2:call(?SERVER, {register, Pid}).
 
-report_memory(Pid, Memory, Gain, Loss) ->
-    gen_server2:cast(?SERVER, {report_memory, Pid, Memory, Gain, Loss}).
+report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
+    gen_server2:cast(?SERVER,
+                     {report_memory, Pid, Memory, Gain, Loss, Hibernating}).
 
 init([]) ->
     process_flag(trap_exit, true),
@@ -84,7 +88,9 @@ init([]) ->
     {ok, #state { available_tokens = Avail,
                   available_etokens = EAvail,
                   mixed_queues = dict:new(),
-                  tokens_per_byte = ?TOTAL_TOKENS / MemAvail
+                  tokens_per_byte = ?TOTAL_TOKENS / MemAvail,
+                  low_rate = sets:new(),
+                  hibernated = sets:new()
                 }}.
 
 handle_call({register, Pid}, _From,
@@ -101,7 +107,7 @@ handle_call({register, Pid}, _From,
         end,
     {reply, {ok, Result}, State1}.
                                               
-handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost},
+handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             State = #state { available_tokens = Avail,
                              available_etokens = EAvail,
                              tokens_per_byte = TPB,
@@ -137,11 +143,11 @@ handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost},
                     true -> %% getting bigger
                         case Req > Avail1 of
                             true -> %% go to disk
-                                rabbit_amqqueue:set_mode(Pid, disk),
-                                State #state { available_tokens = Avail1,
-                                               available_etokens = EAvail1,
-                                               mixed_queues =
-                                               dict:erase(Pid, Mixed) };
+                                attempt_free_from_idle(Req, Pid, 
+                                                       State #state { available_tokens = Avail1,
+                                                                      available_etokens = EAvail1,
+                                                                      mixed_queues =
+                                                                      dict:erase(Pid, Mixed) });
                             false -> %% request not too big, stay mixed
                                 State #state { available_tokens = Avail1 - Req,
                                                available_etokens = EAvail1,
@@ -153,11 +159,11 @@ handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost},
                             true ->
                                 case Req > Avail1 orelse LowRate of
                                     true -> %% go to disk
-                                        rabbit_amqqueue:set_mode(Pid, disk),
-                                        State #state { available_tokens = Avail1,
-                                                       available_etokens = EAvail1,
-                                                       mixed_queues =
-                                                       dict:erase(Pid, Mixed) };
+                                        attempt_free_from_idle(Req, Pid,
+                                                               State #state { available_tokens = Avail1,
+                                                                              available_etokens = EAvail1,
+                                                                              mixed_queues =
+                                                                              dict:erase(Pid, Mixed) });
                                     false -> %% request not too big, stay mixed
                                         State #state { available_tokens = Avail1 - Req,
                                                        available_etokens = EAvail1,
@@ -170,11 +176,11 @@ handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost},
                                         EReq = Req - Avail1,
                                         case EReq > EAvail1 of
                                             true ->  %% go to disk
-                                                rabbit_amqqueue:set_mode(Pid, disk),
-                                                State #state { available_tokens = Avail1,
-                                                               available_etokens = EAvail1,
-                                                               mixed_queues =
-                                                               dict:erase(Pid, Mixed) };
+                                                attempt_free_from_idle(Req, Pid,
+                                                                       State #state { available_tokens = Avail1,
+                                                                                      available_etokens = EAvail1,
+                                                                                      mixed_queues =
+                                                                                      dict:erase(Pid, Mixed) });
                                             false -> %% request not too big, stay mixed
                                                 State #state { available_tokens = 0,
                                                                available_etokens = EAvail1 - EReq,
@@ -226,3 +232,59 @@ ceil(N) when N - trunc(N) > 0 ->
     1 + trunc(N);
 ceil(N) ->
     N.
+
+attempt_free_from_idle(Req, Pid, State = #state { available_tokens = Avail,
+                                                  available_etokens = EAvail,
+                                                  low_rate = Lazy,
+                                                  hibernated = Sleepy,
+                                                  mixed_queues = Mixed }) ->
+    case Req > Avail of
+        true ->
+            {Sleepy1, Freed, EFreed, State1} = free_upto(Req, sets:to_list(Sleepy), State),
+            case Req > Avail + Freed of
+                true ->
+                    {Lazy1, Freed1, EFreed1, State2} = free_upto(Req, sets:to_list(Lazy), State1),
+                    case Req > Avail + Freed + Freed1 of
+                        true ->
+                            rabbit_amqqueue:set_mode(Pid, disk),
+                            State2 #state { available_tokens = Avail + Freed + Freed1,
+                                            available_etokens = EAvail + EFreed + EFreed1,
+                                            low_rate = Lazy1,
+                                            hibernated = Sleepy1,
+                                            mixed_queues = dict:erase(Pid, Mixed)
+                                          };
+                        false ->
+                            State2 #state { available_tokens = Avail + Freed + Freed1 - Req,
+                                            available_etokens = EAvail + EFreed + EFreed1,
+                                            low_rate = Lazy1,
+                                            hibernated = Sleepy1,
+                                            mixed_queues = dict:store(Pid, {Req, 0}, Mixed)
+                                          }
+                    end;
+                false ->
+                    State1 #state { available_tokens = Avail + Freed - Req,
+                                    available_etokens = EAvail + EFreed,
+                                    hibernated = Sleepy1,
+                                    mixed_queues = dict:store(Pid, {Req, 0}, Mixed)
+                                  }
+            end;
+        false ->
+            State #state { mixed_queues = dict:store(Pid, {Req, 0}, Mixed) }
+    end.
+
+free_upto(Req, List, State) ->
+    free_upto(Req, List, 0, 0, State).
+
+free_upto(_Req, [], Freed, EFreed, State) ->
+    {[], Freed, EFreed, State};
+free_upto(Req, [Pid|Pids], Freed, EFreed, State = #state { available_tokens = Avail,
+                                                           mixed_queues = Mixed }) ->
+    {mixed, {Alloc, EAlloc}} = find_queue(Pid, State),
+    rabbit_amqqueue:set_mode(Pid, disk),
+    State1 = State #state { mixed_queues = dict:erase(Pid, Mixed) },
+    case Req > Avail + Freed + Alloc of
+        true ->
+            free_upto(Req, Pids, Freed + Alloc, EFreed + EAlloc, State1);
+        false ->
+            {Pids, Freed + Alloc, EFreed + EAlloc, State1}
+    end.
-- 
cgit v1.2.1


From 32b124c56a25bf503ec84fc010f9e5f03de4ea11 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 3 Jul 2009 10:24:33 +0100
Subject: report memory: a) when we're not hibernating, every 10 seconds b)
 immediately prior to hibernating c) as soon as we stop hibernating

---
 src/rabbit_amqqueue_process.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index ebee301e..986546dc 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -141,7 +141,7 @@ reply(Reply, NewState) ->
 reply1(Reply, NewState = #q { hibernated_at = undefined }) ->
     {reply, Reply, NewState, NewState #q.hibernate_after};
 reply1(Reply, NewState) ->
-    NewState1 = adjust_hibernate_after(NewState),
+    NewState1 = report_memory(false, adjust_hibernate_after(NewState)),
     {reply, Reply, NewState1, NewState1 #q.hibernate_after}.
 
 noreply(NewState = #q { memory_report_timer = undefined }) ->
@@ -152,7 +152,7 @@ noreply(NewState) ->
 noreply1(NewState = #q { hibernated_at = undefined }) ->
     {noreply, NewState, NewState #q.hibernate_after};
 noreply1(NewState) ->
-    NewState1 = adjust_hibernate_after(NewState),
+    NewState1 = report_memory(false, adjust_hibernate_after(NewState)),
     {noreply, NewState1, NewState1 #q.hibernate_after}.
 
 adjust_hibernate_after(State = #q { hibernated_at = undefined }) ->
@@ -603,8 +603,7 @@ report_memory(Hibernating, State = #q { mixed_state = MS }) ->
                  N -> N
              end,
     rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss, Hibernating),
-    State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS),
-               memory_report_timer = undefined }.
+    State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS) }.
 
 %---------------------------------------------------------------------------
 
@@ -881,7 +880,8 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
     noreply(State #q { mixed_state = MS1 });
 
 handle_cast(report_memory, State) ->
-    noreply1(report_memory(false, State)).
+    noreply1
+      ((report_memory(false, State)) #q { memory_report_timer = undefined }).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
-- 
cgit v1.2.1


From ea69d80c5adc8c23afbb70de02ef3051f7459a63 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 3 Jul 2009 17:43:52 +0100
Subject: Reworked. Because the disk->mixed transition doesn't eat up any ram,
 there is no need for the emergency tokens, nor any need for the weird
 doubling. So it's basically got much simpler.

We hold two queues, one of hibernating queues (ordered by when they hibernated) and another priority_queue of lowrate queues (ordered by the amount of memory allocated to them). We evict to disk from the hibernated and then the lowrate queues in their relevant orders. Seems to work. Oh and disk_queue is now managed by the tokens too.
---
 src/priority_queue.erl            |  58 +++++-
 src/rabbit_amqqueue_process.erl   |   7 +-
 src/rabbit_disk_queue.erl         | 104 +++++++---
 src/rabbit_misc.erl               |  10 +-
 src/rabbit_mixed_queue.erl        |   2 +-
 src/rabbit_mnesia.erl             |   2 +-
 src/rabbit_queue_mode_manager.erl | 406 ++++++++++++++++++++++----------------
 src/rabbit_tests.erl              |  58 +++++-
 8 files changed, 435 insertions(+), 212 deletions(-)

diff --git a/src/priority_queue.erl b/src/priority_queue.erl
index 732757c4..96838099 100644
--- a/src/priority_queue.erl
+++ b/src/priority_queue.erl
@@ -55,7 +55,8 @@
 
 -module(priority_queue).
 
--export([new/0, is_queue/1, is_empty/1, len/1, to_list/1, in/2, in/3, out/1]).
+-export([new/0, is_queue/1, is_empty/1, len/1, to_list/1, in/2, in/3,
+         out/1, pout/1, join/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -73,6 +74,8 @@
 -spec(in/2 :: (any(), pqueue()) -> pqueue()).
 -spec(in/3 :: (any(), priority(), pqueue()) -> pqueue()).
 -spec(out/1 :: (pqueue()) -> {empty | {value, any()}, pqueue()}).
+-spec(pout/1 :: (pqueue()) -> {empty | {value, any(), priority()}, pqueue()}).
+-spec(join/2 :: (pqueue(), pqueue()) -> pqueue()).
 
 -endif.
 
@@ -147,6 +150,59 @@ out({pqueue, [{P, Q} | Queues]}) ->
            end,
     {R, NewQ}.
 
+pout({queue, [], []}) ->
+    {empty, {queue, [], []}};
+pout({queue, _, _} = Q) ->
+    {{value, V}, Q1} = out(Q),
+    {{value, V, 0}, Q1};
+pout({pqueue, [{P, Q} | Queues]}) ->
+    {{value, V}, Q1} = out(Q),
+    NewQ = case is_empty(Q1) of
+               true -> case Queues of
+                           []           -> {queue, [], []};
+                           [{0, OnlyQ}] -> OnlyQ;
+                           [_|_]        -> {pqueue, Queues}
+                       end;
+               false -> {pqueue, [{P, Q1} | Queues]}
+           end,
+    {{value, V, -P}, NewQ}.
+
+join(A, {queue, [], []}) ->
+    A;
+join({queue, [], []}, B) ->
+    B;
+join({queue, AIn, AOut}, {queue, BIn, BOut}) ->
+    {queue, BIn, AOut ++ lists:reverse(AIn, BOut)};
+join(A = {queue, _, _}, {pqueue, BPQ}) ->
+    {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, BPQ),
+    Post1 = case Post of
+                []                        -> [ {0, A} ];
+                [ {0, ZeroQueue} | Rest ] -> [ {0, join(A, ZeroQueue)} | Rest ];
+                _                         -> [ {0, A} | Post ]
+            end,
+    {pqueue, Pre ++ Post1};
+join({pqueue, APQ}, B = {queue, _, _}) ->
+    {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, APQ),
+    Post1 = case Post of
+                []                        -> [ {0, B} ];
+                [ {0, ZeroQueue} | Rest ] -> [ {0, join(ZeroQueue, B)} | Rest ];
+                _                         -> [ {0, B} | Post ]
+            end,
+    {pqueue, Pre ++ Post1};
+join({pqueue, APQ}, {pqueue, BPQ}) ->
+    {pqueue, merge(APQ, BPQ, [])}.
+
+merge([], BPQ, Acc) ->
+    lists:reverse(Acc, BPQ);
+merge(APQ, [], Acc) ->
+    lists:reverse(Acc, APQ);
+merge([{P, A}|As], [{P, B}|Bs], Acc) ->
+    merge(As, Bs, [ {P, join(A, B)} | Acc ]);
+merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB ->
+    merge(As, Bs, [ {PA, A} | Acc ]);
+merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) ->
+    merge(As, Bs, [ {PB, B} | Acc ]).
+
 r2f([])      -> {queue, [], []};
 r2f([_] = R) -> {queue, [], R};
 r2f([X,Y])   -> {queue, [X], [Y]};
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 986546dc..6b196951 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -102,7 +102,8 @@ start_link(Q) ->
 
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    {ok, Mode} = rabbit_queue_mode_manager:register(self()),
+    {ok, Mode} = rabbit_queue_mode_manager:register
+                   (self(), rabbit_amqqueue, set_mode, [self()]),
     {ok, MS} = rabbit_mixed_queue:init(QName, Durable, Mode),
     {ok, #q{q = Q,
             owner = none,
@@ -141,7 +142,7 @@ reply(Reply, NewState) ->
 reply1(Reply, NewState = #q { hibernated_at = undefined }) ->
     {reply, Reply, NewState, NewState #q.hibernate_after};
 reply1(Reply, NewState) ->
-    NewState1 = report_memory(false, adjust_hibernate_after(NewState)),
+    NewState1 = adjust_hibernate_after(NewState),
     {reply, Reply, NewState1, NewState1 #q.hibernate_after}.
 
 noreply(NewState = #q { memory_report_timer = undefined }) ->
@@ -152,7 +153,7 @@ noreply(NewState) ->
 noreply1(NewState = #q { hibernated_at = undefined }) ->
     {noreply, NewState, NewState #q.hibernate_after};
 noreply1(NewState) ->
-    NewState1 = report_memory(false, adjust_hibernate_after(NewState)),
+    NewState1 = adjust_hibernate_after(NewState),
     {noreply, NewState1, NewState1 #q.hibernate_after}.
 
 adjust_hibernate_after(State = #q { hibernated_at = undefined }) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index dc328792..6674ce0e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -46,23 +46,24 @@
 
 -export([length/1, filesync/0, cache_info/0]).
 
--export([stop/0, stop_and_obliterate/0, conserve_memory/2,
-         to_disk_only_mode/0, to_ram_disk_mode/0]).
+-export([stop/0, stop_and_obliterate/0, report_memory/0,
+         set_mode/1, to_disk_only_mode/0, to_ram_disk_mode/0]).
 
 -include("rabbit.hrl").
 
--define(WRITE_OK_SIZE_BITS,       8).
--define(WRITE_OK,                 255).
--define(INTEGER_SIZE_BYTES,       8).
--define(INTEGER_SIZE_BITS,        (8 * ?INTEGER_SIZE_BYTES)).
--define(MSG_LOC_NAME,             rabbit_disk_queue_msg_location).
--define(FILE_SUMMARY_ETS_NAME,    rabbit_disk_queue_file_summary).
--define(SEQUENCE_ETS_NAME,        rabbit_disk_queue_sequences).
--define(CACHE_ETS_NAME,           rabbit_disk_queue_cache).
--define(FILE_EXTENSION,           ".rdq").
--define(FILE_EXTENSION_TMP,       ".rdt").
--define(FILE_EXTENSION_DETS,      ".dets").
--define(FILE_PACKING_ADJUSTMENT,  (1 + (2* (?INTEGER_SIZE_BYTES)))).
+-define(WRITE_OK_SIZE_BITS,          8).
+-define(WRITE_OK,                    255).
+-define(INTEGER_SIZE_BYTES,          8).
+-define(INTEGER_SIZE_BITS,           (8 * ?INTEGER_SIZE_BYTES)).
+-define(MSG_LOC_NAME,                rabbit_disk_queue_msg_location).
+-define(FILE_SUMMARY_ETS_NAME,       rabbit_disk_queue_file_summary).
+-define(SEQUENCE_ETS_NAME,           rabbit_disk_queue_sequences).
+-define(CACHE_ETS_NAME,              rabbit_disk_queue_cache).
+-define(FILE_EXTENSION,              ".rdq").
+-define(FILE_EXTENSION_TMP,          ".rdt").
+-define(FILE_EXTENSION_DETS,         ".dets").
+-define(FILE_PACKING_ADJUSTMENT,     (1 + (2* (?INTEGER_SIZE_BYTES)))).
+-define(MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
 
 -define(SERVER, ?MODULE).
 
@@ -89,7 +90,9 @@
          on_sync_froms,           %% list of commiters to run on sync (reversed)
          timer_ref,               %% TRef for our interval timer
          last_sync_offset,        %% current_offset at the last time we sync'd
-         message_cache            %% ets message cache
+         message_cache,           %% ets message cache
+         memory_report_timer,     %% TRef for the memory report timer
+         wordsize                 %% bytes in a word on this platform
         }).
 
 %% The components:
@@ -267,7 +270,8 @@
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
--spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
+-spec(report_memory/0 :: () -> 'ok').
+-spec(set_mode/1 :: ('disk' | 'mixed') -> 'ok').
 
 -endif.
 
@@ -339,8 +343,11 @@ filesync() ->
 cache_info() ->
     gen_server2:call(?SERVER, cache_info, infinity).
 
-conserve_memory(_Pid, Conserve) ->
-    gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
+report_memory() ->
+    gen_server2:cast(?SERVER, report_memory).
+
+set_mode(Mode) ->
+    gen_server2:cast(?SERVER, {set_mode, Mode}).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
@@ -354,7 +361,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
-    ok = rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
+    {ok, Mode} = rabbit_queue_mode_manager:register
+                   (self(), rabbit_disk_queue, set_mode, []),
     Node = node(),
     ok = 
         case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
@@ -381,6 +389,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
 
+    {ok, TRef} = timer:apply_interval(?MEMORY_REPORT_TIME_INTERVAL,
+                                      rabbit_disk_queue, report_memory, []),
+
+
     InitName = "0" ++ ?FILE_EXTENSION,
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
@@ -402,7 +414,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    timer_ref               = undefined,
                    last_sync_offset        = 0,
                    message_cache           = ets:new(?CACHE_ETS_NAME,
-                                                     [set, private])
+                                                     [set, private]),
+                   memory_report_timer     = TRef,
+                   wordsize                = erlang:system_info(wordsize)
                  },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
@@ -419,7 +433,11 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
         false -> %% new file, so preallocate
             ok = preallocate(FileHdl, FileSizeLimit, Offset)
     end,
-    {ok, State1 #dqstate { current_file_handle = FileHdl }}.
+    State2 = State1 #dqstate { current_file_handle = FileHdl },
+    {ok, case Mode of
+             mixed -> State2;
+             disk -> to_disk_only_mode(State2)
+         end}.
 
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
@@ -493,11 +511,15 @@ handle_cast({delete_queue, Q}, State) ->
     noreply(State1);
 handle_cast(filesync, State) ->
     noreply(sync_current_file_handle(State));
-handle_cast({conserve_memory, Conserve}, State) ->
-    noreply((case Conserve of
-                 true -> fun to_disk_only_mode/1;
-                 false -> fun to_ram_disk_mode/1
-             end)(State)).
+handle_cast({set_mode, Mode}, State) ->
+    noreply((case Mode of
+                 disk -> fun to_disk_only_mode/1;
+                 mixed -> fun to_ram_disk_mode/1
+             end)(State));
+handle_cast(report_memory, State) ->
+    Bytes = memory_use(State),
+    rabbit_queue_mode_manager:report_memory(self(), Bytes),
+    noreply(State).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -513,10 +535,12 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                             msg_location_ets = MsgLocationEts,
                             current_file_handle = FileHdl,
-                            read_file_handles = {ReadHdls, _ReadHdlsAge}
+                            read_file_handles = {ReadHdls, _ReadHdlsAge},
+                            memory_report_timer = TRef
                           }) ->
-    State1 = stop_commit_timer(State),
     %% deliberately ignoring return codes here
+    timer:cancel(TRef),
+    State1 = stop_commit_timer(State),
     dets:close(MsgLocationDets),
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
@@ -531,7 +555,8 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
               end, ok, ReadHdls),
     State1 #dqstate { current_file_handle = undefined,
                       current_dirty = false,
-                      read_file_handles = {dict:new(), gb_trees:empty()}
+                      read_file_handles = {dict:new(), gb_trees:empty()},
+                      memory_report_timer = undefined
                     }.
 
 code_change(_OldVsn, State, _Extra) ->
@@ -539,6 +564,27 @@ code_change(_OldVsn, State, _Extra) ->
 
 %% ---- UTILITY FUNCTIONS ----
 
+memory_use(#dqstate { operation_mode = ram_disk,
+                      file_summary = FileSummary,
+                      sequences = Sequences,
+                      msg_location_ets = MsgLocationEts,
+                      wordsize = WordSize
+                     }) ->
+    WordSize * (mnesia:table_info(rabbit_disk_queue, memory) +
+                ets:info(MsgLocationEts, memory) +
+                ets:info(FileSummary, memory) +
+                ets:info(Sequences, memory));
+memory_use(#dqstate { operation_mode = disk_only,
+                      file_summary = FileSummary,
+                      sequences = Sequences,
+                      msg_location_dets = MsgLocationDets,
+                      wordsize = WordSize
+                     }) ->
+    (WordSize * (ets:info(FileSummary, memory) +
+                 ets:info(Sequences, memory))) +
+        mnesia:table_info(rabbit_disk_queue, memory) +
+        dets:info(MsgLocationDets, memory).
+
 to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
     State;
 to_disk_only_mode(State = #dqstate { operation_mode = ram_disk,
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 2971e332..e66eb6b0 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -52,7 +52,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
--export([unfold/2]).
+-export([unfold/2, ceil/1]).
 
 -import(mnesia).
 -import(lists).
@@ -115,7 +115,8 @@
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
-
+-spec(ceil/1 :: (number()) -> number()).
+              
 -endif.
 
 %%----------------------------------------------------------------------------
@@ -442,3 +443,8 @@ unfold(Fun, Acc, Init) ->
         {true, E, I} -> unfold(Fun, [E|Acc], I);
         false -> {Acc, Init}
     end.
+
+ceil(N) when N - trunc(N) > 0 ->
+    1 + trunc(N);
+ceil(N) ->
+    N.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 26fa029d..d9c46898 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -541,7 +541,7 @@ is_empty(#mqstate { length = Length }) ->
 
 estimate_queue_memory(#mqstate { memory_size = Size, memory_gain = Gain,
                                  memory_loss = Loss }) ->
-    {2*Size, Gain, Loss}.
+    {Size, Gain, Loss}.
 
 reset_counters(State) ->
     State #mqstate { memory_gain = 0, memory_loss = 0 }.
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 149501f8..b40294f6 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -150,7 +150,7 @@ table_definitions() ->
        {type, set},
        {local_content, true},
        {attributes, record_info(fields, dq_msg_loc)},
-       {disc_only_copies, [node()]}]}
+       {disc_copies, [node()]}]}
     ].
 
 replicated_table_definitions() ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 39524978..30695404 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,10 +38,9 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/1, report_memory/5]).
+-export([register/4, report_memory/2, report_memory/5, info/0]).
 
 -define(TOTAL_TOKENS, 1000).
--define(LOW_WATER_MARK_FRACTION, 0.25).
 -define(ACTIVITY_THRESHOLD, 25).
 -define(INITIAL_TOKEN_ALLOCATION, 10).
 
@@ -53,7 +52,7 @@
 
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(register/1 :: (pid()) -> {'ok', queue_mode()}).
+-spec(register/4 :: (pid(), atom(), atom(), list()) -> {'ok', queue_mode()}).
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
                           non_neg_integer(), non_neg_integer(), bool()) ->
              'ok').
@@ -61,153 +60,153 @@
 -endif.
 
 -record(state, { available_tokens,
-                 available_etokens,
                  mixed_queues,
+                 callbacks,
                  tokens_per_byte,
-                 low_rate,
-                 hibernated
+                 lowrate,
+                 hibernate
                }).
 
 start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
 
-register(Pid) ->
-    gen_server2:call(?SERVER, {register, Pid}).
+register(Pid, Module, Function, Args) ->
+    gen_server2:call(?SERVER, {register, Pid, Module, Function, Args}).
+
+report_memory(Pid, Memory) ->
+    report_memory(Pid, Memory, undefined, undefined, false).
 
 report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
     gen_server2:cast(?SERVER,
                      {report_memory, Pid, Memory, Gain, Loss, Hibernating}).
 
+info() ->
+    gen_server2:call(?SERVER, info).
+
 init([]) ->
     process_flag(trap_exit, true),
     %% todo, fix up this call as os_mon may not be running
     {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
-    MemAvail = MemTotal - MemUsed,
-    Avail = ceil(?TOTAL_TOKENS * (1 - ?LOW_WATER_MARK_FRACTION)),
-    EAvail = ?TOTAL_TOKENS - Avail,
-    {ok, #state { available_tokens = Avail,
-                  available_etokens = EAvail,
+    MemAvail = (MemTotal - MemUsed) / 3, %% magic
+    {ok, #state { available_tokens = ?TOTAL_TOKENS,
                   mixed_queues = dict:new(),
+                  callbacks = dict:new(),
                   tokens_per_byte = ?TOTAL_TOKENS / MemAvail,
-                  low_rate = sets:new(),
-                  hibernated = sets:new()
+                  lowrate = priority_queue:new(),
+                  hibernate = queue:new()
                 }}.
 
-handle_call({register, Pid}, _From,
-            State = #state { available_tokens = Avail,
-                             mixed_queues = Mixed }) ->
+handle_call({register, Pid, Module, Function, Args}, _From,
+            State = #state { callbacks = Callbacks }) ->
     _MRef = erlang:monitor(process, Pid),
-    {Result, State1} =
+    State1 = State #state { callbacks = dict:store
+                            (Pid, {Module, Function, Args}, Callbacks) },
+    State2 = #state { available_tokens = Avail,
+                      mixed_queues = Mixed } =
+        free_upto(Pid, ?INITIAL_TOKEN_ALLOCATION, State1),
+    {Result, State3} =
         case ?INITIAL_TOKEN_ALLOCATION > Avail of
             true ->
-                {disk, State};
+                {disk, State2};
             false ->
-                {mixed, State #state { mixed_queues = dict:store
-                                       (Pid, {?INITIAL_TOKEN_ALLOCATION, 0}, Mixed) }}
+                {mixed, State2 #state { 
+                          available_tokens =
+                          Avail - ?INITIAL_TOKEN_ALLOCATION,
+                          mixed_queues = dict:store
+                          (Pid, {?INITIAL_TOKEN_ALLOCATION, active}, Mixed) }}
         end,
-    {reply, {ok, Result}, State1}.
+    {reply, {ok, Result}, State3};
+
+handle_call(info, _From, State) ->
+    State1 = #state { available_tokens = Avail,
+                      mixed_queues = Mixed,
+                      lowrate = Lazy,
+                      hibernate = Sleepy } =
+        free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
+    {reply, [{ available_tokens, Avail },
+             { mixed_queues, dict:to_list(Mixed) },
+             { lowrate_queues, priority_queue:to_list(Lazy) },
+             { hibernated_queues, queue:to_list(Sleepy) }], State1}.
+
                                               
-handle_cast(O = {report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
-            State = #state { available_tokens = Avail,
-                             available_etokens = EAvail,
-                             tokens_per_byte = TPB,
-                             mixed_queues = Mixed
-                           }) ->
-    Req = ceil(Memory * TPB),
-    io:format("~w : ~w  ~w ~n", [Pid, Memory, Req]),
-    LowRate = (BytesGained < ?ACTIVITY_THRESHOLD)
-        andalso (BytesLost < ?ACTIVITY_THRESHOLD),
-    io:format("~w ~w~n", [O, LowRate]),
-    State1 =
-        case find_queue(Pid, State) of
-            disk ->
-                case Req > Avail orelse (2*Req) > (Avail + EAvail) orelse
-                    LowRate of
-                    true -> State; %% remain as disk queue
-                    false ->
-                        %% go to mixed, allocate double Req, and use Extra
-                        rabbit_amqqueue:set_mode(Pid, mixed),
-                        Alloc = lists:min([2*Req, Avail]),
-                        EAlloc = (2*Req) - Alloc,
-                        State #state { available_tokens = Avail - Alloc,
-                                       available_etokens = EAvail - EAlloc,
-                                       mixed_queues = dict:store
-                                       (Pid, {Alloc, EAlloc}, Mixed)
-                                      }
-                end;
-            {mixed, {OAlloc, OEAlloc}} ->
-                io:format("~w ; ~w ~w ~n", [Pid, OAlloc, OEAlloc]),
+handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
+            State = #state { mixed_queues = Mixed,
+                             available_tokens = Avail,
+                             callbacks = Callbacks,
+                             tokens_per_byte = TPB }) ->
+    Req = rabbit_misc:ceil(TPB * Memory),
+    LowRate = case {BytesGained, BytesLost} of
+                  {undefined, _} -> false;
+                  {_, undefined} -> false;
+                  {G, L} -> G < ?ACTIVITY_THRESHOLD andalso
+                            L < ?ACTIVITY_THRESHOLD
+              end,
+    {StateN = #state { lowrate = Lazy, hibernate = Sleepy }, ActivityNew} =
+        case find_queue(Pid, Mixed) of
+            {mixed, {OAlloc, _OActivity}} ->
                 Avail1 = Avail + OAlloc,
-                EAvail1 = EAvail + OEAlloc,
-                case Req > (OAlloc + OEAlloc) of
-                    true -> %% getting bigger
-                        case Req > Avail1 of
-                            true -> %% go to disk
-                                attempt_free_from_idle(Req, Pid, 
-                                                       State #state { available_tokens = Avail1,
-                                                                      available_etokens = EAvail1,
-                                                                      mixed_queues =
-                                                                      dict:erase(Pid, Mixed) });
-                            false -> %% request not too big, stay mixed
-                                State #state { available_tokens = Avail1 - Req,
-                                               available_etokens = EAvail1,
-                                               mixed_queues = dict:store
-                                               (Pid, {Req, 0}, Mixed) }
-                        end;
-                    false -> %% getting smaller (or staying same)
-                        case 0 =:= OEAlloc of
-                            true ->
-                                case Req > Avail1 orelse LowRate of
-                                    true -> %% go to disk
-                                        attempt_free_from_idle(Req, Pid,
-                                                               State #state { available_tokens = Avail1,
-                                                                              available_etokens = EAvail1,
-                                                                              mixed_queues =
-                                                                              dict:erase(Pid, Mixed) });
-                                    false -> %% request not too big, stay mixed
-                                        State #state { available_tokens = Avail1 - Req,
-                                                       available_etokens = EAvail1,
-                                                       mixed_queues = dict:store
-                                                       (Pid, {Req, 0}, Mixed) }
-                                end;
-                            false ->
-                                case Req > Avail1 of
-                                    true ->
-                                        EReq = Req - Avail1,
-                                        case EReq > EAvail1 of
-                                            true ->  %% go to disk
-                                                attempt_free_from_idle(Req, Pid,
-                                                                       State #state { available_tokens = Avail1,
-                                                                                      available_etokens = EAvail1,
-                                                                                      mixed_queues =
-                                                                                      dict:erase(Pid, Mixed) });
-                                            false -> %% request not too big, stay mixed
-                                                State #state { available_tokens = 0,
-                                                               available_etokens = EAvail1 - EReq,
-                                                               mixed_queues = dict:store
-                                                               (Pid, {Avail1, EReq}, Mixed) }
-                                        end;
-                                    false -> %% request not too big, stay mixed
-                                        State #state { available_tokens = Avail1 - Req,
-                                                       available_etokens = EAvail1,
-                                                       mixed_queues = dict:store
-                                                       (Pid, {Req, 0}, Mixed) }
-                                end
-                        end
+                State1 = #state { available_tokens = Avail2,
+                                  mixed_queues = Mixed1 } =
+                    free_upto(Pid, Req,
+                              State #state { available_tokens = Avail1 }),
+                case Req > Avail2 of
+                    true -> %% nowt we can do, send to disk
+                        {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+                        ok = erlang:apply(Module, Function, Args ++ [disk]),
+                        {State1 #state { mixed_queues =
+                                         dict:erase(Pid, Mixed1) },
+                         disk};
+                    false -> %% keep mixed
+                        Activity = if Hibernating -> hibernate;
+                                      LowRate -> lowrate;
+                                      true -> active
+                                   end,
+                        {State1 #state
+                         { mixed_queues =
+                           dict:store(Pid, {Req, Activity}, Mixed1),
+                           available_tokens = Avail2 - Req },
+                         Activity}
+                end;
+            disk ->
+                State1 = #state { available_tokens = Avail1,
+                                  mixed_queues = Mixed1 } =
+                    free_upto(Pid, Req, State),
+                case Req > Avail1 of
+                    true -> %% not enough space, stay as disk
+                        {State1, disk};
+                    false -> %% can go to mixed mode
+                        {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+                        ok = erlang:apply(Module, Function, Args ++ [mixed]),
+                        Activity = if Hibernating -> hibernate;
+                                      LowRate -> lowrate;
+                                      true -> active
+                                   end,
+                        {State1 #state {
+                           mixed_queues =
+                           dict:store(Pid, {Req, Activity}, Mixed1),
+                           available_tokens = Avail1 - Req },
+                         disk}
                 end
         end,
-    {noreply, State1}.
+    StateN1 =
+        case ActivityNew of
+            active -> StateN;
+            disk -> StateN;
+            lowrate -> StateN #state { lowrate =
+                                       priority_queue:in(Pid, Req, Lazy) };
+            hibernate -> StateN #state { hibernate =
+                                         queue:in(Pid, Sleepy) }
+        end,
+    {noreply, StateN1}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
-                             available_etokens = EAvail,
                              mixed_queues = Mixed }) ->
-    State1 = case find_queue(Pid, State) of
+    State1 = case find_queue(Pid, Mixed) of
                  disk ->
                      State;
-                 {mixed, {Alloc, EAlloc}} ->
+                 {mixed, {Alloc, _Activity}} ->
                      State #state { available_tokens = Avail + Alloc,
-                                    available_etokens = EAvail + EAlloc,
                                     mixed_queues = dict:erase(Pid, Mixed) }
              end,
     {noreply, State1};
@@ -222,69 +221,140 @@ terminate(_Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-find_queue(Pid, #state { mixed_queues = Mixed }) ->
+find_queue(Pid, Mixed) ->
     case dict:find(Pid, Mixed) of
         {ok, Value} -> {mixed, Value};
         error -> disk
     end.
 
-ceil(N) when N - trunc(N) > 0 ->
-    1 + trunc(N);
-ceil(N) ->
-    N.
+tidy_and_sum_lazy(IgnorePid, Lazy, Mixed) ->
+    tidy_and_sum_lazy(sets:add_element(IgnorePid, sets:new()),
+                      Lazy, Mixed, 0, priority_queue:new()).
 
-attempt_free_from_idle(Req, Pid, State = #state { available_tokens = Avail,
-                                                  available_etokens = EAvail,
-                                                  low_rate = Lazy,
-                                                  hibernated = Sleepy,
-                                                  mixed_queues = Mixed }) ->
-    case Req > Avail of
-        true ->
-            {Sleepy1, Freed, EFreed, State1} = free_upto(Req, sets:to_list(Sleepy), State),
-            case Req > Avail + Freed of
+tidy_and_sum_lazy(DupCheckSet, Lazy, Mixed, FreeAcc, LazyAcc) ->
+    case priority_queue:pout(Lazy) of
+        {empty, Lazy} -> {FreeAcc, LazyAcc};
+        {{value, Pid, Alloc}, Lazy1} ->
+            case sets:is_element(Pid, DupCheckSet) of
                 true ->
-                    {Lazy1, Freed1, EFreed1, State2} = free_upto(Req, sets:to_list(Lazy), State1),
-                    case Req > Avail + Freed + Freed1 of
-                        true ->
-                            rabbit_amqqueue:set_mode(Pid, disk),
-                            State2 #state { available_tokens = Avail + Freed + Freed1,
-                                            available_etokens = EAvail + EFreed + EFreed1,
-                                            low_rate = Lazy1,
-                                            hibernated = Sleepy1,
-                                            mixed_queues = dict:erase(Pid, Mixed)
-                                          };
-                        false ->
-                            State2 #state { available_tokens = Avail + Freed + Freed1 - Req,
-                                            available_etokens = EAvail + EFreed + EFreed1,
-                                            low_rate = Lazy1,
-                                            hibernated = Sleepy1,
-                                            mixed_queues = dict:store(Pid, {Req, 0}, Mixed)
-                                          }
-                    end;
+                    tidy_and_sum_lazy(DupCheckSet, Lazy1, Mixed, FreeAcc,
+                                      LazyAcc);
                 false ->
-                    State1 #state { available_tokens = Avail + Freed - Req,
-                                    available_etokens = EAvail + EFreed,
-                                    hibernated = Sleepy1,
-                                    mixed_queues = dict:store(Pid, {Req, 0}, Mixed)
-                                  }
-            end;
-        false ->
-            State #state { mixed_queues = dict:store(Pid, {Req, 0}, Mixed) }
+                    DupCheckSet1 = sets:add_element(Pid, DupCheckSet),
+                    case find_queue(Pid, Mixed) of
+                        {mixed, {Alloc, lowrate}} ->
+                            tidy_and_sum_lazy(DupCheckSet1, Lazy1, Mixed,
+                                              FreeAcc + Alloc, priority_queue:in
+                                              (Pid, Alloc, LazyAcc));
+                        _ ->
+                            tidy_and_sum_lazy(DupCheckSet1, Lazy1, Mixed,
+                                              FreeAcc, LazyAcc)
+                    end
+            end
+    end.
+            
+tidy_and_sum_sleepy(IgnorePid, Sleepy, Mixed) ->
+    tidy_and_sum_sleepy(sets:add_element(IgnorePid, sets:new()),
+                        Sleepy, Mixed, 0, queue:new()).
+
+tidy_and_sum_sleepy(DupCheckSet, Sleepy, Mixed, FreeAcc, SleepyAcc) ->
+    case queue:out(Sleepy) of
+        {empty, Sleepy} -> {FreeAcc, SleepyAcc};
+        {{value, Pid}, Sleepy1} ->
+            case sets:is_element(Pid, DupCheckSet) of
+                true ->
+                    tidy_and_sum_sleepy(DupCheckSet, Sleepy1, Mixed, FreeAcc,
+                                        SleepyAcc);
+                false ->
+                    DupCheckSet1 = sets:add_element(Pid, DupCheckSet),
+                    case find_queue(Pid, Mixed) of
+                        {mixed, {Alloc, hibernate}} ->
+                            tidy_and_sum_sleepy(DupCheckSet1, Sleepy1, Mixed,
+                                                FreeAcc + Alloc, queue:in
+                                                (Pid, SleepyAcc));
+                        _ -> tidy_and_sum_sleepy(DupCheckSet1, Sleepy1, Mixed,
+                                                 FreeAcc, SleepyAcc)
+                    end
+            end
+    end.
+
+free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req) ->
+    free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req,
+                   priority_queue:new()).
+
+free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req, LazyAcc) ->
+    case priority_queue:pout(Lazy) of
+        {empty, Lazy} -> {priority_queue:join(Lazy, LazyAcc), Mixed, Req};
+        {{value, IgnorePid, Alloc}, Lazy1} ->
+            free_upto_lazy(IgnorePid, Callbacks, Lazy1, Mixed, Req,
+                           priority_queue:in(IgnorePid, Alloc, LazyAcc));
+        {{value, Pid, Alloc}, Lazy1} ->
+            {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+            ok = erlang:apply(Module, Function, Args ++ [disk]),
+            Mixed1 = dict:erase(Pid, Mixed),
+            case Req > Alloc of
+                true -> free_upto_lazy(IgnorePid, Callbacks, Lazy1, Mixed1,
+                                       Req - Alloc, LazyAcc);
+                false -> {priority_queue:join(Lazy1, LazyAcc), Mixed1,
+                          Req - Alloc}
+            end
+    end.
+
+free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req) ->
+    free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req, queue:new()).
+
+free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req, SleepyAcc) ->
+    case queue:out(Sleepy) of
+        {empty, Sleepy} -> {queue:join(Sleepy, SleepyAcc), Mixed, Req};
+        {{value, IgnorePid}, Sleepy1} ->
+            free_upto_sleepy(IgnorePid, Callbacks, Sleepy1, Mixed, Req,
+                             queue:in(IgnorePid, SleepyAcc));
+        {{value, Pid}, Sleepy1} ->
+            {Alloc, hibernate} = dict:fetch(Pid, Mixed),
+            {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+            ok = erlang:apply(Module, Function, Args ++ [disk]),
+            Mixed1 = dict:erase(Pid, Mixed),
+            case Req > Alloc of
+                true -> free_upto_sleepy(IgnorePid, Callbacks, Sleepy1, Mixed1,
+                                         Req - Alloc, SleepyAcc);
+                false -> {queue:join(Sleepy1, SleepyAcc), Mixed1, Req - Alloc}
+            end
     end.
 
-free_upto(Req, List, State) ->
-    free_upto(Req, List, 0, 0, State).
-
-free_upto(_Req, [], Freed, EFreed, State) ->
-    {[], Freed, EFreed, State};
-free_upto(Req, [Pid|Pids], Freed, EFreed, State = #state { available_tokens = Avail,
-                                                           mixed_queues = Mixed }) ->
-    {mixed, {Alloc, EAlloc}} = find_queue(Pid, State),
-    rabbit_amqqueue:set_mode(Pid, disk),
-    State1 = State #state { mixed_queues = dict:erase(Pid, Mixed) },
-    case Req > Avail + Freed + Alloc of
+free_upto(Pid, Req, State = #state { available_tokens = Avail,
+                                     mixed_queues = Mixed,
+                                     callbacks = Callbacks,
+                                     lowrate = Lazy,
+                                     hibernate = Sleepy }) ->
+    case Req > Avail of
         true ->
-            free_upto(Req, Pids, Freed + Alloc, EFreed + EAlloc, State1);
-        false ->
-            {Pids, Freed + Alloc, EFreed + EAlloc, State1}
+            {SleepySum, Sleepy1} = tidy_and_sum_sleepy(Pid, Sleepy, Mixed),
+            case Req > Avail + SleepySum of
+                true -> %% not enough in sleepy, have a look in lazy too
+                    {LazySum, Lazy1} = tidy_and_sum_lazy(Pid, Lazy, Mixed),
+                    case Req > Avail + SleepySum + LazySum of
+                        true -> %% can't free enough, just return tidied state
+                            State #state { lowrate = Lazy1,
+                                           hibernate = Sleepy1 };
+                        false -> %% need to free all of sleepy, and some of lazy
+                            {Sleepy2, Mixed1, ReqRem} =
+                                free_upto_sleepy
+                                  (Pid, Callbacks, Sleepy1, Mixed, Req),
+                            {Lazy2, Mixed2, ReqRem1} =
+                                free_upto_lazy(Pid, Callbacks, Lazy1, Mixed1,
+                                               ReqRem),
+                            State #state { available_tokens =
+                                           Avail + (Req - ReqRem1),
+                                           mixed_queues = Mixed2,
+                                           lowrate = Lazy2,
+                                           hibernate = Sleepy2 }
+                    end;
+                false -> %% enough available in sleepy, don't touch lazy
+                    {Sleepy2, Mixed1, ReqRem} =
+                        free_upto_sleepy(Pid, Callbacks, Sleepy1, Mixed, Req),
+                    State #state { available_tokens = Avail + (Req - ReqRem),
+                                   mixed_queues = Mixed1,
+                                   hibernate = Sleepy2 }
+            end;
+        false -> State
     end.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b56d71c8..f1082850 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -65,7 +65,7 @@ test_priority_queue() ->
 
     %% empty Q
     Q = priority_queue:new(),
-    {true, true, 0, [], []} = test_priority_queue(Q),
+    {true, true, 0, [], [], []} = test_priority_queue(Q),
 
     %% 1-4 element no-priority Q
     true = lists:all(fun (X) -> X =:= passed end,
@@ -74,21 +74,57 @@ test_priority_queue() ->
 
     %% 1-element priority Q
     Q1 = priority_queue:in(foo, 1, priority_queue:new()),
-    {true, false, 1, [{1, foo}], [foo]} = test_priority_queue(Q1),
+    {true, false, 1, [{1, foo}], [foo], [{foo, 1}]} = test_priority_queue(Q1),
 
     %% 2-element same-priority Q
     Q2 = priority_queue:in(bar, 1, Q1),
-    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar]} =
+    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [{foo, 1}, {bar, 1}]} =
         test_priority_queue(Q2),
 
     %% 2-element different-priority Q
     Q3 = priority_queue:in(bar, 2, Q1),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo]} =
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [{bar, 2}, {foo, 1}]} =
         test_priority_queue(Q3),
 
     %% 1-element negative priority Q
     Q4 = priority_queue:in(foo, -1, priority_queue:new()),
-    {true, false, 1, [{-1, foo}], [foo]} = test_priority_queue(Q4),
+    {true, false, 1, [{-1, foo}], [foo], [{foo, -1}]} = test_priority_queue(Q4),
+
+    %% merge 2 * 1-element no-priority Qs
+    Q5 = priority_queue:join(priority_queue:in(foo, Q),
+                             priority_queue:in(bar, Q)),
+    {true, false, 2, [{0, foo}, {0, bar}], [foo, bar], [{foo, 0}, {bar, 0}]} =
+        test_priority_queue(Q5),
+
+    %% merge 1-element no-priority Q with 1-element priority Q
+    Q6 = priority_queue:join(priority_queue:in(foo, Q),
+                             priority_queue:in(bar, 1, Q)),
+    {true, false, 2, [{1, bar}, {0, foo}], [bar, foo], [{bar, 1}, {foo, 0}]} =
+        test_priority_queue(Q6),
+
+    %% merge 1-element priority Q with 1-element no-priority Q 
+    Q7 = priority_queue:join(priority_queue:in(foo, 1, Q),
+                             priority_queue:in(bar, Q)),
+    {true, false, 2, [{1, foo}, {0, bar}], [foo, bar], [{foo, 1}, {bar, 0}]} =
+        test_priority_queue(Q7),
+
+    %% merge 2 * 1-element same-priority Qs
+    Q8 = priority_queue:join(priority_queue:in(foo, 1, Q),
+                             priority_queue:in(bar, 1, Q)),
+    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [{foo, 1}, {bar, 1}]} =
+        test_priority_queue(Q8),
+
+    %% merge 2 * 1-element different-priority Qs
+    Q9 = priority_queue:join(priority_queue:in(foo, 1, Q),
+                             priority_queue:in(bar, 2, Q)),
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [{bar, 2}, {foo, 1}]} =
+        test_priority_queue(Q9),
+
+    %% merge 2 * 1-element different-priority Qs (other way around)
+    Q10 = priority_queue:join(priority_queue:in(bar, 2, Q),
+                              priority_queue:in(foo, 1, Q)),
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [{bar, 2}, {foo, 1}]} =
+        test_priority_queue(Q10),
 
     passed.
 
@@ -101,18 +137,26 @@ priority_queue_out_all(Q) ->
         {{value, V}, Q1} -> [V | priority_queue_out_all(Q1)]
     end.
 
+priority_queue_pout_all(Q) ->
+    case priority_queue:pout(Q) of
+        {empty, _}       -> [];
+        {{value, V, P}, Q1} -> [{V, P} | priority_queue_pout_all(Q1)]
+    end.
+
 test_priority_queue(Q) ->
     {priority_queue:is_queue(Q),
      priority_queue:is_empty(Q),
      priority_queue:len(Q),
      priority_queue:to_list(Q),
-     priority_queue_out_all(Q)}.
+     priority_queue_out_all(Q),
+     priority_queue_pout_all(Q)}.
 
 test_simple_n_element_queue(N) ->
     Items = lists:seq(1, N),
     Q = priority_queue_in_all(priority_queue:new(), Items),
     ToListRes = [{0, X} || X <- Items],
-    {true, false, N, ToListRes, Items} = test_priority_queue(Q),
+    POutAllRes = [{X, 0} || X <- Items],
+    {true, false, N, ToListRes, Items, POutAllRes} = test_priority_queue(Q),
     passed.
 
 test_parsing() ->
-- 
cgit v1.2.1


From 59cfa91f75386e99b5735a92161f3a9b48b6f2dd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 6 Jul 2009 11:50:28 +0100
Subject:  testing shows these values work well. The whole thing works pretty
 well. Obviously, converting a mixed queue to disk does take some time and the
 values are deliberately set low to save memory because on this transition,
 the disk_queue mailbox will go insane and eat lots of memory very quickly.
 But it seems about the right balance. I'll add documentation next

---
 src/rabbit_disk_queue.erl         | 2 +-
 src/rabbit_mixed_queue.erl        | 2 +-
 src/rabbit_queue_mode_manager.erl | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 6674ce0e..84fbd760 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -518,7 +518,7 @@ handle_cast({set_mode, Mode}, State) ->
              end)(State));
 handle_cast(report_memory, State) ->
     Bytes = memory_use(State),
-    rabbit_queue_mode_manager:report_memory(self(), Bytes),
+    rabbit_queue_mode_manager:report_memory(self(), 2 * Bytes),
     noreply(State).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index d9c46898..df241a6d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -541,7 +541,7 @@ is_empty(#mqstate { length = Length }) ->
 
 estimate_queue_memory(#mqstate { memory_size = Size, memory_gain = Gain,
                                  memory_loss = Loss }) ->
-    {Size, Gain, Loss}.
+    {4 * Size, Gain, Loss}.
 
 reset_counters(State) ->
     State #mqstate { memory_gain = 0, memory_loss = 0 }.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 30695404..ea4633e6 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -87,7 +87,7 @@ init([]) ->
     process_flag(trap_exit, true),
     %% todo, fix up this call as os_mon may not be running
     {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
-    MemAvail = (MemTotal - MemUsed) / 3, %% magic
+    MemAvail = MemTotal - MemUsed,
     {ok, #state { available_tokens = ?TOTAL_TOKENS,
                   mixed_queues = dict:new(),
                   callbacks = dict:new(),
@@ -128,7 +128,7 @@ handle_call(info, _From, State) ->
              { lowrate_queues, priority_queue:to_list(Lazy) },
              { hibernated_queues, queue:to_list(Sleepy) }], State1}.
 
-                                              
+
 handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             State = #state { mixed_queues = Mixed,
                              available_tokens = Avail,
-- 
cgit v1.2.1


From 7ca957f21494b401fdcb74d6af73b6e06016259e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 6 Jul 2009 12:29:50 +0100
Subject: Added documentation

---
 src/rabbit_queue_mode_manager.erl | 66 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index ea4633e6..ba371538 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -53,6 +53,7 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/4 :: (pid(), atom(), atom(), list()) -> {'ok', queue_mode()}).
+-spec(report_memory/2 :: (pid(), non_neg_integer()) -> 'ok').
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
                           non_neg_integer(), non_neg_integer(), bool()) ->
              'ok').
@@ -67,6 +68,71 @@
                  hibernate
                }).
 
+%% Token-credit based memory management
+
+%% Start off by working out the amount of memory available in the
+%% system (RAM). Then, work out how many tokens each byte corresponds
+%% to. This is the tokens_per_byte field. When a process registers, it
+%% must provide an M-F-A triple to a function that needs one further
+%% argument, which is the new mode. This will either be 'mixed' or
+%% 'disk'.
+%%
+%% Processes then report their own memory usage, in bytes, and the
+%% manager takes care of the rest.
+%%
+%% There are a finite number of tokens in the system. These are
+%% allocated to processes as they are requested. We keep track of
+%% processes which have hibernated, and processes that are doing only
+%% a low rate of work. When a request for memory can't be satisfied,
+%% we try and evict processes first from the hibernated group, and
+%% then from the lowrate group. The hibernated group is a simple
+%% queue, and so is implicitly sorted by the order in which processes
+%% were added to the queue. This means that when removing from the
+%% queue, we hibernate the sleepiest pid first. The lowrate group is a
+%% priority queue, where the priority is the amount of memory
+%% allocated. Thus when we remove from the queue, we first remove the
+%% queue with the most amount of memory.
+%%
+%% If the request still can't be satisfied after evicting to disk
+%% everyone from those two groups (and note that we check first
+%% whether or not freeing them would make available enough tokens to
+%% satisfy the request rather than just sending all those queues to
+%% disk and then going "whoops, didn't help afterall"), then we send
+%% the requesting process to disk.
+%%
+%% If a process has been sent to disk, it continues making
+%% requests. As soon as a request can be satisfied (and this can
+%% include sending other processes to disk in the way described
+%% above), it will be told to come back into mixed mode.
+%%
+%% Note that the lowrate and hibernate groups can get very out of
+%% date. This is fine, and kinda unavoidable given the absence of
+%% useful APIs for queues. Thus we allow them to get out of date
+%% (processes will be left in there when they change groups,
+%% duplicates can appear, dead processes are not pruned etc etc etc),
+%% and when we go through the groups, summing up their amount of
+%% memory, we tidy up at that point.
+%%
+%% A process which is not evicted to disk, and is requesting a smaller
+%% amount of ram than its last request will always be satisfied. A
+%% mixed-mode process that is busy but consuming an unchanging amount
+%% of RAM will never be sent to disk. The disk_queue is also managed
+%% in the same way. This means that a queue that has gone back to
+%% being mixed after being in disk mode now has its messages counted
+%% twice as they are counted both in the request made by the queue
+%% (even though they may not yet be in RAM) and also by the
+%% disk_queue. This means that the threshold for going mixed -> disk
+%% is above the threshold for going disk -> mixed. This is actually
+%% fairly sensible as it reduces the risk of any oscillations
+%% occurring.
+%%
+%% The queue process deliberately reports 4 times its estimated RAM
+%% usage, and the disk_queue 2 times. In practise, this seems to work
+%% well. Note that we are deliberately running out of tokes a little
+%% early because of the fact that the mixed -> disk transition can
+%% transiently eat a lot of memory and take some time (flushing a few
+%% million messages to disk is never going to be instantaneous).
+
 start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
 
-- 
cgit v1.2.1


From 5aa647dd08ddb504897f9244aa46ba9eb9ed567f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 7 Jul 2009 17:28:12 +0100
Subject: lots of tuning and testing. Totally rewrote the to_disk_only_mode in
 mixed_queue so that it does batching. This means that it won't just flood the
 disk_queue with a billion messages, thus exhausting memory. Instead it does
 batching and uses tx_commit to demarkate the batches. This means the
 conversion happens as quickly as possible and does not exhaust memory.
 Dropped the memory alarms to 0.8. This is a good idea because converting
 queues between modes transiently takes a fair chunk of memory, and leaving
 the alarms up at 0.95 was proving too high making the mode transitions
 exhaust ram and swap to buggery.

However, there is a problem when going to disk_mode in mixed queue where messages in the queue are already on disk. A million calls to phantom deliver is not a good idea, and locks a CPU core at 100% for a very long time.
---
 scripts/rabbitmq-server         |   2 +-
 scripts/rabbitmq-server.bat     |   2 +-
 scripts/rabbitmq-service.bat    |   2 +-
 src/rabbit_amqqueue_process.erl |   4 +-
 src/rabbit_disk_queue.erl       |  44 +++++++++++++----
 src/rabbit_mixed_queue.erl      | 106 ++++++++++++++++++++++++----------------
 6 files changed, 103 insertions(+), 57 deletions(-)

diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 0aa09bd8..70e0c66b 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -96,7 +96,7 @@ exec erl \
     -os_mon start_memsup false \
     -os_mon start_os_sup false \
     -os_mon memsup_system_only true \
-    -os_mon system_memory_high_watermark 0.95 \
+    -os_mon system_memory_high_watermark 0.8 \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
     -mnesia dump_log_write_threshold 10000 \
     ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index 1cf6c6ba..22dc10c6 100755
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -125,7 +125,7 @@ if "%RABBITMQ_MNESIA_DIR%"=="" (
 -os_mon start_memsup false ^
 -os_mon start_os_sup false ^
 -os_mon memsup_system_only true ^
--os_mon system_memory_high_watermark 0.95 ^
+-os_mon system_memory_high_watermark 0.8 ^
 -mnesia dir \""%RABBITMQ_MNESIA_DIR%"\" ^
 -mnesia dump_log_write_threshold 10000 ^
 %CLUSTER_CONFIG% ^
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat
index 29be1742..6b997a25 100755
--- a/scripts/rabbitmq-service.bat
+++ b/scripts/rabbitmq-service.bat
@@ -175,7 +175,7 @@ set ERLANG_SERVICE_ARGUMENTS= ^
 -os_mon start_memsup false ^
 -os_mon start_os_sup false ^
 -os_mon memsup_system_only true ^
--os_mon system_memory_high_watermark 0.95 ^
+-os_mon system_memory_high_watermark 0.8 ^
 -mnesia dir \""%RABBITMQ_MNESIA_DIR%"\" ^
 %CLUSTER_CONFIG% ^
 %RABBITMQ_SERVER_START_ARGS% ^
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 11841220..aab336ca 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -841,7 +841,9 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
     noreply(State #q { mixed_state = MS1 });
 
 handle_cast(report_memory, State) ->
-    {noreply, (report_memory(false, State)) #q { memory_report_timer = undefined }, binary}.
+    %% deliberately don't call noreply/2 as we don't want to restart the timer
+    {noreply, (report_memory(false, State))
+     #q { memory_report_timer = undefined }, binary}.
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 84fbd760..86a47c38 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -92,7 +92,9 @@
          last_sync_offset,        %% current_offset at the last time we sync'd
          message_cache,           %% ets message cache
          memory_report_timer,     %% TRef for the memory report timer
-         wordsize                 %% bytes in a word on this platform
+         wordsize,                %% bytes in a word on this platform
+         mnesia_bytes_per_record, %% bytes per record in mnesia in ram_disk mode
+         ets_bytes_per_record     %% bytes per record in msg_location_ets
         }).
 
 %% The components:
@@ -416,7 +418,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    message_cache           = ets:new(?CACHE_ETS_NAME,
                                                      [set, private]),
                    memory_report_timer     = TRef,
-                   wordsize                = erlang:system_info(wordsize)
+                   wordsize                = erlang:system_info(wordsize),
+                   mnesia_bytes_per_record = undefined,
+                   ets_bytes_per_record    = undefined
                  },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
@@ -518,7 +522,7 @@ handle_cast({set_mode, Mode}, State) ->
              end)(State));
 handle_cast(report_memory, State) ->
     Bytes = memory_use(State),
-    rabbit_queue_mode_manager:report_memory(self(), 2 * Bytes),
+    rabbit_queue_mode_manager:report_memory(self(), 2.5 * Bytes),
     noreply(State).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
@@ -568,34 +572,51 @@ memory_use(#dqstate { operation_mode = ram_disk,
                       file_summary = FileSummary,
                       sequences = Sequences,
                       msg_location_ets = MsgLocationEts,
+                      message_cache = Cache,
                       wordsize = WordSize
                      }) ->
     WordSize * (mnesia:table_info(rabbit_disk_queue, memory) +
                 ets:info(MsgLocationEts, memory) +
                 ets:info(FileSummary, memory) +
+                ets:info(Cache, memory) +
                 ets:info(Sequences, memory));
 memory_use(#dqstate { operation_mode = disk_only,
                       file_summary = FileSummary,
                       sequences = Sequences,
                       msg_location_dets = MsgLocationDets,
-                      wordsize = WordSize
-                     }) ->
+                      message_cache = Cache,
+                      wordsize = WordSize,
+                      mnesia_bytes_per_record = MnesiaBytesPerRecord,
+                      ets_bytes_per_record = EtsBytesPerRecord }) ->
+    MnesiaSizeEstimate =
+        mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord,
+    MsgLocationSizeEstimate =
+        dets:info(MsgLocationDets, size) * EtsBytesPerRecord,
     (WordSize * (ets:info(FileSummary, memory) +
+                 ets:info(Cache, memory) +
                  ets:info(Sequences, memory))) +
-        mnesia:table_info(rabbit_disk_queue, memory) +
-        dets:info(MsgLocationDets, memory).
+        round(MnesiaSizeEstimate) +
+        round(MsgLocationSizeEstimate).
 
 to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
     State;
 to_disk_only_mode(State = #dqstate { operation_mode = ram_disk,
                                      msg_location_dets = MsgLocationDets,
-                                     msg_location_ets = MsgLocationEts }) ->
+                                     msg_location_ets = MsgLocationEts,
+                                     wordsize = WordSize }) ->
     rabbit_log:info("Converting disk queue to disk only mode~n", []),
+    MnesiaMemoryBytes = WordSize * mnesia:table_info(rabbit_disk_queue, memory),
+    MnesiaSize = lists:max([1, mnesia:table_info(rabbit_disk_queue, size)]),
+    EtsMemoryBytes = WordSize * ets:info(MsgLocationEts, memory),
+    EtsSize = lists:max([1, ets:info(MsgLocationEts, size)]),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_only_copies),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
-    State #dqstate { operation_mode = disk_only }.
+    garbage_collect(),
+    State #dqstate { operation_mode = disk_only,
+                     mnesia_bytes_per_record = MnesiaMemoryBytes / MnesiaSize,
+                     ets_bytes_per_record = EtsMemoryBytes / EtsSize }.
 
 to_ram_disk_mode(State = #dqstate { operation_mode = ram_disk }) ->
     State;
@@ -607,7 +628,10 @@ to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
                                                  disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
-    State #dqstate { operation_mode = ram_disk }.
+    garbage_collect(),
+    State #dqstate { operation_mode = ram_disk,
+                     mnesia_bytes_per_record = undefined,
+                     ets_bytes_per_record = undefined }.
 
 noreply(NewState = #dqstate { on_sync_froms = [], timer_ref = undefined }) ->
     {noreply, NewState, infinity};
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index df241a6d..f4154727 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -53,6 +53,8 @@
                  }
        ).
 
+-define(TO_DISK_MAX_FLUSH_SIZE, 100000).
+
 -ifdef(use_specs).
 
 -type(mode() :: ( 'disk' | 'mixed' )).
@@ -124,48 +126,7 @@ to_disk_only_mode(TxnMessages, State =
     %% message on disk.
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
-    Msgs = queue:to_list(MsgBuf),
-    {Requeue, TxPublish} =
-        lists:foldl(
-          fun ({Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk},
-               {RQueueAcc, TxPublishAcc}) ->
-                  case OnDisk of
-                      true ->
-                          ok = rabbit_disk_queue:tx_commit(Q, TxPublishAcc, []),
-                          {MsgId, IsDelivered, AckTag, _PersistRemaining} =
-                              rabbit_disk_queue:phantom_deliver(Q),
-                          {[ {AckTag, {next, IsDelivered}} | RQueueAcc ], []};
-                      false ->
-                          ok = if [] == RQueueAcc -> ok;
-                                  true ->
-                                       rabbit_disk_queue:requeue_with_seqs(
-                                         Q, lists:reverse(RQueueAcc))
-                               end,
-                          ok = rabbit_disk_queue:tx_publish(Msg),
-                          {[], [ MsgId | TxPublishAcc ]}
-                  end;
-              ({disk, Count}, {RQueueAcc, TxPublishAcc}) ->
-                  ok = if [] == TxPublishAcc -> ok;
-                          true ->
-                               rabbit_disk_queue:tx_commit(Q, TxPublishAcc, [])
-                       end,
-                  {RQueueAcc1, 0} =
-                      rabbit_misc:unfold(
-                        fun (0) -> false;
-                            (N) ->
-                                {_MsgId, IsDelivered, AckTag, _PersistRemaining}
-                                    = rabbit_disk_queue:phantom_deliver(Q),
-                                {true, {AckTag, {next, IsDelivered}}, N - 1}
-                        end, Count),
-                  {RQueueAcc1 ++ RQueueAcc, []}
-          end, {[], []}, Msgs),
-    ok = if [] == TxPublish -> ok;
-            true -> rabbit_disk_queue:tx_commit(Q, TxPublish, [])
-         end,
-    ok = if [] == Requeue -> ok;
-            true ->
-                 rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
-         end,
+    ok = send_messages_to_disk(Q, MsgBuf, [], 0, []),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -177,8 +138,64 @@ to_disk_only_mode(TxnMessages, State =
                        _    -> rabbit_disk_queue:tx_publish(Msg)
                    end
       end, TxnMessages),
+    garbage_collect(),
     {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
 
+send_messages_to_disk(Q, Queue, Requeue, PublishCount, Commit) ->
+    case queue:out(Queue) of
+        {empty, Queue} ->
+            ok = flush_messages_to_disk_queue(Q, Commit),
+            [] = flush_requeue_to_disk_queue(Q, Requeue, []),
+            ok;
+        {{value, {Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk}},
+         Queue1} ->
+            case OnDisk of
+                true ->
+                    ok = flush_messages_to_disk_queue (Q, Commit),
+                    {MsgId, IsDelivered, AckTag, _PersistRemaining} =
+                        rabbit_disk_queue:phantom_deliver(Q),
+                    send_messages_to_disk(
+                      Q, Queue1, [{AckTag, {next, IsDelivered}} | Requeue],
+                      0, []);
+                false ->
+                    Commit1 =
+                        flush_requeue_to_disk_queue(Q, Requeue, Commit),
+                    ok = rabbit_disk_queue:tx_publish(Msg),
+                    case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
+                        true ->
+                            ok = flush_messages_to_disk_queue(Q, Commit1),
+                            send_messages_to_disk(Q, Queue1, [], 1, [MsgId]);
+                        false ->
+                            send_messages_to_disk
+                              (Q, Queue1, [], PublishCount + 1,
+                               [MsgId | Commit1])
+                    end
+            end;
+        {{value, {disk, Count}}, Queue2} ->
+            ok = flush_messages_to_disk_queue(Q, Commit),
+            {Requeue1, 0} =
+                rabbit_misc:unfold(
+                  fun (0) -> false;
+                      (N) ->
+                          {_MsgId, IsDelivered, AckTag, _PersistRemaining}
+                              = rabbit_disk_queue:phantom_deliver(Q),
+                          {true, {AckTag, {next, IsDelivered}}, N - 1}
+                  end, Count),
+            send_messages_to_disk(Q, Queue2, Requeue1 ++ Requeue, 0, [])
+    end.
+
+flush_messages_to_disk_queue(Q, Commit) ->
+    ok = if [] == Commit -> ok;
+            true -> rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), [])
+         end.
+
+flush_requeue_to_disk_queue(Q, Requeue, Commit) ->
+    if [] == Requeue -> Commit;
+       true -> ok = rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), []),
+               rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue)),
+               []
+    end.
+
 to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
     {ok, State};
 to_mixed_mode(TxnMessages, State =
@@ -204,7 +221,10 @@ to_mixed_mode(TxnMessages, State =
                       _    -> [Msg #basic_message.guid | Acc]
                   end
           end, [], TxnMessages),
-    ok = rabbit_disk_queue:tx_cancel(Cancel),
+    ok = if Cancel == [] -> ok;
+            true -> rabbit_disk_queue:tx_cancel(Cancel)
+         end,
+    garbage_collect(),
     {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf }}.
 
 purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
-- 
cgit v1.2.1


From 07fb74f9905bd84667d60656149f24b67774d113 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 7 Jul 2009 18:02:52 +0100
Subject: Sorted out rabbitmqctl so that it sends pinning commands to the
 queue_mode_manager rather than directly talking to the queues. This means the
 queues and the queue manager can't disagree on the mode a queue should be in.

---
 src/rabbit_amqqueue.erl           | 16 +++++--
 src/rabbit_control.erl            | 19 ++++++--
 src/rabbit_queue_mode_manager.erl | 98 ++++++++++++++++++++++++++++++---------
 3 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 92272f0c..15c5e907 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([set_mode/3, set_mode/2, report_memory/1]).
+-export([set_mode_pin/3, set_mode/2, report_memory/1]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -104,7 +104,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(set_mode/3 :: (vhost(), amqqueue(), ('disk' | 'mixed')) -> 'ok').
+-spec(set_mode_pin/3 :: (vhost(), amqqueue(), bool) -> any()).
 -spec(set_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -227,11 +227,17 @@ list(VHostPath) ->
 
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
-set_mode(VHostPath, Queue, ModeBin)
+set_mode_pin(VHostPath, Queue, DiskBin)
   when is_binary(VHostPath) andalso is_binary(Queue) ->
-    Mode = list_to_atom(binary_to_list(ModeBin)),
+    Disk = list_to_atom(binary_to_list(DiskBin)),
     with(rabbit_misc:r(VHostPath, queue, Queue),
-         fun(Q) -> set_mode(Q #amqqueue.pid, Mode) end).
+         fun(Q) -> case Disk of
+                       true -> rabbit_queue_mode_manager:pin_to_disk
+                                 (Q #amqqueue.pid);
+                       false -> rabbit_queue_mode_manager:unpin_to_disk
+                                  (Q #amqqueue.pid)
+                   end
+         end).
 
 set_mode(QPid, Mode) ->
     gen_server2:pcast(QPid, 10, {set_mode, Mode}).
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index ab5fe1bc..69859564 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -137,7 +137,8 @@ Available commands:
   list_bindings  [-p <VHostPath>] 
   list_connections [<ConnectionInfoItem> ...]
 
-  set_queue_mode <QueueName> (disk|mixed)
+  pin_queue_to_disk <QueueName>
+  unpin_queue_from_disk <QueueName>
 
 Quiet output mode is selected with the \"-q\" flag. Informational messages
 are suppressed when quiet mode is in effect.
@@ -168,6 +169,9 @@ peer_address, peer_port, state, channels, user, vhost, timeout, frame_max,
 recv_oct, recv_cnt, send_oct, send_cnt, send_pend]. The default is to display 
 user, peer_address and peer_port.
 
+pin_queue_to_disk will force a queue to be in disk mode.
+unpin_queue_from_disk will permit a queue that has been pinned to disk mode
+to be converted to mixed mode should there be enough memory available.
 "),
     halt(1).
 
@@ -282,10 +286,15 @@ action(Command, Node, Args, Inform) ->
     {VHost, RemainingArgs} = parse_vhost_flag(Args),
     action(Command, Node, VHost, RemainingArgs, Inform).
 
-action(set_queue_mode, Node, VHost, [Queue, Mode], Inform) ->
-    Inform("Setting queue mode to ~p for queue ~p in vhost ~p",
-           [Mode, Queue, VHost]),
-    call(Node, {rabbit_amqqueue, set_mode, [VHost, Queue, Mode]});
+action(pin_queue_to_disk, Node, VHost, [Queue], Inform) ->
+    Inform("Pinning queue ~p in vhost ~p to disk",
+           [Queue, VHost]),
+    call(Node, {rabbit_amqqueue, set_mode_pin, [VHost, Queue, "true"]});
+    
+action(unpin_queue_from_disk, Node, VHost, [Queue], Inform) ->
+    Inform("Unpinning queue ~p in vhost ~p from disk",
+           [Queue, VHost]),
+    call(Node, {rabbit_amqqueue, set_mode_pin, [VHost, Queue, "false"]});
     
 action(set_permissions, Node, VHost, [Username, CPerm, WPerm, RPerm], Inform) ->
     Inform("Setting permissions for user ~p in vhost ~p", [Username, VHost]),
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index ba371538..359ef708 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,7 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/4, report_memory/2, report_memory/5, info/0]).
+-export([register/4, report_memory/2, report_memory/5, info/0,
+         pin_to_disk/1, unpin_to_disk/1]).
 
 -define(TOTAL_TOKENS, 1000).
 -define(ACTIVITY_THRESHOLD, 25).
@@ -57,6 +58,8 @@
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
                           non_neg_integer(), non_neg_integer(), bool()) ->
              'ok').
+-spec(pin_to_disk/1 :: (pid()) -> 'ok').
+-spec(unpin_to_disk/1 :: (pid()) -> 'ok').
 
 -endif.
 
@@ -65,7 +68,8 @@
                  callbacks,
                  tokens_per_byte,
                  lowrate,
-                 hibernate
+                 hibernate,
+                 disk_mode_pins
                }).
 
 %% Token-credit based memory management
@@ -139,6 +143,12 @@ start_link() ->
 register(Pid, Module, Function, Args) ->
     gen_server2:call(?SERVER, {register, Pid, Module, Function, Args}).
 
+pin_to_disk(Pid) ->
+    gen_server2:call(?SERVER, {pin_to_disk, Pid}).
+
+unpin_to_disk(Pid) ->
+    gen_server2:call(?SERVER, {unpin_to_disk, Pid}).
+
 report_memory(Pid, Memory) ->
     report_memory(Pid, Memory, undefined, undefined, false).
 
@@ -159,7 +169,8 @@ init([]) ->
                   callbacks = dict:new(),
                   tokens_per_byte = ?TOTAL_TOKENS / MemAvail,
                   lowrate = priority_queue:new(),
-                  hibernate = queue:new()
+                  hibernate = queue:new(),
+                  disk_mode_pins = sets:new()
                 }}.
 
 handle_call({register, Pid, Module, Function, Args}, _From,
@@ -183,22 +194,56 @@ handle_call({register, Pid, Module, Function, Args}, _From,
         end,
     {reply, {ok, Result}, State3};
 
+handle_call({pin_to_disk, Pid}, _From,
+            State = #state { mixed_queues = Mixed,
+                             callbacks = Callbacks,
+                             available_tokens = Avail,
+                             disk_mode_pins = Pins }) ->
+    {Res, State1} =
+        case sets:is_element(Pid, Pins) of
+            true -> {already_pinned, State};
+            false ->
+                case find_queue(Pid, Mixed) of
+                    {mixed, {OAlloc, _OActivity}} ->
+                        {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+                        ok = erlang:apply(Module, Function, Args ++ [disk]),
+                        {convert_to_disk_mode,
+                         State #state { mixed_queues = dict:erase(Pid, Mixed),
+                                        available_tokens = Avail + OAlloc,
+                                        disk_mode_pins =
+                                        sets:add_element(Pid, Pins)
+                                       }};
+                    disk ->
+                        {already_disk,
+                         State #state { disk_mode_pins =
+                                        sets:add_element(Pid, Pins) }}
+                end
+        end,
+    {reply, Res, State1};
+
+handle_call({unpin_to_disk, Pid}, _From,
+            State = #state { disk_mode_pins = Pins }) ->
+    {reply, ok, State #state { disk_mode_pins = sets:del_element(Pid, Pins) }};
+
 handle_call(info, _From, State) ->
     State1 = #state { available_tokens = Avail,
                       mixed_queues = Mixed,
                       lowrate = Lazy,
-                      hibernate = Sleepy } =
+                      hibernate = Sleepy,
+                      disk_mode_pins = Pins } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
     {reply, [{ available_tokens, Avail },
              { mixed_queues, dict:to_list(Mixed) },
              { lowrate_queues, priority_queue:to_list(Lazy) },
-             { hibernated_queues, queue:to_list(Sleepy) }], State1}.
+             { hibernated_queues, queue:to_list(Sleepy) },
+             { queues_pinned_to_disk, sets:to_list(Pins) }], State1}.
 
 
 handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             State = #state { mixed_queues = Mixed,
                              available_tokens = Avail,
                              callbacks = Callbacks,
+                             disk_mode_pins = Pins,
                              tokens_per_byte = TPB }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
     LowRate = case {BytesGained, BytesLost} of
@@ -234,24 +279,31 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                          Activity}
                 end;
             disk ->
-                State1 = #state { available_tokens = Avail1,
-                                  mixed_queues = Mixed1 } =
-                    free_upto(Pid, Req, State),
-                case Req > Avail1 of
-                    true -> %% not enough space, stay as disk
-                        {State1, disk};
-                    false -> %% can go to mixed mode
-                        {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-                        ok = erlang:apply(Module, Function, Args ++ [mixed]),
-                        Activity = if Hibernating -> hibernate;
-                                      LowRate -> lowrate;
-                                      true -> active
-                                   end,
-                        {State1 #state {
-                           mixed_queues =
-                           dict:store(Pid, {Req, Activity}, Mixed1),
-                           available_tokens = Avail1 - Req },
-                         disk}
+                case sets:is_element(Pid, Pins) of
+                    true ->
+                        {State, disk};
+                    false ->
+                        State1 = #state { available_tokens = Avail1,
+                                          mixed_queues = Mixed1 } =
+                            free_upto(Pid, Req, State),
+                        case Req > Avail1 of
+                            true -> %% not enough space, stay as disk
+                                {State1, disk};
+                            false -> %% can go to mixed mode
+                                {Module, Function, Args} =
+                                    dict:fetch(Pid, Callbacks),
+                                ok = erlang:apply(Module, Function,
+                                                  Args ++ [mixed]),
+                                Activity = if Hibernating -> hibernate;
+                                              LowRate -> lowrate;
+                                              true -> active
+                                           end,
+                                {State1 #state {
+                                   mixed_queues =
+                                   dict:store(Pid, {Req, Activity}, Mixed1),
+                                   available_tokens = Avail1 - Req },
+                                 disk}
+                        end
                 end
         end,
     StateN1 =
-- 
cgit v1.2.1


From 0a63c5789995d4965fcbaf98b52f2a1cd596adf3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 8 Jul 2009 12:35:10 +0100
Subject: found a bug in the memory reports in combination with hibernation in
 that if a process comes out of hibernation, then does under 10 second's work
 before hibernating again then it'll only issue a memory report when it goes
 back to hibernation, thus it'll always claim to the queue_mode_manager that
 it's hibernating. Thus now, when hibernating, or when receiving the
 report_memory message, we set state so that when the next normal message
 comes in, we always send a memory report after that message. This ensures
 that when a process wakes up and does some real work, the queue_mode_manager
 will be informed.

Applied this, and the ability to hibernate to the disk_queue too. Plus some minor refactoring and better state field names. All tests pass, and the disk_queue really does hibernate with the binary backoff as I wanted.
---
 src/rabbit_amqqueue_process.erl   | 18 +++-----
 src/rabbit_disk_queue.erl         | 94 ++++++++++++++++++++++++++-------------
 src/rabbit_queue_mode_manager.erl | 18 ++++----
 3 files changed, 78 insertions(+), 52 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index aab336ca..a1b5a895 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -130,22 +130,19 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-reply(Reply, NewState = #q { memory_report_timer = undefined }) ->
-    {reply, Reply, start_memory_timer(NewState), binary};
 reply(Reply, NewState) ->
-    {reply, Reply, NewState, binary}.
+    {reply, Reply, start_memory_timer(NewState), binary}.
 
-noreply(NewState = #q { memory_report_timer = undefined }) ->
-    {noreply, start_memory_timer(NewState), binary};
 noreply(NewState) ->
-    {noreply, NewState, binary}.
+    {noreply, start_memory_timer(NewState), binary}.
 
 start_memory_timer() ->
     {ok, TRef} = timer:apply_after(?MEMORY_REPORT_TIME_INTERVAL,
                                    rabbit_amqqueue, report_memory, [self()]),
     TRef.
 start_memory_timer(State = #q { memory_report_timer = undefined }) ->
-    State #q { memory_report_timer = start_memory_timer() };
+    report_memory(false,
+                  State #q { memory_report_timer = start_memory_timer() });
 start_memory_timer(State) ->
     State.
 
@@ -842,8 +839,8 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
 
 handle_cast(report_memory, State) ->
     %% deliberately don't call noreply/2 as we don't want to restart the timer
-    {noreply, (report_memory(false, State))
-     #q { memory_report_timer = undefined }, binary}.
+    %% by unsetting the timer, we force a report on the next normal message
+    {noreply, State #q { memory_report_timer = undefined }, binary}.
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -862,9 +859,8 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     handle_ch_down(DownPid, State);
 
 handle_info(timeout, State) ->
-    %% TODO: Once we drop support for R11B-5, we can change this to
-    %% {noreply, State, hibernate};
     State1 = stop_memory_timer(report_memory(true, State)),
+    %% don't call noreply/1 as that'll restart the memory_report_timer
     {noreply, State1, hibernate};
 
 handle_info(Info, State) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 86a47c38..d8c6580f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -71,6 +71,7 @@
 -define(FILE_SIZE_LIMIT, (256*1024*1024)).
 
 -define(SYNC_INTERVAL, 5). %% milliseconds
+-define(HIBERNATE_AFTER_MIN, 1000).
 
 -record(dqstate,
         {msg_location_dets,       %% where are messages?
@@ -88,7 +89,7 @@
          read_file_handles,       %% file handles for reading (LRU)
          read_file_handles_limit, %% how many file handles can we open?
          on_sync_froms,           %% list of commiters to run on sync (reversed)
-         timer_ref,               %% TRef for our interval timer
+         commit_timer_ref,        %% TRef for our interval timer
          last_sync_offset,        %% current_offset at the last time we sync'd
          message_cache,           %% ets message cache
          memory_report_timer,     %% TRef for the memory report timer
@@ -391,9 +392,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
 
-    {ok, TRef} = timer:apply_interval(?MEMORY_REPORT_TIME_INTERVAL,
-                                      rabbit_disk_queue, report_memory, []),
-
+    TRef = start_memory_timer(),
 
     InitName = "0" ++ ?FILE_EXTENSION,
     State =
@@ -413,7 +412,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    read_file_handles       = {dict:new(), gb_trees:empty()},
                    read_file_handles_limit = ReadFileHandlesLimit,
                    on_sync_froms           = [],
-                   timer_ref               = undefined,
+                   commit_timer_ref        = undefined,
                    last_sync_offset        = 0,
                    message_cache           = ets:new(?CACHE_ETS_NAME,
                                                      [set, private]),
@@ -441,7 +440,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, case Mode of
              mixed -> State2;
              disk -> to_disk_only_mode(State2)
-         end}.
+         end, {binary, ?HIBERNATE_AFTER_MIN}}.
 
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
@@ -521,14 +520,18 @@ handle_cast({set_mode, Mode}, State) ->
                  mixed -> fun to_ram_disk_mode/1
              end)(State));
 handle_cast(report_memory, State) ->
-    Bytes = memory_use(State),
-    rabbit_queue_mode_manager:report_memory(self(), 2.5 * Bytes),
-    noreply(State).
+    %% call noreply1/1, not noreply/1, as we don't want to restart the
+    %% memory_report_timer
+    %% by unsetting the timer, we force a report on the next normal message
+    noreply1(State #dqstate { memory_report_timer = undefined }).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
-handle_info(timeout, State = #dqstate { timer_ref = TRef })
-  when TRef /= undefined ->
+handle_info(timeout, State = #dqstate { commit_timer_ref = undefined }) ->
+    ok = report_memory(true, State),
+    %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
+    {noreply, stop_memory_timer(State), hibernate};
+handle_info(timeout, State) ->
     noreply(sync_current_file_handle(State));
 handle_info(_Info, State) ->
     noreply(State).
@@ -539,12 +542,10 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                             msg_location_ets = MsgLocationEts,
                             current_file_handle = FileHdl,
-                            read_file_handles = {ReadHdls, _ReadHdlsAge},
-                            memory_report_timer = TRef
+                            read_file_handles = {ReadHdls, _ReadHdlsAge}
                           }) ->
     %% deliberately ignoring return codes here
-    timer:cancel(TRef),
-    State1 = stop_commit_timer(State),
+    State1 = stop_commit_timer(stop_memory_timer(State)),
     dets:close(MsgLocationDets),
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
@@ -568,6 +569,27 @@ code_change(_OldVsn, State, _Extra) ->
 
 %% ---- UTILITY FUNCTIONS ----
 
+stop_memory_timer(State = #dqstate { memory_report_timer = undefined }) ->
+    State;
+stop_memory_timer(State = #dqstate { memory_report_timer = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #dqstate { memory_report_timer = undefined }.
+
+start_memory_timer() ->
+    {ok, TRef} = timer:apply_after(?MEMORY_REPORT_TIME_INTERVAL,
+                                   rabbit_disk_queue, report_memory, []),
+    TRef.
+
+start_memory_timer(State = #dqstate { memory_report_timer = undefined }) ->
+    report_memory(false, State),
+    State #dqstate { memory_report_timer = start_memory_timer() };
+start_memory_timer(State) ->
+    State.
+
+report_memory(Hibernating, State) ->
+    Bytes = memory_use(State),
+    rabbit_queue_mode_manager:report_memory(self(), 2.5 * Bytes, Hibernating).
+
 memory_use(#dqstate { operation_mode = ram_disk,
                       file_summary = FileSummary,
                       sequences = Sequences,
@@ -633,22 +655,30 @@ to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
                      mnesia_bytes_per_record = undefined,
                      ets_bytes_per_record = undefined }.
 
-noreply(NewState = #dqstate { on_sync_froms = [], timer_ref = undefined }) ->
-    {noreply, NewState, infinity};
-noreply(NewState = #dqstate { timer_ref = undefined }) ->
-    {noreply, start_commit_timer(NewState), 0};
-noreply(NewState = #dqstate { on_sync_froms = [] }) ->
-    {noreply, stop_commit_timer(NewState), infinity};
 noreply(NewState) ->
+    noreply1(start_memory_timer(NewState)).
+
+noreply1(NewState = #dqstate { on_sync_froms = [],
+                               commit_timer_ref = undefined }) ->
+    {noreply, NewState, binary};
+noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
+    {noreply, start_commit_timer(NewState), 0};
+noreply1(NewState = #dqstate { on_sync_froms = [] }) ->
+    {noreply, stop_commit_timer(NewState), binary};
+noreply1(NewState) ->
     {noreply, NewState, 0}.
 
-reply(Reply, NewState = #dqstate { on_sync_froms = [], timer_ref = undefined }) ->
-    {reply, Reply, NewState, infinity};
-reply(Reply, NewState = #dqstate { timer_ref = undefined }) ->
-    {reply, Reply, start_commit_timer(NewState), 0};
-reply(Reply, NewState = #dqstate { on_sync_froms = [] }) ->
-    {reply, Reply, stop_commit_timer(NewState), infinity};
 reply(Reply, NewState) ->
+    reply1(Reply, start_memory_timer(NewState)).
+
+reply1(Reply, NewState = #dqstate { on_sync_froms = [],
+                                    commit_timer_ref = undefined }) ->
+    {reply, Reply, NewState, binary};
+reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
+    {reply, Reply, start_commit_timer(NewState), 0};
+reply1(Reply, NewState = #dqstate { on_sync_froms = [] }) ->
+    {reply, Reply, stop_commit_timer(NewState), binary};
+reply1(Reply, NewState) ->
     {reply, Reply, NewState, 0}.
 
 form_filename(Name) ->
@@ -793,15 +823,15 @@ sequence_lookup(Sequences, Q) ->
             {ReadSeqId, WriteSeqId, Length}
     end.
 
-start_commit_timer(State = #dqstate { timer_ref = undefined }) ->
+start_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
     {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
-    State #dqstate { timer_ref = TRef }.
+    State #dqstate { commit_timer_ref = TRef }.
 
-stop_commit_timer(State = #dqstate { timer_ref = undefined }) ->
+stop_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
     State;
-stop_commit_timer(State = #dqstate { timer_ref = TRef }) ->
+stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
-    State #dqstate { timer_ref = undefined }.
+    State #dqstate { commit_timer_ref = undefined }.
 
 sync_current_file_handle(State = #dqstate { current_dirty = false,
                                             on_sync_froms = [] }) ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 359ef708..99f6e408 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/4, report_memory/2, report_memory/5, info/0,
+-export([register/4, report_memory/3, report_memory/5, info/0,
          pin_to_disk/1, unpin_to_disk/1]).
 
 -define(TOTAL_TOKENS, 1000).
@@ -54,7 +54,7 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/4 :: (pid(), atom(), atom(), list()) -> {'ok', queue_mode()}).
--spec(report_memory/2 :: (pid(), non_neg_integer()) -> 'ok').
+-spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
                           non_neg_integer(), non_neg_integer(), bool()) ->
              'ok').
@@ -131,11 +131,11 @@
 %% occurring.
 %%
 %% The queue process deliberately reports 4 times its estimated RAM
-%% usage, and the disk_queue 2 times. In practise, this seems to work
-%% well. Note that we are deliberately running out of tokes a little
-%% early because of the fact that the mixed -> disk transition can
-%% transiently eat a lot of memory and take some time (flushing a few
-%% million messages to disk is never going to be instantaneous).
+%% usage, and the disk_queue 2.5 times. In practise, this seems to
+%% work well. Note that we are deliberately running out of tokes a
+%% little early because of the fact that the mixed -> disk transition
+%% can transiently eat a lot of memory and take some time (flushing a
+%% few million messages to disk is never going to be instantaneous).
 
 start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
@@ -149,8 +149,8 @@ pin_to_disk(Pid) ->
 unpin_to_disk(Pid) ->
     gen_server2:call(?SERVER, {unpin_to_disk, Pid}).
 
-report_memory(Pid, Memory) ->
-    report_memory(Pid, Memory, undefined, undefined, false).
+report_memory(Pid, Memory, Hibernating) ->
+    report_memory(Pid, Memory, undefined, undefined, Hibernating).
 
 report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
     gen_server2:cast(?SERVER,
-- 
cgit v1.2.1


From 98d8a988cbac662f2d0be27a4949be7c592e6a4f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 8 Jul 2009 14:28:01 +0100
Subject: added requeue_next_n to disk_queue and made use of it in
 mixed_queue:to_disk_only_mode. This function puts the next N messages at the
 front of the queue to the back and is MUCH more efficient than calling
 phantom_deliver and then requeue_with_seqs. This means that a queue which has
 been sent to disk, then converted back to mixed mode, some minor been done,
 and then sent back to disk takes almost no time in transitions beyond the
 first transition. The test of this:

1) declare durable queue
2) send 100,000 persistent messages to it
3) send 100,000 non-persistent messages to it
4) send 100,000 persistent messages to it
5) now pin it to disk - it'll make two calls to requeue_next_n and should be rather quick as it's only the middle 100,000 messages that actually have to be written, the other 200,000 don't even get sent between the disk_queue and mixed_queue in either direction. A total of 100,003 calls are necessary for this transition: 2 requeue_next_n, 100,000 tx_publish, 1 tx_commit
6) now unpin it from disk and list the queues to wake it up. The transition to mixed_mode is one call, zero reads, and instantaneous
7) now repin it to disk. The mixed queue knows everything is still on disk, so it makes one call to requeue_next_n with N = 300,000. The disk_queue sees this is the whole queue and so doesn't need to do any work at all and so is instant.

All tests pass.
---
 src/rabbit_disk_queue.erl         | 47 ++++++++++++++++++++++++++++++++++++++-
 src/rabbit_mixed_queue.erl        | 37 +++++++++++-------------------
 src/rabbit_queue_mode_manager.erl |  6 ++---
 3 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d8c6580f..9c7d35eb 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -41,7 +41,8 @@
 -export([publish/3, deliver/1, phantom_deliver/1, ack/2,
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
-         delete_non_durable_queues/1, auto_ack_next_message/1
+         delete_non_durable_queues/1, auto_ack_next_message/1,
+         requeue_next_n/2
         ]).
 
 -export([length/1, filesync/0, cache_info/0]).
@@ -264,6 +265,7 @@
 -spec(requeue_with_seqs/2 ::
       (queue_name(),
        [{{msg_id(), seq_id()}, {seq_id_or_next(), bool()}}]) -> 'ok').
+-spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
@@ -315,6 +317,9 @@ requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
 requeue_with_seqs(Q, MsgSeqSeqIds) when is_list(MsgSeqSeqIds) ->
     gen_server2:cast(?SERVER, {requeue_with_seqs, Q, MsgSeqSeqIds}).
 
+requeue_next_n(Q, N) when is_integer(N) ->
+    gen_server2:cast(?SERVER, {requeue_next_n, Q, N}).
+
 purge(Q) ->
     gen_server2:call(?SERVER, {purge, Q}, infinity).
 
@@ -509,6 +514,9 @@ handle_cast({requeue, Q, MsgSeqIds}, State) ->
 handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
     {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
     noreply(State1);
+handle_cast({requeue_next_n, Q, N}, State) ->
+    {ok, State1} = internal_requeue_next_n(Q, N, State),
+    noreply(State1);
 handle_cast({delete_queue, Q}, State) ->
     {ok, State1} = internal_delete_queue(Q, State),
     noreply(State1);
@@ -747,13 +755,19 @@ find_next_seq_id(CurrentSeq, NextSeqId)
   when NextSeqId > CurrentSeq ->
     NextSeqId.
 
+%% the queue is empty, and we've just written exactly where we
+%% expected, so read it back
 determine_next_read_id(CurrentReadWrite, CurrentReadWrite, CurrentReadWrite) ->
     CurrentReadWrite;
+%% we've just written in the next slot, so the next read pos is unaltered
 determine_next_read_id(CurrentRead, _CurrentWrite, next) ->
     CurrentRead;
+%% queue is empty, but we've written somewhere else - a gap has formed
+%% - so read back from where we wrote, after the gap
 determine_next_read_id(CurrentReadWrite, CurrentReadWrite, NextWrite)
   when NextWrite > CurrentReadWrite ->
     NextWrite;
+%% queue is not empty, and we've created a gap, so the read pos is unaltered
 determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
   when NextWrite >= CurrentWrite ->
     CurrentRead.
@@ -1200,6 +1214,37 @@ requeue_message({{{MsgId, SeqIdOrig}, {SeqIdTo, NewIsDelivered}},
     decrement_cache(MsgId, State),
     {NextSeqIdTo1, Q, State}.
 
+%% move the next N messages from the front of the queue to the back.
+internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
+    {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
+    ReadSeqId1 = determine_next_read_id(ReadSeqId, WriteSeqId, next),
+    if N >= Length -> {ok, State};
+       true ->
+            {atomic, {ReadSeqIdN, WriteSeqIdN}} =
+                mnesia:transaction(
+                  fun() ->
+                          ok = mnesia:write_lock_table(rabbit_disk_queue),
+                          requeue_next_messages(Q, N, ReadSeqId1, WriteSeqId)
+                  end
+                 ),
+            true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN, Length}),
+            {ok, State}
+    end.
+
+requeue_next_messages(_Q, 0, ReadSeq, WriteSeq) ->
+    {ReadSeq, WriteSeq};
+requeue_next_messages(Q, N, ReadSeq, WriteSeq) ->
+    WriteSeq1 = adjust_last_msg_seq_id(Q, WriteSeq, next, write),
+    NextWriteSeq = find_next_seq_id(WriteSeq1, next),
+    [Obj = #dq_msg_loc { next_seq_id = NextSeqIdOrig }] =
+        mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
+    ok = mnesia:write(rabbit_disk_queue,
+                      Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeq1},
+                                       next_seq_id = NextWriteSeq
+                                      }, write),
+    ok = mnesia:delete(rabbit_disk_queue, {Q, ReadSeq}, write),
+    requeue_next_messages(Q, N - 1, NextSeqIdOrig, NextWriteSeq).
+
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, 0, State};
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index f4154727..4a2803a4 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -126,7 +126,7 @@ to_disk_only_mode(TxnMessages, State =
     %% message on disk.
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
-    ok = send_messages_to_disk(Q, MsgBuf, [], 0, []),
+    ok = send_messages_to_disk(Q, MsgBuf, 0, 0, []),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -141,47 +141,36 @@ to_disk_only_mode(TxnMessages, State =
     garbage_collect(),
     {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
 
-send_messages_to_disk(Q, Queue, Requeue, PublishCount, Commit) ->
+send_messages_to_disk(Q, Queue, RequeueCount, PublishCount, Commit) ->
     case queue:out(Queue) of
         {empty, Queue} ->
             ok = flush_messages_to_disk_queue(Q, Commit),
-            [] = flush_requeue_to_disk_queue(Q, Requeue, []),
+            [] = flush_requeue_to_disk_queue(Q, RequeueCount, []),
             ok;
-        {{value, {Msg = #basic_message { guid = MsgId }, IsDelivered, OnDisk}},
+        {{value, {Msg = #basic_message { guid = MsgId }, _IsDelivered, OnDisk}},
          Queue1} ->
             case OnDisk of
                 true ->
-                    ok = flush_messages_to_disk_queue (Q, Commit),
-                    {MsgId, IsDelivered, AckTag, _PersistRemaining} =
-                        rabbit_disk_queue:phantom_deliver(Q),
+                    ok = flush_messages_to_disk_queue(Q, Commit),
                     send_messages_to_disk(
-                      Q, Queue1, [{AckTag, {next, IsDelivered}} | Requeue],
-                      0, []);
+                      Q, Queue1, 1 + RequeueCount, 0, []);
                 false ->
                     Commit1 =
-                        flush_requeue_to_disk_queue(Q, Requeue, Commit),
+                        flush_requeue_to_disk_queue(Q, RequeueCount, Commit),
                     ok = rabbit_disk_queue:tx_publish(Msg),
                     case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
                         true ->
                             ok = flush_messages_to_disk_queue(Q, Commit1),
-                            send_messages_to_disk(Q, Queue1, [], 1, [MsgId]);
+                            send_messages_to_disk(Q, Queue1, 0, 1, [MsgId]);
                         false ->
                             send_messages_to_disk
-                              (Q, Queue1, [], PublishCount + 1,
+                              (Q, Queue1, 0, PublishCount + 1,
                                [MsgId | Commit1])
                     end
             end;
         {{value, {disk, Count}}, Queue2} ->
             ok = flush_messages_to_disk_queue(Q, Commit),
-            {Requeue1, 0} =
-                rabbit_misc:unfold(
-                  fun (0) -> false;
-                      (N) ->
-                          {_MsgId, IsDelivered, AckTag, _PersistRemaining}
-                              = rabbit_disk_queue:phantom_deliver(Q),
-                          {true, {AckTag, {next, IsDelivered}}, N - 1}
-                  end, Count),
-            send_messages_to_disk(Q, Queue2, Requeue1 ++ Requeue, 0, [])
+            send_messages_to_disk(Q, Queue2, RequeueCount + Count, 0, [])
     end.
 
 flush_messages_to_disk_queue(Q, Commit) ->
@@ -189,10 +178,10 @@ flush_messages_to_disk_queue(Q, Commit) ->
             true -> rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), [])
          end.
 
-flush_requeue_to_disk_queue(Q, Requeue, Commit) ->
-    if [] == Requeue -> Commit;
+flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
+    if 0 == RequeueCount -> Commit;
        true -> ok = rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), []),
-               rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue)),
+               rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
                []
     end.
 
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 99f6e408..f5cc32b4 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -201,20 +201,20 @@ handle_call({pin_to_disk, Pid}, _From,
                              disk_mode_pins = Pins }) ->
     {Res, State1} =
         case sets:is_element(Pid, Pins) of
-            true -> {already_pinned, State};
+            true -> {ok, State};
             false ->
                 case find_queue(Pid, Mixed) of
                     {mixed, {OAlloc, _OActivity}} ->
                         {Module, Function, Args} = dict:fetch(Pid, Callbacks),
                         ok = erlang:apply(Module, Function, Args ++ [disk]),
-                        {convert_to_disk_mode,
+                        {ok,
                          State #state { mixed_queues = dict:erase(Pid, Mixed),
                                         available_tokens = Avail + OAlloc,
                                         disk_mode_pins =
                                         sets:add_element(Pid, Pins)
                                        }};
                     disk ->
-                        {already_disk,
+                        {ok,
                          State #state { disk_mode_pins =
                                         sets:add_element(Pid, Pins) }}
                 end
-- 
cgit v1.2.1


From 15e9dcc80ef49d39fa392316c681a6fa799ed691 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 8 Jul 2009 23:59:30 +0100
Subject: The mixed queue contains in its queue knowledge of whether the next
 message is on disk or not. It does not use any sequence numbers, nor does it
 try to correllate queue position with sequence numbers in the disk_queue.
 Therefore, there is absolutely no reason for the disk_queue to have all the
 necessary complexity associated with being able to cope with non-contiguous
 sequence ids. Thus all removed. This has made the disk_queue a good bit
 simpler and slightly faster in a few cases too. All tests pass.

---
 include/rabbit.hrl         |   2 +-
 src/rabbit_disk_queue.erl  | 251 +++++++++++++--------------------------------
 src/rabbit_mixed_queue.erl |  18 ++--
 src/rabbit_tests.erl       |   2 +-
 4 files changed, 83 insertions(+), 190 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index b8425baf..0ba31cb5 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -65,7 +65,7 @@
 -record(basic_message, {exchange_name, routing_key, content,
                         guid, is_persistent}).
 
--record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id, next_seq_id}).
+-record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id}).
 
 -record(delivery, {mandatory, immediate, txn, sender, message}).
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 9c7d35eb..96889dbd 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,7 @@
 
 -export([publish/3, deliver/1, phantom_deliver/1, ack/2,
          tx_publish/1, tx_commit/3, tx_cancel/1,
-         requeue/2, requeue_with_seqs/2, purge/1, delete_queue/1,
+         requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
          requeue_next_n/2
         ]).
@@ -106,7 +106,7 @@
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
 %% Sequences:   this is an ets table which contains:
-%%              {Q, ReadSeqId, WriteSeqId, QueueLength}
+%%              {Q, ReadSeqId, WriteSeqId}
 %% rabbit_disk_queue: this is an mnesia table which contains:
 %%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
 %%                            is_delivered = IsDelivered,
@@ -245,7 +245,6 @@
 -ifdef(use_specs).
 
 -type(seq_id() :: non_neg_integer()).
--type(seq_id_or_next() :: ( seq_id() | 'next' )).
 
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
@@ -261,10 +260,7 @@
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
              'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
--spec(requeue/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
--spec(requeue_with_seqs/2 ::
-      (queue_name(),
-       [{{msg_id(), seq_id()}, {seq_id_or_next(), bool()}}]) -> 'ok').
+-spec(requeue/2 :: (queue_name(), [{{msg_id(), seq_id()}, bool()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
@@ -314,9 +310,6 @@ tx_cancel(MsgIds) when is_list(MsgIds) ->
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
-requeue_with_seqs(Q, MsgSeqSeqIds) when is_list(MsgSeqSeqIds) ->
-    gen_server2:cast(?SERVER, {requeue_with_seqs, Q, MsgSeqSeqIds}).
-
 requeue_next_n(Q, N) when is_integer(N) ->
     gen_server2:cast(?SERVER, {requeue_next_n, Q, N}).
 
@@ -454,9 +447,8 @@ handle_call({phantom_deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, false, false, State),
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
-    PubMsgSeqIds = zip_with_tail(PubMsgIds, {duplicate, next}),
     {Reply, State1} =
-        internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From, State),
+        internal_tx_commit(Q, PubMsgIds, AckSeqIds, From, State),
     case Reply of
         true -> reply(ok, State1);
         false -> noreply(State1)
@@ -483,8 +475,8 @@ handle_call(to_disk_only_mode, _From, State) ->
 handle_call(to_ram_disk_mode, _From, State) ->
     reply(ok, to_ram_disk_mode(State));
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
-    {_ReadSeqId, _WriteSeqId, Length} = sequence_lookup(Sequences, Q),
-    reply(Length, State);
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
+    reply(WriteSeqId - ReadSeqId, State);
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
     reply(ok, State1);
@@ -492,8 +484,7 @@ handle_call(cache_info, _From, State = #dqstate { message_cache = Cache }) ->
     reply(ets:info(Cache), State).
 
 handle_cast({publish, Q, Message, IsDelivered}, State) ->
-    {ok, _MsgSeqId, State1} =
-        internal_publish(Q, Message, next, IsDelivered, State),
+    {ok, _MsgSeqId, State1} = internal_publish(Q, Message, IsDelivered, State),
     noreply(State1);
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
@@ -508,11 +499,7 @@ handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
     noreply(State1);
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    MsgSeqSeqIds = zip_with_tail(MsgSeqIds, {duplicate, {next, true}}),
-    {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
-    noreply(State1);
-handle_cast({requeue_with_seqs, Q, MsgSeqSeqIds}, State) ->
-    {ok, State1} = internal_requeue(Q, MsgSeqSeqIds, State),
+    {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
     noreply(State1);
 handle_cast({requeue_next_n, Q, N}, State) ->
     {ok, State1} = internal_requeue_next_n(Q, N, State),
@@ -695,13 +682,6 @@ form_filename(Name) ->
 base_directory() ->
     filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
 
-zip_with_tail(List1, List2) when length(List1) =:= length(List2) ->
-    lists:zip(List1, List2);
-zip_with_tail(List = [_|Tail], {last, E}) ->
-    zip_with_tail(List, Tail ++ [E]);
-zip_with_tail(List, {duplicate, E}) ->
-    zip_with_tail(List, lists:duplicate(erlang:length(List), E)).
-
 dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets,
                            operation_mode = disk_only },
                 Key) ->
@@ -749,29 +729,6 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
                       Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
-find_next_seq_id(CurrentSeq, next) ->
-    CurrentSeq + 1;
-find_next_seq_id(CurrentSeq, NextSeqId)
-  when NextSeqId > CurrentSeq ->
-    NextSeqId.
-
-%% the queue is empty, and we've just written exactly where we
-%% expected, so read it back
-determine_next_read_id(CurrentReadWrite, CurrentReadWrite, CurrentReadWrite) ->
-    CurrentReadWrite;
-%% we've just written in the next slot, so the next read pos is unaltered
-determine_next_read_id(CurrentRead, _CurrentWrite, next) ->
-    CurrentRead;
-%% queue is empty, but we've written somewhere else - a gap has formed
-%% - so read back from where we wrote, after the gap
-determine_next_read_id(CurrentReadWrite, CurrentReadWrite, NextWrite)
-  when NextWrite > CurrentReadWrite ->
-    NextWrite;
-%% queue is not empty, and we've created a gap, so the read pos is unaltered
-determine_next_read_id(CurrentRead, CurrentWrite, NextWrite)
-  when NextWrite >= CurrentWrite ->
-    CurrentRead.
-
 get_read_handle(File, Offset, State =
                 #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
                            read_file_handles_limit = ReadFileHandlesLimit,
@@ -809,32 +766,12 @@ get_read_handle(File, Offset, State =
     {FileHdl,
      State1 #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge3} }}.
 
-adjust_last_msg_seq_id(_Q, ExpectedSeqId, next, _Mode) ->
-    ExpectedSeqId;
-adjust_last_msg_seq_id(_Q, 0, SuppliedSeqId, _Mode) ->
-    SuppliedSeqId;
-adjust_last_msg_seq_id(_Q, ExpectedSeqId, ExpectedSeqId, _Mode) ->
-    ExpectedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, dirty)
-  when SuppliedSeqId > ExpectedSeqId ->
-    [Obj] = mnesia:dirty_read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}),
-    ok = mnesia:dirty_write(rabbit_disk_queue,
-                            Obj #dq_msg_loc { next_seq_id = SuppliedSeqId }),
-    SuppliedSeqId;
-adjust_last_msg_seq_id(Q, ExpectedSeqId, SuppliedSeqId, Lock)
-  when SuppliedSeqId > ExpectedSeqId ->
-    [Obj] = mnesia:read(rabbit_disk_queue, {Q, ExpectedSeqId - 1}, Lock),
-    ok = mnesia:write(rabbit_disk_queue,
-                      Obj #dq_msg_loc { next_seq_id = SuppliedSeqId },
-                      Lock),
-    SuppliedSeqId.
-
 sequence_lookup(Sequences, Q) ->
     case ets:lookup(Sequences, Q) of
         [] ->
-            {0, 0, 0};
-        [{Q, ReadSeqId, WriteSeqId, Length}] ->
-            {ReadSeqId, WriteSeqId, Length}
+            {0, 0};
+        [{Q, ReadSeqId, WriteSeqId}] ->
+            {ReadSeqId, WriteSeqId}
     end.
 
 start_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
@@ -910,14 +847,14 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
                  State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, empty, State};
-        [{Q, SeqId, SeqId, 0}] -> {ok, empty, State};
-        [{Q, ReadSeqId, WriteSeqId, Length}] when Length > 0 ->
-            Remaining = Length - 1,
-            {ok, Result, NextReadSeqId, State1} =
+        [{Q, SeqId, SeqId}] -> {ok, empty, State};
+        [{Q, ReadSeqId, WriteSeqId}] when WriteSeqId >= ReadSeqId ->
+            Remaining = WriteSeqId - ReadSeqId - 1,
+            {ok, Result, State1} =
                 internal_read_message(
                   Q, ReadSeqId, FakeDeliver, ReadMsg, State),
             true = ets:insert(Sequences,
-                              {Q, NextReadSeqId, WriteSeqId, Remaining}),
+                              {Q, ReadSeqId+1, WriteSeqId}),
             {ok,
              case Result of
                  {MsgId, Delivered, {MsgId, ReadSeqId}} ->
@@ -930,8 +867,7 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
 
 internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
     [Obj =
-     #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId,
-                  next_seq_id = NextReadSeqId}] =
+     #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
     [{MsgId, RefCount, File, Offset, TotalSize}] =
         dets_ets_lookup(State, MsgId),
@@ -959,13 +895,13 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
                              _ -> insert_into_cache(Message, BodySize, State1)
                          end,
                     {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
-                     NextReadSeqId, State1};
+                     State1};
                 {Message, BodySize, _RefCount} ->
                     {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
-                     NextReadSeqId, State}
+                     State}
             end;
         false ->
-            {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, NextReadSeqId, State}
+            {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
     end.
 
 internal_auto_ack(Q, State) ->
@@ -1064,25 +1000,15 @@ internal_tx_publish(MsgId, Message,
             {ok, State}
     end.
 
-%% can call this with PubMsgSeqIds as zip(PubMsgIds, duplicate(N, next))
-internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
+internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                    State = #dqstate { sequences = Sequences,
                                       current_file_name = CurFile,
                                       current_dirty = IsDirty,
                                       on_sync_froms = SyncFroms,
                                       last_sync_offset = SyncOffset
                                     }) ->
-    {PubList, PubAcc, ReadSeqId, Length} =
-        case PubMsgSeqIds of
-            [] -> {[], undefined, undefined, undefined};
-            [{_, FirstSeqIdTo}|_] ->
-                {InitReadSeqId, InitWriteSeqId, InitLength} =
-                    sequence_lookup(Sequences, Q),
-                InitReadSeqId1 = determine_next_read_id(
-                                   InitReadSeqId, InitWriteSeqId, FirstSeqIdTo),
-                { zip_with_tail(PubMsgSeqIds, {last, {next, next}}),
-                  InitWriteSeqId, InitReadSeqId1, InitLength}
-        end,
+    {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
+    WriteSeqId = InitWriteSeqId + erlang:length(PubMsgIds),
     {atomic, {InCurFile, WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
@@ -1094,34 +1020,27 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
                   %% order which _could_not_ have happened.
                   {InCurFile1, WriteSeqId1} =
                       lists:foldl(
-                        fun ({{MsgId, SeqId}, {_NextMsgId, NextSeqId}},
-                             {InCurFileAcc, ExpectedSeqId}) ->
+                        fun (MsgId, {InCurFileAcc, SeqId}) ->
                                 [{MsgId, _RefCount, File, Offset,
                                   _TotalSize}] = dets_ets_lookup(State, MsgId),
-                                 SeqId1 = adjust_last_msg_seq_id(
-                                            Q, ExpectedSeqId, SeqId, write),
-                                 NextSeqId1 =
-                                    find_next_seq_id(SeqId1, NextSeqId),
                                  ok = mnesia:write(
                                         rabbit_disk_queue,
                                         #dq_msg_loc { queue_and_seq_id =
-                                                      {Q, SeqId1},
+                                                      {Q, SeqId},
                                                       msg_id = MsgId,
-                                                      is_delivered = false,
-                                                      next_seq_id = NextSeqId1
+                                                      is_delivered = false
                                                      },
                                         write),
                                  {InCurFileAcc orelse (File =:= CurFile andalso
                                                        Offset >= SyncOffset),
-                                  NextSeqId1}
-                         end, {false, PubAcc}, PubList),
+                                  SeqId + 1}
+                         end, {false, InitWriteSeqId}, PubMsgIds),
                    {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
                    {InCurFile1, WriteSeqId1, State2}
           end),
-    true = case PubList of
+    true = case PubMsgIds of
                [] -> true;
-               _  -> ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId,
-                                            Length + erlang:length(PubList)})
+               _  -> ets:insert(Sequences, {Q, InitReadSeqId, WriteSeqId})
            end,
     if IsDirty andalso InCurFile ->
             {false, State1 #dqstate { on_sync_froms = [From | SyncFroms] }};
@@ -1129,34 +1048,28 @@ internal_tx_commit(Q, PubMsgSeqIds, AckSeqIds, From,
             {true, State1}
     end.
 
-%% SeqId can be 'next'
-internal_publish(Q, Message = #basic_message { guid = MsgId }, SeqId,
+internal_publish(Q, Message = #basic_message { guid = MsgId },
                  IsDelivered, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
         internal_tx_publish(MsgId, Message, State),
-    {ReadSeqId, WriteSeqId, Length} =
-        sequence_lookup(Sequences, Q),
-    ReadSeqId3 = determine_next_read_id(ReadSeqId, WriteSeqId, SeqId),
-    WriteSeqId3 = adjust_last_msg_seq_id(Q, WriteSeqId, SeqId, dirty),
-    WriteSeqId3Next = WriteSeqId3 + 1,
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     ok = mnesia:dirty_write(rabbit_disk_queue,
-                            #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId3},
+                            #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
                                           msg_id = MsgId,
-                                          next_seq_id = WriteSeqId3Next,
                                           is_delivered = IsDelivered}),
-    true = ets:insert(Sequences, {Q, ReadSeqId3, WriteSeqId3Next, Length + 1}),
-    {ok, {MsgId, WriteSeqId3}, State1}.
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId + 1}),
+    {ok, {MsgId, WriteSeqId}, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
-    MsgSeqIds = zip_with_tail(MsgIds, {duplicate, undefined}),
+    MsgSeqIds = lists:zip(MsgIds,
+                          lists:duplicate(erlang:length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-internal_requeue(Q, MsgSeqIds = [{_, {FirstSeqIdTo, _}}|_],
-                 State = #dqstate { sequences = Sequences }) ->
+internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
     %% already been delivered). We also know that the rows for these
@@ -1179,76 +1092,59 @@ internal_requeue(Q, MsgSeqIds = [{_, {FirstSeqIdTo, _}}|_],
     %% MsgLocation and FileSummary stay put (which makes further sense
     %% as they have no concept of sequence id anyway).
 
-    {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
-    ReadSeqId1 = determine_next_read_id(ReadSeqId, WriteSeqId, FirstSeqIdTo),
-    MsgSeqIdsZipped = zip_with_tail(MsgSeqIds, {last, {next, {next, true}}}),
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     {atomic, {WriteSeqId1, Q, State}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   lists:foldl(fun requeue_message/2, {WriteSeqId, Q, State},
-                              MsgSeqIdsZipped)
+                              MsgSeqIds)
           end),
-    true = ets:insert(Sequences, {Q, ReadSeqId1, WriteSeqId1,
-                                  Length + erlang:length(MsgSeqIds)}),
+    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId1}),
     {ok, State}.
 
-requeue_message({{{MsgId, SeqIdOrig}, {SeqIdTo, NewIsDelivered}},
-                 {_NextMsgSeqId, {NextSeqIdTo, _NextNewIsDelivered}}},
-                {ExpectedSeqIdTo, Q, State}) ->
-    SeqIdTo1 = adjust_last_msg_seq_id(Q, ExpectedSeqIdTo, SeqIdTo, write),
-    NextSeqIdTo1 = find_next_seq_id(SeqIdTo1, NextSeqIdTo),
-    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId,
-                         next_seq_id = NextSeqIdOrig }] =
-        mnesia:read(rabbit_disk_queue, {Q, SeqIdOrig}, write),
-    if SeqIdTo1 == SeqIdOrig andalso NextSeqIdTo1 == NextSeqIdOrig -> ok;
-       true ->
-            ok = mnesia:write(rabbit_disk_queue,
-                              Obj #dq_msg_loc {queue_and_seq_id = {Q, SeqIdTo1},
-                                               next_seq_id = NextSeqIdTo1,
-                                               is_delivered = NewIsDelivered
-                                              },
-                              write),
-            ok = mnesia:delete(rabbit_disk_queue, {Q, SeqIdOrig}, write)
-    end,
+requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, State}) ->
+    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
+        mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
+    ok = mnesia:write(rabbit_disk_queue,
+                      Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeqId},
+                                       is_delivered = IsDelivered
+                                      },
+                      write),
+    ok = mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
     decrement_cache(MsgId, State),
-    {NextSeqIdTo1, Q, State}.
+    {WriteSeqId + 1, Q, State}.
 
 %% move the next N messages from the front of the queue to the back.
 internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
-    {ReadSeqId, WriteSeqId, Length} = sequence_lookup(Sequences, Q),
-    ReadSeqId1 = determine_next_read_id(ReadSeqId, WriteSeqId, next),
-    if N >= Length -> {ok, State};
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
+    if N >= (WriteSeqId - ReadSeqId) -> {ok, State};
        true ->
             {atomic, {ReadSeqIdN, WriteSeqIdN}} =
                 mnesia:transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          requeue_next_messages(Q, N, ReadSeqId1, WriteSeqId)
+                          requeue_next_messages(Q, N, ReadSeqId, WriteSeqId)
                   end
                  ),
-            true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN, Length}),
+            true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN}),
             {ok, State}
     end.
 
 requeue_next_messages(_Q, 0, ReadSeq, WriteSeq) ->
     {ReadSeq, WriteSeq};
 requeue_next_messages(Q, N, ReadSeq, WriteSeq) ->
-    WriteSeq1 = adjust_last_msg_seq_id(Q, WriteSeq, next, write),
-    NextWriteSeq = find_next_seq_id(WriteSeq1, next),
-    [Obj = #dq_msg_loc { next_seq_id = NextSeqIdOrig }] =
-        mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
+    [Obj] = mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
     ok = mnesia:write(rabbit_disk_queue,
-                      Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeq1},
-                                       next_seq_id = NextWriteSeq
-                                      }, write),
+                      Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeq}},
+                      write),
     ok = mnesia:delete(rabbit_disk_queue, {Q, ReadSeq}, write),
-    requeue_next_messages(Q, N - 1, NextSeqIdOrig, NextWriteSeq).
+    requeue_next_messages(Q, N - 1, ReadSeq + 1, WriteSeq + 1).
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case ets:lookup(Sequences, Q) of
         [] -> {ok, 0, State};
-        [{Q, ReadSeqId, WriteSeqId, _Length}] ->
+        [{Q, ReadSeqId, WriteSeqId}] ->
             {atomic, {ok, State1}} =
                 mnesia:transaction(
                   fun() ->
@@ -1257,15 +1153,14 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                               rabbit_misc:unfold(
                                 fun (SeqId) when SeqId == WriteSeqId -> false;
                                     (SeqId) ->
-                                        [#dq_msg_loc { msg_id = MsgId,
-                                                       next_seq_id = NextSeqId }
-                                        ] = mnesia:read(rabbit_disk_queue,
+                                        [#dq_msg_loc { msg_id = MsgId }] =
+                                            mnesia:read(rabbit_disk_queue,
                                                         {Q, SeqId}, write),
-                                        {true, {MsgId, SeqId}, NextSeqId}
+                                        {true, {MsgId, SeqId}, SeqId + 1}
                                 end, ReadSeqId),
                           remove_messages(Q, MsgSeqIds, txn, State)
                   end),
-            true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId, 0}),
+            true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
             {ok, WriteSeqId - ReadSeqId, State1}
     end.
 
@@ -1282,8 +1177,7 @@ internal_delete_queue(Q, State) ->
                         rabbit_disk_queue,
                         #dq_msg_loc { queue_and_seq_id = {Q, '_'},
                                       msg_id = '_',
-                                      is_delivered = '_',
-                                      next_seq_id = '_'
+                                      is_delivered = '_'
                                      },
                         write),
                   MsgSeqIds =
@@ -1298,7 +1192,7 @@ internal_delete_queue(Q, State) ->
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
     ets:foldl(
-      fun ({Q, _Read, _Write, _Length}, {ok, State1}) ->
+      fun ({Q, _Read, _Write}, {ok, State1}) ->
               case sets:is_element(Q, DurableQueues) of
                   true -> {ok, State1};
                   false -> internal_delete_queue(Q, State1)
@@ -1667,10 +1561,9 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
                     fun ({Q, ReadSeqId, WriteSeqId, _Length}) ->
                             Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
                             ReadSeqId1 = ReadSeqId + Gap,
-                            Length = WriteSeqId - ReadSeqId1,
                             true =
                                 ets:insert(Sequences,
-                                           {Q, ReadSeqId1, WriteSeqId, Length})
+                                           {Q, ReadSeqId1, WriteSeqId})
                     end, ets:match_object(Sequences, '_'))
           end).
 
@@ -1685,9 +1578,7 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
                     0 -> ok;
                     _ -> mnesia:write(rabbit_disk_queue,
                                       Obj #dq_msg_loc {
-                                        queue_and_seq_id = {Q, SeqId + Gap },
-                                        next_seq_id = SeqId + Gap + 1
-                                       },
+                                        queue_and_seq_id = {Q, SeqId + Gap }},
                                       write),
                          mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
                 end,
@@ -1720,8 +1611,7 @@ load_messages(Left, [File|Files],
                             (rabbit_disk_queue,
                              #dq_msg_loc { msg_id = MsgId,
                                            queue_and_seq_id = '_',
-                                           is_delivered = '_',
-                                           next_seq_id = '_'
+                                           is_delivered = '_'
                                          },
                              msg_id)) of
                     0 -> {VMAcc, VTSAcc};
@@ -1761,8 +1651,7 @@ verify_messages_in_mnesia(MsgIds) ->
                                        (rabbit_disk_queue,
                                         #dq_msg_loc { msg_id = MsgId,
                                                       queue_and_seq_id = '_',
-                                                      is_delivered = '_',
-                                                      next_seq_id = '_'
+                                                      is_delivered = '_'
                                                      },
                                         msg_id))
       end, MsgIds).
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 4a2803a4..61487c9d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -180,9 +180,13 @@ flush_messages_to_disk_queue(Q, Commit) ->
 
 flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
     if 0 == RequeueCount -> Commit;
-       true -> ok = rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), []),
-               rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
-               []
+       true ->
+            ok = if [] == Commit -> ok;
+                    true -> rabbit_disk_queue:tx_commit
+                              (Q, lists:reverse(Commit), [])
+                 end,
+            rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
+            []
     end.
 
 to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
@@ -226,7 +230,7 @@ purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
         deliver_all_messages(Q, IsDurable, [], [], 0, 0),
     ok = if Requeue == [] -> ok;
             true ->
-                 rabbit_disk_queue:requeue_with_seqs(Q, lists:reverse(Requeue))
+                 rabbit_disk_queue:requeue(Q, lists:reverse(Requeue))
          end,
     ok = if Acks == [] -> ok;
             true -> rabbit_disk_queue:ack(Q, Acks)
@@ -241,7 +245,7 @@ deliver_all_messages(Q, IsDurable, Acks, Requeue, Length, QSize) ->
             OnDisk = IsPersistent andalso IsDurable,
             {Acks1, Requeue1, Length1, QSize1} =
                 if OnDisk -> { Acks,
-                               [{AckTag, {next, IsDelivered}} | Requeue],
+                               [{AckTag, IsDelivered} | Requeue],
                                Length + 1, QSize + size_of_message(Msg) };
                    true   -> { [AckTag | Acks], Requeue, Length, QSize }
                 end,
@@ -484,7 +488,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
         = lists:foldl(
             fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
                 when IsDurable andalso IsPersistent ->
-                    [AckTag | RQ];
+                    [{AckTag, true} | RQ];
                 ({Msg, _AckTag}, RQ) ->
                     ok = case RQ == [] of
                              true  -> ok;
@@ -508,7 +512,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                {Acc, MsgBuf2}) ->
                   OnDisk = IsDurable andalso IsPersistent,
                   Acc1 =
-                      if OnDisk -> [AckTag | Acc];
+                      if OnDisk -> [{AckTag, true} | Acc];
                          true -> Acc
                       end,
                   {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2)}
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f1082850..8ab82677 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -924,7 +924,7 @@ rdq_test_redeliver() ->
     %% now requeue every other message (starting at the _first_)
     %% and ack the other ones
     lists:foldl(fun (SeqId2, true) ->
-                        rabbit_disk_queue:requeue(q, [SeqId2]),
+                        rabbit_disk_queue:requeue(q, [{SeqId2, true}]),
                         false;
                     (SeqId2, false) ->
                         rabbit_disk_queue:ack(q, [SeqId2]),
-- 
cgit v1.2.1


From edf3a33b337c7cc7155a21242533846fab4da616 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 11:00:48 +0100
Subject: Fixes from removing the non-contiguous sequences support from the
 disk queue that I failed to spot last night, but apparently came to me during
 my dreams. I have no idea how the tests managed to pass last night...

---
 src/rabbit_disk_queue.erl | 51 +++++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 31 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 96889dbd..763e544d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -845,10 +845,9 @@ insert_into_cache(Message = #basic_message { guid = MsgId },
 
 internal_deliver(Q, ReadMsg, FakeDeliver,
                  State = #dqstate { sequences = Sequences }) ->
-    case ets:lookup(Sequences, Q) of
-        [] -> {ok, empty, State};
-        [{Q, SeqId, SeqId}] -> {ok, empty, State};
-        [{Q, ReadSeqId, WriteSeqId}] when WriteSeqId >= ReadSeqId ->
+    case sequence_lookup(Sequences, Q) of
+        {SeqId, SeqId} -> {ok, empty, State};
+        {ReadSeqId, WriteSeqId} when WriteSeqId >= ReadSeqId ->
             Remaining = WriteSeqId - ReadSeqId - 1,
             {ok, Result, State1} =
                 internal_read_message(
@@ -1142,9 +1141,9 @@ requeue_next_messages(Q, N, ReadSeq, WriteSeq) ->
     requeue_next_messages(Q, N - 1, ReadSeq + 1, WriteSeq + 1).
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
-    case ets:lookup(Sequences, Q) of
-        [] -> {ok, 0, State};
-        [{Q, ReadSeqId, WriteSeqId}] ->
+    case sequence_lookup(Sequences, Q) of
+        {SeqId, SeqId} -> {ok, 0, State};
+        {ReadSeqId, WriteSeqId} ->
             {atomic, {ok, State1}} =
                 mnesia:transaction(
                   fun() ->
@@ -1518,26 +1517,17 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
               mnesia:foldl(
                 fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
                         NextWrite = SeqId + 1,
-                        true =
-                            case ets:lookup(Sequences, Q) of
-                                [] -> ets:insert_new(Sequences,
-                                                     {Q, SeqId, NextWrite, -1});
-                                [Orig = {Q, Read, Write, Length}] ->
-                                    Repl = {Q, lists:min([Read, SeqId]),
-                                            lists:max([Write, NextWrite]),
-                                            %% Length is wrong here,
-                                            %% but it doesn't matter
-                                            %% because we'll pull out
-                                            %% the gaps in
-                                            %% remove_gaps_in_sequences
-                                            %% in then do a straight
-                                            %% subtraction to get the
-                                            %% right length
-                                            Length},
-                                    if Orig =:= Repl -> true;
-                                       true -> ets:insert(Sequences, Repl)
-                                    end
-                            end
+                        case ets:lookup(Sequences, Q) of
+                            [] -> ets:insert_new(Sequences,
+                                                 {Q, SeqId, NextWrite});
+                            [Orig = {Q, Read, Write}] ->
+                                Repl = {Q, lists:min([Read, SeqId]),
+                                        lists:max([Write, NextWrite])},
+                                case Orig == Repl of
+                                    true -> true;
+                                    false -> ets:insert(Sequences, Repl)
+                                end
+                        end
                 end, true, rabbit_disk_queue)
       end),
     remove_gaps_in_sequences(State),
@@ -1558,12 +1548,11 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   lists:foreach(
-                    fun ({Q, ReadSeqId, WriteSeqId, _Length}) ->
+                    fun ({Q, ReadSeqId, WriteSeqId}) ->
                             Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
                             ReadSeqId1 = ReadSeqId + Gap,
-                            true =
-                                ets:insert(Sequences,
-                                           {Q, ReadSeqId1, WriteSeqId})
+                            true = ets:insert(Sequences,
+                                              {Q, ReadSeqId1, WriteSeqId})
                     end, ets:match_object(Sequences, '_'))
           end).
 
-- 
cgit v1.2.1


From 2bd8a17889b3e432ce6eabe4c280669536c3e2c9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 11:37:10 +0100
Subject: minor documentation fix.

---
 src/rabbit_disk_queue.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 763e544d..27be0eeb 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -110,8 +110,7 @@
 %% rabbit_disk_queue: this is an mnesia table which contains:
 %%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
 %%                            is_delivered = IsDelivered,
-%%                            msg_id = MsgId,
-%%                            next_seq_id = SeqId
+%%                            msg_id = MsgId
 %%                          }
 %%
 
-- 
cgit v1.2.1


From 47899af696cb9f24648cde3e006d8a93f6b316c7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 12:27:59 +0100
Subject: length is never used in disk_queue, so removed

---
 src/rabbit_disk_queue.erl | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 27be0eeb..3656694e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -45,7 +45,7 @@
          requeue_next_n/2
         ]).
 
--export([length/1, filesync/0, cache_info/0]).
+-export([filesync/0, cache_info/0]).
 
 -export([stop/0, stop_and_obliterate/0, report_memory/0,
          set_mode/1, to_disk_only_mode/0, to_ram_disk_mode/0]).
@@ -267,7 +267,6 @@
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
--spec(length/1 :: (queue_name()) -> non_neg_integer()).
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
 -spec(report_memory/0 :: () -> 'ok').
@@ -334,9 +333,6 @@ to_disk_only_mode() ->
 to_ram_disk_mode() ->
     gen_server2:pcall(?SERVER, 9, to_ram_disk_mode, infinity).
 
-length(Q) ->
-    gen_server2:call(?SERVER, {length, Q}, infinity).
-
 filesync() ->
     gen_server2:pcast(?SERVER, 10, filesync).
 
@@ -473,9 +469,6 @@ handle_call(to_disk_only_mode, _From, State) ->
     reply(ok, to_disk_only_mode(State));
 handle_call(to_ram_disk_mode, _From, State) ->
     reply(ok, to_ram_disk_mode(State));
-handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    reply(WriteSeqId - ReadSeqId, State);
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
     reply(ok, State1);
@@ -1006,7 +999,7 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                                       last_sync_offset = SyncOffset
                                     }) ->
     {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-    WriteSeqId = InitWriteSeqId + erlang:length(PubMsgIds),
+    WriteSeqId = InitWriteSeqId + length(PubMsgIds),
     {atomic, {InCurFile, WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
@@ -1061,8 +1054,7 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
-    MsgSeqIds = lists:zip(MsgIds,
-                          lists:duplicate(erlang:length(MsgIds), undefined)),
+    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
@@ -1496,8 +1488,7 @@ load_from_disk(State) ->
                        fun (#dq_msg_loc { msg_id = MsgId,
                                           queue_and_seq_id = {Q, SeqId} },
                             true) ->
-                               case erlang:length(dets_ets_lookup(
-                                                    State1, MsgId)) of
+                               case length(dets_ets_lookup(State1, MsgId)) of
                                    0 -> ok == mnesia:delete(rabbit_disk_queue,
                                                             {Q, SeqId}, write);
                                    1 -> true
@@ -1595,7 +1586,7 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
         fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case erlang:length(mnesia:dirty_index_match_object
+                case length(mnesia:dirty_index_match_object
                             (rabbit_disk_queue,
                              #dq_msg_loc { msg_id = MsgId,
                                            queue_and_seq_id = '_',
@@ -1635,13 +1626,13 @@ recover_crashed_compactions(Files, TmpFiles) ->
 verify_messages_in_mnesia(MsgIds) ->
     lists:foreach(
       fun (MsgId) ->
-              true = 0 < erlang:length(mnesia:dirty_index_match_object
-                                       (rabbit_disk_queue,
-                                        #dq_msg_loc { msg_id = MsgId,
-                                                      queue_and_seq_id = '_',
-                                                      is_delivered = '_'
-                                                     },
-                                        msg_id))
+              true = 0 < length(mnesia:dirty_index_match_object
+                                (rabbit_disk_queue,
+                                 #dq_msg_loc { msg_id = MsgId,
+                                               queue_and_seq_id = '_',
+                                               is_delivered = '_'
+                                              },
+                                 msg_id))
       end, MsgIds).
 
 recover_crashed_compactions1(Files, TmpFile) ->
-- 
cgit v1.2.1


From e315441605dfcbf48cc861c049c7296c69f5a34e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 17:06:30 +0100
Subject: Initial work to permit low priority background tasks to be catered
 for.

---
 src/gen_server2.erl       | 213 ++++++++++++++++++++++++++++++++++++----------
 src/priority_queue.erl    |  22 +++--
 src/rabbit_disk_queue.erl |  14 +--
 src/rabbit_misc.erl       |  19 ++++-
 src/rabbit_tests.erl      |  46 +++++-----
 5 files changed, 236 insertions(+), 78 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index e46f2645..2784090a 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -37,6 +37,28 @@
 %% Explicit timeouts (i.e. not 'binary') from the handle_* functions
 %% are still supported, and do not have any effect on the current
 %% timeout value.
+%%
+%% 6) init/1 can also return (either a further arg in addition to
+%% timeout above, or as a key-value list with the timeout as {timeout,
+%% Timeout}) a minimum priority (key: min_priority). This can also be
+%% returned from handle_* functions (i.e. {noreply, NewState} or
+%% {noreply, NewState, Timeout} or {noreply, NewState, Timeout,
+%% MinPri} or {noreply, NewState, [{min_priority, MinPri}]} or
+%% {noreply, NewState, [{min_priority, MinPri}, {timeout,
+%% Timeout}]}). What this does is to only allow messages greater than
+%% the indicated priority through to the module. To allow any message
+%% through (as is the default), use 'any'. One effect of this is that
+%% when hibernating, the process can be woken up to receive a message
+%% which it then realises it is not interested in. When this happens,
+%% handle_info(roused_and_disinterested, State) will be called as soon
+%% as there are no further messages to process (i.e. upon waking, the
+%% message queue is drained, and a timeout of 0 is used). A suggested
+%% use of this is to cater for low priority background casts, which
+%% can be sent with negative priorities, and to use a priority of 0 or
+%% higher for everything else. Then, if you return from handle_* with
+%% a timeout of 0 and find handle_info(timeout, State) being called,
+%% you can then return with a min_priority of 'any' and pick up the
+%% low priority messages.
 
 %% All modifications are (C) 2009 LShift Ltd.
 
@@ -133,7 +155,8 @@
 	 cast/2, pcast/3, reply/2,
 	 abcast/2, abcast/3,
 	 multi_call/2, multi_call/3, multi_call/4,
-	 enter_loop/3, enter_loop/4, enter_loop/5, wake_hib/7]).
+	 enter_loop/3, enter_loop/4, enter_loop/5, enter_loop/6,
+         wake_hib/8]).
 
 -export([behaviour_info/1]).
 
@@ -322,22 +345,32 @@ multi_call(Nodes, Name, Req, Timeout)
 %%              process, including registering a name for it.
 %%-----------------------------------------------------------------
 enter_loop(Mod, Options, State) ->
-    enter_loop(Mod, Options, State, self(), infinity).
+    enter_loop(Mod, Options, State, self(), []).
 
 enter_loop(Mod, Options, State, ServerName = {_, _}) ->
-    enter_loop(Mod, Options, State, ServerName, infinity);
+    enter_loop(Mod, Options, State, ServerName, []);
+
+enter_loop(Mod, Options, State, Opts) when is_list(Opts) ->
+    enter_loop(Mod, Options, State, self(), Opts);
 
 enter_loop(Mod, Options, State, Timeout) ->
-    enter_loop(Mod, Options, State, self(), Timeout).
+    enter_loop(Mod, Options, State, self(), [{timeout, Timeout}]).
 
-enter_loop(Mod, Options, State, ServerName, Timeout) ->
+enter_loop(Mod, Options, State, ServerName, Opts) when is_list(Opts) ->
     Name = get_proc_name(ServerName),
     Parent = get_parent(),
     Debug = debug_options(Name, Options),
     Queue = priority_queue:new(),
+    [{timeout, Timeout}, {min_priority, MinPri}] = extract_timeout_minpri(Opts),
     {Timeout1, TimeoutState} = build_timeout_state(Timeout),
-    loop(Parent, Name, State, Mod, Timeout1, TimeoutState, Queue, Debug).
+    loop(Parent, Name, State, Mod, Timeout1, TimeoutState, MinPri, Queue, Debug);
+
+enter_loop(Mod, Options, State, ServerName, Timeout) ->
+    enter_loop(Mod, Options, State, ServerName, [{timeout, Timeout}]).
 
+enter_loop(Mod, Options, State, ServerName, Timeout, MinPri) ->
+    enter_loop(Mod, Options, State, ServerName,
+               [{timeout, Timeout}, {min_priority, MinPri}]).
 %%%========================================================================
 %%% Gen-callback functions
 %%%========================================================================
@@ -357,13 +390,19 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) ->
     Queue = priority_queue:new(),
     case catch Mod:init(Args) of
 	{ok, State} ->
-	    proc_lib:init_ack(Starter, {ok, self()}), 	    
-	    loop(Parent, Name, State, Mod, infinity, undefined, Queue, Debug);
+	    proc_lib:init_ack(Starter, {ok, self()}),
+	    loop(Parent, Name, State, Mod, infinity, undefined,
+                 any, Queue, Debug);
 	{ok, State, Timeout} ->
 	    proc_lib:init_ack(Starter, {ok, self()}),
             {Timeout1, TimeoutState} = build_timeout_state(Timeout),
-	    loop(Parent, Name, State, Mod, Timeout1, TimeoutState, Queue,
-                 Debug);
+	    loop(Parent, Name, State, Mod, Timeout1, TimeoutState,
+                 any, Queue, Debug);
+	{ok, State, Timeout, MinPri} ->
+	    proc_lib:init_ack(Starter, {ok, self()}),
+            {Timeout1, TimeoutState} = build_timeout_state(Timeout),
+	    loop(Parent, Name, State, Mod, Timeout1, TimeoutState,
+                 MinPri, Queue, Debug);
 	{stop, Reason} ->
 	    %% For consistency, we must make sure that the
 	    %% registered name (if any) is unregistered before
@@ -407,57 +446,70 @@ build_timeout_state(Timeout) ->
         _             -> {Timeout, undefined}
     end.
 
+extract_timeout_minpri(Opts) ->
+    rabbit_misc:keygets([{timeout, infinity}, {min_priority, any}], Opts).
+
 %%%========================================================================
 %%% Internal functions
 %%%========================================================================
 %%% ---------------------------------------------------
 %%% The MAIN loop.
 %%% ---------------------------------------------------
-loop(Parent, Name, State, Mod, hibernate, undefined, Queue, Debug) ->
-    proc_lib:hibernate(?MODULE,wake_hib,
-                       [Parent, Name, State, Mod, undefined, Queue, Debug]);
-loop(Parent, Name, State, Mod, hibernate, {Current, Min, undefined}, Queue,
-     Debug) ->
+loop(Parent, Name, State, Mod, hibernate, undefined, MinPri, Queue, Debug) ->
+    proc_lib:hibernate(?MODULE, wake_hib, [Parent, Name, State, Mod, undefined,
+                                           MinPri, Queue, Debug]);
+loop(Parent, Name, State, Mod, hibernate, {Current, Min, undefined},
+     MinPri, Queue, Debug) ->
     proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod,
-                                         {Current, Min, now()}, Queue, Debug]);
-loop(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug) ->
+                                         {Current, Min, now()},
+                                         MinPri, Queue, Debug]);
+loop(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue, Debug) ->
     receive
         Input -> loop(Parent, Name, State, Mod,
-                      Time, TimeoutState, in(Input, Queue), Debug)
+                      Time, TimeoutState, MinPri, in(Input, Queue), Debug)
     after 0 ->
             process_next_msg(Parent, Name, State, Mod, Time, TimeoutState,
-                             Queue, Debug, false)
+                             MinPri, Queue, Debug, false)
     end.
 
-process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue,
+process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue,
                  Debug, Hib) ->
-    case priority_queue:out(Queue) of
+    Res = case MinPri of
+              any -> priority_queue:out(Queue);
+              _ -> priority_queue:out(MinPri, Queue)
+          end,
+    case Res of
         {{value, Msg}, Queue1} ->
             process_msg(Parent, Name, State, Mod,
                         Time, TimeoutState, Queue1, Debug, Hib, Msg);
         {empty, Queue1} ->
-            Time1 = case {Time, TimeoutState} of
-                        {binary, {Current, _Min, undefined}} -> Current;
+            Time1 = case {Hib, Time, TimeoutState} of
+                        {true, _, _} -> 0;
+                        {_, binary, {Current, _Min, undefined}} -> Current;
                         _ -> Time
                     end,
             receive
                 Input ->
                     loop(Parent, Name, State, Mod,
-                         Time, TimeoutState, in(Input, Queue1), Debug)
+                         Time, TimeoutState, MinPri, in(Input, Queue1), Debug)
             after Time1 ->
                     process_msg(Parent, Name, State, Mod,
-                                Time, TimeoutState, Queue1, Debug, Hib, timeout)
+                                Time, TimeoutState, Queue1, Debug, Hib,
+                               case Hib of
+                                  true -> roused_and_disinterested;
+                                  false -> timeout
+                               end)
             end
     end.
 
-wake_hib(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
+wake_hib(Parent, Name, State, Mod, TimeoutState, MinPri, Queue, Debug) ->
     Msg = receive
 	      Input ->
 		  Input
 	  end,
     TimeoutState1 = adjust_hibernate_after(TimeoutState),
     process_next_msg(Parent, Name, State, Mod, hibernate, TimeoutState1,
-                     in(Msg, Queue), Debug, true).
+                     MinPri, in(Msg, Queue), Debug, true).
 
 adjust_hibernate_after(undefined) ->
     undefined;
@@ -707,14 +759,34 @@ handle_msg({'$gen_call', From, Msg},
     case catch Mod:handle_call(Msg, From, State) of
 	{reply, Reply, NState} ->
 	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
-	{reply, Reply, NState, Time1} ->
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
+                 []);
+	{reply, Reply, NState, Opts} when is_list(Opts) ->
+	    reply(From, Reply),
+            [{timeout, Time}, {min_priority, MinPri}] =
+                extract_timeout_minpri(Opts),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 []);
+	{reply, Reply, NState, Time} ->
 	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue, []);
+	{reply, Reply, NState, Time, MinPri} ->
+	    reply(From, Reply),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 []);
 	{noreply, NState} ->
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
-	{noreply, NState, Time1} ->
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
+                 []);
+	{noreply, NState, Opts} when is_list(Opts) ->
+            [{timeout, Time}, {min_priority, MinPri}] =
+                extract_timeout_minpri(Opts),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 []);
+	{noreply, NState, Time} ->
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue, []);
+	{noreply, NState, Time, MinPri} ->
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 []);
 	{stop, Reason, Reply, NState} ->
 	    {'EXIT', R} = 
 		(catch terminate(Reason, Name, Msg, Mod, NState, [])),
@@ -734,20 +806,44 @@ handle_msg({'$gen_call', From, Msg},
     case catch Mod:handle_call(Msg, From, State) of
 	{reply, Reply, NState} ->
 	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
+                 Debug1);
+	{reply, Reply, NState, Opts} when is_list(Opts) ->
+	    Debug1 = reply(Name, From, Reply, NState, Debug),
+            [{timeout, Time}, {min_priority, MinPri}] =
+                extract_timeout_minpri(Opts),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 Debug1);
+	{reply, Reply, NState, Time} ->
+	    Debug1 = reply(Name, From, Reply, NState, Debug),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue,
                  Debug1);
-	{reply, Reply, NState, Time1} ->
+	{reply, Reply, NState, Time, MinPri} ->
 	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 Debug1);
 	{noreply, NState} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
+                 Debug1);
+	{noreply, NState, Opts} when is_list(Opts) ->
+	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
+				      {noreply, NState}),
+            [{timeout, Time}, {min_priority, MinPri}] =
+                extract_timeout_minpri(Opts),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
                  Debug1);
-	{noreply, NState, Time1} ->
+	{noreply, NState, Time} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue,
+                 Debug1);
+	{noreply, NState, Time, MinPri} ->
+	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
+				      {noreply, NState}),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 Debug1);
 	{stop, Reason, Reply, NState} ->
 	    {'EXIT', R} = 
 		(catch terminate(Reason, Name, Msg, Mod, NState, Debug)),
@@ -767,9 +863,18 @@ handle_common_reply(Reply, Parent, Name, Msg, Mod, State,
                     TimeoutState, Queue) ->
     case Reply of
 	{noreply, NState} ->
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
-	{noreply, NState, Time1} ->
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
+                 []);
+	{noreply, NState, Opts} when is_list(Opts) ->
+            [{timeout, Time}, {min_priority, MinPri}] =
+                extract_timeout_minpri(Opts),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 []);
+	{noreply, NState, Time} ->
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue, []);
+	{noreply, NState, Time, MinPri} ->
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 []);
 	{stop, Reason, NState} ->
 	    terminate(Reason, Name, Msg, Mod, NState, []);
 	{'EXIT', What} ->
@@ -784,12 +889,25 @@ handle_common_reply(Reply, Parent, Name, Msg, Mod, State, TimeoutState, Queue,
 	{noreply, NState} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
                  Debug1);
-	{noreply, NState, Time1} ->
+	{noreply, NState, Opts} when is_list(Opts) ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
+            [{timeout, Time}, {min_priority, MinPri}] =
+                extract_timeout_minpri(Opts),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 Debug1);
+	{noreply, NState, Time} ->
+	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
+				      {noreply, NState}),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue,
+                 Debug1);
+	{noreply, NState, Time, MinPri} ->
+	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
+				      {noreply, NState}),
+	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+                 Debug1);
 	{stop, Reason, NState} ->
 	    terminate(Reason, Name, Msg, Mod, NState, Debug);
 	{'EXIT', What} ->
@@ -807,8 +925,9 @@ reply(Name, {To, Tag}, Reply, State, Debug) ->
 %%-----------------------------------------------------------------
 %% Callback functions for system messages handling.
 %%-----------------------------------------------------------------
-system_continue(Parent, Debug, [Name, State, Mod, Time, TimeoutState, Queue]) ->
-    loop(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug).
+system_continue(Parent, Debug, [Name, State, Mod, Time, TimeoutState, MinPri,
+                                Queue]) ->
+    loop(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue, Debug).
 
 -ifdef(use_specs).
 -spec system_terminate(_, _, _, [_]) -> no_return().
diff --git a/src/priority_queue.erl b/src/priority_queue.erl
index 96838099..9421f281 100644
--- a/src/priority_queue.erl
+++ b/src/priority_queue.erl
@@ -56,7 +56,7 @@
 -module(priority_queue).
 
 -export([new/0, is_queue/1, is_empty/1, len/1, to_list/1, in/2, in/3,
-         out/1, pout/1, join/2]).
+         out/1, out/2, pout/1, join/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -73,8 +73,9 @@
 -spec(to_list/1 :: (pqueue()) -> [{priority(), any()}]).
 -spec(in/2 :: (any(), pqueue()) -> pqueue()).
 -spec(in/3 :: (any(), priority(), pqueue()) -> pqueue()).
--spec(out/1 :: (pqueue()) -> {empty | {value, any()}, pqueue()}).
--spec(pout/1 :: (pqueue()) -> {empty | {value, any(), priority()}, pqueue()}).
+-spec(out/1 :: (pqueue()) -> {(empty | {value, any()}), pqueue()}).
+-spec(out/2 :: (priority(), pqueue()) -> {(empty | {value, any()}), pqueue()}).
+-spec(pout/1 :: (pqueue()) -> {(empty | {value, any(), priority()}), pqueue()}).
 -spec(join/2 :: (pqueue(), pqueue()) -> pqueue()).
 
 -endif.
@@ -150,8 +151,19 @@ out({pqueue, [{P, Q} | Queues]}) ->
            end,
     {R, NewQ}.
 
-pout({queue, [], []}) ->
-    {empty, {queue, [], []}};
+out(_Priority, {queue, [], []} = Q) ->
+    {empty, Q};
+out(Priority, {queue, _, _} = Q) when Priority =< 0 ->
+    out(Q);
+out(_Priority, {queue, _, _} = Q) ->
+    {empty, Q};
+out(Priority, {pqueue, [{P, _Q} | _Queues]} = Q) when Priority =< (-P) ->
+    out(Q);
+out(_Priority, {pqueue, [_|_]} = Q) ->
+    {empty, Q}.
+
+pout({queue, [], []} = Q) ->
+    {empty, Q};
 pout({queue, _, _} = Q) ->
     {{value, V}, Q1} = out(Q),
     {{value, V, 0}, Q1};
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3656694e..a537e456 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -817,7 +817,7 @@ fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
 
 decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
     true = try case ets:update_counter(Cache, MsgId, {4, -1}) of
-                   0  -> ets:delete(Cache, MsgId);
+                   N when N =< 0 -> ets:delete(Cache, MsgId);
                    _N -> true
                end
            catch error:badarg -> 
@@ -1114,22 +1114,24 @@ internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
                 mnesia:transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          requeue_next_messages(Q, N, ReadSeqId, WriteSeqId)
+                          requeue_next_messages(Q, State, N, ReadSeqId, WriteSeqId)
                   end
                  ),
             true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN}),
             {ok, State}
     end.
 
-requeue_next_messages(_Q, 0, ReadSeq, WriteSeq) ->
+requeue_next_messages(_Q, _State, 0, ReadSeq, WriteSeq) ->
     {ReadSeq, WriteSeq};
-requeue_next_messages(Q, N, ReadSeq, WriteSeq) ->
-    [Obj] = mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
+requeue_next_messages(Q, State, N, ReadSeq, WriteSeq) ->
+    [Obj = #dq_msg_loc { msg_id = MsgId }] =
+        mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
     ok = mnesia:write(rabbit_disk_queue,
                       Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeq}},
                       write),
     ok = mnesia:delete(rabbit_disk_queue, {Q, ReadSeq}, write),
-    requeue_next_messages(Q, N - 1, ReadSeq + 1, WriteSeq + 1).
+    decrement_cache(MsgId, State),
+    requeue_next_messages(Q, State, N - 1, ReadSeq + 1, WriteSeq + 1).
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index e66eb6b0..176ddddb 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -52,7 +52,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
--export([unfold/2, ceil/1]).
+-export([unfold/2, ceil/1, keygets/2]).
 
 -import(mnesia).
 -import(lists).
@@ -116,6 +116,8 @@
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 -spec(ceil/1 :: (number()) -> number()).
+-spec(keygets/2 :: ([({K, V} | {K, non_neg_integer(), V})], [any()]) ->
+             [({K, V} | any())]).
               
 -endif.
 
@@ -448,3 +450,18 @@ ceil(N) when N - trunc(N) > 0 ->
     1 + trunc(N);
 ceil(N) ->
     N.
+
+keygets(Keys, KeyList) ->
+    lists:reverse(
+      lists:foldl(
+        fun({Key, Pos, Default}, Acc) ->
+                case lists:keysearch(Key, Pos, KeyList) of
+                    false -> [{Key, Default} | Acc];
+                    {value, T} -> [T | Acc]
+                end;
+           ({Key, Default}, Acc) ->
+                case lists:keysearch(Key, 1, KeyList) of
+                    false -> [{Key, Default} | Acc];
+                    {value, T} -> [T | Acc]
+                end
+        end, [], Keys)).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 8ab82677..6d76d23f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -65,7 +65,7 @@ test_priority_queue() ->
 
     %% empty Q
     Q = priority_queue:new(),
-    {true, true, 0, [], [], []} = test_priority_queue(Q),
+    {true, true, 0, [], [], [], []} = test_priority_queue(Q),
 
     %% 1-4 element no-priority Q
     true = lists:all(fun (X) -> X =:= passed end,
@@ -74,57 +74,59 @@ test_priority_queue() ->
 
     %% 1-element priority Q
     Q1 = priority_queue:in(foo, 1, priority_queue:new()),
-    {true, false, 1, [{1, foo}], [foo], [{foo, 1}]} = test_priority_queue(Q1),
+    {true, false, 1, [{1, foo}], [foo], [], [{foo, 1}]} =
+        test_priority_queue(Q1),
 
     %% 2-element same-priority Q
     Q2 = priority_queue:in(bar, 1, Q1),
-    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [{foo, 1}, {bar, 1}]} =
-        test_priority_queue(Q2),
+    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [], [{foo, 1}, {bar, 1}]}
+        = test_priority_queue(Q2),
 
     %% 2-element different-priority Q
     Q3 = priority_queue:in(bar, 2, Q1),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [{bar, 2}, {foo, 1}]} =
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [bar], [{bar, 2}, {foo, 1}]} =
         test_priority_queue(Q3),
 
     %% 1-element negative priority Q
     Q4 = priority_queue:in(foo, -1, priority_queue:new()),
-    {true, false, 1, [{-1, foo}], [foo], [{foo, -1}]} = test_priority_queue(Q4),
+    {true, false, 1, [{-1, foo}], [foo], [], [{foo, -1}]} =
+        test_priority_queue(Q4),
 
     %% merge 2 * 1-element no-priority Qs
     Q5 = priority_queue:join(priority_queue:in(foo, Q),
                              priority_queue:in(bar, Q)),
-    {true, false, 2, [{0, foo}, {0, bar}], [foo, bar], [{foo, 0}, {bar, 0}]} =
-        test_priority_queue(Q5),
+    {true, false, 2, [{0, foo}, {0, bar}], [foo, bar], [], [{foo, 0}, {bar, 0}]}
+        = test_priority_queue(Q5),
 
     %% merge 1-element no-priority Q with 1-element priority Q
     Q6 = priority_queue:join(priority_queue:in(foo, Q),
                              priority_queue:in(bar, 1, Q)),
-    {true, false, 2, [{1, bar}, {0, foo}], [bar, foo], [{bar, 1}, {foo, 0}]} =
-        test_priority_queue(Q6),
+    {true, false, 2, [{1, bar}, {0, foo}], [bar, foo], [], [{bar, 1}, {foo, 0}]}
+        = test_priority_queue(Q6),
 
     %% merge 1-element priority Q with 1-element no-priority Q 
     Q7 = priority_queue:join(priority_queue:in(foo, 1, Q),
                              priority_queue:in(bar, Q)),
-    {true, false, 2, [{1, foo}, {0, bar}], [foo, bar], [{foo, 1}, {bar, 0}]} =
-        test_priority_queue(Q7),
+    {true, false, 2, [{1, foo}, {0, bar}], [foo, bar], [], [{foo, 1}, {bar, 0}]}
+        = test_priority_queue(Q7),
 
     %% merge 2 * 1-element same-priority Qs
     Q8 = priority_queue:join(priority_queue:in(foo, 1, Q),
                              priority_queue:in(bar, 1, Q)),
-    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [{foo, 1}, {bar, 1}]} =
-        test_priority_queue(Q8),
+    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [], [{foo, 1}, {bar, 1}]}
+        = test_priority_queue(Q8),
 
     %% merge 2 * 1-element different-priority Qs
     Q9 = priority_queue:join(priority_queue:in(foo, 1, Q),
                              priority_queue:in(bar, 2, Q)),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [{bar, 2}, {foo, 1}]} =
-        test_priority_queue(Q9),
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [bar],
+     [{bar, 2}, {foo, 1}]} = test_priority_queue(Q9),
 
     %% merge 2 * 1-element different-priority Qs (other way around)
     Q10 = priority_queue:join(priority_queue:in(bar, 2, Q),
                               priority_queue:in(foo, 1, Q)),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [{bar, 2}, {foo, 1}]} =
-        test_priority_queue(Q10),
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [bar],
+     [{bar, 2}, {foo, 1}]} = test_priority_queue(Q10),
 
     passed.
 
@@ -136,6 +138,11 @@ priority_queue_out_all(Q) ->
         {empty, _}       -> [];
         {{value, V}, Q1} -> [V | priority_queue_out_all(Q1)]
     end.
+priority_queue_out_2_all(Q) ->
+    case priority_queue:out(2, Q) of
+        {empty, _}       -> [];
+        {{value, V}, Q1} -> [V | priority_queue_out_2_all(Q1)]
+    end.
 
 priority_queue_pout_all(Q) ->
     case priority_queue:pout(Q) of
@@ -149,6 +156,7 @@ test_priority_queue(Q) ->
      priority_queue:len(Q),
      priority_queue:to_list(Q),
      priority_queue_out_all(Q),
+     priority_queue_out_2_all(Q),
      priority_queue_pout_all(Q)}.
 
 test_simple_n_element_queue(N) ->
@@ -156,7 +164,7 @@ test_simple_n_element_queue(N) ->
     Q = priority_queue_in_all(priority_queue:new(), Items),
     ToListRes = [{0, X} || X <- Items],
     POutAllRes = [{X, 0} || X <- Items],
-    {true, false, N, ToListRes, Items, POutAllRes} = test_priority_queue(Q),
+    {true, false, N, ToListRes, Items, [], POutAllRes} = test_priority_queue(Q),
     passed.
 
 test_parsing() ->
-- 
cgit v1.2.1


From 3bef2ad2f74e288306061c837d2222d2005dac7c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 17:10:10 +0100
Subject: additional documentation

---
 src/gen_server2.erl | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index 2784090a..c19f1601 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -334,7 +334,15 @@ multi_call(Nodes, Name, Req, Timeout)
 
 
 %%-----------------------------------------------------------------
-%% enter_loop(Mod, Options, State, <ServerName>, <TimeOut>) ->_ 
+%% enter_loop(Mod, Options, State) -> _
+%% enter_loop(Mod, Options, State, ServerName) -> _
+%% enter_loop(Mod, Options, State, [{Key, Value}]) -> _
+%% enter_loop(Mod, Options, State, Timeout) -> _
+%% enter_loop(Mod, Options, State, ServerName, [{Key, Value}]) -> _
+%% enter_loop(Mod, Options, State, ServerName, Timeout) -> _
+%% enter_loop(Mod, Options, State, ServerName, Timeout, MinPri) -> _
+%%
+%% {Key, Value} = {min_priority, MinPri} | {timeout, Timeout}
 %%   
 %% Description: Makes an existing process into a gen_server. 
 %%              The calling process will enter the gen_server receive 
-- 
cgit v1.2.1


From 563dc889dc4fafa2198be63d03793741cb8427d7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 17:25:40 +0100
Subject: ...and with some testing and debugging, it might even work as
 described in the documentation!

---
 src/gen_server2.erl       | 29 +++++++++++++----------------
 src/rabbit_disk_queue.erl | 23 +++++++++++------------
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index c19f1601..253c2eb1 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -477,11 +477,11 @@ loop(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue, Debug) ->
                       Time, TimeoutState, MinPri, in(Input, Queue), Debug)
     after 0 ->
             process_next_msg(Parent, Name, State, Mod, Time, TimeoutState,
-                             MinPri, Queue, Debug, false)
+                             MinPri, Queue, Debug)
     end.
 
 process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug, Hib) ->
+                 Debug) ->
     Res = case MinPri of
               any -> priority_queue:out(Queue);
               _ -> priority_queue:out(MinPri, Queue)
@@ -489,11 +489,11 @@ process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue,
     case Res of
         {{value, Msg}, Queue1} ->
             process_msg(Parent, Name, State, Mod,
-                        Time, TimeoutState, Queue1, Debug, Hib, Msg);
+                        Time, TimeoutState, Queue1, Debug, Msg);
         {empty, Queue1} ->
-            Time1 = case {Hib, Time, TimeoutState} of
-                        {true, _, _} -> 0;
-                        {_, binary, {Current, _Min, undefined}} -> Current;
+            Time1 = case {Time, TimeoutState} of
+                        {hibernate, _} -> 0;
+                        {binary, {Current, _Min, undefined}} -> Current;
                         _ -> Time
                     end,
             receive
@@ -502,11 +502,11 @@ process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue,
                          Time, TimeoutState, MinPri, in(Input, Queue1), Debug)
             after Time1 ->
                     process_msg(Parent, Name, State, Mod,
-                                Time, TimeoutState, Queue1, Debug, Hib,
-                               case Hib of
-                                  true -> roused_and_disinterested;
-                                  false -> timeout
-                               end)
+                                Time, TimeoutState, Queue1, Debug,
+                                case Time == hibernate of
+                                    true -> roused_and_disinterested;
+                                    false -> timeout
+                                end)
             end
     end.
 
@@ -517,7 +517,7 @@ wake_hib(Parent, Name, State, Mod, TimeoutState, MinPri, Queue, Debug) ->
 	  end,
     TimeoutState1 = adjust_hibernate_after(TimeoutState),
     process_next_msg(Parent, Name, State, Mod, hibernate, TimeoutState1,
-                     MinPri, in(Msg, Queue), Debug, true).
+                     MinPri, in(Msg, Queue), Debug).
 
 adjust_hibernate_after(undefined) ->
     undefined;
@@ -548,15 +548,12 @@ in(Input, Queue) ->
     priority_queue:in(Input, Queue).
 
 process_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue,
-            Debug, _Hib, Msg) ->
+            Debug, Msg) ->
     case Msg of
 	{system, From, Req} ->
 	    sys:handle_system_msg
               (Req, From, Parent, ?MODULE, Debug,
                [Name, State, Mod, Time, TimeoutState, Queue]);
-        %% gen_server puts Hib on the end as the 7th arg, but that
-        %% version of the function seems not to be documented so
-        %% leaving out for now.
 	{'EXIT', Parent, Reason} ->
 	    terminate(Reason, Name, Msg, Mod, State, Debug);
 	_Msg when Debug =:= [] ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a537e456..eaeef9e3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -433,7 +433,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, case Mode of
              mixed -> State2;
              disk -> to_disk_only_mode(State2)
-         end, {binary, ?HIBERNATE_AFTER_MIN}}.
+         end, {binary, ?HIBERNATE_AFTER_MIN}, 0}.
 
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
@@ -517,10 +517,9 @@ handle_info({'EXIT', _Pid, Reason}, State) ->
 handle_info(timeout, State = #dqstate { commit_timer_ref = undefined }) ->
     ok = report_memory(true, State),
     %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
-    {noreply, stop_memory_timer(State), hibernate};
+    {noreply, stop_memory_timer(State), hibernate, 0};
 handle_info(timeout, State) ->
-    noreply(sync_current_file_handle(State));
-handle_info(_Info, State) ->
+    noreply(sync_current_file_handle(State)).
     noreply(State).
 
 terminate(_Reason, State) ->
@@ -647,26 +646,26 @@ noreply(NewState) ->
 
 noreply1(NewState = #dqstate { on_sync_froms = [],
                                commit_timer_ref = undefined }) ->
-    {noreply, NewState, binary};
+    {noreply, NewState, binary, 0};
 noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {noreply, start_commit_timer(NewState), 0};
+    {noreply, start_commit_timer(NewState), 0, 0};
 noreply1(NewState = #dqstate { on_sync_froms = [] }) ->
-    {noreply, stop_commit_timer(NewState), binary};
+    {noreply, stop_commit_timer(NewState), binary, 0};
 noreply1(NewState) ->
-    {noreply, NewState, 0}.
+    {noreply, NewState, 0, 0}.
 
 reply(Reply, NewState) ->
     reply1(Reply, start_memory_timer(NewState)).
 
 reply1(Reply, NewState = #dqstate { on_sync_froms = [],
                                     commit_timer_ref = undefined }) ->
-    {reply, Reply, NewState, binary};
+    {reply, Reply, NewState, binary, 0};
 reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {reply, Reply, start_commit_timer(NewState), 0};
+    {reply, Reply, start_commit_timer(NewState), 0, 0};
 reply1(Reply, NewState = #dqstate { on_sync_froms = [] }) ->
-    {reply, Reply, stop_commit_timer(NewState), binary};
+    {reply, Reply, stop_commit_timer(NewState), binary, 0};
 reply1(Reply, NewState) ->
-    {reply, Reply, NewState, 0}.
+    {reply, Reply, NewState, 0, 0}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
-- 
cgit v1.2.1


From 9839c520c8b097ff96912141e3869c1db946f171 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 9 Jul 2009 17:26:50 +0100
Subject: *cough*

---
 src/rabbit_disk_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index eaeef9e3..813ab7c4 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -519,7 +519,8 @@ handle_info(timeout, State = #dqstate { commit_timer_ref = undefined }) ->
     %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
     {noreply, stop_memory_timer(State), hibernate, 0};
 handle_info(timeout, State) ->
-    noreply(sync_current_file_handle(State)).
+    noreply(sync_current_file_handle(State));
+handle_info(_Info, State) ->
     noreply(State).
 
 terminate(_Reason, State) ->
-- 
cgit v1.2.1


From 0bfe731400873c59abb1ccdb121d526e848dfbb8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Jul 2009 10:00:12 +0100
Subject: Adjusted documentation

---
 src/gen_server2.erl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index 253c2eb1..87b56ba3 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -52,13 +52,17 @@
 %% which it then realises it is not interested in. When this happens,
 %% handle_info(roused_and_disinterested, State) will be called as soon
 %% as there are no further messages to process (i.e. upon waking, the
-%% message queue is drained, and a timeout of 0 is used). A suggested
-%% use of this is to cater for low priority background casts, which
-%% can be sent with negative priorities, and to use a priority of 0 or
-%% higher for everything else. Then, if you return from handle_* with
-%% a timeout of 0 and find handle_info(timeout, State) being called,
-%% you can then return with a min_priority of 'any' and pick up the
-%% low priority messages.
+%% message queue is drained, and a timeout of 0 is used).
+%%
+%% This feature means that you can delay processing lower priority
+%% messages. For example, when a min_priority of 0 is combined with
+%% the binary backoff timeout, you can delay processing any
+%% negative-priority messages until the first timeout fires which
+%% indicates that, given a steady state, the process has been idle for
+%% sufficiently long that it's reasonable to expect it to be
+%% uninterrupted by higher-priority messages for some little while;
+%% thus preventing low-priority, but lengthy jobs from getting in the
+%% way of higher priority jobs that need quick responses.
 
 %% All modifications are (C) 2009 LShift Ltd.
 
-- 
cgit v1.2.1


From bd2f0801eb9754cd3b2de65ed5d82badfdcbd83f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Jul 2009 13:24:25 +0100
Subject: prefetch part 1. When a mixed_queue sees that the next item in its
 queue is on disk, it issues a low priority prefetch instruction to the disk
 queue, which populates the disk_queue's cache. Note that this shouldn't
 impact on memory as by virtue of the mixed_queue being in mixed mode, the
 contents of the queue are already accounted for in memory even though they
 were on disk. The effect of this is that when the deliver comes, it doesn't
 need to go to disk to read the message as the messages are already in cache.
 Testing:

A 100,000 * 1Kb msg queue takes 15 seconds to drain (basic.get, noack) when the messages are in memory, in the mixed queue.
On disk, without prefetch, takes 32 seconds
On disk, with prefetch, cache hot, takes 25 seconds.

The next step is to get the disk queue to signal back to the queue that the prefetch is done and for the queue to grab the messages from the disk_queue in advance, thus meaning that on delivery, all that is needed is the async acks being sent to the disk_queue (assuming the messages are not actually persistent).
---
 src/gen_server2.erl        |   5 ++-
 src/rabbit_disk_queue.erl  | 105 +++++++++++++++++++++++++++++----------------
 src/rabbit_mixed_queue.erl |  15 ++++++-
 3 files changed, 85 insertions(+), 40 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index 87b56ba3..cf54811f 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -508,8 +508,9 @@ process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue,
                     process_msg(Parent, Name, State, Mod,
                                 Time, TimeoutState, Queue1, Debug,
                                 case Time == hibernate of
-                                    true -> roused_and_disinterested;
-                                    false -> timeout
+                                    true -> {roused_and_disinterested, MinPri};
+                                    false when MinPri =:= any -> timeout;
+                                    false -> {timeout, MinPri}
                                 end)
             end
     end.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 813ab7c4..3a520ecd 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,7 +42,7 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2
+         requeue_next_n/2, prefetch/2
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -345,6 +345,9 @@ report_memory() ->
 set_mode(Mode) ->
     gen_server2:cast(?SERVER, {set_mode, Mode}).
 
+prefetch(Q, Count) ->
+    gen_server2:pcast(?SERVER, -1, {prefetch, Q, Count}).
+
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -507,21 +510,28 @@ handle_cast({set_mode, Mode}, State) ->
                  mixed -> fun to_ram_disk_mode/1
              end)(State));
 handle_cast(report_memory, State) ->
-    %% call noreply1/1, not noreply/1, as we don't want to restart the
+    %% call noreply1/2, not noreply/1/2, as we don't want to restart the
     %% memory_report_timer
     %% by unsetting the timer, we force a report on the next normal message
-    noreply1(State #dqstate { memory_report_timer = undefined }).
+    noreply1(State #dqstate { memory_report_timer = undefined }, 0);
+handle_cast({prefetch, Q, Count}, State) ->
+    {ok, State1} = internal_prefetch(Q, Count, State),
+    noreply(State1, any). %% set minpri to any
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
-handle_info(timeout, State = #dqstate { commit_timer_ref = undefined }) ->
-    ok = report_memory(true, State),
-    %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
-    {noreply, stop_memory_timer(State), hibernate, 0};
-handle_info(timeout, State) ->
+handle_info({timeout, 0}, State = #dqstate { commit_timer_ref = undefined }) ->
+    %% this is the binary timeout coming back, with minpri = 0
+    %% don't use noreply/1/2 or noreply1/2 as they'll restart the memory timer
+    %% set timeout to 0, and go pick up any low priority messages
+    {noreply, stop_memory_timer(State), 0, any};
+handle_info({timeout, 0}, State) ->
+    %% must have commit_timer set, so timeout was 0, and we're not hibernating
     noreply(sync_current_file_handle(State));
-handle_info(_Info, State) ->
-    noreply(State).
+handle_info(timeout, State) ->
+    %% no minpri supplied, so it must have been 'any', so go hibernate
+    ok = report_memory(true, State),
+    {noreply, State, hibernate, any}.
 
 terminate(_Reason, State) ->
     shutdown(State).
@@ -643,30 +653,36 @@ to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
                      ets_bytes_per_record = undefined }.
 
 noreply(NewState) ->
-    noreply1(start_memory_timer(NewState)).
+    noreply(NewState, 0).
+
+noreply(NewState, MinPri) ->
+    noreply1(start_memory_timer(NewState), MinPri).
 
 noreply1(NewState = #dqstate { on_sync_froms = [],
-                               commit_timer_ref = undefined }) ->
-    {noreply, NewState, binary, 0};
-noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {noreply, start_commit_timer(NewState), 0, 0};
-noreply1(NewState = #dqstate { on_sync_froms = [] }) ->
-    {noreply, stop_commit_timer(NewState), binary, 0};
-noreply1(NewState) ->
-    {noreply, NewState, 0, 0}.
+                               commit_timer_ref = undefined }, MinPri) ->
+    {noreply, NewState, binary, MinPri};
+noreply1(NewState = #dqstate { commit_timer_ref = undefined }, MinPri) ->
+    {noreply, start_commit_timer(NewState), 0, MinPri};
+noreply1(NewState = #dqstate { on_sync_froms = [] }, MinPri) ->
+    {noreply, stop_commit_timer(NewState), binary, MinPri};
+noreply1(NewState, MinPri) ->
+    {noreply, NewState, 0, MinPri}.
 
 reply(Reply, NewState) ->
-    reply1(Reply, start_memory_timer(NewState)).
+    reply(Reply, NewState, 0).
+
+reply(Reply, NewState, MinPri) ->
+    reply1(Reply, start_memory_timer(NewState), MinPri).
 
 reply1(Reply, NewState = #dqstate { on_sync_froms = [],
-                                    commit_timer_ref = undefined }) ->
-    {reply, Reply, NewState, binary, 0};
-reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {reply, Reply, start_commit_timer(NewState), 0, 0};
-reply1(Reply, NewState = #dqstate { on_sync_froms = [] }) ->
-    {reply, Reply, stop_commit_timer(NewState), binary, 0};
-reply1(Reply, NewState) ->
-    {reply, Reply, NewState, 0, 0}.
+                                    commit_timer_ref = undefined }, MinPri) ->
+    {reply, Reply, NewState, binary, MinPri};
+reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }, MinPri) ->
+    {reply, Reply, start_commit_timer(NewState), 0, MinPri};
+reply1(Reply, NewState = #dqstate { on_sync_froms = [] }, MinPri) ->
+    {reply, Reply, stop_commit_timer(NewState), binary, MinPri};
+reply1(Reply, NewState, MinPri) ->
+    {reply, Reply, NewState, 0, MinPri}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
@@ -829,8 +845,12 @@ decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
     ok.
 
 insert_into_cache(Message = #basic_message { guid = MsgId },
-                  MsgSize, #dqstate { message_cache = Cache }) ->
-    true = ets:insert_new(Cache, {MsgId, Message, MsgSize, 1}),
+                  MsgSize, Forced, #dqstate { message_cache = Cache }) ->
+    Count = case Forced of
+                true -> 0;
+                false -> 1
+            end,
+    true = ets:insert_new(Cache, {MsgId, Message, MsgSize, Count}),
     ok.
 
 %% ---- INTERNAL RAW FUNCTIONS ----
@@ -843,7 +863,7 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
             Remaining = WriteSeqId - ReadSeqId - 1,
             {ok, Result, State1} =
                 internal_read_message(
-                  Q, ReadSeqId, FakeDeliver, ReadMsg, State),
+                  Q, ReadSeqId, ReadMsg, FakeDeliver, false, State),
             true = ets:insert(Sequences,
                               {Q, ReadSeqId+1, WriteSeqId}),
             {ok,
@@ -856,7 +876,20 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
              end, State1}
     end.
 
-internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
+internal_prefetch(Q, Count, State = #dqstate { sequences = Sequences }) ->
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
+    Length = WriteSeqId - ReadSeqId,
+    Count1 = lists:min([Length, Count]),
+    StateN =
+        lists:foldl(
+          fun(N, State1) ->
+                  {ok, _MsgStuff, State2} =
+                      internal_read_message(Q, N, true, true, true, State1),
+                  State2
+          end, State, lists:seq(ReadSeqId, ReadSeqId + Count1 - 1)),
+    {ok, StateN}.
+
+internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) ->
     [Obj =
      #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
@@ -876,14 +909,14 @@ internal_read_message(Q, ReadSeqId, FakeDeliver, ReadMsg, State) ->
                     {ok, {MsgBody, BodySize}} =
                         read_message_at_offset(FileHdl, Offset, TotalSize),
                     Message = bin_to_msg(MsgBody),
-                    ok = case RefCount of
-                             1 ->
+                    ok = if RefCount > 1 orelse ForceInCache ->
+                                 insert_into_cache(Message, BodySize,
+                                                   ForceInCache, State1);
+                            true -> ok
                                  %% it's not in the cache and we only
                                  %% have 1 queue with the message. So
                                  %% don't bother putting it in the
                                  %% cache.
-                                 ok;
-                             _ -> insert_into_cache(Message, BodySize, State1)
                          end,
                     {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
                      State1};
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 61487c9d..2ef534ff 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -200,7 +200,8 @@ to_mixed_mode(TxnMessages, State =
     %% don't actually do anything to the disk
     MsgBuf = case Length of
                  0 -> queue:new();
-                 _ -> queue:from_list([{disk, Length}])
+                 _ -> ok = rabbit_disk_queue:prefetch(Q, Length),
+                      queue:from_list([{disk, Length}])
              end,
     %% remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
@@ -341,6 +342,7 @@ deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
                             end;
                         false -> noack
                     end,
+                ok = maybe_prefetch(Q, MsgBuf1),
                 {Msg1, IsDelivered1, AckTag1, MsgBuf1};
             {disk, Rem1} ->
                 {Msg1 = #basic_message { is_persistent = IsPersistent },
@@ -353,7 +355,8 @@ deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
                                  noack
                     end,
                 MsgBuf3 = case Rem1 of
-                              1 -> MsgBuf1;
+                              1 -> ok = maybe_prefetch(Q, MsgBuf1),
+                                   MsgBuf1;
                               _ -> queue:in_r({disk, Rem1 - 1}, MsgBuf1)
                           end,
                 {Msg1, IsDelivered1, AckTag2, MsgBuf3}
@@ -362,6 +365,14 @@ deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
     {{Msg, IsDelivered, AckTag, Rem},
      State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
+maybe_prefetch(Q, MsgBuf) ->
+    case queue:peek(MsgBuf) of
+        empty -> ok;
+        {value, {disk, Count}} -> rabbit_disk_queue:prefetch(Q, Count);
+        {value, _} -> ok
+    end.
+            
+
 remove_noacks(MsgsWithAcks) ->
     {AckTags, ASize} =
       lists:foldl(
-- 
cgit v1.2.1


From a1afff89c51d8ccb719a0e88f27f32047be40144 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 10 Jul 2009 17:28:56 +0100
Subject: Just adding a bit more testing really just to bump the code coverage
 up.

---
 src/rabbit_disk_queue.erl |  1 +
 src/rabbit_tests.erl      | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3a520ecd..4d00bc3a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -271,6 +271,7 @@
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
 -spec(report_memory/0 :: () -> 'ok').
 -spec(set_mode/1 :: ('disk' | 'mixed') -> 'ok').
+-spec(prefetch/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 
 -endif.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 6d76d23f..221279f7 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -742,12 +742,13 @@ delete_log_handlers(Handlers) ->
 test_disk_queue() ->
     rdq_stop(),
     rdq_virgin(),
-    passed = rdq_stress_gc(1000),
+    passed = rdq_stress_gc(5000),
     passed = rdq_test_startup_with_queue_gaps(),
     passed = rdq_test_redeliver(),
     passed = rdq_test_purge(),
     passed = rdq_test_mixed_queue_modes(),
     passed = rdq_test_mode_conversion_mid_txn(),
+    passed = rdq_test_disk_queue_modes(),
     rdq_virgin(),
     passed.
 
@@ -1151,6 +1152,49 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                 rabbit_mixed_queue:ack(AckTags, MS8)
         end,
     0 = rabbit_mixed_queue:length(MS9),
+    Msg = rdq_message(0, <<0:256>>),
+    {ok, AckTag, MS10} = rabbit_mixed_queue:publish_delivered(Msg, MS9),
+    {ok,MS11} = rabbit_mixed_queue:ack([{Msg, AckTag}], MS10),
+    0 = rabbit_mixed_queue:length(MS11),
+    passed.
+
+rdq_test_disk_queue_modes() ->
+    rdq_virgin(),
+    rdq_start(),
+    Msg = <<0:(8*256)>>,
+    Total = 1000,
+    Half1 = lists:seq(1,round(Total/2)),
+    Half2 = lists:seq(1 + round(Total/2), Total),
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- Half1],
+    ok = rabbit_disk_queue:tx_commit(q, Half1, []),
+    io:format("Publish done~n", []),
+    ok = rabbit_disk_queue:to_disk_only_mode(),
+    io:format("To Disk Only done~n", []),
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- Half2],
+    ok = rabbit_disk_queue:tx_commit(q, Half2, []),
+    Seqs = [begin
+                Remaining = Total - N,
+                {Message, _TSize, false, SeqId, Remaining} =
+                    rabbit_disk_queue:deliver(q),
+                ok = rdq_match_message(Message, N, Msg, 256),
+                SeqId
+            end || N <- Half1],
+    io:format("Deliver first half done~n", []),
+    ok = rabbit_disk_queue:to_ram_disk_mode(),
+    io:format("To RAM Disk done~n", []),
+    Seqs2 = [begin
+                 Remaining = Total - N,
+                 {Message, _TSize, false, SeqId, Remaining} =
+                     rabbit_disk_queue:deliver(q),
+                 ok = rdq_match_message(Message, N, Msg, 256),
+                 SeqId
+             end || N <- Half2],
+    io:format("Deliver second half done~n", []),
+    ok = rabbit_disk_queue:tx_commit(q, [], Seqs),
+    ok = rabbit_disk_queue:to_disk_only_mode(),
+    ok = rabbit_disk_queue:tx_commit(q, [], Seqs2),
+    empty = rabbit_disk_queue:deliver(q),
+    rdq_stop(),
     passed.
 
 rdq_time_commands(Funcs) ->
-- 
cgit v1.2.1


From d64d8b81013ff78ca24381d38f5cdfa2e0a76765 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 15 Jul 2009 18:03:21 +0100
Subject: Substantial changes to mixed_queue.

Previously, persistent and non-persistent messages went into the same queue on disk. The advantage of this is that you don't need to track which queue you're currently reading from and for how many messages. However, the downside to this is that on queue recovery you need to iterate through the entire queue and delete all non-persistent messages. This takes a huge amount of time.

So now this is changed. Each amqqueue is now two on disk queues. One for persistent messages and one for non-persistent messages. Thus queue recovery is now trivial - just delete the non-persistent queue. However, we now _always_ use the erlang queue in mixed_queue to track (in disk mode) how many of each queue we need to read (i.e. run-length encoding). This, in the worst case (alternating persistent and non-persistent) is per-message cost. It's possible we need some sort of disk-based queue (AGH!). Not sure. Provided the queue only contains one sort of message, it degenerates to a simple single counter.

All tests pass. However, there is a bug, which is that on recovery, the size of the queue (RAM cost) is not known. As such, the reporting of the queue to the queue_mode manager on queue recovery is incorrect (it starts of 0, and can go -ve). I've not decided how to fix this yet, because I do not want to have to iterate through all the messages to get the queue size out!
---
 src/rabbit_disk_queue.erl  |  49 +++--
 src/rabbit_mixed_queue.erl | 439 ++++++++++++++++++++++++---------------------
 2 files changed, 264 insertions(+), 224 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 4d00bc3a..8b148777 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,7 +42,7 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, prefetch/2
+         requeue_next_n/2, prefetch/2, length/1
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -255,6 +255,7 @@
              ( 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
                           non_neg_integer()})).
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
+-spec(auto_ack_next_message/1 :: (queue_name()) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
              'ok').
@@ -262,11 +263,13 @@
 -spec(requeue/2 :: (queue_name(), [{{msg_id(), seq_id()}, bool()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
+-spec(delete_queue/1 :: (queue_name()) -> 'ok').
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
+-spec(length/1 :: (queue_name()) -> non_neg_integer()).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
--spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
+-spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
 -spec(report_memory/0 :: () -> 'ok').
@@ -322,6 +325,9 @@ delete_non_durable_queues(DurableQueues) ->
     gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues},
                      infinity).
 
+length(Q) ->
+    gen_server2:call(?SERVER, {length, Q}, infinity).
+
 stop() ->
     gen_server2:call(?SERVER, stop, infinity).
 
@@ -455,6 +461,9 @@ handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     reply(Count, State1);
+handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
+    reply(WriteSeqId - ReadSeqId, State);
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
@@ -1033,7 +1042,7 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                                       last_sync_offset = SyncOffset
                                     }) ->
     {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-    WriteSeqId = InitWriteSeqId + length(PubMsgIds),
+    WriteSeqId = InitWriteSeqId + erlang:length(PubMsgIds),
     {atomic, {InCurFile, WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
@@ -1088,7 +1097,8 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
-    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
+    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds),
+                                                  undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
@@ -1524,7 +1534,8 @@ load_from_disk(State) ->
                        fun (#dq_msg_loc { msg_id = MsgId,
                                           queue_and_seq_id = {Q, SeqId} },
                             true) ->
-                               case length(dets_ets_lookup(State1, MsgId)) of
+                               case erlang:length
+                                   (dets_ets_lookup(State1, MsgId)) of
                                    0 -> ok == mnesia:delete(rabbit_disk_queue,
                                                             {Q, SeqId}, write);
                                    1 -> true
@@ -1622,13 +1633,13 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
         fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case length(mnesia:dirty_index_match_object
-                            (rabbit_disk_queue,
-                             #dq_msg_loc { msg_id = MsgId,
-                                           queue_and_seq_id = '_',
-                                           is_delivered = '_'
-                                         },
-                             msg_id)) of
+                case erlang:length(mnesia:dirty_index_match_object
+                                   (rabbit_disk_queue,
+                                    #dq_msg_loc { msg_id = MsgId,
+                                                  queue_and_seq_id = '_',
+                                                  is_delivered = '_'
+                                                 },
+                                    msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
                         true =
@@ -1662,13 +1673,13 @@ recover_crashed_compactions(Files, TmpFiles) ->
 verify_messages_in_mnesia(MsgIds) ->
     lists:foreach(
       fun (MsgId) ->
-              true = 0 < length(mnesia:dirty_index_match_object
-                                (rabbit_disk_queue,
-                                 #dq_msg_loc { msg_id = MsgId,
-                                               queue_and_seq_id = '_',
-                                               is_delivered = '_'
-                                              },
-                                 msg_id))
+              true = 0 < erlang:length(mnesia:dirty_index_match_object
+                                       (rabbit_disk_queue,
+                                        #dq_msg_loc { msg_id = MsgId,
+                                                      queue_and_seq_id = '_',
+                                                      is_delivered = '_'
+                                                     },
+                                        msg_id))
       end, MsgIds).
 
 recover_crashed_compactions1(Files, TmpFile) ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 2ef534ff..a9013f3d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -100,10 +100,12 @@
 -endif.
 
 init(Queue, IsDurable, disk) ->
-    purge_non_persistent_messages(
-      #mqstate { mode = disk, msg_buf = queue:new(), queue = Queue,
-                 is_durable = IsDurable, length = 0, memory_size = 0,
-                 memory_gain = 0, memory_loss = 0 });
+    Len = rabbit_disk_queue:length(Queue),
+    ok = rabbit_disk_queue:delete_queue(transient_queue(Queue)),
+    MsgBuf = inc_queue_length(Queue, queue:new(), Len),
+    {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
+                    is_durable = IsDurable, length = Len,
+                    memory_size = 0, memory_gain = 0, memory_loss = 0 }};
 init(Queue, IsDurable, mixed) ->
     {ok, State} = init(Queue, IsDurable, disk),
     to_mixed_mode([], State).
@@ -126,7 +128,10 @@ to_disk_only_mode(TxnMessages, State =
     %% message on disk.
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
-    ok = send_messages_to_disk(Q, MsgBuf, 0, 0, []),
+    TransQ = transient_queue(Q),
+    {ok, MsgBuf1} =
+        send_messages_to_disk(IsDurable, Q, TransQ, MsgBuf, 0, 0, [],
+                              queue:new()),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -139,38 +144,49 @@ to_disk_only_mode(TxnMessages, State =
                    end
       end, TxnMessages),
     garbage_collect(),
-    {ok, State #mqstate { mode = disk, msg_buf = queue:new() }}.
+    {ok, State #mqstate { mode = disk, msg_buf = MsgBuf1 }}.
 
-send_messages_to_disk(Q, Queue, RequeueCount, PublishCount, Commit) ->
+send_messages_to_disk(IsDurable, Q, TransQ, Queue, PublishCount, RequeueCount,
+                      Commit, MsgBuf) ->
     case queue:out(Queue) of
         {empty, Queue} ->
-            ok = flush_messages_to_disk_queue(Q, Commit),
-            [] = flush_requeue_to_disk_queue(Q, RequeueCount, []),
-            ok;
-        {{value, {Msg = #basic_message { guid = MsgId }, _IsDelivered, OnDisk}},
-         Queue1} ->
-            case OnDisk of
-                true ->
-                    ok = flush_messages_to_disk_queue(Q, Commit),
+            ok = flush_messages_to_disk_queue(TransQ, Commit),
+            [] = flush_requeue_to_disk_queue(TransQ, RequeueCount, []),
+            {ok, MsgBuf};
+        {{value, {Msg = #basic_message { guid = MsgId,
+                                         is_persistent = IsPersistent },
+                  _IsDelivered}}, Queue1} ->
+            case IsDurable andalso IsPersistent of
+                true -> %% it's already in the persistent Q
                     send_messages_to_disk(
-                      Q, Queue1, 1 + RequeueCount, 0, []);
+                      IsDurable, Q, TransQ, Queue1, PublishCount, RequeueCount,
+                      Commit, inc_queue_length(Q, MsgBuf, 1));
                 false ->
-                    Commit1 =
-                        flush_requeue_to_disk_queue(Q, RequeueCount, Commit),
+                    Commit1 = flush_requeue_to_disk_queue
+                                (TransQ, RequeueCount, Commit),
                     ok = rabbit_disk_queue:tx_publish(Msg),
                     case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
                         true ->
-                            ok = flush_messages_to_disk_queue(Q, Commit1),
-                            send_messages_to_disk(Q, Queue1, 0, 1, [MsgId]);
+                            ok = flush_messages_to_disk_queue(TransQ, Commit1),
+                            send_messages_to_disk(
+                              IsDurable, Q, TransQ, Queue1, 1, 0, [MsgId],
+                              inc_queue_length(TransQ, MsgBuf, 1));
                         false ->
-                            send_messages_to_disk
-                              (Q, Queue1, 0, PublishCount + 1,
-                               [MsgId | Commit1])
+                            send_messages_to_disk(
+                              IsDurable, Q, TransQ, Queue1, PublishCount + 1, 0,
+                              [MsgId | Commit1],
+                              inc_queue_length(TransQ, MsgBuf, 1))
                     end
             end;
-        {{value, {disk, Count}}, Queue2} ->
-            ok = flush_messages_to_disk_queue(Q, Commit),
-            send_messages_to_disk(Q, Queue2, RequeueCount + Count, 0, [])
+        {{value, {Q, Count}}, Queue1} ->
+            send_messages_to_disk(IsDurable, Q, TransQ, Queue1, PublishCount,
+                                  RequeueCount, Commit,
+                                  inc_queue_length(Q, MsgBuf, Count));
+        {{value, {TransQ, Count}}, Queue1} ->
+            ok = flush_messages_to_disk_queue(TransQ, Commit),
+            send_messages_to_disk(IsDurable, Q, TransQ, Queue1, 0,
+                                  RequeueCount + Count, [],
+                                  inc_queue_length(TransQ, MsgBuf, Count))
     end.
 
 flush_messages_to_disk_queue(Q, Commit) ->
@@ -192,17 +208,13 @@ flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
 to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
     {ok, State};
 to_mixed_mode(TxnMessages, State =
-              #mqstate { mode = disk, queue = Q, length = Length,
-                         is_durable = IsDurable }) ->
+              #mqstate { mode = disk, queue = Q,
+                         is_durable = IsDurable, msg_buf = MsgBuf }) ->
     rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
     %% load up a new queue with a token that says how many messages
-    %% are on disk
+    %% are on disk (this is already built for us by the disk mode)
     %% don't actually do anything to the disk
-    MsgBuf = case Length of
-                 0 -> queue:new();
-                 _ -> ok = rabbit_disk_queue:prefetch(Q, Length),
-                      queue:from_list([{disk, Length}])
-             end,
+    ok = maybe_prefetch(MsgBuf),
     %% remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_cancel/2 which is why
@@ -219,57 +231,58 @@ to_mixed_mode(TxnMessages, State =
             true -> rabbit_disk_queue:tx_cancel(Cancel)
          end,
     garbage_collect(),
-    {ok, State #mqstate { mode = mixed, msg_buf = MsgBuf }}.
-
-purge_non_persistent_messages(State = #mqstate { mode = disk, queue = Q,
-                                                 is_durable = IsDurable,
-                                                 memory_size = 0 }) ->
-    %% iterate through the content on disk, ack anything which isn't
-    %% persistent, accumulate everything else that is persistent and
-    %% requeue it
-    {Acks, Requeue, Length, QSize} =
-        deliver_all_messages(Q, IsDurable, [], [], 0, 0),
-    ok = if Requeue == [] -> ok;
-            true ->
-                 rabbit_disk_queue:requeue(Q, lists:reverse(Requeue))
-         end,
-    ok = if Acks == [] -> ok;
-            true -> rabbit_disk_queue:ack(Q, Acks)
-         end,
-    {ok, State #mqstate { length = Length, memory_size = QSize }}.
-
-deliver_all_messages(Q, IsDurable, Acks, Requeue, Length, QSize) ->
-    case rabbit_disk_queue:deliver(Q) of
-        empty -> {Acks, Requeue, Length, QSize};
-        {Msg = #basic_message { is_persistent = IsPersistent },
-         _Size, IsDelivered, AckTag, _Remaining} ->
-            OnDisk = IsPersistent andalso IsDurable,
-            {Acks1, Requeue1, Length1, QSize1} =
-                if OnDisk -> { Acks,
-                               [{AckTag, IsDelivered} | Requeue],
-                               Length + 1, QSize + size_of_message(Msg) };
-                   true   -> { [AckTag | Acks], Requeue, Length, QSize }
-                end,
-            deliver_all_messages(Q, IsDurable, Acks1, Requeue1, Length1, QSize1)
+    {ok, State #mqstate { mode = mixed }}.
+
+transient_queue(Queue) ->
+    {Queue, transient}.
+
+inc_queue_length(_Queue, MsgBuf, 0) ->
+    MsgBuf;
+inc_queue_length(Queue, MsgBuf, Count) ->
+    case queue:out_r(MsgBuf) of
+        {empty, MsgBuf} ->
+            queue:in({Queue, Count}, MsgBuf);
+        {{value, {Queue, Len}}, MsgBuf1} ->
+            queue:in({Queue, Len + Count}, MsgBuf1);
+        {{value, _}, _MsgBuf1} ->
+            queue:in({Queue, Count}, MsgBuf)
     end.
 
-publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
-                                memory_size = QSize, memory_gain = Gain }) ->
-    ok = rabbit_disk_queue:publish(Q, Msg, false),
+dec_queue_length(MsgBuf) ->
+    {{value, {Queue, Len}}, MsgBuf1} = queue:out(MsgBuf),
+    MsgBuf2 = case Len of
+                  1 -> ok = maybe_prefetch(MsgBuf1),
+                       MsgBuf1;
+                  _ -> queue:in_r({Queue, Len-1}, MsgBuf1)
+              end,
+    {Queue, MsgBuf2}.
+
+publish(Msg = #basic_message { is_persistent = IsPersistent },
+        State = #mqstate { mode = disk, queue = Q, length = Length,
+                           is_durable = IsDurable, msg_buf = MsgBuf,
+                           memory_size = QSize, memory_gain = Gain }) ->
+    Persist = IsDurable andalso IsPersistent,
+    PubQ = case Persist of
+               true -> Q;
+               false -> transient_queue(Q)
+           end,
+    MsgBuf1 = inc_queue_length(PubQ, MsgBuf, 1),
+    ok = rabbit_disk_queue:publish(PubQ, Msg, false),
     MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { length = Length + 1, memory_size = QSize + MsgSize,
-                          memory_gain = Gain + MsgSize }};
+    {ok, State #mqstate { memory_gain = Gain + MsgSize,
+                          memory_size = QSize + MsgSize,
+                          msg_buf = MsgBuf1, length = Length + 1 }};
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                    msg_buf = MsgBuf, length = Length, memory_size = QSize,
                    memory_gain = Gain }) ->
-    OnDisk = IsDurable andalso IsPersistent,
-    ok = if OnDisk ->
-                 rabbit_disk_queue:publish(Q, Msg, false);
-            true -> ok
+    Persist = IsDurable andalso IsPersistent,
+    ok = case Persist of
+             true -> rabbit_disk_queue:publish(Q, Msg, false);
+             false -> ok
          end,
     MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { msg_buf = queue:in({Msg, false, OnDisk}, MsgBuf),
+    {ok, State #mqstate { msg_buf = queue:in({Msg, false}, MsgBuf),
                           length = Length + 1, memory_size = QSize + MsgSize,
                           memory_gain = Gain + MsgSize }}.
 
@@ -279,23 +292,29 @@ publish_delivered(Msg =
                   #basic_message { guid = MsgId, is_persistent = IsPersistent},
                   State =
                   #mqstate { mode = Mode, is_durable = IsDurable,
-                             queue = Q, length = 0, memory_size = QSize,
-                             memory_gain = Gain })
+                             queue = Q, length = 0,
+                             memory_size = QSize, memory_gain = Gain })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    rabbit_disk_queue:publish(Q, Msg, false),
+    Persist = IsDurable andalso IsPersistent,
+    PubQ = case Persist of
+               true -> Q;
+               false -> transient_queue(Q)
+           end,
+    rabbit_disk_queue:publish(PubQ, Msg, false),
     MsgSize = size_of_message(Msg),
     State1 = State #mqstate { memory_size = QSize + MsgSize,
                               memory_gain = Gain + MsgSize },
-    if IsDurable andalso IsPersistent ->
+    case Persist of
+        true ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
-            {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(Q),
+            {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(PubQ),
             {ok, AckTag, State1};
-       true ->
+        false ->
             %% in this case, we don't actually care about the ack, so
             %% auto ack it (asynchronously).
-            ok = rabbit_disk_queue:auto_ack_next_message(Q),
+            ok = rabbit_disk_queue:auto_ack_next_message(PubQ),
             {ok, noack, State1}
     end;
 publish_delivered(Msg, State =
@@ -307,103 +326,77 @@ publish_delivered(Msg, State =
 
 deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
-deliver(State = #mqstate { mode = disk, queue = Q, is_durable = IsDurable,
-                           length = Length }) ->
-    {Msg = #basic_message { is_persistent = IsPersistent },
-     _Size, IsDelivered, AckTag, Remaining}
-        = rabbit_disk_queue:deliver(Q),
-    AckTag1 = if IsPersistent andalso IsDurable -> AckTag;
-                 true -> ok = rabbit_disk_queue:ack(Q, [AckTag]),
-                         noack
-              end,
-    {{Msg, IsDelivered, AckTag1, Remaining},
-     State #mqstate { length = Length - 1 }};
-deliver(State = #mqstate { mode = mixed, msg_buf = MsgBuf, queue = Q,
+deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                            is_durable = IsDurable, length = Length }) ->
-    {{value, Value}, MsgBuf1}
-        = queue:out(MsgBuf),
+    {{value, Value}, MsgBuf1} = queue:out(MsgBuf),
     {Msg, IsDelivered, AckTag, MsgBuf2} =
         case Value of
             {Msg1 = #basic_message { guid = MsgId,
                                      is_persistent = IsPersistent },
-             IsDelivered1, OnDisk} ->
+             IsDelivered1} ->
                 AckTag1 =
-                    case OnDisk of
+                    case IsDurable andalso IsPersistent of
                         true ->
-                            case IsPersistent andalso IsDurable of
-                                true ->
-                                    {MsgId, IsDelivered1, AckTag2, _PersistRem}
-                                        = rabbit_disk_queue:phantom_deliver(Q),
-                                    AckTag2;
-                                false ->
-                                    ok = rabbit_disk_queue:auto_ack_next_message
-                                           (Q),
-                                    noack
-                            end;
-                        false -> noack
+                            {MsgId, IsDelivered1, AckTag2, _PersistRem}
+                                = rabbit_disk_queue:phantom_deliver(Q),
+                            AckTag2;
+                        false ->
+                            noack
                     end,
-                ok = maybe_prefetch(Q, MsgBuf1),
+                ok = maybe_prefetch(MsgBuf1),
                 {Msg1, IsDelivered1, AckTag1, MsgBuf1};
-            {disk, Rem1} ->
+            _ ->
+                {ReadQ, MsgBuf3} = dec_queue_length(MsgBuf),
                 {Msg1 = #basic_message { is_persistent = IsPersistent },
                  _Size, IsDelivered1, AckTag1, _PersistRem}
-                    = rabbit_disk_queue:deliver(Q),
+                    = rabbit_disk_queue:deliver(ReadQ),
                 AckTag2 =
-                    case IsPersistent andalso IsDurable of
-                        true -> AckTag1;
-                        false -> rabbit_disk_queue:ack(Q, [AckTag1]),
-                                 noack
+                    case IsDurable andalso IsPersistent of
+                        true ->
+                            AckTag1;
+                        false ->
+                            ok = rabbit_disk_queue:ack(ReadQ, [AckTag1]),
+                            noack
                     end,
-                MsgBuf3 = case Rem1 of
-                              1 -> ok = maybe_prefetch(Q, MsgBuf1),
-                                   MsgBuf1;
-                              _ -> queue:in_r({disk, Rem1 - 1}, MsgBuf1)
-                          end,
                 {Msg1, IsDelivered1, AckTag2, MsgBuf3}
         end,
     Rem = Length - 1,
     {{Msg, IsDelivered, AckTag, Rem},
      State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
-maybe_prefetch(Q, MsgBuf) ->
+maybe_prefetch(MsgBuf) ->
     case queue:peek(MsgBuf) of
-        empty -> ok;
-        {value, {disk, Count}} -> rabbit_disk_queue:prefetch(Q, Count);
-        {value, _} -> ok
+        empty ->
+            ok;
+        {value, {#basic_message {}, _IsDelivered}} ->
+            ok;
+        {value, {Q, Count}} ->
+            rabbit_disk_queue:prefetch(Q, Count)
     end.
-            
 
 remove_noacks(MsgsWithAcks) ->
-    {AckTags, ASize} =
-      lists:foldl(
-        fun ({Msg, noack}, {AccAckTags, AccSize}) ->
-                {AccAckTags, size_of_message(Msg) + AccSize};
-            ({Msg, AckTag}, {AccAckTags, AccSize}) ->
-                {[AckTag | AccAckTags], size_of_message(Msg) + AccSize}
-        end, {[], 0}, MsgsWithAcks),
-    {AckTags, ASize}.
+    lists:foldl(
+      fun ({Msg, noack}, {AccAckTags, AccSize}) ->
+              {AccAckTags, size_of_message(Msg) + AccSize};
+          ({Msg, AckTag}, {AccAckTags, AccSize}) ->
+              {[AckTag | AccAckTags], size_of_message(Msg) + AccSize}
+      end, {[], 0}, MsgsWithAcks).
 
 ack(MsgsWithAcks, State = #mqstate { queue = Q, memory_size = QSize,
                                      memory_loss = Loss }) ->
-    ASize = case remove_noacks(MsgsWithAcks) of
-                {[], ASize1} -> ASize1;
-                {AckTags, ASize1} -> rabbit_disk_queue:ack(Q, AckTags),
-                                     ASize1
+    {AckTags, ASize} = remove_noacks(MsgsWithAcks),
+    ok = case AckTags of
+             [] -> ok;
+             _ -> rabbit_disk_queue:ack(Q, AckTags)
          end,
     State1 = State #mqstate { memory_size = QSize - ASize,
                               memory_loss = Loss + ASize },
     {ok, State1}.
                                                    
-tx_publish(Msg, State = #mqstate { mode = disk, memory_size = QSize,
-                                   memory_gain = Gain }) ->
-    ok = rabbit_disk_queue:tx_publish(Msg),
-    MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { memory_size = QSize + MsgSize,
-                          memory_gain = Gain + MsgSize }};
-tx_publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
-           #mqstate { mode = mixed, is_durable = IsDurable,
-                      memory_size = QSize, memory_gain = Gain })
-  when IsDurable andalso IsPersistent ->
+tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
+           State = #mqstate { mode = Mode, memory_size = QSize,
+                              is_durable = IsDurable, memory_gain = Gain })
+  when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     ok = rabbit_disk_queue:tx_publish(Msg),
     MsgSize = size_of_message(Msg),
     {ok, State #mqstate { memory_size = QSize + MsgSize,
@@ -418,16 +411,64 @@ tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize,
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
+%% The last 2 params are accumulators. We work through the publishes,
+%% sorting out our msgbuf as we go. Finally, when no more work to do,
+%% we commit first transient, and the persistent msgs. This is safe
+%% because in case of failure, transient messages will be lost on
+%% restart anyway.
+commit_to_queues(_IsDurable, _Q, _TransQ, MsgBuf, [], [], [], []) ->
+    MsgBuf;
+commit_to_queues(_IsDurable, Q, _TransQ, MsgBuf, AckTags, [],
+                 PersistMsgIds, []) ->
+    MsgIds = lists:flatten(lists:reverse(PersistMsgIds)),
+    ok = rabbit_disk_queue:tx_commit(Q, MsgIds, AckTags),
+    MsgBuf;
+commit_to_queues(IsDurable, Q, TransQ, MsgBuf, AckTags, [],
+                 PersistMsgIds, TransMsgIds) ->
+    MsgIds = lists:flatten(lists:reverse(TransMsgIds)),
+    ok = rabbit_disk_queue:tx_commit(TransQ, MsgIds, []),
+    commit_to_queues(IsDurable, Q, TransQ, MsgBuf, AckTags, [],
+                     PersistMsgIds, []);
+commit_to_queues(false, Q, TransQ, MsgBuf, AckTags, Publishes, [], []) ->
+    MsgIds = only_msg_ids(Publishes),
+    MsgBuf1 = inc_queue_length(TransQ, MsgBuf, erlang:length(MsgIds)),
+    commit_to_queues(false, Q, TransQ, MsgBuf1, AckTags, [], [], [MsgIds]);
+commit_to_queues(true, Q, TransQ, MsgBuf, AckTags, Publishes =
+                 [#basic_message { is_persistent = true } | _],
+                 PersistAcc, TransAcc) ->
+    {Persist, Publishes1} = lists:splitwith(fun is_persistent/1, Publishes),
+    MsgIds = only_msg_ids(Persist),
+    MsgBuf1 = inc_queue_length(Q, MsgBuf, erlang:length(MsgIds)),
+    commit_to_queues(true, Q, TransQ, MsgBuf1, AckTags, Publishes1,
+                     [MsgIds | PersistAcc], TransAcc);
+commit_to_queues(true, Q, TransQ, MsgBuf, AckTags, Publishes,
+                 PersistAcc, TransAcc) ->
+    %% not persistent
+    {Trans, Publishes1} = lists:splitwith(fun is_not_persistent/1, Publishes),
+    MsgIds = only_msg_ids(Trans),
+    MsgBuf1 = inc_queue_length(TransQ, MsgBuf, erlang:length(MsgIds)),
+    commit_to_queues(true, Q, TransQ, MsgBuf1, AckTags, Publishes1,
+                     PersistAcc, [MsgIds | TransAcc]).
+
+is_persistent(#basic_message { is_persistent = IsPersistent }) ->
+    IsPersistent.
+
+is_not_persistent(#basic_message { is_persistent = IsPersistent }) ->
+    not IsPersistent.
+
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = disk, queue = Q, length = Length,
-                             memory_size = QSize, memory_loss = Loss }) ->
+                             memory_size = QSize, memory_loss = Loss,
+                             is_durable = IsDurable, msg_buf = MsgBuf }) ->
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
-    ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
-            true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
-                                                RealAcks)
-         end,
+    MsgBuf1 = case ([] == Publishes) andalso ([] == RealAcks) of
+                  true -> MsgBuf;
+                  false -> commit_to_queues
+                             (IsDurable, Q, transient_queue(Q), MsgBuf,
+                              RealAcks, Publishes, [], [])
+              end,
     {ok, State #mqstate { length = Length + erlang:length(Publishes),
-                          memory_size = QSize - ASize,
+                          msg_buf = MsgBuf1, memory_size = QSize - ASize,
                           memory_loss = Loss + ASize }};
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
@@ -436,19 +477,17 @@ tx_commit(Publishes, MsgsWithAcks,
     {PersistentPubs, MsgBuf1} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
                          {Acc, MsgBuf2}) ->
-                            OnDisk = IsPersistent andalso IsDurable,
                             Acc1 =
-                                if OnDisk ->
-                                        [Msg #basic_message.guid | Acc];
-                                   true -> Acc
+                                case IsPersistent andalso IsDurable of
+                                    true -> [Msg #basic_message.guid | Acc];
+                                    false -> Acc
                                 end,
-                            {Acc1, queue:in({Msg, false, OnDisk}, MsgBuf2)}
+                            {Acc1, queue:in({Msg, false}, MsgBuf2)}
                     end, {[], MsgBuf}, Publishes),
-    %% foldl reverses, so re-reverse PersistentPubs to match
-    %% requirements of rabbit_disk_queue (ascending SeqIds)
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
-    ok = if ([] == PersistentPubs) andalso ([] == RealAcks) -> ok;
-            true ->
+    ok = case ([] == PersistentPubs) andalso ([] == RealAcks) of
+             true -> ok;
+             false ->
                  rabbit_disk_queue:tx_commit(
                    Q, lists:reverse(PersistentPubs), RealAcks)
          end,
@@ -490,28 +529,25 @@ tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable,
 %% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                                                 is_durable = IsDurable,
-                                                length = Length
-                                              }) ->
+                                                length = Length,
+                                                msg_buf = MsgBuf }) ->
     %% here, we may have messages with no ack tags, because of the
     %% fact they are not persistent, but nevertheless we want to
     %% requeue them. This means publishing them delivered.
-    Requeue
+    TransQ = transient_queue(Q),
+    {MsgBuf1, PersistRQ}
         = lists:foldl(
-            fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
+            fun ({#basic_message { is_persistent = IsPersistent }, AckTag},
+                 {MB, PRQ})
                 when IsDurable andalso IsPersistent ->
-                    [{AckTag, true} | RQ];
-                ({Msg, _AckTag}, RQ) ->
-                    ok = case RQ == [] of
-                             true  -> ok;
-                             false -> rabbit_disk_queue:requeue(
-                                        Q, lists:reverse(RQ))
-                         end,
-                    ok = rabbit_disk_queue:publish(Q, Msg, true),
-                    []
-            end, [], MessagesWithAckTags),
-    ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
-    {ok,
-     State #mqstate { length = Length + erlang:length(MessagesWithAckTags) }};
+                    {inc_queue_length(Q, MB, 1), [{AckTag, true} | PRQ]};
+                ({Msg, noack}, {MB, PRQ}) ->
+                    ok = rabbit_disk_queue:publish(TransQ, Msg, true),
+                    {inc_queue_length(TransQ, MB, 1), PRQ}
+            end, {MsgBuf, []}, MessagesWithAckTags),
+    ok = rabbit_disk_queue:requeue(Q, lists:reverse(PersistRQ)),
+    {ok, State #mqstate { length = Length + erlang:length(MessagesWithAckTags),
+                          msg_buf = MsgBuf1 }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
@@ -521,40 +557,33 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
         lists:foldl(
           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
                {Acc, MsgBuf2}) ->
-                  OnDisk = IsDurable andalso IsPersistent,
                   Acc1 =
-                      if OnDisk -> [{AckTag, true} | Acc];
-                         true -> Acc
+                      case IsDurable andalso IsPersistent of
+                          true -> [{AckTag, true} | Acc];
+                          false -> Acc
                       end,
-                  {Acc1, queue:in({Msg, true, OnDisk}, MsgBuf2)}
+                  {Acc1, queue:in({Msg, true}, MsgBuf2)}
           end, {[], MsgBuf}, MessagesWithAckTags),
-    ok = if [] == PersistentPubs -> ok;
-            true -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
+    ok = case PersistentPubs of
+             [] -> ok;
+             _  -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
     {ok, State #mqstate {msg_buf = MsgBuf1,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
-purge(State = #mqstate { queue = Q, mode = disk, length = Count,
-                         memory_loss = Loss, memory_size = QSize }) ->
-    Count = rabbit_disk_queue:purge(Q),
-    {Count, State #mqstate { length = 0, memory_size = 0,
-                             memory_loss = Loss + QSize }};
-purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
+purge(State = #mqstate { queue = Q, length = Count,
                          memory_loss = Loss, memory_size = QSize }) ->
-    rabbit_disk_queue:purge(Q),
-    {Length,
-     State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
-                      memory_loss = Loss + QSize }}.
+    Len1 = rabbit_disk_queue:purge(Q),
+    Len2 = rabbit_disk_queue:purge(transient_queue(Q)),
+    true = Count >= Len1 + Len2,
+    {Count, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
+                             memory_loss = Loss + QSize }}.
 
-delete_queue(State = #mqstate { queue = Q, mode = disk, memory_size = QSize,
-                                memory_loss = Loss }) ->
-    rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { length = 0, memory_size = 0,
-                          memory_loss = Loss + QSize }};
-delete_queue(State = #mqstate { queue = Q, mode = mixed, memory_size = QSize,
+delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
                                 memory_loss = Loss }) ->
-    rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
+    ok = rabbit_disk_queue:delete_queue(Q),
+    ok = rabbit_disk_queue:delete_queue(transient_queue(Q)),
+    {ok, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
                           memory_loss = Loss + QSize }}.
 
 length(#mqstate { length = Length }) ->
-- 
cgit v1.2.1


From 563b889cabb7a5877fe2b0f18628a56b035976fe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 16 Jul 2009 18:08:41 +0100
Subject: Well it's better. The memory size is now recovered at start up by
 doing a foldl on the entire queue. This seems excessive, but it does work. It
 only takes 75 seconds on my machine to get through 1e6 1024-byte messages,
 and 160 seconds to get through 2e6 1024-byte messages. So that doesn't worry
 me any more. Also, it's done in constant memory... ish[0].

Also fixed the queue_mode_manager. Registration does not now produce a mode. Instead, it assumes you're starting up in disk only mode and then the first memory_report will result in the correct mode being set. This is safe and prevents a potentially deadly prefetch being sent when a queue starts up in mixed mode only to be sent to disk_only mode.

However, the disk_queue has to start up in mixed mode because if it doesn't it has no way to estimate its memory use for disk mode. As such, it registers and then sends a report of 0 memory use. This guarantees that it can be put in mixed mode, thus it can then respond as necessary to the queue_mode_manager.

I've not done anything further at this stage with the use of the erlang queue in the mixed_queue module when in disk mode (the potential per-message cost). Really you don't want to send individual entries here to the disk_queue, you want to batch them up... makes this rather more complex.

[0] Sort of wrong. It can use the cache, and if you think about not too big queues sharing messages, this is clearly a good thing. But if there are lots of shared messages then it all goes wrong because the cache will get over populated and exhaust memory. Furthermore, the foldl is entirely in the disk_queue process. This means that during the foldl it won't be reporting memory and it won't be able to respond to request to change its mode.

All of which points pretty strongly to the requirement that the prefetch needs to be somewhat more sophisticated.
---
 src/rabbit_amqqueue_process.erl   | 37 ++++++++++++++++++------------------
 src/rabbit_disk_queue.erl         | 40 +++++++++++++++++++++++++++++++--------
 src/rabbit_mixed_queue.erl        | 33 ++++++++++++++++++--------------
 src/rabbit_queue_mode_manager.erl | 34 +++++++++------------------------
 src/rabbit_tests.erl              | 23 +++++++++++++++-------
 5 files changed, 94 insertions(+), 73 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a1b5a895..0597215f 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -100,19 +100,22 @@ start_link(Q) ->
 
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    {ok, Mode} = rabbit_queue_mode_manager:register
-                   (self(), rabbit_amqqueue, set_mode, [self()]),
-    {ok, MS} = rabbit_mixed_queue:init(QName, Durable, Mode),
-    {ok, #q{q = Q,
-            owner = none,
-            exclusive_consumer = none,
-            has_had_consumers = false,
-            mixed_state = MS,
-            next_msg_id = 1,
-            active_consumers = queue:new(),
-            blocked_consumers = queue:new(),
-            memory_report_timer = start_memory_timer()
-           }, {binary, ?HIBERNATE_AFTER_MIN}}.
+    ok = rabbit_queue_mode_manager:register
+           (self(), rabbit_amqqueue, set_mode, [self()]),
+    {ok, MS} = rabbit_mixed_queue:init(QName, Durable),
+    State = #q{q = Q,
+               owner = none,
+               exclusive_consumer = none,
+               has_had_consumers = false,
+               mixed_state = MS,
+               next_msg_id = 1,
+               active_consumers = queue:new(),
+               blocked_consumers = queue:new(),
+               memory_report_timer = start_memory_timer()
+              },
+    %% first thing we must do is report_memory which will clear out
+    %% the 'undefined' values in gain and loss in mixed_queue state
+    {ok, report_memory(false, State), {binary, ?HIBERNATE_AFTER_MIN}}.
 
 terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
@@ -553,14 +556,10 @@ i(memory, _) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
-report_memory(Hibernating, State = #q { mixed_state = MS }) ->
+report_memory(Hib, State = #q { mixed_state = MS }) ->
     {MSize, Gain, Loss} =
         rabbit_mixed_queue:estimate_queue_memory(MS),
-    NewMem = case MSize of
-                 0 -> 1; %% avoid / 0
-                 N -> N
-             end,
-    rabbit_queue_mode_manager:report_memory(self(), NewMem, Gain, Loss, Hibernating),
+    rabbit_queue_mode_manager:report_memory(self(), MSize, Gain, Loss, Hib),
     State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS) }.
 
 %---------------------------------------------------------------------------
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8b148777..868eab4a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,7 +42,7 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, prefetch/2, length/1
+         requeue_next_n/2, prefetch/2, length/1, foldl/3
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -266,6 +266,9 @@
 -spec(delete_queue/1 :: (queue_name()) -> 'ok').
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
+-spec(foldl/3 :: (fun (({message(), non_neg_integer(),
+                         bool(), {msg_id(), seq_id()}}, A) ->
+                              A), A, queue_name()) -> A).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
@@ -328,6 +331,9 @@ delete_non_durable_queues(DurableQueues) ->
 length(Q) ->
     gen_server2:call(?SERVER, {length, Q}, infinity).
 
+foldl(Fun, Init, Acc) ->
+    gen_server2:call(?SERVER, {foldl, Fun, Init, Acc}, infinity).
+
 stop() ->
     gen_server2:call(?SERVER, stop, infinity).
 
@@ -367,8 +373,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
-    {ok, Mode} = rabbit_queue_mode_manager:register
-                   (self(), rabbit_disk_queue, set_mode, []),
+    ok = rabbit_queue_mode_manager:register
+           (self(), rabbit_disk_queue, set_mode, []),
     Node = node(),
     ok = 
         case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
@@ -440,10 +446,13 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
             ok = preallocate(FileHdl, FileSizeLimit, Offset)
     end,
     State2 = State1 #dqstate { current_file_handle = FileHdl },
-    {ok, case Mode of
-             mixed -> State2;
-             disk -> to_disk_only_mode(State2)
-         end, {binary, ?HIBERNATE_AFTER_MIN}, 0}.
+    %% by reporting a memory use of 0, we guarantee the manager will
+    %% grant us to ram_disk mode. We have to start in ram_disk mode
+    %% because we can't find values for mnesia_bytes_per_record or
+    %% ets_bytes_per_record otherwise.
+    ok = rabbit_queue_mode_manager:report_memory(self(), 0, false),
+    ok = report_memory(false, State2),
+    {ok, State2, {binary, ?HIBERNATE_AFTER_MIN}, 0}.
 
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, false, State),
@@ -464,6 +473,9 @@ handle_call({purge, Q}, _From, State) ->
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     reply(WriteSeqId - ReadSeqId, State);
+handle_call({foldl, Fun, Init, Q}, _From, State) ->
+    {ok, Result, State1} = internal_foldl(Q, Fun, Init, State),
+    reply(Result, State1);
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
@@ -588,7 +600,7 @@ start_memory_timer() ->
     TRef.
 
 start_memory_timer(State = #dqstate { memory_report_timer = undefined }) ->
-    report_memory(false, State),
+    ok = report_memory(false, State),
     State #dqstate { memory_report_timer = start_memory_timer() };
 start_memory_timer(State) ->
     State.
@@ -899,6 +911,18 @@ internal_prefetch(Q, Count, State = #dqstate { sequences = Sequences }) ->
           end, State, lists:seq(ReadSeqId, ReadSeqId + Count1 - 1)),
     {ok, StateN}.
 
+internal_foldl(Q, Fun, Init, State = #dqstate { sequences = Sequences }) ->
+    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
+    internal_foldl(Q, WriteSeqId, Fun, State, Init, ReadSeqId).
+
+internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
+    {ok, Acc, State};
+internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
+    {ok, MsgStuff, State1}
+        = internal_read_message(Q, ReadSeqId, true, true, false, State),
+    Acc1 = Fun(MsgStuff, Acc),
+    internal_foldl(Q, WriteSeqId, Fun, State1, Acc1, ReadSeqId + 1).
+
 internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) ->
     [Obj =
      #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] =
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index a9013f3d..d864d9b2 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -33,7 +33,7 @@
 
 -include("rabbit.hrl").
 
--export([init/3]).
+-export([init/2]).
 
 -export([publish/2, publish_delivered/2, deliver/1, ack/2,
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
@@ -70,7 +70,7 @@
 -type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
 -type(okmqs() :: {'ok', mqstate()}).
 
--spec(init/3 :: (queue_name(), bool(), mode()) -> okmqs()).
+-spec(init/2 :: (queue_name(), bool()) -> okmqs()).
 -spec(publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(publish_delivered/2 :: (message(), mqstate()) ->
              {'ok', acktag(), mqstate()}).
@@ -99,16 +99,18 @@
 
 -endif.
 
-init(Queue, IsDurable, disk) ->
+init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:length(Queue),
     ok = rabbit_disk_queue:delete_queue(transient_queue(Queue)),
     MsgBuf = inc_queue_length(Queue, queue:new(), Len),
+    Size = rabbit_disk_queue:foldl(
+             fun ({Msg, _Size, _IsDelivered, _AckTag}, Acc) ->
+                     Acc + size_of_message(Msg)
+             end, 0, Queue),
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
                     is_durable = IsDurable, length = Len,
-                    memory_size = 0, memory_gain = 0, memory_loss = 0 }};
-init(Queue, IsDurable, mixed) ->
-    {ok, State} = init(Queue, IsDurable, disk),
-    to_mixed_mode([], State).
+                    memory_size = Size, memory_gain = undefined,
+                    memory_loss = undefined }}.
 
 size_of_message(
   #basic_message { content = #content { payload_fragments_rev = Payload }}) ->
@@ -214,7 +216,7 @@ to_mixed_mode(TxnMessages, State =
     %% load up a new queue with a token that says how many messages
     %% are on disk (this is already built for us by the disk mode)
     %% don't actually do anything to the disk
-    ok = maybe_prefetch(MsgBuf),
+    ok = maybe_prefetch(mixed, MsgBuf),
     %% remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_cancel/2 which is why
@@ -248,10 +250,10 @@ inc_queue_length(Queue, MsgBuf, Count) ->
             queue:in({Queue, Count}, MsgBuf)
     end.
 
-dec_queue_length(MsgBuf) ->
+dec_queue_length(Mode, MsgBuf) ->
     {{value, {Queue, Len}}, MsgBuf1} = queue:out(MsgBuf),
     MsgBuf2 = case Len of
-                  1 -> ok = maybe_prefetch(MsgBuf1),
+                  1 -> ok = maybe_prefetch(Mode, MsgBuf1),
                        MsgBuf1;
                   _ -> queue:in_r({Queue, Len-1}, MsgBuf1)
               end,
@@ -327,7 +329,8 @@ publish_delivered(Msg, State =
 deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
 deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
-                           is_durable = IsDurable, length = Length }) ->
+                           is_durable = IsDurable, length = Length,
+                           mode = Mode }) ->
     {{value, Value}, MsgBuf1} = queue:out(MsgBuf),
     {Msg, IsDelivered, AckTag, MsgBuf2} =
         case Value of
@@ -343,10 +346,10 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                         false ->
                             noack
                     end,
-                ok = maybe_prefetch(MsgBuf1),
+                ok = maybe_prefetch(Mode, MsgBuf1),
                 {Msg1, IsDelivered1, AckTag1, MsgBuf1};
             _ ->
-                {ReadQ, MsgBuf3} = dec_queue_length(MsgBuf),
+                {ReadQ, MsgBuf3} = dec_queue_length(Mode, MsgBuf),
                 {Msg1 = #basic_message { is_persistent = IsPersistent },
                  _Size, IsDelivered1, AckTag1, _PersistRem}
                     = rabbit_disk_queue:deliver(ReadQ),
@@ -364,7 +367,9 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
     {{Msg, IsDelivered, AckTag, Rem},
      State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
-maybe_prefetch(MsgBuf) ->
+maybe_prefetch(disk, MsgBuf) ->
+    ok;
+maybe_prefetch(mixed, MsgBuf) ->
     case queue:peek(MsgBuf) of
         empty ->
             ok;
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index a5e9610a..d4bc21d4 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -43,7 +43,6 @@
 
 -define(TOTAL_TOKENS, 10000000).
 -define(ACTIVITY_THRESHOLD, 25).
--define(INITIAL_TOKEN_ALLOCATION, 100).
 
 -define(SERVER, ?MODULE).
 
@@ -53,7 +52,7 @@
 
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(register/4 :: (pid(), atom(), atom(), list()) -> {'ok', queue_mode()}).
+-spec(register/4 :: (pid(), atom(), atom(), list()) -> 'ok').
 -spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
                           non_neg_integer(), non_neg_integer(), bool()) ->
@@ -141,7 +140,7 @@ start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
 
 register(Pid, Module, Function, Args) ->
-    gen_server2:call(?SERVER, {register, Pid, Module, Function, Args}).
+    gen_server2:cast(?SERVER, {register, Pid, Module, Function, Args}).
 
 pin_to_disk(Pid) ->
     gen_server2:call(?SERVER, {pin_to_disk, Pid}).
@@ -173,27 +172,6 @@ init([]) ->
                   disk_mode_pins = sets:new()
                 }}.
 
-handle_call({register, Pid, Module, Function, Args}, _From,
-            State = #state { callbacks = Callbacks }) ->
-    _MRef = erlang:monitor(process, Pid),
-    State1 = State #state { callbacks = dict:store
-                            (Pid, {Module, Function, Args}, Callbacks) },
-    State2 = #state { available_tokens = Avail,
-                      mixed_queues = Mixed } =
-        free_upto(Pid, ?INITIAL_TOKEN_ALLOCATION, State1),
-    {Result, State3} =
-        case ?INITIAL_TOKEN_ALLOCATION > Avail of
-            true ->
-                {disk, State2};
-            false ->
-                {mixed, State2 #state { 
-                          available_tokens =
-                          Avail - ?INITIAL_TOKEN_ALLOCATION,
-                          mixed_queues = dict:store
-                          (Pid, {?INITIAL_TOKEN_ALLOCATION, active}, Mixed) }}
-        end,
-    {reply, {ok, Result}, State3};
-
 handle_call({pin_to_disk, Pid}, _From,
             State = #state { mixed_queues = Mixed,
                              callbacks = Callbacks,
@@ -317,7 +295,13 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             hibernate -> StateN #state { hibernate =
                                          queue:in(Pid, Sleepy) }
         end,
-    {noreply, StateN1}.
+    {noreply, StateN1};
+
+handle_cast({register, Pid, Module, Function, Args},
+            State = #state { callbacks = Callbacks }) ->
+    _MRef = erlang:monitor(process, Pid),
+    {noreply, State #state { callbacks = dict:store
+                   (Pid, {Module, Function, Args}, Callbacks) }}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 221279f7..58a9d0cd 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -990,11 +990,20 @@ rdq_test_purge() ->
     rdq_stop(),
     passed.    
 
+rdq_new_mixed_queue(Q, Durable, Disk) ->
+    {ok, MS} = rabbit_mixed_queue:init(Q, Durable),
+    MS1 = rabbit_mixed_queue:reset_counters(MS),
+    case Disk of
+        true -> {ok, MS2} = rabbit_mixed_queue:to_disk_only_mode([], MS1),
+                MS2;
+        false -> MS1
+    end.
+
 rdq_test_mixed_queue_modes() ->
     rdq_virgin(),
     rdq_start(),
     Payload = <<0:(8*256)>>,
-    {ok, MS} = rabbit_mixed_queue:init(q, true, mixed),
+    MS = rdq_new_mixed_queue(q, true, false),
     MS2 = lists:foldl(
             fun (_N, MS1) ->
                     Msg = rabbit_basic:message(x, <<>>, [], Payload),
@@ -1041,7 +1050,7 @@ rdq_test_mixed_queue_modes() ->
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
-    {ok, MS12} = rabbit_mixed_queue:init(q, true, mixed),
+    MS12 = rdq_new_mixed_queue(q, true, false),
     10 = rabbit_mixed_queue:length(MS12),
     io:format("Recovered queue~n"),
     {MS14, AckTags} =
@@ -1061,7 +1070,7 @@ rdq_test_mixed_queue_modes() ->
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
-    {ok, MS17} = rabbit_mixed_queue:init(q, true, mixed),
+    MS17 = rdq_new_mixed_queue(q, true, false),
     0 = rabbit_mixed_queue:length(MS17),
     {0,0,0} = rabbit_mixed_queue:estimate_queue_memory(MS17),
     io:format("Recovered queue~n"),
@@ -1081,23 +1090,23 @@ rdq_test_mode_conversion_mid_txn() ->
 
     rdq_virgin(),
     rdq_start(),
-    {ok, MS0} = rabbit_mixed_queue:init(q, true, mixed),
+    MS0 = rdq_new_mixed_queue(q, true, false),
     passed = rdq_tx_publish_mixed_alter_commit_get(
                MS0, MsgsA, MsgsB, fun rabbit_mixed_queue:to_disk_only_mode/2, commit),
 
     rdq_stop_virgin_start(),
-    {ok, MS1} = rabbit_mixed_queue:init(q, true, mixed),
+    MS1 = rdq_new_mixed_queue(q, true, false),
     passed = rdq_tx_publish_mixed_alter_commit_get(
                MS1, MsgsA, MsgsB, fun rabbit_mixed_queue:to_disk_only_mode/2, cancel),
 
 
     rdq_stop_virgin_start(),
-    {ok, MS2} = rabbit_mixed_queue:init(q, true, disk),
+    MS2 = rdq_new_mixed_queue(q, true, true),
     passed = rdq_tx_publish_mixed_alter_commit_get(
                MS2, MsgsA, MsgsB, fun rabbit_mixed_queue:to_mixed_mode/2, commit),
 
     rdq_stop_virgin_start(),
-    {ok, MS3} = rabbit_mixed_queue:init(q, true, disk),
+    MS3 = rdq_new_mixed_queue(q, true, true),
     passed = rdq_tx_publish_mixed_alter_commit_get(
                MS3, MsgsA, MsgsB, fun rabbit_mixed_queue:to_mixed_mode/2, cancel),
 
-- 
cgit v1.2.1


From 040a7b7c421a9bb3b5f4ab50eb06dda0b12d42da Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 17 Jul 2009 13:03:17 +0100
Subject: The use of the in-memory run length queue in disk_only queue is
 considered a show stopper, and rightly so. I personally don't like the idea
 of adding additional tokens to the disk queue to indicated queue switch
 because it can substantially increase the number of OS calls and writes and
 reads from disk and, eg, getting queue length right and memory size right is
 made a fair bit more complex. So abandon the two queues idea.

Instead, store the persistent flag in the stop byte on disk. Then on startup, the persistent flag turns up in the MsgLocations ets table. This is all done and all tests pass.

The next stage is that on start up, go through each queue and just wipe out non-persistent messages. This should be pretty fast. Then call the shuffle_up function as is currently being done. This will eliminate the gaps in sequences. This really should be enough. Then the mixed_queue can go back to just talking about a single queue.
---
 src/rabbit_disk_queue.erl  | 152 +++++++++++++++++++++++++++------------------
 src/rabbit_misc.erl        |   8 ++-
 src/rabbit_mixed_queue.erl |   5 +-
 3 files changed, 98 insertions(+), 67 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 868eab4a..4eef884f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -53,7 +53,8 @@
 -include("rabbit.hrl").
 
 -define(WRITE_OK_SIZE_BITS,          8).
--define(WRITE_OK,                    255).
+-define(WRITE_OK_TRANSIENT,        255).
+-define(WRITE_OK_PERSISTENT,       254).
 -define(INTEGER_SIZE_BYTES,          8).
 -define(INTEGER_SIZE_BITS,           (8 * ?INTEGER_SIZE_BYTES)).
 -define(MSG_LOC_NAME,                rabbit_disk_queue_msg_location).
@@ -101,8 +102,8 @@
 
 %% The components:
 %%
-%% MsgLocation: this is a dets table which contains:
-%%              {MsgId, RefCount, File, Offset, TotalSize}
+%% MsgLocation: this is a (d)ets table which contains:
+%%              {MsgId, RefCount, File, Offset, TotalSize, IsPersistent}
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
 %% Sequences:   this is an ets table which contains:
@@ -393,7 +394,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                                              ?FILE_EXTENSION_DETS)},
                         {min_no_slots, 1024*1024},
                         %% man says this should be <= 32M. But it works...
-                        {max_no_slots, 1024*1024*1024},
+                        {max_no_slots, 30*1024*1024},
                         {type, set}
                        ]),
 
@@ -509,8 +510,8 @@ handle_cast({ack, Q, MsgSeqIds}, State) ->
 handle_cast({auto_ack_next_message, Q}, State) ->
     {ok, State1} = internal_auto_ack(Q, State),
     noreply(State1);
-handle_cast({tx_publish, Message = #basic_message { guid = MsgId }}, State) ->
-    {ok, State1} = internal_tx_publish(MsgId, Message, State),
+handle_cast({tx_publish, Message}, State) ->
+    {ok, State1} = internal_tx_publish(Message, State),
     noreply(State1);
 handle_cast({tx_cancel, MsgIds}, State) ->
     {ok, State1} = internal_tx_cancel(MsgIds, State),
@@ -636,8 +637,8 @@ memory_use(#dqstate { operation_mode = disk_only,
     (WordSize * (ets:info(FileSummary, memory) +
                  ets:info(Cache, memory) +
                  ets:info(Sequences, memory))) +
-        round(MnesiaSizeEstimate) +
-        round(MsgLocationSizeEstimate).
+        rabbit_misc:ceil(MnesiaSizeEstimate) +
+        rabbit_misc:ceil(MsgLocationSizeEstimate).
 
 to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
     State;
@@ -872,7 +873,8 @@ insert_into_cache(Message = #basic_message { guid = MsgId },
                 true -> 0;
                 false -> 1
             end,
-    true = ets:insert_new(Cache, {MsgId, Message, MsgSize, Count}),
+    true =
+        ets:insert_new(Cache, {MsgId, Message, MsgSize, Count}),
     ok.
 
 %% ---- INTERNAL RAW FUNCTIONS ----
@@ -890,8 +892,9 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
                               {Q, ReadSeqId+1, WriteSeqId}),
             {ok,
              case Result of
-                 {MsgId, Delivered, {MsgId, ReadSeqId}} ->
-                     {MsgId, Delivered, {MsgId, ReadSeqId}, Remaining};
+                 {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId}} ->
+                     {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId},
+                      Remaining};
                  {Message, BodySize, Delivered, {MsgId, ReadSeqId}} ->
                      {Message, BodySize, Delivered, {MsgId, ReadSeqId},
                       Remaining}
@@ -927,7 +930,7 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
     [Obj =
      #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
-    [{MsgId, RefCount, File, Offset, TotalSize}] =
+    [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
         dets_ets_lookup(State, MsgId),
     ok =
         if FakeDeliver orelse Delivered -> ok;
@@ -940,12 +943,13 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
                     {FileHdl, State1} = get_read_handle(File, Offset, State),
-                    {ok, {MsgBody, BodySize}} =
+                    {ok, {MsgBody, IsPersistent, BodySize}} =
                         read_message_at_offset(FileHdl, Offset, TotalSize),
-                    Message = bin_to_msg(MsgBody),
+                    #basic_message { is_persistent=IsPersistent, guid=MsgId } =
+                        Message = bin_to_msg(MsgBody),
                     ok = if RefCount > 1 orelse ForceInCache ->
-                                 insert_into_cache(Message, BodySize,
-                                                   ForceInCache, State1);
+                                 insert_into_cache
+                                   (Message, BodySize, ForceInCache, State1);
                             true -> ok
                                  %% it's not in the cache and we only
                                  %% have 1 queue with the message. So
@@ -959,13 +963,14 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
                      State}
             end;
         false ->
-            {ok, {MsgId, Delivered, {MsgId, ReadSeqId}}, State}
+            {ok, {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId}}, State}
     end.
 
 internal_auto_ack(Q, State) ->
     case internal_deliver(Q, false, true, State) of
         {ok, empty, State1} -> {ok, State1};
-        {ok, {_MsgId, _Delivered, MsgSeqId, _Remaining}, State1} ->
+        {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Remaining},
+         State1} ->
             remove_messages(Q, [MsgSeqId], true, State1)
     end.        
 
@@ -985,7 +990,7 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
     Files =
         lists:foldl(
           fun ({MsgId, SeqId}, Files1) ->
-                  [{MsgId, RefCount, File, Offset, TotalSize}] =
+                  [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
                       dets_ets_lookup(State, MsgId),
                   Files2 =
                       case RefCount of
@@ -1007,8 +1012,8 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                           _ when 1 < RefCount ->
                               ok = decrement_cache(MsgId, State),
                               ok = dets_ets_insert(
-                                     State, {MsgId, RefCount - 1,
-                                             File, Offset, TotalSize}),
+                                     State, {MsgId, RefCount - 1, File, Offset,
+                                             TotalSize, IsPersistent}),
                               Files1
                       end,
                   ok = case MnesiaDelete of
@@ -1023,7 +1028,8 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
     State1 = compact(Files, State),
     {ok, State1}.
 
-internal_tx_publish(MsgId, Message,
+internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
+                                               guid = MsgId },
                     State = #dqstate { current_file_handle = CurHdl,
                                        current_file_name = CurName,
                                        current_offset = CurOffset,
@@ -1032,10 +1038,11 @@ internal_tx_publish(MsgId, Message,
     case dets_ets_lookup(State, MsgId) of
         [] ->
             %% New message, lots to do
-            {ok, TotalSize} =
-                append_message(CurHdl, MsgId, msg_to_bin(Message)),
-            true = dets_ets_insert_new(State, {MsgId, 1, CurName,
-                                               CurOffset, TotalSize}),
+            {ok, TotalSize} = append_message(CurHdl, MsgId, msg_to_bin(Message),
+                                             IsPersistent),
+            true = dets_ets_insert_new
+                     (State, {MsgId, 1, CurName,
+                              CurOffset, TotalSize, IsPersistent}),
             [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize +
@@ -1051,10 +1058,10 @@ internal_tx_publish(MsgId, Message,
             maybe_roll_to_new_file(
               NextOffset, State #dqstate {current_offset = NextOffset,
                                           current_dirty = true});
-        [{MsgId, RefCount, File, Offset, TotalSize}] ->
+        [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] ->
             %% We already know about it, just update counter
             ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
-                                         Offset, TotalSize}),
+                                         Offset, TotalSize, IsPersistent}),
             {ok, State}
     end.
 
@@ -1080,7 +1087,8 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                       lists:foldl(
                         fun (MsgId, {InCurFileAcc, SeqId}) ->
                                 [{MsgId, _RefCount, File, Offset,
-                                  _TotalSize}] = dets_ets_lookup(State, MsgId),
+                                  _TotalSize, _IsPersistent}] =
+                                    dets_ets_lookup(State, MsgId),
                                  ok = mnesia:write(
                                         rabbit_disk_queue,
                                         #dq_msg_loc { queue_and_seq_id =
@@ -1109,7 +1117,7 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
 internal_publish(Q, Message = #basic_message { guid = MsgId },
                  IsDelivered, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
-        internal_tx_publish(MsgId, Message, State),
+        internal_tx_publish(Message, State),
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
@@ -1363,7 +1371,7 @@ sort_msg_locations_by_offset(Asc, List) ->
                true  -> fun erlang:'<'/2;
                false -> fun erlang:'>'/2
            end,
-    lists:sort(fun ({_, _, _, OffA, _}, {_, _, _, OffB, _}) ->
+    lists:sort(fun ({_, _, _, OffA, _, _}, {_, _, _, OffB, _, _}) ->
                        Comp(OffA, OffB)
                end, List).
 
@@ -1402,7 +1410,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                            read_ahead, delayed_write]),
             Worklist =
                 lists:dropwhile(
-                  fun ({_, _, _, Offset, _})
+                  fun ({_, _, _, Offset, _, _})
                       when Offset /= DestinationContiguousTop ->
                           %% it cannot be that Offset ==
                           %% DestinationContiguousTop because if it
@@ -1416,7 +1424,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                   end, sort_msg_locations_by_offset(
                          true, dets_ets_match_object(State,
                                                      {'_', '_', Destination,
-                                                      '_', '_'}))),
+                                                      '_', '_', '_'}))),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State),
@@ -1438,7 +1446,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
         sort_msg_locations_by_offset(
           true, dets_ets_match_object(State,
                                       {'_', '_', Source,
-                                       '_', '_'})),
+                                       '_', '_', '_'})),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State),
     %% tidy up
@@ -1452,14 +1460,15 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, State) ->
     {FinalOffset, BlockStart1, BlockEnd1} =
         lists:foldl(
-          fun ({MsgId, RefCount, _Source, Offset, TotalSize},
+          fun ({MsgId, RefCount, _Source, Offset, TotalSize, IsPersistent},
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
                   %% update MsgLocationDets to reflect change of file and offset
-                  ok = dets_ets_insert(State, {MsgId, RefCount, Destination,
-                                               CurOffset, TotalSize}),
+                  ok = dets_ets_insert
+                         (State, {MsgId, RefCount, Destination,
+                                  CurOffset, TotalSize, IsPersistent}),
                   NextOffset = CurOffset + Size,
                   if BlockStart =:= undefined ->
                           %% base case, called only for the first list elem
@@ -1643,11 +1652,13 @@ load_messages(undefined, [],
     State;
 load_messages(Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
-    Offset = case dets_ets_match_object(State, {'_', '_', Left, '_', '_'}) of
-                 [] -> 0;
-                 L -> [{_MsgId, _RefCount, Left, MaxOffset, TotalSize}|_] =
-                          sort_msg_locations_by_offset(false, L),
-                      MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
+    Offset =
+        case dets_ets_match_object(State, {'_', '_', Left, '_', '_', '_'}) of
+            [] -> 0;
+            L ->
+                [ {_MsgId, _RefCount, Left, MaxOffset, TotalSize, _IsPersistent}
+                | _ ] = sort_msg_locations_by_offset(false, L),
+                MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
              end,
     State #dqstate { current_file_num = Num, current_file_name = Left,
                      current_offset = Offset };
@@ -1656,7 +1667,7 @@ load_messages(Left, [File|Files],
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
-        fun ({MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+        fun ({MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                 case erlang:length(mnesia:dirty_index_match_object
                                    (rabbit_disk_queue,
                                     #dq_msg_loc { msg_id = MsgId,
@@ -1666,9 +1677,9 @@ load_messages(Left, [File|Files],
                                     msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
-                        true =
-                            dets_ets_insert_new(State, {MsgId, RefCount, File,
-                                                        Offset, TotalSize}),
+                        true = dets_ets_insert_new
+                                 (State, {MsgId, RefCount, File,
+                                          Offset, TotalSize, IsPersistent}),
                         {[{MsgId, TotalSize, Offset}|VMAcc],
                          VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
                         }
@@ -1706,20 +1717,22 @@ verify_messages_in_mnesia(MsgIds) ->
                                         msg_id))
       end, MsgIds).
 
+grab_msg_id({MsgId, _IsPersistent, _TotalSize, _FileOffset}) ->
+    MsgId.
+
 recover_crashed_compactions1(Files, TmpFile) ->
-    GrabMsgId = fun ({MsgId, _TotalSize, _FileOffset}) -> MsgId end,
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFile, Files),
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, UncorruptedMessagesTmp} =
         scan_file_for_valid_messages(form_filename(TmpFile)),
-    MsgIdsTmp = lists:map(GrabMsgId, UncorruptedMessagesTmp),
+    MsgIdsTmp = lists:map(fun grab_msg_id/1, UncorruptedMessagesTmp),
     %% all of these messages should appear in the mnesia table,
     %% otherwise they wouldn't have been copied out
     verify_messages_in_mnesia(MsgIdsTmp),
     {ok, UncorruptedMessages} =
         scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
-    MsgIds = lists:map(GrabMsgId, UncorruptedMessages),
+    MsgIds = lists:map(fun grab_msg_id/1, UncorruptedMessages),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -1788,7 +1801,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
 
             {ok, MainMessages} =
                 scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
-            MsgIdsMain = lists:map(GrabMsgId, MainMessages),
+            MsgIdsMain = lists:map(fun grab_msg_id/1, MainMessages),
             %% check that everything in MsgIds is in MsgIdsMain
             true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
                              MsgIds),
@@ -1833,16 +1846,20 @@ get_disk_queue_files() ->
 
 %% ---- RAW READING AND WRITING OF FILES ----
 
-append_message(FileHdl, MsgId, MsgBody) when is_binary(MsgBody) ->
+append_message(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
     BodySize = size(MsgBody),
     MsgIdBin = term_to_binary(MsgId),
     MsgIdBinSize = size(MsgIdBin),
     TotalSize = BodySize + MsgIdBinSize,
+    StopByte = case IsPersistent of
+                   true -> ?WRITE_OK_PERSISTENT;
+                   false -> ?WRITE_OK_TRANSIENT
+               end,
     case file:write(FileHdl, <<TotalSize:?INTEGER_SIZE_BITS,
                                MsgIdBinSize:?INTEGER_SIZE_BITS,
                                MsgIdBin:MsgIdBinSize/binary,
                                MsgBody:BodySize/binary,
-                               ?WRITE_OK:?WRITE_OK_SIZE_BITS>>) of
+                               StopByte:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, TotalSize};
         KO -> KO
     end.
@@ -1856,9 +1873,14 @@ read_message_at_offset(FileHdl, Offset, TotalSize) ->
                        MsgIdBinSize:?INTEGER_SIZE_BITS,
                        Rest:TotalSizeWriteOkBytes/binary>>} ->
                     BodySize = TotalSize - MsgIdBinSize,
-                    <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-                      ?WRITE_OK:?WRITE_OK_SIZE_BITS>> = Rest,
-                    {ok, {MsgBody, BodySize}};
+                    case Rest of
+                        <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+                         ?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>> ->
+                            {ok, {MsgBody, false, BodySize}};
+                        <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+                         ?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>> ->
+                            {ok, {MsgBody, true, BodySize}}
+                    end;
                 KO -> KO
             end;
         KO -> KO
@@ -1876,15 +1898,15 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
         {ok, eof} -> {ok, Acc};
         {ok, {corrupted, NextOffset}} ->
             scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
-        {ok, {ok, MsgId, TotalSize, NextOffset}} ->
-            scan_file_for_valid_messages(FileHdl, NextOffset,
-                                         [{MsgId, TotalSize, Offset}|Acc]);
+        {ok, {ok, MsgId, IsPersistent, TotalSize, NextOffset}} ->
+            scan_file_for_valid_messages(
+              FileHdl, NextOffset,
+              [{MsgId, IsPersistent, TotalSize, Offset} | Acc]);
         _KO ->
             %% bad message, but we may still have recovered some valid messages
             {ok, Acc}
     end.
             
-
 read_next_file_entry(FileHdl, Offset) ->
     TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
     case file:read(FileHdl, TwoIntegers) of
@@ -1915,9 +1937,15 @@ read_next_file_entry(FileHdl, Offset) ->
                                         ?FILE_PACKING_ADJUSTMENT,
                                     case file:read(FileHdl, 1) of
                                         {ok,
-                                         <<?WRITE_OK:?WRITE_OK_SIZE_BITS>>} ->
-                                            {ok, {ok, binary_to_term(MsgId),
-                                                  TotalSize, NextOffset}};
+                                         <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>} ->
+                                            {ok,
+                                             {ok, binary_to_term(MsgId),
+                                              false, TotalSize, NextOffset}};
+                                        {ok,
+                                         <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} ->
+                                            {ok,
+                                             {ok, binary_to_term(MsgId),
+                                              true, TotalSize, NextOffset}};
                                         {ok, _SomeOtherData} ->
                                             {ok, {corrupted, NextOffset}};
                                         KO -> KO
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 176ddddb..fc30834e 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -446,10 +446,12 @@ unfold(Fun, Acc, Init) ->
         false -> {Acc, Init}
     end.
 
-ceil(N) when N - trunc(N) > 0 ->
-    1 + trunc(N);
 ceil(N) ->
-    N.
+    T = trunc(N),
+    case N - T of
+        0 -> N;
+        _ -> 1 + T
+    end.
 
 keygets(Keys, KeyList) ->
     lists:reverse(
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index d864d9b2..425d7763 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -311,7 +311,8 @@ publish_delivered(Msg =
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
-            {MsgId, false, AckTag, 0} = rabbit_disk_queue:phantom_deliver(PubQ),
+            {MsgId, IsPersistent, false, AckTag, 0} =
+                rabbit_disk_queue:phantom_deliver(PubQ),
             {ok, AckTag, State1};
         false ->
             %% in this case, we don't actually care about the ack, so
@@ -340,7 +341,7 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                 AckTag1 =
                     case IsDurable andalso IsPersistent of
                         true ->
-                            {MsgId, IsDelivered1, AckTag2, _PersistRem}
+                            {MsgId, IsPersistent, IsDelivered1, AckTag2, _PRem}
                                 = rabbit_disk_queue:phantom_deliver(Q),
                             AckTag2;
                         false ->
-- 
cgit v1.2.1


From 256a0ba7708324b67d41d8da36b2e3534b3d4bfe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 17 Jul 2009 15:12:04 +0100
Subject: part 2 done. The mixed_queue is back to using only one queue. Start
 up time isn't too bad with big queues, and memory use is stable. In
 disk_queue, when iterating through the mnesia table, do the normal limited
 batching for removal of non-persistent messages.

---
 src/rabbit_disk_queue.erl  |  57 +++++++++-----
 src/rabbit_mixed_queue.erl | 189 ++++++++++++++++-----------------------------
 src/rabbit_tests.erl       |  20 ++---
 3 files changed, 114 insertions(+), 152 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 4eef884f..95ed8adf 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1560,25 +1560,46 @@ load_from_disk(State) ->
     State1 = load_messages(undefined, Files, State),
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
-    {atomic, true} = mnesia:transaction(
-             fun() ->
-                     ok = mnesia:read_lock_table(rabbit_disk_queue),
-                     mnesia:foldl(
-                       fun (#dq_msg_loc { msg_id = MsgId,
-                                          queue_and_seq_id = {Q, SeqId} },
-                            true) ->
-                               case erlang:length
-                                   (dets_ets_lookup(State1, MsgId)) of
-                                   0 -> ok == mnesia:delete(rabbit_disk_queue,
-                                                            {Q, SeqId}, write);
-                                   1 -> true
-                               end
-                       end,
-                       true, rabbit_disk_queue)
-             end),
-    State2 = extract_sequence_numbers(State1),
+    {atomic, State2} =
+        mnesia:transaction(
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  {State6, FinalQ, MsgSeqIds2, _Len} =
+                      mnesia:foldl(
+                        fun (#dq_msg_loc { msg_id = MsgId,
+                                           queue_and_seq_id = {Q, SeqId} },
+                             {State3, OldQ, MsgSeqIds, Len}) ->
+                                {State4, MsgSeqIds1, Len1} =
+                                    case {OldQ == Q, MsgSeqIds} of
+                                        {true, _} when Len < 10000 ->
+                                            {State3, MsgSeqIds, Len};
+                                        {false, []} -> {State3, MsgSeqIds, Len};
+                                        {_, _} ->
+                                            {ok, State5} =
+                                                remove_messages(Q, MsgSeqIds,
+                                                                txn, State3),
+                                            {State5, [], 0}
+                                    end,
+                                case dets_ets_lookup(State4, MsgId) of
+                                    [] -> ok = mnesia:delete(rabbit_disk_queue,
+                                                             {Q, SeqId}, write),
+                                          {State4, Q, MsgSeqIds1, Len1};
+                                    [{MsgId, _RefCount, _File, _Offset,
+                                      _TotalSize, true}] ->
+                                        {State4, Q, MsgSeqIds1, Len1};
+                                    [{MsgId, _RefCount, _File, _Offset,
+                                      _TotalSize, false}] ->
+                                        {State4, Q,
+                                         [{MsgId, SeqId} | MsgSeqIds1], Len1+1}
+                                end
+                        end, {State1, undefined, [], 0}, rabbit_disk_queue),
+                  {ok, State7} =
+                      remove_messages(FinalQ, MsgSeqIds2, txn, State6),
+                  State7
+          end),
+    State8 = extract_sequence_numbers(State2),
     ok = del_index(),
-    {ok, State2}.
+    {ok, State8}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
     {atomic, true} = mnesia:transaction(
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 425d7763..3b86596b 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -101,10 +101,10 @@
 
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:length(Queue),
-    ok = rabbit_disk_queue:delete_queue(transient_queue(Queue)),
     MsgBuf = inc_queue_length(Queue, queue:new(), Len),
     Size = rabbit_disk_queue:foldl(
-             fun ({Msg, _Size, _IsDelivered, _AckTag}, Acc) ->
+             fun ({Msg = #basic_message { is_persistent = true },
+                   _Size, _IsDelivered, _AckTag}, Acc) ->
                      Acc + size_of_message(Msg)
              end, 0, Queue),
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
@@ -130,10 +130,8 @@ to_disk_only_mode(TxnMessages, State =
     %% message on disk.
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
-    TransQ = transient_queue(Q),
     {ok, MsgBuf1} =
-        send_messages_to_disk(IsDurable, Q, TransQ, MsgBuf, 0, 0, [],
-                              queue:new()),
+        send_messages_to_disk(IsDurable, Q, MsgBuf, 0, 0, [], queue:new()),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -148,47 +146,42 @@ to_disk_only_mode(TxnMessages, State =
     garbage_collect(),
     {ok, State #mqstate { mode = disk, msg_buf = MsgBuf1 }}.
 
-send_messages_to_disk(IsDurable, Q, TransQ, Queue, PublishCount, RequeueCount,
+send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       Commit, MsgBuf) ->
     case queue:out(Queue) of
         {empty, Queue} ->
-            ok = flush_messages_to_disk_queue(TransQ, Commit),
-            [] = flush_requeue_to_disk_queue(TransQ, RequeueCount, []),
+            ok = flush_messages_to_disk_queue(Q, Commit),
+            [] = flush_requeue_to_disk_queue(Q, RequeueCount, []),
             {ok, MsgBuf};
         {{value, {Msg = #basic_message { guid = MsgId,
                                          is_persistent = IsPersistent },
                   _IsDelivered}}, Queue1} ->
             case IsDurable andalso IsPersistent of
-                true -> %% it's already in the persistent Q
+                true -> %% it's already in the Q
                     send_messages_to_disk(
-                      IsDurable, Q, TransQ, Queue1, PublishCount, RequeueCount,
+                      IsDurable, Q, Queue1, PublishCount, RequeueCount,
                       Commit, inc_queue_length(Q, MsgBuf, 1));
                 false ->
                     Commit1 = flush_requeue_to_disk_queue
-                                (TransQ, RequeueCount, Commit),
+                                (Q, RequeueCount, Commit),
                     ok = rabbit_disk_queue:tx_publish(Msg),
                     case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
                         true ->
-                            ok = flush_messages_to_disk_queue(TransQ, Commit1),
+                            ok = flush_messages_to_disk_queue(Q, Commit1),
                             send_messages_to_disk(
-                              IsDurable, Q, TransQ, Queue1, 1, 0, [MsgId],
-                              inc_queue_length(TransQ, MsgBuf, 1));
+                              IsDurable, Q, Queue1, 1, 0, [MsgId],
+                              inc_queue_length(Q, MsgBuf, 1));
                         false ->
                             send_messages_to_disk(
-                              IsDurable, Q, TransQ, Queue1, PublishCount + 1, 0,
+                              IsDurable, Q, Queue1, PublishCount + 1, 0,
                               [MsgId | Commit1],
-                              inc_queue_length(TransQ, MsgBuf, 1))
+                              inc_queue_length(Q, MsgBuf, 1))
                     end
             end;
         {{value, {Q, Count}}, Queue1} ->
-            send_messages_to_disk(IsDurable, Q, TransQ, Queue1, PublishCount,
+            send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
                                   RequeueCount, Commit,
-                                  inc_queue_length(Q, MsgBuf, Count));
-        {{value, {TransQ, Count}}, Queue1} ->
-            ok = flush_messages_to_disk_queue(TransQ, Commit),
-            send_messages_to_disk(IsDurable, Q, TransQ, Queue1, 0,
-                                  RequeueCount + Count, [],
-                                  inc_queue_length(TransQ, MsgBuf, Count))
+                                  inc_queue_length(Q, MsgBuf, Count))
     end.
 
 flush_messages_to_disk_queue(Q, Commit) ->
@@ -235,9 +228,6 @@ to_mixed_mode(TxnMessages, State =
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
 
-transient_queue(Queue) ->
-    {Queue, transient}.
-
 inc_queue_length(_Queue, MsgBuf, 0) ->
     MsgBuf;
 inc_queue_length(Queue, MsgBuf, Count) ->
@@ -259,17 +249,11 @@ dec_queue_length(Mode, MsgBuf) ->
               end,
     {Queue, MsgBuf2}.
 
-publish(Msg = #basic_message { is_persistent = IsPersistent },
-        State = #mqstate { mode = disk, queue = Q, length = Length,
-                           is_durable = IsDurable, msg_buf = MsgBuf,
-                           memory_size = QSize, memory_gain = Gain }) ->
-    Persist = IsDurable andalso IsPersistent,
-    PubQ = case Persist of
-               true -> Q;
-               false -> transient_queue(Q)
-           end,
-    MsgBuf1 = inc_queue_length(PubQ, MsgBuf, 1),
-    ok = rabbit_disk_queue:publish(PubQ, Msg, false),
+publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
+                                msg_buf = MsgBuf, memory_size = QSize,
+                                memory_gain = Gain }) ->
+    MsgBuf1 = inc_queue_length(Q, MsgBuf, 1),
+    ok = rabbit_disk_queue:publish(Q, Msg, false),
     MsgSize = size_of_message(Msg),
     {ok, State #mqstate { memory_gain = Gain + MsgSize,
                           memory_size = QSize + MsgSize,
@@ -298,11 +282,7 @@ publish_delivered(Msg =
                              memory_size = QSize, memory_gain = Gain })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     Persist = IsDurable andalso IsPersistent,
-    PubQ = case Persist of
-               true -> Q;
-               false -> transient_queue(Q)
-           end,
-    rabbit_disk_queue:publish(PubQ, Msg, false),
+    rabbit_disk_queue:publish(Q, Msg, false),
     MsgSize = size_of_message(Msg),
     State1 = State #mqstate { memory_size = QSize + MsgSize,
                               memory_gain = Gain + MsgSize },
@@ -312,12 +292,12 @@ publish_delivered(Msg =
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
             {MsgId, IsPersistent, false, AckTag, 0} =
-                rabbit_disk_queue:phantom_deliver(PubQ),
+                rabbit_disk_queue:phantom_deliver(Q),
             {ok, AckTag, State1};
         false ->
             %% in this case, we don't actually care about the ack, so
             %% auto ack it (asynchronously).
-            ok = rabbit_disk_queue:auto_ack_next_message(PubQ),
+            ok = rabbit_disk_queue:auto_ack_next_message(Q),
             {ok, noack, State1}
     end;
 publish_delivered(Msg, State =
@@ -350,16 +330,16 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                 ok = maybe_prefetch(Mode, MsgBuf1),
                 {Msg1, IsDelivered1, AckTag1, MsgBuf1};
             _ ->
-                {ReadQ, MsgBuf3} = dec_queue_length(Mode, MsgBuf),
+                {Q, MsgBuf3} = dec_queue_length(Mode, MsgBuf),
                 {Msg1 = #basic_message { is_persistent = IsPersistent },
                  _Size, IsDelivered1, AckTag1, _PersistRem}
-                    = rabbit_disk_queue:deliver(ReadQ),
+                    = rabbit_disk_queue:deliver(Q),
                 AckTag2 =
                     case IsDurable andalso IsPersistent of
                         true ->
                             AckTag1;
                         false ->
-                            ok = rabbit_disk_queue:ack(ReadQ, [AckTag1]),
+                            ok = rabbit_disk_queue:ack(Q, [AckTag1]),
                             noack
                     end,
                 {Msg1, IsDelivered1, AckTag2, MsgBuf3}
@@ -368,7 +348,7 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
     {{Msg, IsDelivered, AckTag, Rem},
      State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
-maybe_prefetch(disk, MsgBuf) ->
+maybe_prefetch(disk, _MsgBuf) ->
     ok;
 maybe_prefetch(mixed, MsgBuf) ->
     case queue:peek(MsgBuf) of
@@ -417,64 +397,19 @@ tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize,
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
 
-%% The last 2 params are accumulators. We work through the publishes,
-%% sorting out our msgbuf as we go. Finally, when no more work to do,
-%% we commit first transient, and the persistent msgs. This is safe
-%% because in case of failure, transient messages will be lost on
-%% restart anyway.
-commit_to_queues(_IsDurable, _Q, _TransQ, MsgBuf, [], [], [], []) ->
-    MsgBuf;
-commit_to_queues(_IsDurable, Q, _TransQ, MsgBuf, AckTags, [],
-                 PersistMsgIds, []) ->
-    MsgIds = lists:flatten(lists:reverse(PersistMsgIds)),
-    ok = rabbit_disk_queue:tx_commit(Q, MsgIds, AckTags),
-    MsgBuf;
-commit_to_queues(IsDurable, Q, TransQ, MsgBuf, AckTags, [],
-                 PersistMsgIds, TransMsgIds) ->
-    MsgIds = lists:flatten(lists:reverse(TransMsgIds)),
-    ok = rabbit_disk_queue:tx_commit(TransQ, MsgIds, []),
-    commit_to_queues(IsDurable, Q, TransQ, MsgBuf, AckTags, [],
-                     PersistMsgIds, []);
-commit_to_queues(false, Q, TransQ, MsgBuf, AckTags, Publishes, [], []) ->
-    MsgIds = only_msg_ids(Publishes),
-    MsgBuf1 = inc_queue_length(TransQ, MsgBuf, erlang:length(MsgIds)),
-    commit_to_queues(false, Q, TransQ, MsgBuf1, AckTags, [], [], [MsgIds]);
-commit_to_queues(true, Q, TransQ, MsgBuf, AckTags, Publishes =
-                 [#basic_message { is_persistent = true } | _],
-                 PersistAcc, TransAcc) ->
-    {Persist, Publishes1} = lists:splitwith(fun is_persistent/1, Publishes),
-    MsgIds = only_msg_ids(Persist),
-    MsgBuf1 = inc_queue_length(Q, MsgBuf, erlang:length(MsgIds)),
-    commit_to_queues(true, Q, TransQ, MsgBuf1, AckTags, Publishes1,
-                     [MsgIds | PersistAcc], TransAcc);
-commit_to_queues(true, Q, TransQ, MsgBuf, AckTags, Publishes,
-                 PersistAcc, TransAcc) ->
-    %% not persistent
-    {Trans, Publishes1} = lists:splitwith(fun is_not_persistent/1, Publishes),
-    MsgIds = only_msg_ids(Trans),
-    MsgBuf1 = inc_queue_length(TransQ, MsgBuf, erlang:length(MsgIds)),
-    commit_to_queues(true, Q, TransQ, MsgBuf1, AckTags, Publishes1,
-                     PersistAcc, [MsgIds | TransAcc]).
-
-is_persistent(#basic_message { is_persistent = IsPersistent }) ->
-    IsPersistent.
-
-is_not_persistent(#basic_message { is_persistent = IsPersistent }) ->
-    not IsPersistent.
-
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = disk, queue = Q, length = Length,
                              memory_size = QSize, memory_loss = Loss,
-                             is_durable = IsDurable, msg_buf = MsgBuf }) ->
+                             msg_buf = MsgBuf }) ->
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
-    MsgBuf1 = case ([] == Publishes) andalso ([] == RealAcks) of
-                  true -> MsgBuf;
-                  false -> commit_to_queues
-                             (IsDurable, Q, transient_queue(Q), MsgBuf,
-                              RealAcks, Publishes, [], [])
-              end,
-    {ok, State #mqstate { length = Length + erlang:length(Publishes),
-                          msg_buf = MsgBuf1, memory_size = QSize - ASize,
+    ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
+            true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
+                                                RealAcks)
+         end,
+    Len = erlang:length(Publishes),
+    {ok, State #mqstate { length = Length + Len,
+                          msg_buf = inc_queue_length(Q, MsgBuf, Len),
+                          memory_size = QSize - ASize,
                           memory_loss = Loss + ASize }};
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
@@ -493,9 +428,8 @@ tx_commit(Publishes, MsgsWithAcks,
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
     ok = case ([] == PersistentPubs) andalso ([] == RealAcks) of
              true -> ok;
-             false ->
-                 rabbit_disk_queue:tx_commit(
-                   Q, lists:reverse(PersistentPubs), RealAcks)
+             false -> rabbit_disk_queue:tx_commit(
+                        Q, lists:reverse(PersistentPubs), RealAcks)
          end,
     {ok, State #mqstate { msg_buf = MsgBuf1, memory_size = QSize - ASize,
                           length = Length + erlang:length(Publishes),
@@ -540,20 +474,24 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
     %% here, we may have messages with no ack tags, because of the
     %% fact they are not persistent, but nevertheless we want to
     %% requeue them. This means publishing them delivered.
-    TransQ = transient_queue(Q),
-    {MsgBuf1, PersistRQ}
+    Requeue
         = lists:foldl(
-            fun ({#basic_message { is_persistent = IsPersistent }, AckTag},
-                 {MB, PRQ})
+            fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
                 when IsDurable andalso IsPersistent ->
-                    {inc_queue_length(Q, MB, 1), [{AckTag, true} | PRQ]};
-                ({Msg, noack}, {MB, PRQ}) ->
-                    ok = rabbit_disk_queue:publish(TransQ, Msg, true),
-                    {inc_queue_length(TransQ, MB, 1), PRQ}
-            end, {MsgBuf, []}, MessagesWithAckTags),
-    ok = rabbit_disk_queue:requeue(Q, lists:reverse(PersistRQ)),
-    {ok, State #mqstate { length = Length + erlang:length(MessagesWithAckTags),
-                          msg_buf = MsgBuf1 }};
+                    [{AckTag, true} | RQ];
+                ({Msg, noack}, RQ) ->
+                    ok = case RQ == [] of
+                             true  -> ok;
+                             false -> rabbit_disk_queue:requeue(
+                                        Q, lists:reverse(RQ))
+                         end,
+                    ok = rabbit_disk_queue:publish(Q, Msg, true),
+                    []
+            end, [], MessagesWithAckTags),
+    ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
+    Len = erlang:length(MessagesWithAckTags),
+    {ok, State #mqstate { length = Length + Len,
+                          msg_buf = inc_queue_length(Q, MsgBuf, Len) }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
@@ -577,18 +515,21 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
     {ok, State #mqstate {msg_buf = MsgBuf1,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
-purge(State = #mqstate { queue = Q, length = Count,
+purge(State = #mqstate { queue = Q, mode = disk, length = Count,
+                         memory_loss = Loss, memory_size = QSize }) ->
+    Count = rabbit_disk_queue:purge(Q),
+    {Count, State #mqstate { length = 0, memory_size = 0,
+                             memory_loss = Loss + QSize }};
+purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
                          memory_loss = Loss, memory_size = QSize }) ->
-    Len1 = rabbit_disk_queue:purge(Q),
-    Len2 = rabbit_disk_queue:purge(transient_queue(Q)),
-    true = Count >= Len1 + Len2,
-    {Count, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
-                             memory_loss = Loss + QSize }}.
+    rabbit_disk_queue:purge(Q),
+    {Length,
+     State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
+                      memory_loss = Loss + QSize }}.
 
 delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
                                 memory_loss = Loss }) ->
     ok = rabbit_disk_queue:delete_queue(Q),
-    ok = rabbit_disk_queue:delete_queue(transient_queue(Q)),
     {ok, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
                           memory_loss = Loss + QSize }}.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 58a9d0cd..b9777337 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -767,8 +767,8 @@ benchmark_disk_queue() ->
     ok = control_action(start_app, []),
     passed.
 
-rdq_message(MsgId, MsgBody) ->
-    rabbit_basic:message(x, <<>>, [], MsgBody, MsgId).
+rdq_message(MsgId, MsgBody, IsPersistent) ->
+    rabbit_basic:message(x, <<>>, [], MsgBody, MsgId, IsPersistent).
 
 rdq_match_message(
   #basic_message { guid = MsgId, content =
@@ -784,7 +784,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     List = lists:seq(1, MsgCount),
     {Publish, ok} =
         timer:tc(?MODULE, rdq_time_commands,
-                 [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg))
+                 [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false))
                              || N <- List, _ <- Qs] end,
                    fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List, [])
                              || Q <- Qs] end
@@ -820,7 +820,7 @@ rdq_stress_gc(MsgCount) ->
     MsgSizeBytes = 256*1024,
     Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- List],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- List],
     rabbit_disk_queue:tx_commit(q, List, []),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
@@ -862,7 +862,7 @@ rdq_test_startup_with_queue_gaps() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, true)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
@@ -918,7 +918,7 @@ rdq_test_redeliver() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
@@ -970,7 +970,7 @@ rdq_test_purge() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- All],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
     rabbit_disk_queue:tx_commit(q, All, []),
     io:format("Publish done~n", []),
     %% deliver first half
@@ -1161,7 +1161,7 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                 rabbit_mixed_queue:ack(AckTags, MS8)
         end,
     0 = rabbit_mixed_queue:length(MS9),
-    Msg = rdq_message(0, <<0:256>>),
+    Msg = rdq_message(0, <<0:256>>, false),
     {ok, AckTag, MS10} = rabbit_mixed_queue:publish_delivered(Msg, MS9),
     {ok,MS11} = rabbit_mixed_queue:ack([{Msg, AckTag}], MS10),
     0 = rabbit_mixed_queue:length(MS11),
@@ -1174,12 +1174,12 @@ rdq_test_disk_queue_modes() ->
     Total = 1000,
     Half1 = lists:seq(1,round(Total/2)),
     Half2 = lists:seq(1 + round(Total/2), Total),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- Half1],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half1],
     ok = rabbit_disk_queue:tx_commit(q, Half1, []),
     io:format("Publish done~n", []),
     ok = rabbit_disk_queue:to_disk_only_mode(),
     io:format("To Disk Only done~n", []),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg)) || N <- Half2],
+    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half2],
     ok = rabbit_disk_queue:tx_commit(q, Half2, []),
     Seqs = [begin
                 Remaining = Total - N,
-- 
cgit v1.2.1


From 81c1f59db47c4904e92e3ce57eef1d208f9345ad Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 17 Jul 2009 17:26:37 +0100
Subject: ok, limits on the cache, and on prefetch.

I decided the right thing to do is to prefer older messages in the cache to younger ones. This is because they're more likely to be used sooner. Which means we just fill it up and then leave it alone, which is nice and simple.

Things are pretty much ok with it now, but the whole notion of prefetch is still wrong and needs to be changed to be driven by the mixed queue, not the disk_queue. For one reason, currently, if two or more queues issue prefetch requests, and the first fills the cache up, then the 2nd won't do anything. The cache is useful, but shouldn't be abused for prefetching purposes. The two things are separate.
---
 src/rabbit_disk_queue.erl | 48 +++++++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 95ed8adf..178771b8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -66,6 +66,8 @@
 -define(FILE_EXTENSION_DETS,         ".dets").
 -define(FILE_PACKING_ADJUSTMENT,     (1 + (2* (?INTEGER_SIZE_BYTES)))).
 -define(MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
+-define(BATCH_SIZE,                  10000).
+-define(CACHE_MAX_SIZE,              10485760).
 
 -define(SERVER, ?MODULE).
 
@@ -856,7 +858,7 @@ fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
 
 decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
     true = try case ets:update_counter(Cache, MsgId, {4, -1}) of
-                   N when N =< 0 -> ets:delete(Cache, MsgId);
+                   N when N =< 0 -> true = ets:delete(Cache, MsgId);
                    _N -> true
                end
            catch error:badarg -> 
@@ -867,15 +869,21 @@ decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
            end,
     ok.
 
-insert_into_cache(Message = #basic_message { guid = MsgId },
-                  MsgSize, Forced, #dqstate { message_cache = Cache }) ->
-    Count = case Forced of
-                true -> 0;
-                false -> 1
-            end,
-    true =
-        ets:insert_new(Cache, {MsgId, Message, MsgSize, Count}),
-    ok.
+insert_into_cache(Message = #basic_message { guid = MsgId }, MsgSize,
+                  Forced, State = #dqstate { message_cache = Cache }) ->
+    case cache_is_full(State) of
+        true -> ok;
+        false -> Count = case Forced of
+                             true -> 0;
+                             false -> 1
+                         end,
+                 true = ets:insert_new(Cache, {MsgId, Message,
+                                               MsgSize, Count}),
+                 ok
+    end.
+
+cache_is_full(#dqstate { message_cache = Cache }) ->
+    ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
@@ -905,15 +913,19 @@ internal_prefetch(Q, Count, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     Length = WriteSeqId - ReadSeqId,
     Count1 = lists:min([Length, Count]),
-    StateN =
-        lists:foldl(
-          fun(N, State1) ->
-                  {ok, _MsgStuff, State2} =
-                      internal_read_message(Q, N, true, true, true, State1),
-                  State2
-          end, State, lists:seq(ReadSeqId, ReadSeqId + Count1 - 1)),
+    StateN = internal_prefetch(Q, ReadSeqId + Count1 - 1, ReadSeqId, State),
     {ok, StateN}.
 
+internal_prefetch(_Q, Target, Target, State) ->
+    State;
+internal_prefetch(Q, Target, ReadSeqId, State) ->
+    {ok, _MsgStuff, State1} =
+        internal_read_message(Q, ReadSeqId, true, true, true, State),
+    case cache_is_full(State1) of
+        true -> State1;
+        false -> internal_prefetch(Q, Target, ReadSeqId + 1, State1)
+    end.
+
 internal_foldl(Q, Fun, Init, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     internal_foldl(Q, WriteSeqId, Fun, State, Init, ReadSeqId).
@@ -1571,7 +1583,7 @@ load_from_disk(State) ->
                              {State3, OldQ, MsgSeqIds, Len}) ->
                                 {State4, MsgSeqIds1, Len1} =
                                     case {OldQ == Q, MsgSeqIds} of
-                                        {true, _} when Len < 10000 ->
+                                        {true, _} when Len < ?BATCH_SIZE ->
                                             {State3, MsgSeqIds, Len};
                                         {false, []} -> {State3, MsgSeqIds, Len};
                                         {_, _} ->
-- 
cgit v1.2.1


From 1f7ab79444fb880cc6eac0952c10cefd65881736 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 19 Jul 2009 00:48:48 +0100
Subject: Spotted and corrected some mistakes where messages published to the
 mixed queue in disk-only mode would not be marked delivered even if they were
 persistent, thus resulting in redelivery on broker startup without the
 message being marked predelivered.

Also, spotted (not fixed yet) bug in commit coalescing in which the mnesia transaction is always commiting before the messages are flushed to disk. What should happen is that if coalescing is going to happen, the mnesia transaction should be delayed too, and happen only _after_ the disk sync. I.e. it doesn't matter if we disk sync and then the mnesia txn fails, but it does matter if the mnesia txn succeeds and then the disk sync fails.

Also, I think I've worked out how to do prefetching properly. It's not actually that complex.
---
 src/rabbit_disk_queue.erl  | 10 +++++-----
 src/rabbit_mixed_queue.erl |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 178771b8..6bd3e046 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -458,10 +458,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State2, {binary, ?HIBERNATE_AFTER_MIN}, 0}.
 
 handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, true, false, State),
+    {ok, Result, State1} = internal_deliver(Q, true, State),
     reply(Result, State1);
 handle_call({phantom_deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, false, false, State),
+    {ok, Result, State1} = internal_deliver(Q, false, State),
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     {Reply, State1} =
@@ -887,7 +887,7 @@ cache_is_full(#dqstate { message_cache = Cache }) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, ReadMsg, FakeDeliver,
+internal_deliver(Q, ReadMsg,
                  State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
         {SeqId, SeqId} -> {ok, empty, State};
@@ -895,7 +895,7 @@ internal_deliver(Q, ReadMsg, FakeDeliver,
             Remaining = WriteSeqId - ReadSeqId - 1,
             {ok, Result, State1} =
                 internal_read_message(
-                  Q, ReadSeqId, ReadMsg, FakeDeliver, false, State),
+                  Q, ReadSeqId, ReadMsg, false, false, State),
             true = ets:insert(Sequences,
                               {Q, ReadSeqId+1, WriteSeqId}),
             {ok,
@@ -979,7 +979,7 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
     end.
 
 internal_auto_ack(Q, State) ->
-    case internal_deliver(Q, false, true, State) of
+    case internal_deliver(Q, false, State) of
         {ok, empty, State1} -> {ok, State1};
         {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Remaining},
          State1} ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 3b86596b..50b75789 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -282,7 +282,7 @@ publish_delivered(Msg =
                              memory_size = QSize, memory_gain = Gain })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     Persist = IsDurable andalso IsPersistent,
-    rabbit_disk_queue:publish(Q, Msg, false),
+    ok = rabbit_disk_queue:publish(Q, Msg, true),
     MsgSize = size_of_message(Msg),
     State1 = State #mqstate { memory_size = QSize + MsgSize,
                               memory_gain = Gain + MsgSize },
@@ -291,7 +291,7 @@ publish_delivered(Msg =
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
-            {MsgId, IsPersistent, false, AckTag, 0} =
+            {MsgId, IsPersistent, true, AckTag, 0} =
                 rabbit_disk_queue:phantom_deliver(Q),
             {ok, AckTag, State1};
         false ->
-- 
cgit v1.2.1


From 1d560b34acfef88401c9814fa1b825085addfc5f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 19 Jul 2009 16:47:14 +0100
Subject: Fixed the commit bug. Really this should probably be in bug20470 but
 I really didn't want to have to deal with merging and the other information
 about this bug is in the above comments in 20980 so it's in here.

Now on commit, we test to see if we need to sync the current file. If so then we just store all the txn details in state for later dealing with. If not, we really do the commit there and then and reply. Interestingly, performance is actually better now than it was (see details in bug20470) but, eg, the one-in-one-out at altitude test has further reduced fsyncs from 21 to 6 and now completes in 2.1 seconds, not 3.6 (altitude of 1000, then 5000 @ one in, one out, then 1000 drain). All tests pass.

We now guarantee that the messages will be fsync'd to disk _before_ anything is done to mnesia, in all cases of a txn_commit.
---
 src/rabbit_disk_queue.erl | 99 +++++++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 51 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 6bd3e046..0e43c387 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -92,7 +92,7 @@
          file_size_limit,         %% how big can our files get?
          read_file_handles,       %% file handles for reading (LRU)
          read_file_handles_limit, %% how many file handles can we open?
-         on_sync_froms,           %% list of commiters to run on sync (reversed)
+         on_sync_txns,           %% list of commiters to run on sync (reversed)
          commit_timer_ref,        %% TRef for our interval timer
          last_sync_offset,        %% current_offset at the last time we sync'd
          message_cache,           %% ets message cache
@@ -423,7 +423,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    file_size_limit         = FileSizeLimit,
                    read_file_handles       = {dict:new(), gb_trees:empty()},
                    read_file_handles_limit = ReadFileHandlesLimit,
-                   on_sync_froms           = [],
+                   on_sync_txns           = [],
                    commit_timer_ref        = undefined,
                    last_sync_offset        = 0,
                    message_cache           = ets:new(?CACHE_ETS_NAME,
@@ -464,12 +464,9 @@ handle_call({phantom_deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, false, State),
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
-    {Reply, State1} =
+    State1 =
         internal_tx_commit(Q, PubMsgIds, AckSeqIds, From, State),
-    case Reply of
-        true -> reply(ok, State1);
-        false -> noreply(State1)
-    end;
+    noreply(State1);
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     reply(Count, State1);
@@ -683,12 +680,12 @@ noreply(NewState) ->
 noreply(NewState, MinPri) ->
     noreply1(start_memory_timer(NewState), MinPri).
 
-noreply1(NewState = #dqstate { on_sync_froms = [],
+noreply1(NewState = #dqstate { on_sync_txns = [],
                                commit_timer_ref = undefined }, MinPri) ->
     {noreply, NewState, binary, MinPri};
 noreply1(NewState = #dqstate { commit_timer_ref = undefined }, MinPri) ->
     {noreply, start_commit_timer(NewState), 0, MinPri};
-noreply1(NewState = #dqstate { on_sync_froms = [] }, MinPri) ->
+noreply1(NewState = #dqstate { on_sync_txns = [] }, MinPri) ->
     {noreply, stop_commit_timer(NewState), binary, MinPri};
 noreply1(NewState, MinPri) ->
     {noreply, NewState, 0, MinPri}.
@@ -699,12 +696,12 @@ reply(Reply, NewState) ->
 reply(Reply, NewState, MinPri) ->
     reply1(Reply, start_memory_timer(NewState), MinPri).
 
-reply1(Reply, NewState = #dqstate { on_sync_froms = [],
+reply1(Reply, NewState = #dqstate { on_sync_txns = [],
                                     commit_timer_ref = undefined }, MinPri) ->
     {reply, Reply, NewState, binary, MinPri};
 reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }, MinPri) ->
     {reply, Reply, start_commit_timer(NewState), 0, MinPri};
-reply1(Reply, NewState = #dqstate { on_sync_froms = [] }, MinPri) ->
+reply1(Reply, NewState = #dqstate { on_sync_txns = [] }, MinPri) ->
     {reply, Reply, stop_commit_timer(NewState), binary, MinPri};
 reply1(Reply, NewState, MinPri) ->
     {reply, Reply, NewState, 0, MinPri}.
@@ -818,12 +815,12 @@ stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
     State #dqstate { commit_timer_ref = undefined }.
 
 sync_current_file_handle(State = #dqstate { current_dirty = false,
-                                            on_sync_froms = [] }) ->
+                                            on_sync_txns = [] }) ->
     State;
 sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
                                             current_dirty = IsDirty,
                                             current_offset = CurOffset,
-                                            on_sync_froms = Froms,
+                                            on_sync_txns = Txns,
                                             last_sync_offset = SyncOffset
                                           }) ->
     SyncOffset1 = case IsDirty of
@@ -831,10 +828,9 @@ sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
                               CurOffset;
                       false -> SyncOffset
                   end,
-    lists:map(fun (From) -> gen_server2:reply(From, ok) end,
-              lists:reverse(Froms)),
-    State #dqstate { current_dirty = false, on_sync_froms = [],
-                     last_sync_offset = SyncOffset1 }.
+    State1 = lists:foldl(fun internal_do_tx_commit/2, State, lists:reverse(Txns)),
+    State1 #dqstate { current_dirty = false, on_sync_txns = [],
+                      last_sync_offset = SyncOffset1 }.
 
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
@@ -1078,53 +1074,54 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
     end.
 
 internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
-                   State = #dqstate { sequences = Sequences,
-                                      current_file_name = CurFile,
+                   State = #dqstate { current_file_name = CurFile,
                                       current_dirty = IsDirty,
-                                      on_sync_froms = SyncFroms,
+                                      on_sync_txns = Txns,
                                       last_sync_offset = SyncOffset
                                     }) ->
+    NeedsSync = IsDirty andalso
+        lists:any(fun (MsgId) ->
+                          [{MsgId, _RefCount, File, Offset,
+                            _TotalSize, _IsPersistent}] =
+                              dets_ets_lookup(State, MsgId),
+                          File =:= CurFile andalso Offset >= SyncOffset
+                  end, PubMsgIds),
+    TxnDetails = {Q, PubMsgIds, AckSeqIds, From},
+    case NeedsSync of
+        true -> 
+            Txns1 = [TxnDetails | Txns],
+            State #dqstate { on_sync_txns = Txns1 };
+        false ->
+            internal_do_tx_commit(TxnDetails, State)
+    end.
+
+internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
+                      State = #dqstate { sequences = Sequences }) ->
     {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-    WriteSeqId = InitWriteSeqId + erlang:length(PubMsgIds),
-    {atomic, {InCurFile, WriteSeqId, State1}} =
+    {atomic, {WriteSeqId, State1}} =
         mnesia:transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  %% must deal with publishes first, if we didn't
-                  %% then we could end up acking a message before
-                  %% it's been published, which is clearly
-                  %% nonsense. I.e. in commit, do not do things in an
-                  %% order which _could_not_ have happened.
-                  {InCurFile1, WriteSeqId1} =
+                  {ok, WriteSeqId1} =
                       lists:foldl(
-                        fun (MsgId, {InCurFileAcc, SeqId}) ->
-                                [{MsgId, _RefCount, File, Offset,
-                                  _TotalSize, _IsPersistent}] =
-                                    dets_ets_lookup(State, MsgId),
-                                 ok = mnesia:write(
-                                        rabbit_disk_queue,
-                                        #dq_msg_loc { queue_and_seq_id =
-                                                      {Q, SeqId},
-                                                      msg_id = MsgId,
-                                                      is_delivered = false
-                                                     },
-                                        write),
-                                 {InCurFileAcc orelse (File =:= CurFile andalso
-                                                       Offset >= SyncOffset),
-                                  SeqId + 1}
-                         end, {false, InitWriteSeqId}, PubMsgIds),
-                   {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
-                   {InCurFile1, WriteSeqId1, State2}
+                        fun (MsgId, {ok, SeqId}) ->
+                                {mnesia:write(
+                                   rabbit_disk_queue,
+                                   #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
+                                                 msg_id = MsgId,
+                                                 is_delivered = false
+                                               }, write),
+                                 SeqId + 1}
+                        end, {ok, InitWriteSeqId}, PubMsgIds),
+                  {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
+                  {WriteSeqId1, State2}
           end),
     true = case PubMsgIds of
                [] -> true;
                _  -> ets:insert(Sequences, {Q, InitReadSeqId, WriteSeqId})
            end,
-    if IsDirty andalso InCurFile ->
-            {false, State1 #dqstate { on_sync_froms = [From | SyncFroms] }};
-       true ->
-            {true, State1}
-    end.
+    gen_server2:reply(From, ok),
+    State1.
 
 internal_publish(Q, Message = #basic_message { guid = MsgId },
                  IsDelivered, State) ->
-- 
cgit v1.2.1


From 1b391d8be1fc7156555fcb3421e12fb624ce7135 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Jul 2009 11:29:18 +0100
Subject: Stripping out old broken prefetch. Also reverted gen_server2 back to
 the revision at the end of bug21087 on the grounds that the min_pri stuff
 wasn't enormously compelling and added a good chunk of complexity. Also, I
 don't believe it'll be needed for the new prefetcher. All tests pass.

---
 src/gen_server2.erl        | 233 ++++++++++-----------------------------------
 src/rabbit_disk_queue.erl  |  90 ++++++-----------
 src/rabbit_mixed_queue.erl |  14 +--
 3 files changed, 83 insertions(+), 254 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index cf54811f..e46f2645 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -37,32 +37,6 @@
 %% Explicit timeouts (i.e. not 'binary') from the handle_* functions
 %% are still supported, and do not have any effect on the current
 %% timeout value.
-%%
-%% 6) init/1 can also return (either a further arg in addition to
-%% timeout above, or as a key-value list with the timeout as {timeout,
-%% Timeout}) a minimum priority (key: min_priority). This can also be
-%% returned from handle_* functions (i.e. {noreply, NewState} or
-%% {noreply, NewState, Timeout} or {noreply, NewState, Timeout,
-%% MinPri} or {noreply, NewState, [{min_priority, MinPri}]} or
-%% {noreply, NewState, [{min_priority, MinPri}, {timeout,
-%% Timeout}]}). What this does is to only allow messages greater than
-%% the indicated priority through to the module. To allow any message
-%% through (as is the default), use 'any'. One effect of this is that
-%% when hibernating, the process can be woken up to receive a message
-%% which it then realises it is not interested in. When this happens,
-%% handle_info(roused_and_disinterested, State) will be called as soon
-%% as there are no further messages to process (i.e. upon waking, the
-%% message queue is drained, and a timeout of 0 is used).
-%%
-%% This feature means that you can delay processing lower priority
-%% messages. For example, when a min_priority of 0 is combined with
-%% the binary backoff timeout, you can delay processing any
-%% negative-priority messages until the first timeout fires which
-%% indicates that, given a steady state, the process has been idle for
-%% sufficiently long that it's reasonable to expect it to be
-%% uninterrupted by higher-priority messages for some little while;
-%% thus preventing low-priority, but lengthy jobs from getting in the
-%% way of higher priority jobs that need quick responses.
 
 %% All modifications are (C) 2009 LShift Ltd.
 
@@ -159,8 +133,7 @@
 	 cast/2, pcast/3, reply/2,
 	 abcast/2, abcast/3,
 	 multi_call/2, multi_call/3, multi_call/4,
-	 enter_loop/3, enter_loop/4, enter_loop/5, enter_loop/6,
-         wake_hib/8]).
+	 enter_loop/3, enter_loop/4, enter_loop/5, wake_hib/7]).
 
 -export([behaviour_info/1]).
 
@@ -338,15 +311,7 @@ multi_call(Nodes, Name, Req, Timeout)
 
 
 %%-----------------------------------------------------------------
-%% enter_loop(Mod, Options, State) -> _
-%% enter_loop(Mod, Options, State, ServerName) -> _
-%% enter_loop(Mod, Options, State, [{Key, Value}]) -> _
-%% enter_loop(Mod, Options, State, Timeout) -> _
-%% enter_loop(Mod, Options, State, ServerName, [{Key, Value}]) -> _
-%% enter_loop(Mod, Options, State, ServerName, Timeout) -> _
-%% enter_loop(Mod, Options, State, ServerName, Timeout, MinPri) -> _
-%%
-%% {Key, Value} = {min_priority, MinPri} | {timeout, Timeout}
+%% enter_loop(Mod, Options, State, <ServerName>, <TimeOut>) ->_ 
 %%   
 %% Description: Makes an existing process into a gen_server. 
 %%              The calling process will enter the gen_server receive 
@@ -357,32 +322,22 @@ multi_call(Nodes, Name, Req, Timeout)
 %%              process, including registering a name for it.
 %%-----------------------------------------------------------------
 enter_loop(Mod, Options, State) ->
-    enter_loop(Mod, Options, State, self(), []).
+    enter_loop(Mod, Options, State, self(), infinity).
 
 enter_loop(Mod, Options, State, ServerName = {_, _}) ->
-    enter_loop(Mod, Options, State, ServerName, []);
-
-enter_loop(Mod, Options, State, Opts) when is_list(Opts) ->
-    enter_loop(Mod, Options, State, self(), Opts);
+    enter_loop(Mod, Options, State, ServerName, infinity);
 
 enter_loop(Mod, Options, State, Timeout) ->
-    enter_loop(Mod, Options, State, self(), [{timeout, Timeout}]).
+    enter_loop(Mod, Options, State, self(), Timeout).
 
-enter_loop(Mod, Options, State, ServerName, Opts) when is_list(Opts) ->
+enter_loop(Mod, Options, State, ServerName, Timeout) ->
     Name = get_proc_name(ServerName),
     Parent = get_parent(),
     Debug = debug_options(Name, Options),
     Queue = priority_queue:new(),
-    [{timeout, Timeout}, {min_priority, MinPri}] = extract_timeout_minpri(Opts),
     {Timeout1, TimeoutState} = build_timeout_state(Timeout),
-    loop(Parent, Name, State, Mod, Timeout1, TimeoutState, MinPri, Queue, Debug);
-
-enter_loop(Mod, Options, State, ServerName, Timeout) ->
-    enter_loop(Mod, Options, State, ServerName, [{timeout, Timeout}]).
+    loop(Parent, Name, State, Mod, Timeout1, TimeoutState, Queue, Debug).
 
-enter_loop(Mod, Options, State, ServerName, Timeout, MinPri) ->
-    enter_loop(Mod, Options, State, ServerName,
-               [{timeout, Timeout}, {min_priority, MinPri}]).
 %%%========================================================================
 %%% Gen-callback functions
 %%%========================================================================
@@ -402,19 +357,13 @@ init_it(Starter, Parent, Name0, Mod, Args, Options) ->
     Queue = priority_queue:new(),
     case catch Mod:init(Args) of
 	{ok, State} ->
-	    proc_lib:init_ack(Starter, {ok, self()}),
-	    loop(Parent, Name, State, Mod, infinity, undefined,
-                 any, Queue, Debug);
+	    proc_lib:init_ack(Starter, {ok, self()}), 	    
+	    loop(Parent, Name, State, Mod, infinity, undefined, Queue, Debug);
 	{ok, State, Timeout} ->
 	    proc_lib:init_ack(Starter, {ok, self()}),
             {Timeout1, TimeoutState} = build_timeout_state(Timeout),
-	    loop(Parent, Name, State, Mod, Timeout1, TimeoutState,
-                 any, Queue, Debug);
-	{ok, State, Timeout, MinPri} ->
-	    proc_lib:init_ack(Starter, {ok, self()}),
-            {Timeout1, TimeoutState} = build_timeout_state(Timeout),
-	    loop(Parent, Name, State, Mod, Timeout1, TimeoutState,
-                 MinPri, Queue, Debug);
+	    loop(Parent, Name, State, Mod, Timeout1, TimeoutState, Queue,
+                 Debug);
 	{stop, Reason} ->
 	    %% For consistency, we must make sure that the
 	    %% registered name (if any) is unregistered before
@@ -458,71 +407,57 @@ build_timeout_state(Timeout) ->
         _             -> {Timeout, undefined}
     end.
 
-extract_timeout_minpri(Opts) ->
-    rabbit_misc:keygets([{timeout, infinity}, {min_priority, any}], Opts).
-
 %%%========================================================================
 %%% Internal functions
 %%%========================================================================
 %%% ---------------------------------------------------
 %%% The MAIN loop.
 %%% ---------------------------------------------------
-loop(Parent, Name, State, Mod, hibernate, undefined, MinPri, Queue, Debug) ->
-    proc_lib:hibernate(?MODULE, wake_hib, [Parent, Name, State, Mod, undefined,
-                                           MinPri, Queue, Debug]);
-loop(Parent, Name, State, Mod, hibernate, {Current, Min, undefined},
-     MinPri, Queue, Debug) ->
+loop(Parent, Name, State, Mod, hibernate, undefined, Queue, Debug) ->
+    proc_lib:hibernate(?MODULE,wake_hib,
+                       [Parent, Name, State, Mod, undefined, Queue, Debug]);
+loop(Parent, Name, State, Mod, hibernate, {Current, Min, undefined}, Queue,
+     Debug) ->
     proc_lib:hibernate(?MODULE,wake_hib,[Parent, Name, State, Mod,
-                                         {Current, Min, now()},
-                                         MinPri, Queue, Debug]);
-loop(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue, Debug) ->
+                                         {Current, Min, now()}, Queue, Debug]);
+loop(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug) ->
     receive
         Input -> loop(Parent, Name, State, Mod,
-                      Time, TimeoutState, MinPri, in(Input, Queue), Debug)
+                      Time, TimeoutState, in(Input, Queue), Debug)
     after 0 ->
             process_next_msg(Parent, Name, State, Mod, Time, TimeoutState,
-                             MinPri, Queue, Debug)
+                             Queue, Debug, false)
     end.
 
-process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug) ->
-    Res = case MinPri of
-              any -> priority_queue:out(Queue);
-              _ -> priority_queue:out(MinPri, Queue)
-          end,
-    case Res of
+process_next_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue,
+                 Debug, Hib) ->
+    case priority_queue:out(Queue) of
         {{value, Msg}, Queue1} ->
             process_msg(Parent, Name, State, Mod,
-                        Time, TimeoutState, Queue1, Debug, Msg);
+                        Time, TimeoutState, Queue1, Debug, Hib, Msg);
         {empty, Queue1} ->
             Time1 = case {Time, TimeoutState} of
-                        {hibernate, _} -> 0;
                         {binary, {Current, _Min, undefined}} -> Current;
                         _ -> Time
                     end,
             receive
                 Input ->
                     loop(Parent, Name, State, Mod,
-                         Time, TimeoutState, MinPri, in(Input, Queue1), Debug)
+                         Time, TimeoutState, in(Input, Queue1), Debug)
             after Time1 ->
                     process_msg(Parent, Name, State, Mod,
-                                Time, TimeoutState, Queue1, Debug,
-                                case Time == hibernate of
-                                    true -> {roused_and_disinterested, MinPri};
-                                    false when MinPri =:= any -> timeout;
-                                    false -> {timeout, MinPri}
-                                end)
+                                Time, TimeoutState, Queue1, Debug, Hib, timeout)
             end
     end.
 
-wake_hib(Parent, Name, State, Mod, TimeoutState, MinPri, Queue, Debug) ->
+wake_hib(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
     Msg = receive
 	      Input ->
 		  Input
 	  end,
     TimeoutState1 = adjust_hibernate_after(TimeoutState),
     process_next_msg(Parent, Name, State, Mod, hibernate, TimeoutState1,
-                     MinPri, in(Msg, Queue), Debug).
+                     in(Msg, Queue), Debug, true).
 
 adjust_hibernate_after(undefined) ->
     undefined;
@@ -553,12 +488,15 @@ in(Input, Queue) ->
     priority_queue:in(Input, Queue).
 
 process_msg(Parent, Name, State, Mod, Time, TimeoutState, Queue,
-            Debug, Msg) ->
+            Debug, _Hib, Msg) ->
     case Msg of
 	{system, From, Req} ->
 	    sys:handle_system_msg
               (Req, From, Parent, ?MODULE, Debug,
                [Name, State, Mod, Time, TimeoutState, Queue]);
+        %% gen_server puts Hib on the end as the 7th arg, but that
+        %% version of the function seems not to be documented so
+        %% leaving out for now.
 	{'EXIT', Parent, Reason} ->
 	    terminate(Reason, Name, Msg, Mod, State, Debug);
 	_Msg when Debug =:= [] ->
@@ -769,34 +707,14 @@ handle_msg({'$gen_call', From, Msg},
     case catch Mod:handle_call(Msg, From, State) of
 	{reply, Reply, NState} ->
 	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
-                 []);
-	{reply, Reply, NState, Opts} when is_list(Opts) ->
-	    reply(From, Reply),
-            [{timeout, Time}, {min_priority, MinPri}] =
-                extract_timeout_minpri(Opts),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 []);
-	{reply, Reply, NState, Time} ->
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
+	{reply, Reply, NState, Time1} ->
 	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue, []);
-	{reply, Reply, NState, Time, MinPri} ->
-	    reply(From, Reply),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 []);
+	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
 	{noreply, NState} ->
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
-                 []);
-	{noreply, NState, Opts} when is_list(Opts) ->
-            [{timeout, Time}, {min_priority, MinPri}] =
-                extract_timeout_minpri(Opts),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 []);
-	{noreply, NState, Time} ->
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue, []);
-	{noreply, NState, Time, MinPri} ->
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 []);
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
+	{noreply, NState, Time1} ->
+	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
 	{stop, Reason, Reply, NState} ->
 	    {'EXIT', R} = 
 		(catch terminate(Reason, Name, Msg, Mod, NState, [])),
@@ -816,44 +734,20 @@ handle_msg({'$gen_call', From, Msg},
     case catch Mod:handle_call(Msg, From, State) of
 	{reply, Reply, NState} ->
 	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
-                 Debug1);
-	{reply, Reply, NState, Opts} when is_list(Opts) ->
-	    Debug1 = reply(Name, From, Reply, NState, Debug),
-            [{timeout, Time}, {min_priority, MinPri}] =
-                extract_timeout_minpri(Opts),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug1);
-	{reply, Reply, NState, Time} ->
-	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue,
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
                  Debug1);
-	{reply, Reply, NState, Time, MinPri} ->
+	{reply, Reply, NState, Time1} ->
 	    Debug1 = reply(Name, From, Reply, NState, Debug),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug1);
+	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
 	{noreply, NState} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
-                 Debug1);
-	{noreply, NState, Opts} when is_list(Opts) ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-            [{timeout, Time}, {min_priority, MinPri}] =
-                extract_timeout_minpri(Opts),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug1);
-	{noreply, NState, Time} ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue,
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
                  Debug1);
-	{noreply, NState, Time, MinPri} ->
+	{noreply, NState, Time1} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug1);
+	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
 	{stop, Reason, Reply, NState} ->
 	    {'EXIT', R} = 
 		(catch terminate(Reason, Name, Msg, Mod, NState, Debug)),
@@ -873,18 +767,9 @@ handle_common_reply(Reply, Parent, Name, Msg, Mod, State,
                     TimeoutState, Queue) ->
     case Reply of
 	{noreply, NState} ->
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
-                 []);
-	{noreply, NState, Opts} when is_list(Opts) ->
-            [{timeout, Time}, {min_priority, MinPri}] =
-                extract_timeout_minpri(Opts),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 []);
-	{noreply, NState, Time} ->
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue, []);
-	{noreply, NState, Time, MinPri} ->
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 []);
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue, []);
+	{noreply, NState, Time1} ->
+	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, []);
 	{stop, Reason, NState} ->
 	    terminate(Reason, Name, Msg, Mod, NState, []);
 	{'EXIT', What} ->
@@ -899,25 +784,12 @@ handle_common_reply(Reply, Parent, Name, Msg, Mod, State, TimeoutState, Queue,
 	{noreply, NState} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, any, Queue,
-                 Debug1);
-	{noreply, NState, Opts} when is_list(Opts) ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-            [{timeout, Time}, {min_priority, MinPri}] =
-                extract_timeout_minpri(Opts),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
+	    loop(Parent, Name, NState, Mod, infinity, TimeoutState, Queue,
                  Debug1);
-	{noreply, NState, Time} ->
+	{noreply, NState, Time1} ->
 	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
 				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, any, Queue,
-                 Debug1);
-	{noreply, NState, Time, MinPri} ->
-	    Debug1 = sys:handle_debug(Debug, {?MODULE, print_event}, Name,
-				      {noreply, NState}),
-	    loop(Parent, Name, NState, Mod, Time, TimeoutState, MinPri, Queue,
-                 Debug1);
+	    loop(Parent, Name, NState, Mod, Time1, TimeoutState, Queue, Debug1);
 	{stop, Reason, NState} ->
 	    terminate(Reason, Name, Msg, Mod, NState, Debug);
 	{'EXIT', What} ->
@@ -935,9 +807,8 @@ reply(Name, {To, Tag}, Reply, State, Debug) ->
 %%-----------------------------------------------------------------
 %% Callback functions for system messages handling.
 %%-----------------------------------------------------------------
-system_continue(Parent, Debug, [Name, State, Mod, Time, TimeoutState, MinPri,
-                                Queue]) ->
-    loop(Parent, Name, State, Mod, Time, TimeoutState, MinPri, Queue, Debug).
+system_continue(Parent, Debug, [Name, State, Mod, Time, TimeoutState, Queue]) ->
+    loop(Parent, Name, State, Mod, Time, TimeoutState, Queue, Debug).
 
 -ifdef(use_specs).
 -spec system_terminate(_, _, _, [_]) -> no_return().
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0e43c387..e8a63bc3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,7 +42,7 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, prefetch/2, length/1, foldl/3
+         requeue_next_n/2, length/1, foldl/3
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -280,7 +280,6 @@
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
 -spec(report_memory/0 :: () -> 'ok').
 -spec(set_mode/1 :: ('disk' | 'mixed') -> 'ok').
--spec(prefetch/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 
 -endif.
 
@@ -361,9 +360,6 @@ report_memory() ->
 set_mode(Mode) ->
     gen_server2:cast(?SERVER, {set_mode, Mode}).
 
-prefetch(Q, Count) ->
-    gen_server2:pcast(?SERVER, -1, {prefetch, Q, Count}).
-
 %% ---- GEN-SERVER INTERNAL API ----
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
@@ -455,7 +451,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% ets_bytes_per_record otherwise.
     ok = rabbit_queue_mode_manager:report_memory(self(), 0, false),
     ok = report_memory(false, State2),
-    {ok, State2, {binary, ?HIBERNATE_AFTER_MIN}, 0}.
+    {ok, State2, {binary, ?HIBERNATE_AFTER_MIN}}.
 
 handle_call({deliver, Q}, _From, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, State),
@@ -535,25 +531,18 @@ handle_cast(report_memory, State) ->
     %% call noreply1/2, not noreply/1/2, as we don't want to restart the
     %% memory_report_timer
     %% by unsetting the timer, we force a report on the next normal message
-    noreply1(State #dqstate { memory_report_timer = undefined }, 0);
-handle_cast({prefetch, Q, Count}, State) ->
-    {ok, State1} = internal_prefetch(Q, Count, State),
-    noreply(State1, any). %% set minpri to any
+    noreply1(State #dqstate { memory_report_timer = undefined }).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
-handle_info({timeout, 0}, State = #dqstate { commit_timer_ref = undefined }) ->
-    %% this is the binary timeout coming back, with minpri = 0
-    %% don't use noreply/1/2 or noreply1/2 as they'll restart the memory timer
-    %% set timeout to 0, and go pick up any low priority messages
-    {noreply, stop_memory_timer(State), 0, any};
-handle_info({timeout, 0}, State) ->
-    %% must have commit_timer set, so timeout was 0, and we're not hibernating
-    noreply(sync_current_file_handle(State));
-handle_info(timeout, State) ->
-    %% no minpri supplied, so it must have been 'any', so go hibernate
+handle_info(timeout, State = #dqstate { commit_timer_ref = undefined }) ->
+    %% this is the binary timeout coming back
+    %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
     ok = report_memory(true, State),
-    {noreply, State, hibernate, any}.
+    {noreply, stop_memory_timer(State), hibernate};
+handle_info(timeout, State) ->
+    %% must have commit_timer set, so timeout was 0, and we're not hibernating
+    noreply(sync_current_file_handle(State)).
 
 terminate(_Reason, State) ->
     shutdown(State).
@@ -675,36 +664,30 @@ to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
                      ets_bytes_per_record = undefined }.
 
 noreply(NewState) ->
-    noreply(NewState, 0).
-
-noreply(NewState, MinPri) ->
-    noreply1(start_memory_timer(NewState), MinPri).
+    noreply1(start_memory_timer(NewState)).
 
 noreply1(NewState = #dqstate { on_sync_txns = [],
-                               commit_timer_ref = undefined }, MinPri) ->
-    {noreply, NewState, binary, MinPri};
-noreply1(NewState = #dqstate { commit_timer_ref = undefined }, MinPri) ->
-    {noreply, start_commit_timer(NewState), 0, MinPri};
-noreply1(NewState = #dqstate { on_sync_txns = [] }, MinPri) ->
-    {noreply, stop_commit_timer(NewState), binary, MinPri};
-noreply1(NewState, MinPri) ->
-    {noreply, NewState, 0, MinPri}.
+                               commit_timer_ref = undefined }) ->
+    {noreply, NewState, binary};
+noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
+    {noreply, start_commit_timer(NewState), 0};
+noreply1(NewState = #dqstate { on_sync_txns = [] }) ->
+    {noreply, stop_commit_timer(NewState), binary};
+noreply1(NewState) ->
+    {noreply, NewState, 0}.
 
 reply(Reply, NewState) ->
-    reply(Reply, NewState, 0).
-
-reply(Reply, NewState, MinPri) ->
-    reply1(Reply, start_memory_timer(NewState), MinPri).
+    reply1(Reply, start_memory_timer(NewState)).
 
 reply1(Reply, NewState = #dqstate { on_sync_txns = [],
-                                    commit_timer_ref = undefined }, MinPri) ->
-    {reply, Reply, NewState, binary, MinPri};
-reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }, MinPri) ->
-    {reply, Reply, start_commit_timer(NewState), 0, MinPri};
-reply1(Reply, NewState = #dqstate { on_sync_txns = [] }, MinPri) ->
-    {reply, Reply, stop_commit_timer(NewState), binary, MinPri};
-reply1(Reply, NewState, MinPri) ->
-    {reply, Reply, NewState, 0, MinPri}.
+                                    commit_timer_ref = undefined }) ->
+    {reply, Reply, NewState, binary};
+reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
+    {reply, Reply, start_commit_timer(NewState), 0};
+reply1(Reply, NewState = #dqstate { on_sync_txns = [] }) ->
+    {reply, Reply, stop_commit_timer(NewState), binary};
+reply1(Reply, NewState) ->
+    {reply, Reply, NewState, 0}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
@@ -905,23 +888,6 @@ internal_deliver(Q, ReadMsg,
              end, State1}
     end.
 
-internal_prefetch(Q, Count, State = #dqstate { sequences = Sequences }) ->
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    Length = WriteSeqId - ReadSeqId,
-    Count1 = lists:min([Length, Count]),
-    StateN = internal_prefetch(Q, ReadSeqId + Count1 - 1, ReadSeqId, State),
-    {ok, StateN}.
-
-internal_prefetch(_Q, Target, Target, State) ->
-    State;
-internal_prefetch(Q, Target, ReadSeqId, State) ->
-    {ok, _MsgStuff, State1} =
-        internal_read_message(Q, ReadSeqId, true, true, true, State),
-    case cache_is_full(State1) of
-        true -> State1;
-        false -> internal_prefetch(Q, Target, ReadSeqId + 1, State1)
-    end.
-
 internal_foldl(Q, Fun, Init, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     internal_foldl(Q, WriteSeqId, Fun, State, Init, ReadSeqId).
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 50b75789..3c2f99e6 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -348,17 +348,9 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
     {{Msg, IsDelivered, AckTag, Rem},
      State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
 
-maybe_prefetch(disk, _MsgBuf) ->
-    ok;
-maybe_prefetch(mixed, MsgBuf) ->
-    case queue:peek(MsgBuf) of
-        empty ->
-            ok;
-        {value, {#basic_message {}, _IsDelivered}} ->
-            ok;
-        {value, {Q, Count}} ->
-            rabbit_disk_queue:prefetch(Q, Count)
-    end.
+maybe_prefetch(_, _) ->
+    %% disable just for the time being
+    ok.
 
 remove_noacks(MsgsWithAcks) ->
     lists:foldl(
-- 
cgit v1.2.1


From 9ad814b0063028a739d4186f1541200d0cdd4133 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Jul 2009 12:23:42 +0100
Subject: bare non-functioning skeleton of prefetcher. Essay written on design
 of prefetcher and its limitations.

---
 src/gen_server2.erl             |   2 +-
 src/rabbit_mixed_queue.erl      |   2 +-
 src/rabbit_queue_prefetcher.erl | 204 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 src/rabbit_queue_prefetcher.erl

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index e46f2645..6d8d2ff6 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -153,7 +153,7 @@
 %%%=========================================================================
 
 -ifdef(use_specs).
--spec behaviour_info(atom()) -> 'undefined' | [{atom(), arity()}].
+-spec behaviour_info(atom()) -> 'undefined' | [{atom(), any()}].
 -endif.
 
 behaviour_info(callbacks) ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 3c2f99e6..fedc0e52 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -164,7 +164,7 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                 false ->
                     Commit1 = flush_requeue_to_disk_queue
                                 (Q, RequeueCount, Commit),
-                    ok = rabbit_disk_queue:tx_publish(Msg),
+                    ok = rabbit_disk_queue:tx_publish(Msg), %% TODO - this is resetting the delivered flag to false! (well, actually, in the commit, but nevertheless, it's wrong)
                     case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
                         true ->
                             ok = flush_messages_to_disk_queue(Q, Commit1),
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
new file mode 100644
index 00000000..79624f3d
--- /dev/null
+++ b/src/rabbit_queue_prefetcher.erl
@@ -0,0 +1,204 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_prefetcher).
+
+-behaviour(gen_server2).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-define(HIBERNATE_AFTER_MIN, 1000).
+
+-record(pstate,
+        { msg_buf,
+          buf_length,
+          target_count,
+          fetched_count,
+          queue
+        }).
+
+%% The design of the prefetcher is based on the following:
+%%
+%% a) It must issue low-priority (-ve) requests to the disk queue for
+%%    the next message.
+%% b) If the prefetcher is empty and the amqqueue_process
+%%    (mixed_queue) asks it for a message, it must exit immediately,
+%%    telling the mixed_queue that it is empty so that the mixed_queue
+%%    can then take the more efficient path and communicate with the
+%%    disk_queue directly
+%% c) No message can accidentally be delivered twice, or lost
+%% d) The prefetcher must only cause load when the disk_queue is
+%%    otherwise idle, and must not worsen performance in a loaded
+%%    situation.
+%%
+%% As such, it's a little tricky. It must never issue a call to the
+%% disk_queue - if it did, then that could potentially block, thus
+%% causing pain to the mixed_queue that needs fast answers as to
+%% whether the prefetcher has prefetched content or not. It behaves as
+%% follows:
+%%
+%% 1) disk_queue:prefetch(Q)
+%%    This is a low priority cast
+%%
+%% 2) The disk_queue may pick up the cast, at which point it'll read
+%%    the next message invoke prefetcher:publish(Msg). Normal priority
+%%    cast. Note that in the mean time, the mixed_queue could have
+%%    come along, found the prefetcher empty, asked it to exit. This
+%%    means the effective "reply" from the disk_queue will go no
+%%    where. As a result, the disk_queue must perform no modification
+%%    to the status of the message *or the queue* - do not mark the
+%%    message delivered, and do not advance the queue. If it did
+%%    advance the queue and the msg was then lost, then the queue
+%%    would have lost a msg that the mixed_queue would not pick up.
+%%
+%% 3) The prefetcher hopefully receives the cast from
+%%    prefetcher:publish(Msg). It then adds to its internal queue and
+%%    calls disk_queue:set_delivered_and_advance(Q) which is a normal
+%%    priority cast. This cannot be low-priority because if it was,
+%%    the mixed_queue could come along, drain the prefetcher, thus
+%%    catching the msg just sent by the disk_queue and then call
+%%    disk_queue:deliver(Q) which is normal priority call, which could
+%%    overtake the low-priority
+%%    disk_queue:set_delivered_and_advance(Q) cast and thus result in
+%%    the same msg being delivered by the queue twice.
+%%
+%% 4) The disk_queue receives the set_delivered_and_advance(Q) cast,
+%% marks the msg at the head of the queue Q as delivered, and advances
+%% the Q to the next msg.
+%%
+%% 5) If the prefetcher has not met its target then it goes back to
+%%    1). Otherwise it just sits and waits for the mixed_queue to
+%%    drain it.
+%%
+%% Now at some point, the mixed_queue will come along and will call
+%% prefetcher:drain(). Normal priority call. The prefetcher then
+%% replies with its internal queue and the length of that queue. If
+%% the prefetch target was reached, the prefetcher stops normally at
+%% this point. If it hasn't been reached, then the prefetcher
+%% continues to hang around (it almost certainly has issued a
+%% disk_queue:prefetch(Q) cast and is waiting for a reply from the
+%% disk_queue).
+%%
+%% If the mixed_queue calls prefetcher:drain() and the prefetcher's
+%% internal queue is empty then the prefetcher replies with 'empty',
+%% and it exits. This informs the mixed_queue that it should from now
+%% on talk directly with the disk_queue and not via the
+%% prefetcher. This is more efficient and the mixed_queue will use
+%% normal priority blocking calls to the disk_queue and thus get
+%% better service that way. When exiting in this way, two situations
+%% could occur:
+%%
+%% 1) The prefetcher has issued a disk_queue:prefetch(Q) which has not
+%% yet been picked up by the disk_queue. This msg won't go away and
+%% the disk_queue will eventually find it. However, when it does,
+%% it'll simply read the next message from the queue (which could now
+%% be empty), possibly populate the cache (no harm done) and try and
+%% call prefetcher:publish(Msg) which will go no where. However, the
+%% state of the queue and the state of the message has not been
+%% altered so the mixed_queue will be able to fetch this message as if
+%% it had never been prefetched.
+%%
+%% 2) The disk_queue has already picked up the disk_queue:prefetch(Q)
+%% low priority message and has read the message and replied, by
+%% calling prefetcher:publish(Msg). In fact, it's possible that
+%% message is directly behind the call from mixed_queue to
+%% prefetcher:drain(). Same reasoning as in 1) applies - neither the
+%% queue's nor the message's state have been altered, so the
+%% mixed_queue can absolutely go and fetch the message again.
+%%
+%% The only point at which the queue is advanced and the message
+%% marked as delivered is when the prefetcher calls
+%% disk_queue:set_delivered_and_advance(Q). At this point the message
+%% has been received by the prefetcher and so we guarantee it will be
+%% passed to the mixed_queue when the mixed_queue tries to drain the
+%% prefetcher. We must therefore ensure that this msg can't also be
+%% delivered to the mixed_queue directly by the disk_queue through the
+%% mixed_queue calling disk_queue:deliver(Q) which is why the
+%% disk_queue:set_delivered_and_advance(Q) cast must be normal
+%% priority (or at least match the priority of disk_queue:deliver(Q)).
+%%
+%% Finally, the prefetcher is only created when the mixed_queue is
+%% operating in mixed mode and it sees that the next N messages are
+%% all on disk. During this phase, the mixed_queue can be asked to go
+%% back to disk_only mode. When this happens, it calls
+%% prefetcher:drain_and_stop() which behaves like two consecutive
+%% calls to drain() - i.e. replies with all prefetched messages and
+%% causes the prefetcher to exit.
+%%
+%% Note there is a flaw here in that we end up marking messages which
+%% have come through the prefetcher as delivered even if they don't
+%% get delivered (e.g. prefetcher fetches them, then broker
+%% dies). However, the alternative is that the mixed_queue must do a
+%% call to the disk_queue when it effectively passes them out to the
+%% rabbit_writer. This would hurt performance, and even at that stage,
+%% we have no guarantee that the message will really go out of the
+%% socket. What we do still have is that messages which have the
+%% redelivered bit set false really are guaranteed to have not been
+%% delivered already. Well, almost: if the disk_queue has a large back
+%% log of messages then the prefetcher invocation of
+%% disk_queue:set_delivered_and_advance(Q) may not be acted upon
+%% before a crash. However, given that the prefetching is operating in
+%% lock-step with the disk_queue, this means that at most, 1 (one)
+%% message can fail to have its delivered flag raised. The alternative
+%% is that disk_queue:set_delivered_and_advance(Q) could be made into
+%% a call. However, if the disk_queue is heavily loaded, this can
+%% block the prefetcher for some time, which in turn can block the
+%% mixed_queue when it wants to drain the prefetcher.
+
+start_link(Queue, Count) ->
+    gen_server2:start_link(?MODULE, [Queue, Count], []).
+
+init([Q, Count]) ->
+    State = #pstate { msg_buf = queue:new(),
+                      buf_length = 0,
+                      target_count = Count,
+                      fetched_count = 0,
+                      queue = Q
+                     },
+    {ok, State, {binary, ?HIBERNATE_AFTER_MIN}}.
+
+handle_call(_Msg, _From, State) ->
+    {reply, confused, State}.
+
+handle_cast(_Msg, State) ->
+    {noreply, State}.
+
+handle_info(timeout, State) ->
+    {noreply, State, hibernate}.
+
+terminate(_Reason, _State) ->
+    ok.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
-- 
cgit v1.2.1


From 9213c1cf787767a2dc345e6807b06c5bbf007b87 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Jul 2009 12:45:49 +0100
Subject: minor doc typeos and formatting

---
 src/rabbit_queue_prefetcher.erl | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index 79624f3d..c9bbbb8f 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -72,15 +72,16 @@
 %%    This is a low priority cast
 %%
 %% 2) The disk_queue may pick up the cast, at which point it'll read
-%%    the next message invoke prefetcher:publish(Msg). Normal priority
-%%    cast. Note that in the mean time, the mixed_queue could have
-%%    come along, found the prefetcher empty, asked it to exit. This
-%%    means the effective "reply" from the disk_queue will go no
-%%    where. As a result, the disk_queue must perform no modification
-%%    to the status of the message *or the queue* - do not mark the
-%%    message delivered, and do not advance the queue. If it did
-%%    advance the queue and the msg was then lost, then the queue
-%%    would have lost a msg that the mixed_queue would not pick up.
+%%    the next message and invoke prefetcher:publish(Msg) - normal
+%%    priority cast. Note that in the mean time, the mixed_queue could
+%%    have come along, found the prefetcher empty, asked it to
+%%    exit. This means the effective "reply" from the disk_queue will
+%%    go no where. As a result, the disk_queue must perform no
+%%    modification to the status of the message *or the queue* - do
+%%    not mark the message delivered, and do not advance the queue. If
+%%    it did advance the queue and the msg was then lost, then the
+%%    queue would have lost a msg that the mixed_queue would not pick
+%%    up.
 %%
 %% 3) The prefetcher hopefully receives the cast from
 %%    prefetcher:publish(Msg). It then adds to its internal queue and
@@ -94,15 +95,15 @@
 %%    the same msg being delivered by the queue twice.
 %%
 %% 4) The disk_queue receives the set_delivered_and_advance(Q) cast,
-%% marks the msg at the head of the queue Q as delivered, and advances
-%% the Q to the next msg.
+%%    marks the msg at the head of the queue Q as delivered, and
+%%    advances the Q to the next msg.
 %%
 %% 5) If the prefetcher has not met its target then it goes back to
 %%    1). Otherwise it just sits and waits for the mixed_queue to
 %%    drain it.
 %%
 %% Now at some point, the mixed_queue will come along and will call
-%% prefetcher:drain(). Normal priority call. The prefetcher then
+%% prefetcher:drain() - normal priority call. The prefetcher then
 %% replies with its internal queue and the length of that queue. If
 %% the prefetch target was reached, the prefetcher stops normally at
 %% this point. If it hasn't been reached, then the prefetcher
-- 
cgit v1.2.1


From 598ebb1a6fa2ba1da0a6af189663758b01504e62 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Jul 2009 13:07:23 +0100
Subject: Fixed a bug in the mixed_queue which could lead to messages being
 marked undelivered when in fact they have been delivered when converting to
 disk_only mode. In truth, this bug didn't exist because there is no way in
 which a message could end up in that form in the mixed_queue which had
 previously been delivered. However, that will change when the prefetcher
 comes in, necessitating this "bug" gets fixed.

The solution is to make tx_commit not just take a list of msg ids in the txn, but to take a list of {msgid, delivered} tuples. In this way it mirrors the disk_queue:publish function in that the delivery flag can be set explicitly.

Tests adjusted. All tests pass.
---
 src/rabbit_disk_queue.erl       | 10 +++++-----
 src/rabbit_mixed_queue.erl      | 14 ++++++++------
 src/rabbit_queue_prefetcher.erl |  2 +-
 src/rabbit_tests.erl            | 22 +++++++++++++++-------
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e8a63bc3..e739bfef 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -260,8 +260,8 @@
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(auto_ack_next_message/1 :: (queue_name()) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [msg_id()], [{msg_id(), seq_id()}]) ->
-             'ok').
+-spec(tx_commit/3 :: (queue_name(), [{msg_id(), bool()}],
+                      [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{{msg_id(), seq_id()}, bool()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
@@ -1046,7 +1046,7 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                                       last_sync_offset = SyncOffset
                                     }) ->
     NeedsSync = IsDirty andalso
-        lists:any(fun (MsgId) ->
+        lists:any(fun ({MsgId, _Delivered}) ->
                           [{MsgId, _RefCount, File, Offset,
                             _TotalSize, _IsPersistent}] =
                               dets_ets_lookup(State, MsgId),
@@ -1070,12 +1070,12 @@ internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   {ok, WriteSeqId1} =
                       lists:foldl(
-                        fun (MsgId, {ok, SeqId}) ->
+                        fun ({MsgId, Delivered}, {ok, SeqId}) ->
                                 {mnesia:write(
                                    rabbit_disk_queue,
                                    #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
                                                  msg_id = MsgId,
-                                                 is_delivered = false
+                                                 is_delivered = Delivered
                                                }, write),
                                  SeqId + 1}
                         end, {ok, InitWriteSeqId}, PubMsgIds),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index fedc0e52..afc1c8aa 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -155,7 +155,7 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
             {ok, MsgBuf};
         {{value, {Msg = #basic_message { guid = MsgId,
                                          is_persistent = IsPersistent },
-                  _IsDelivered}}, Queue1} ->
+                  IsDelivered}}, Queue1} ->
             case IsDurable andalso IsPersistent of
                 true -> %% it's already in the Q
                     send_messages_to_disk(
@@ -164,17 +164,18 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                 false ->
                     Commit1 = flush_requeue_to_disk_queue
                                 (Q, RequeueCount, Commit),
-                    ok = rabbit_disk_queue:tx_publish(Msg), %% TODO - this is resetting the delivered flag to false! (well, actually, in the commit, but nevertheless, it's wrong)
+                    ok = rabbit_disk_queue:tx_publish(Msg),
                     case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
                         true ->
                             ok = flush_messages_to_disk_queue(Q, Commit1),
                             send_messages_to_disk(
-                              IsDurable, Q, Queue1, 1, 0, [MsgId],
+                              IsDurable, Q, Queue1, 1, 0,
+                              [{MsgId, IsDelivered}],
                               inc_queue_length(Q, MsgBuf, 1));
                         false ->
                             send_messages_to_disk(
                               IsDurable, Q, Queue1, PublishCount + 1, 0,
-                              [MsgId | Commit1],
+                              [{MsgId, IsDelivered} | Commit1],
                               inc_queue_length(Q, MsgBuf, 1))
                     end
             end;
@@ -387,7 +388,7 @@ tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize,
                           memory_gain = Gain + MsgSize }}.
 
 only_msg_ids(Pubs) ->
-    lists:map(fun (Msg) -> Msg #basic_message.guid end, Pubs).
+    lists:map(fun (Msg) -> {Msg #basic_message.guid, false} end, Pubs).
 
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = disk, queue = Q, length = Length,
@@ -412,7 +413,8 @@ tx_commit(Publishes, MsgsWithAcks,
                          {Acc, MsgBuf2}) ->
                             Acc1 =
                                 case IsPersistent andalso IsDurable of
-                                    true -> [Msg #basic_message.guid | Acc];
+                                    true -> [ {Msg #basic_message.guid, false}
+                                            | Acc];
                                     false -> Acc
                                 end,
                             {Acc1, queue:in({Msg, false}, MsgBuf2)}
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index c9bbbb8f..6cae5404 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/0]).
+-export([start_link/2]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b9777337..10a9873a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -782,11 +782,12 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
+    CommitList = lists:zip(List, lists:duplicate(MsgCount, false)),
     {Publish, ok} =
         timer:tc(?MODULE, rdq_time_commands,
                  [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false))
                              || N <- List, _ <- Qs] end,
-                   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, List, [])
+                   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, CommitList, [])
                              || Q <- Qs] end
                   ]]),
     {Deliver, ok} =
@@ -820,8 +821,9 @@ rdq_stress_gc(MsgCount) ->
     MsgSizeBytes = 256*1024,
     Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
+    CommitList = lists:zip(List, lists:duplicate(MsgCount, false)),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- List],
-    rabbit_disk_queue:tx_commit(q, List, []),
+    rabbit_disk_queue:tx_commit(q, CommitList, []),
     StartChunk = round(MsgCount / 20), % 5%
     AckList =
         lists:foldl(
@@ -862,8 +864,9 @@ rdq_test_startup_with_queue_gaps() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
+    CommitAll = lists:zip(All, lists:duplicate(Total, false)),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, true)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, All, []),
+    rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin
@@ -918,8 +921,9 @@ rdq_test_redeliver() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
+    CommitAll = lists:zip(All, lists:duplicate(Total, false)),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, All, []),
+    rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin
@@ -970,8 +974,9 @@ rdq_test_purge() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
+    CommitAll = lists:zip(All, lists:duplicate(Total, false)),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, All, []),
+    rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
     %% deliver first half
     Seqs = [begin
@@ -1174,13 +1179,16 @@ rdq_test_disk_queue_modes() ->
     Total = 1000,
     Half1 = lists:seq(1,round(Total/2)),
     Half2 = lists:seq(1 + round(Total/2), Total),
+    CommitHalf1 = lists:zip(Half1, lists:duplicate(round(Total/2), false)),
+    CommitHalf2 = lists:zip(Half2, lists:duplicate
+                            (Total - round(Total/2), false)),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half1],
-    ok = rabbit_disk_queue:tx_commit(q, Half1, []),
+    ok = rabbit_disk_queue:tx_commit(q, CommitHalf1, []),
     io:format("Publish done~n", []),
     ok = rabbit_disk_queue:to_disk_only_mode(),
     io:format("To Disk Only done~n", []),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half2],
-    ok = rabbit_disk_queue:tx_commit(q, Half2, []),
+    ok = rabbit_disk_queue:tx_commit(q, CommitHalf2, []),
     Seqs = [begin
                 Remaining = Total - N,
                 {Message, _TSize, false, SeqId, Remaining} =
-- 
cgit v1.2.1


From c8b0d811ccc63782b24b1a8de7062e5c56ed80ab Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Jul 2009 13:44:30 +0100
Subject: sigh, another stupid bug which none of the tests catch and which I
 just happened to spot by reading the code. I am deeply alarmed by how many of
 these sorts of bugs I am finding and how many more there must be. OTOH, they
 do seem to crop up much more in code which has been changed substantially and
 repeatedly, though it's very possible that's just because I'm looking there
 more than elsewhere.

---
 src/rabbit_mixed_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index afc1c8aa..d2d3c19f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -159,7 +159,7 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
             case IsDurable andalso IsPersistent of
                 true -> %% it's already in the Q
                     send_messages_to_disk(
-                      IsDurable, Q, Queue1, PublishCount, RequeueCount,
+                      IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
                       Commit, inc_queue_length(Q, MsgBuf, 1));
                 false ->
                     Commit1 = flush_requeue_to_disk_queue
@@ -181,7 +181,7 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
             end;
         {{value, {Q, Count}}, Queue1} ->
             send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
-                                  RequeueCount, Commit,
+                                  RequeueCount + Count, Commit,
                                   inc_queue_length(Q, MsgBuf, Count))
     end.
 
-- 
cgit v1.2.1


From 2407053ea50c6a3a0dcaa2fe5d9fbd0e5e822661 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 21 Jul 2009 19:30:49 +0100
Subject: Prefetcher appears to be done and working well. None of the tests
 exercise it though because I decided to only start it up when in mixed mode
 and when the amqqueue_process starts to hibernate (otherwise, we start it up
 too soon, it doesn't make much progress and then we just have to shut it down
 anyway). However, other manual tests definitely exercise it and it seems to
 be very effective. Certainly can't make it crash now.

---
 src/rabbit_amqqueue_process.erl |   6 +-
 src/rabbit_disk_queue.erl       |  45 ++++++--
 src/rabbit_mixed_queue.erl      | 233 +++++++++++++++++++++++++++-------------
 src/rabbit_queue_prefetcher.erl |  75 +++++++++++--
 4 files changed, 262 insertions(+), 97 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 0597215f..ab96feff 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -857,8 +857,10 @@ handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
 handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     handle_ch_down(DownPid, State);
 
-handle_info(timeout, State) ->
-    State1 = stop_memory_timer(report_memory(true, State)),
+handle_info(timeout, State = #q { mixed_state = MS }) ->
+    MS1 = rabbit_mixed_queue:maybe_prefetch(MS),
+    State1 =
+        stop_memory_timer(report_memory(true, State #q { mixed_state = MS1 })),
     %% don't call noreply/1 as that'll restart the memory_report_timer
     {noreply, State1, hibernate};
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e739bfef..05ba3a6c 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -42,7 +42,8 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, length/1, foldl/3
+         requeue_next_n/2, length/1, foldl/3, prefetch/1,
+         set_delivered_and_advance/2
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -257,12 +258,15 @@
 -spec(phantom_deliver/1 :: (queue_name()) ->
              ( 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
                           non_neg_integer()})).
+-spec(prefetch/1 :: (queue_name()) -> 'ok'). 
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
 -spec(auto_ack_next_message/1 :: (queue_name()) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [{msg_id(), bool()}],
                       [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
+-spec(set_delivered_and_advance/2 ::
+      (queue_name(), {msg_id(), seq_id()}) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{{msg_id(), seq_id()}, bool()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
@@ -298,6 +302,9 @@ deliver(Q) ->
 phantom_deliver(Q) ->
     gen_server2:call(?SERVER, {phantom_deliver, Q}, infinity).
 
+prefetch(Q) ->
+    gen_server2:pcast(?SERVER, -1, {prefetch, Q, self()}).
+
 ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
@@ -314,6 +321,9 @@ tx_commit(Q, PubMsgIds, AckSeqIds)
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server2:cast(?SERVER, {tx_cancel, MsgIds}).
 
+set_delivered_and_advance(Q, MsgSeqId) ->
+    gen_server2:cast(?SERVER, {set_delivered_and_advance, Q, MsgSeqId}).
+
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
@@ -454,10 +464,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State2, {binary, ?HIBERNATE_AFTER_MIN}}.
 
 handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, true, State),
+    {ok, Result, State1} = internal_deliver(Q, true, false, true, State),
     reply(Result, State1);
 handle_call({phantom_deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, false, State),
+    {ok, Result, State1} = internal_deliver(Q, false, false, true, State),
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     State1 =
@@ -531,8 +541,20 @@ handle_cast(report_memory, State) ->
     %% call noreply1/2, not noreply/1/2, as we don't want to restart the
     %% memory_report_timer
     %% by unsetting the timer, we force a report on the next normal message
-    noreply1(State #dqstate { memory_report_timer = undefined }).
-
+    noreply1(State #dqstate { memory_report_timer = undefined });
+handle_cast({prefetch, Q, From}, State) ->
+    {ok, Result, State1} = internal_deliver(Q, true, true, false, State),
+    ok = rabbit_queue_prefetcher:publish(From, Result),
+    noreply(State1);
+handle_cast({set_delivered_and_advance, Q, MsgSeqId}, State) ->
+    State2 =
+        case internal_deliver(Q, false, false, true, State) of
+            {ok, empty, State1} -> State1;
+            {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Rem}, State1} ->
+                State1
+        end,
+    noreply(State2).
+        
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(timeout, State = #dqstate { commit_timer_ref = undefined }) ->
@@ -866,7 +888,7 @@ cache_is_full(#dqstate { message_cache = Cache }) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, ReadMsg,
+internal_deliver(Q, ReadMsg, FakeDeliver, Advance,
                  State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
         {SeqId, SeqId} -> {ok, empty, State};
@@ -874,9 +896,12 @@ internal_deliver(Q, ReadMsg,
             Remaining = WriteSeqId - ReadSeqId - 1,
             {ok, Result, State1} =
                 internal_read_message(
-                  Q, ReadSeqId, ReadMsg, false, false, State),
-            true = ets:insert(Sequences,
-                              {Q, ReadSeqId+1, WriteSeqId}),
+                  Q, ReadSeqId, ReadMsg, FakeDeliver, false, State),
+            true = case Advance of
+                       true -> ets:insert(Sequences,
+                                          {Q, ReadSeqId+1, WriteSeqId});
+                       false -> true
+                   end,
             {ok,
              case Result of
                  {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId}} ->
@@ -941,7 +966,7 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
     end.
 
 internal_auto_ack(Q, State) ->
-    case internal_deliver(Q, false, State) of
+    case internal_deliver(Q, false, false, true, State) of
         {ok, empty, State1} -> {ok, State1};
         {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Remaining},
          State1} ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index d2d3c19f..ac7495fe 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -37,7 +37,7 @@
 
 -export([publish/2, publish_delivered/2, deliver/1, ack/2,
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
-         length/1, is_empty/1, delete_queue/1]).
+         length/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
 -export([to_disk_only_mode/2, to_mixed_mode/2, estimate_queue_memory/1,
          reset_counters/1, info/1]).
@@ -49,7 +49,8 @@
                    length,
                    memory_size,
                    memory_gain,
-                   memory_loss
+                   memory_loss,
+                   prefetcher
                  }
        ).
 
@@ -63,9 +64,10 @@
                               queue :: queue_name(),
                               is_durable :: bool(),
                               length :: non_neg_integer(),
-                              memory_size :: non_neg_integer(),
-                              memory_gain :: non_neg_integer(),
-                              memory_loss :: non_neg_integer()
+                              memory_size :: (non_neg_integer() | 'undefined'),
+                              memory_gain :: (non_neg_integer() | 'undefined'),
+                              memory_loss :: (non_neg_integer() | 'undefined'),
+                              prefetcher :: (pid() | 'undefined')
                             }).
 -type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
 -type(okmqs() :: {'ok', mqstate()}).
@@ -110,7 +112,7 @@ init(Queue, IsDurable) ->
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
                     is_durable = IsDurable, length = Len,
                     memory_size = Size, memory_gain = undefined,
-                    memory_loss = undefined }}.
+                    memory_loss = undefined, prefetcher = undefined }}.
 
 size_of_message(
   #basic_message { content = #content { payload_fragments_rev = Payload }}) ->
@@ -122,16 +124,30 @@ to_disk_only_mode(_TxnMessages, State = #mqstate { mode = disk }) ->
     {ok, State};
 to_disk_only_mode(TxnMessages, State =
                   #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                             is_durable = IsDurable }) ->
+                             is_durable = IsDurable, prefetcher = Prefetcher
+                           }) ->
     rabbit_log:info("Converting queue to disk only mode: ~p~n", [Q]),
+    State1 = State #mqstate { mode = disk },
+    {MsgBuf1, State2} =
+        case Prefetcher of
+            undefined -> {MsgBuf, State1};
+            _ ->
+                case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
+                    empty -> {MsgBuf, State1};
+                    {Fetched, Len} ->
+                        State3 = #mqstate { msg_buf = MsgBuf2 } =
+                            dec_queue_length(Len, State1),
+                        {queue:join(Fetched, MsgBuf2), State3}
+                end
+        end,
     %% We enqueue _everything_ here. This means that should a message
     %% already be in the disk queue we must remove it and add it back
     %% in. Fortunately, by using requeue, we avoid rewriting the
     %% message on disk.
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
-    {ok, MsgBuf1} =
-        send_messages_to_disk(IsDurable, Q, MsgBuf, 0, 0, [], queue:new()),
+    {ok, MsgBuf3} =
+        send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], queue:new()),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -144,7 +160,7 @@ to_disk_only_mode(TxnMessages, State =
                    end
       end, TxnMessages),
     garbage_collect(),
-    {ok, State #mqstate { mode = disk, msg_buf = MsgBuf1 }}.
+    {ok, State2 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }}.
 
 send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       Commit, MsgBuf) ->
@@ -179,6 +195,23 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                               inc_queue_length(Q, MsgBuf, 1))
                     end
             end;
+        {{value, {Msg = #basic_message { guid = MsgId }, IsDelivered, _AckTag}},
+         Queue1} ->
+            %% these have come via the prefetcher, so are no longer in
+            %% the disk queue so they need to be republished
+            Commit1 = flush_requeue_to_disk_queue(Q, RequeueCount, Commit),
+            ok = rabbit_disk_queue:tx_publish(Msg),
+            case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
+                true ->
+                    ok = flush_messages_to_disk_queue(Q, Commit1),
+                    send_messages_to_disk(IsDurable, Q, Queue1, 1, 0,
+                                          [{MsgId, IsDelivered}],
+                                          inc_queue_length(Q, MsgBuf, 1));
+                false ->
+                    send_messages_to_disk(IsDurable, Q, Queue1, PublishCount+1,
+                                          0, [{MsgId, IsDelivered} | Commit1],
+                                          inc_queue_length(Q, MsgBuf, 1))
+            end;
         {{value, {Q, Count}}, Queue1} ->
             send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
                                   RequeueCount + Count, Commit,
@@ -203,15 +236,16 @@ flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
 
 to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
     {ok, State};
-to_mixed_mode(TxnMessages, State =
-              #mqstate { mode = disk, queue = Q,
-                         is_durable = IsDurable, msg_buf = MsgBuf }) ->
+to_mixed_mode(TxnMessages, State = #mqstate { mode = disk, queue = Q,
+                                              is_durable = IsDurable }) ->
     rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
-    %% load up a new queue with a token that says how many messages
-    %% are on disk (this is already built for us by the disk mode)
-    %% don't actually do anything to the disk
-    ok = maybe_prefetch(mixed, MsgBuf),
-    %% remove txn messages from disk which are neither persistent and
+    %% The queue has a token just saying how many msgs are on disk
+    %% (this is already built for us when in disk mode).
+    %% Don't actually do anything to the disk
+    %% Don't start prefetcher just yet because the queue maybe busy -
+    %% wait for hibernate timeout in the amqqueue_process.
+
+    %% Remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_cancel/2 which is why
     %% we're not using it.
@@ -219,8 +253,8 @@ to_mixed_mode(TxnMessages, State =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
                   case IsDurable andalso IsPersistent of
-                      true -> Acc;
-                      _    -> [Msg #basic_message.guid | Acc]
+                      true  -> Acc;
+                      false -> [Msg #basic_message.guid | Acc]
                   end
           end, [], TxnMessages),
     ok = if Cancel == [] -> ok;
@@ -229,26 +263,43 @@ to_mixed_mode(TxnMessages, State =
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
 
-inc_queue_length(_Queue, MsgBuf, 0) ->
+inc_queue_length(_Q, MsgBuf, 0) ->
     MsgBuf;
-inc_queue_length(Queue, MsgBuf, Count) ->
+inc_queue_length(Q, MsgBuf, Count) ->
     case queue:out_r(MsgBuf) of
         {empty, MsgBuf} ->
-            queue:in({Queue, Count}, MsgBuf);
-        {{value, {Queue, Len}}, MsgBuf1} ->
-            queue:in({Queue, Len + Count}, MsgBuf1);
+            queue:in({Q, Count}, MsgBuf);
+        {{value, {Q, Len}}, MsgBuf1} ->
+            queue:in({Q, Len + Count}, MsgBuf1);
         {{value, _}, _MsgBuf1} ->
-            queue:in({Queue, Count}, MsgBuf)
+            queue:in({Q, Count}, MsgBuf)
     end.
 
-dec_queue_length(Mode, MsgBuf) ->
-    {{value, {Queue, Len}}, MsgBuf1} = queue:out(MsgBuf),
-    MsgBuf2 = case Len of
-                  1 -> ok = maybe_prefetch(Mode, MsgBuf1),
-                       MsgBuf1;
-                  _ -> queue:in_r({Queue, Len-1}, MsgBuf1)
-              end,
-    {Queue, MsgBuf2}.
+dec_queue_length(Count, State = #mqstate { queue = Q, msg_buf = MsgBuf }) ->
+    case queue:out(MsgBuf) of
+        {{value, {Q, Len}}, MsgBuf1} ->
+            case Len of
+                Count ->
+                    maybe_prefetch(State #mqstate { msg_buf = MsgBuf1 });
+                _ when Len > Count ->
+                    State #mqstate { msg_buf = queue:in_r({Q, Len-Count},
+                                                          MsgBuf1)}
+            end;
+        _ -> State
+    end.
+
+maybe_prefetch(State = #mqstate { prefetcher = undefined,
+                                  mode = mixed,
+                                  msg_buf = MsgBuf,
+                                  queue = Q }) ->
+    case queue:peek(MsgBuf) of
+        {value, {Q, Count}} -> {ok, Prefetcher} =
+                                   rabbit_queue_prefetcher:start_link(Q, Count),
+                               State #mqstate { prefetcher = Prefetcher };
+        _ -> State
+    end;
+maybe_prefetch(State) ->
+    State.
 
 publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
                                 msg_buf = MsgBuf, memory_size = QSize,
@@ -312,46 +363,67 @@ deliver(State = #mqstate { length = 0 }) ->
     {empty, State};
 deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                            is_durable = IsDurable, length = Length,
-                           mode = Mode }) ->
+                           prefetcher = Prefetcher }) ->
     {{value, Value}, MsgBuf1} = queue:out(MsgBuf),
-    {Msg, IsDelivered, AckTag, MsgBuf2} =
-        case Value of
-            {Msg1 = #basic_message { guid = MsgId,
-                                     is_persistent = IsPersistent },
-             IsDelivered1} ->
-                AckTag1 =
-                    case IsDurable andalso IsPersistent of
-                        true ->
-                            {MsgId, IsPersistent, IsDelivered1, AckTag2, _PRem}
-                                = rabbit_disk_queue:phantom_deliver(Q),
-                            AckTag2;
-                        false ->
-                            noack
-                    end,
-                ok = maybe_prefetch(Mode, MsgBuf1),
-                {Msg1, IsDelivered1, AckTag1, MsgBuf1};
-            _ ->
-                {Q, MsgBuf3} = dec_queue_length(Mode, MsgBuf),
-                {Msg1 = #basic_message { is_persistent = IsPersistent },
-                 _Size, IsDelivered1, AckTag1, _PersistRem}
-                    = rabbit_disk_queue:deliver(Q),
-                AckTag2 =
-                    case IsDurable andalso IsPersistent of
-                        true ->
-                            AckTag1;
-                        false ->
-                            ok = rabbit_disk_queue:ack(Q, [AckTag1]),
-                            noack
-                    end,
-                {Msg1, IsDelivered1, AckTag2, MsgBuf3}
-        end,
     Rem = Length - 1,
-    {{Msg, IsDelivered, AckTag, Rem},
-     State #mqstate { msg_buf = MsgBuf2, length = Rem }}.
-
-maybe_prefetch(_, _) ->
-    %% disable just for the time being
-    ok.
+    State1 = State #mqstate { length = Rem },
+    case Value of
+        {Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
+         IsDelivered} ->
+            AckTag =
+                case IsDurable andalso IsPersistent of
+                    true ->
+                        {MsgId, IsPersistent, IsDelivered, AckTag1, _PRem}
+                            = rabbit_disk_queue:phantom_deliver(Q),
+                        AckTag1;
+                    false ->
+                        noack
+                end,
+            State2 = maybe_prefetch(State1 #mqstate { msg_buf = MsgBuf1 }),
+            {{Msg, IsDelivered, AckTag, Rem}, State2};
+        {Msg = #basic_message { is_persistent = IsPersistent },
+         IsDelivered, AckTag} ->
+            %% message has come via the prefetcher, thus it's been
+            %% delivered. If it's not persistent+durable, we should
+            %% ack it now
+            AckTag1 =
+                case IsDurable andalso IsPersistent of
+                    true ->
+                        AckTag;
+                    false ->
+                        ok = rabbit_disk_queue:ack(Q, [AckTag]),
+                        noack
+                end,
+            {{Msg, IsDelivered, AckTag1, Rem},
+             State1 #mqstate { msg_buf = MsgBuf1 }};
+        _ when Prefetcher == undefined ->
+            State2 = dec_queue_length(1, State1),
+            {Msg = #basic_message { is_persistent = IsPersistent },
+             _Size, IsDelivered, AckTag, _PersistRem}
+                = rabbit_disk_queue:deliver(Q),
+            AckTag1 =
+                case IsDurable andalso IsPersistent of
+                    true ->
+                        AckTag;
+                    false ->
+                        ok = rabbit_disk_queue:ack(Q, [AckTag]),
+                        noack
+                end,
+            {{Msg, IsDelivered, AckTag1, Rem}, State2};
+        _ ->
+            case rabbit_queue_prefetcher:drain(Prefetcher) of
+                empty -> deliver(State #mqstate { prefetcher = undefined });
+                {Fetched, Len, Status} ->
+                    State2 = #mqstate { msg_buf = MsgBuf2 } =
+                        dec_queue_length(Len, State),
+                    deliver(State2 #mqstate
+                            { msg_buf = queue:join(Fetched, MsgBuf2),
+                              prefetcher = case Status of
+                                               finished -> undefined;
+                                               _ -> Prefetcher
+                                           end })
+            end
+    end.
 
 remove_noacks(MsgsWithAcks) ->
     lists:foldl(
@@ -489,8 +561,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
-                                                length = Length
-                                              }) ->
+                                                length = Length }) ->
     {PersistentPubs, MsgBuf1} =
         lists:foldl(
           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
@@ -515,14 +586,24 @@ purge(State = #mqstate { queue = Q, mode = disk, length = Count,
     {Count, State #mqstate { length = 0, memory_size = 0,
                              memory_loss = Loss + QSize }};
 purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
-                         memory_loss = Loss, memory_size = QSize }) ->
+                         memory_loss = Loss, memory_size = QSize,
+                         prefetcher = Prefetcher }) ->
+    case Prefetcher of
+        undefined -> ok;
+        _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
+    end,
     rabbit_disk_queue:purge(Q),
     {Length,
      State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
                       memory_loss = Loss + QSize }}.
 
 delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
-                                memory_loss = Loss }) ->
+                                memory_loss = Loss, prefetcher = Prefetcher
+                              }) ->
+    case Prefetcher of
+        undefined -> ok;
+        _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
+    end,
     ok = rabbit_disk_queue:delete_queue(Q),
     {ok, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
                           memory_loss = Loss + QSize }}.
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index 6cae5404..dfd444b2 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -38,6 +38,10 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
+-export([publish/2, drain/1, drain_and_stop/1]).
+
+-include("rabbit.hrl").
+
 -define(HIBERNATE_AFTER_MIN, 1000).
 
 -record(pstate,
@@ -45,7 +49,8 @@
           buf_length,
           target_count,
           fetched_count,
-          queue
+          queue,
+          queue_mref
         }).
 
 %% The design of the prefetcher is based on the following:
@@ -178,25 +183,77 @@
 %% mixed_queue when it wants to drain the prefetcher.
 
 start_link(Queue, Count) ->
-    gen_server2:start_link(?MODULE, [Queue, Count], []).
+    gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
+
+publish(Prefetcher, Obj = { #basic_message {}, _Size, _IsDelivered,
+                            _AckTag, _Remaining }) ->
+    gen_server2:cast(Prefetcher, {publish, Obj});
+publish(Prefetcher, empty) ->
+    gen_server2:cast(Prefetcher, publish_empty).
+
+drain(Prefetcher) ->
+    gen_server2:call(Prefetcher, drain, infinity).
+
+drain_and_stop(Prefetcher) ->
+    gen_server2:call(Prefetcher, drain_and_stop, infinity).
 
-init([Q, Count]) ->
+init([Q, Count, QPid]) ->
+    %% link isn't enough because the signal will not appear if the
+    %% queue exits normally. Thus have to use monitor.
+    MRef = erlang:monitor(process, QPid),
     State = #pstate { msg_buf = queue:new(),
                       buf_length = 0,
                       target_count = Count,
                       fetched_count = 0,
-                      queue = Q
+                      queue = Q,
+                      queue_mref = MRef
                      },
+    ok = rabbit_disk_queue:prefetch(Q),
     {ok, State, {binary, ?HIBERNATE_AFTER_MIN}}.
 
-handle_call(_Msg, _From, State) ->
-    {reply, confused, State}.
+handle_call(drain, _From, State = #pstate { buf_length = 0 }) ->
+    {stop, normal, empty, State};
+handle_call(drain, _From, State = #pstate { fetched_count = Count,
+                                            target_count = Count,
+                                            msg_buf = MsgBuf,
+                                            buf_length = Length }) ->
+    {stop, normal, {MsgBuf, Length, finished}, State};
+handle_call(drain, _From, State = #pstate { msg_buf = MsgBuf,
+                                            buf_length = Length }) ->
+    {reply, {MsgBuf, Length, continuing},
+     State #pstate { msg_buf = queue:new(), buf_length = 0 }};
+handle_call(drain_and_stop, _From, State = #pstate { buf_length = 0 }) ->
+    {stop, normal, empty, State};
+handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf,
+                                                     buf_length = Length }) ->
+    {stop, normal, {MsgBuf, Length}, State}.
 
-handle_cast(_Msg, State) ->
-    {noreply, State}.
+handle_cast(publish_empty, State) ->
+    %% Very odd. This could happen if the queue is deleted or purged
+    %% and the mixed queue fails to shut us down.
+    {noreply, State};
+handle_cast({publish, { Msg = #basic_message {},
+                        _Size, IsDelivered, AckTag, _Remaining }},
+            State = #pstate { fetched_count = Fetched, target_count = Target,
+                              msg_buf = MsgBuf, buf_length = Length, queue = Q
+                            }) ->
+    ok = rabbit_disk_queue:set_delivered_and_advance(Q, AckTag),
+    ok = case Fetched + 1 == Target of
+             true -> ok;
+             false -> rabbit_disk_queue:prefetch(Q)
+         end,
+    MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
+    {noreply, State #pstate { fetched_count = Fetched + 1,
+                              buf_length = Length + 1,
+                              msg_buf = MsgBuf1 }}.
 
 handle_info(timeout, State) ->
-    {noreply, State, hibernate}.
+    {noreply, State, hibernate};
+handle_info({'DOWN', MRef, process, _Pid, _Reason},
+            State = #pstate { queue_mref = MRef }) ->
+    %% this is the amqqueue_process going down, so we should go down
+    %% too
+    {stop, normal, State}.
 
 terminate(_Reason, _State) ->
     ok.
-- 
cgit v1.2.1


From 556a75a66d40e50d2c977711d9ac534a2eca8c04 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 1 Aug 2009 21:30:40 +0100
Subject: just making set_mode as a pcast in the disk_queue just like it is in
 the amqqueue

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3a7d2f29..154e8a90 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -370,7 +370,7 @@ report_memory() ->
     gen_server2:cast(?SERVER, report_memory).
 
 set_mode(Mode) ->
-    gen_server2:cast(?SERVER, {set_mode, Mode}).
+    gen_server2:pcast(?SERVER, 10, {set_mode, Mode}).
 
 %% ---- GEN-SERVER INTERNAL API ----
 
-- 
cgit v1.2.1


From ff22f91a975741e87d8d1469a9dc3d6f9cfeb631 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 Aug 2009 15:18:45 +0100
Subject: Removed some transactions and made all transaction bodies idempotent.
 They were actually fine before: a) the rabbit_disk_queue table is
 local_content and b) only one process ever accesses that table - thus there
 is no reason why any transaction will ever retry. However, this change is
 probably still beneficial. The only slight loss is that tx-commit is no
 longer atomic (ref counting of messages in ets, not mnesia, was resulting in
 non idempotency, so moved outside the transaction). This means that you could
 have msgs in a tx committed, but the acks not enforced, in the event of power
 failure or other catastrophic event.

All tests pass.
---
 src/rabbit_disk_queue.erl | 130 ++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 74 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index fe8c433c..75892f68 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -980,11 +980,6 @@ internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, true, State).
 
 %% Q is only needed if MnesiaDelete /= false
-%% called from ack with MnesiaDelete = true
-%% called from tx_commit with MnesiaDelete = txn
-%% called from tx_cancel with MnesiaDelete = false
-%% called from purge with MnesiaDelete = txn
-%% called from delete_queue with MnesiaDelete = txn
 remove_messages(Q, MsgSeqIds, MnesiaDelete,
                 State = #dqstate { file_summary = FileSummary,
                                    current_file_name = CurName
@@ -1092,8 +1087,8 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
 internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
                       State = #dqstate { sequences = Sequences }) ->
     {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-    {atomic, {WriteSeqId, State1}} =
-        mnesia:transaction(
+    WriteSeqId =
+        rabbit_misc:execute_mnesia_transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   {ok, WriteSeqId1} =
@@ -1107,9 +1102,9 @@ internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
                                                }, write),
                                  SeqId + 1}
                         end, {ok, InitWriteSeqId}, PubMsgIds),
-                  {ok, State2} = remove_messages(Q, AckSeqIds, txn, State),
-                  {WriteSeqId1, State2}
+                  WriteSeqId1
           end),
+    {ok, State1} = remove_messages(Q, AckSeqIds, true, State),
     true = case PubMsgIds of
                [] -> true;
                _  -> ets:insert(Sequences, {Q, InitReadSeqId, WriteSeqId})
@@ -1162,17 +1157,18 @@ internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
     %% as they have no concept of sequence id anyway).
 
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    {atomic, {WriteSeqId1, Q, State}} =
-        mnesia:transaction(
+    {WriteSeqId1, Q, MsgIds} =
+        rabbit_misc:execute_mnesia_transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foldl(fun requeue_message/2, {WriteSeqId, Q, State},
+                  lists:foldl(fun requeue_message/2, {WriteSeqId, Q, []},
                               MsgSeqIds)
           end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId1}),
+    lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
     {ok, State}.
 
-requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, State}) ->
+requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, Acc}) ->
     [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
         mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
     ok = mnesia:write(rabbit_disk_queue,
@@ -1181,57 +1177,50 @@ requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, State}) ->
                                       },
                       write),
     ok = mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
-    decrement_cache(MsgId, State),
-    {WriteSeqId + 1, Q, State}.
+    {WriteSeqId + 1, Q, [MsgId | Acc]}.
 
 %% move the next N messages from the front of the queue to the back.
 internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     if N >= (WriteSeqId - ReadSeqId) -> {ok, State};
        true ->
-            {atomic, {ReadSeqIdN, WriteSeqIdN}} =
-                mnesia:transaction(
+            {ReadSeqIdN, WriteSeqIdN, MsgIds} =
+                rabbit_misc:execute_mnesia_transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          requeue_next_messages(Q, State, N, ReadSeqId, WriteSeqId)
+                          requeue_next_messages(Q, N, ReadSeqId, WriteSeqId, [])
                   end
                  ),
             true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN}),
+            lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
             {ok, State}
     end.
 
-requeue_next_messages(_Q, _State, 0, ReadSeq, WriteSeq) ->
-    {ReadSeq, WriteSeq};
-requeue_next_messages(Q, State, N, ReadSeq, WriteSeq) ->
+requeue_next_messages(_Q, 0, ReadSeq, WriteSeq, Acc) ->
+    {ReadSeq, WriteSeq, Acc};
+requeue_next_messages(Q, N, ReadSeq, WriteSeq, Acc) ->
     [Obj = #dq_msg_loc { msg_id = MsgId }] =
         mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
     ok = mnesia:write(rabbit_disk_queue,
                       Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeq}},
                       write),
     ok = mnesia:delete(rabbit_disk_queue, {Q, ReadSeq}, write),
-    decrement_cache(MsgId, State),
-    requeue_next_messages(Q, State, N - 1, ReadSeq + 1, WriteSeq + 1).
+    requeue_next_messages(Q, N - 1, ReadSeq + 1, WriteSeq + 1, [MsgId | Acc]).
 
 internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
         {SeqId, SeqId} -> {ok, 0, State};
         {ReadSeqId, WriteSeqId} ->
-            {atomic, {ok, State1}} =
-                mnesia:transaction(
-                  fun() ->
-                          ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          {MsgSeqIds, WriteSeqId} =
-                              rabbit_misc:unfold(
-                                fun (SeqId) when SeqId == WriteSeqId -> false;
-                                    (SeqId) ->
-                                        [#dq_msg_loc { msg_id = MsgId }] =
-                                            mnesia:read(rabbit_disk_queue,
-                                                        {Q, SeqId}, write),
-                                        {true, {MsgId, SeqId}, SeqId + 1}
-                                end, ReadSeqId),
-                          remove_messages(Q, MsgSeqIds, txn, State)
-                  end),
+            {MsgSeqIds, WriteSeqId} =
+                rabbit_misc:unfold(
+                  fun (SeqId) when SeqId == WriteSeqId -> false;
+                      (SeqId) ->
+                          [#dq_msg_loc { msg_id = MsgId }] =
+                              mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
+                          {true, {MsgId, SeqId}, SeqId + 1}
+                  end, ReadSeqId),
             true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
+            {ok, State1} = remove_messages(Q, MsgSeqIds, true, State),
             {ok, WriteSeqId - ReadSeqId, State1}
     end.
 
@@ -1239,26 +1228,19 @@ internal_delete_queue(Q, State) ->
     {ok, _Count, State1 = #dqstate { sequences = Sequences }} =
         internal_purge(Q, State), %% remove everything undelivered
     true = ets:delete(Sequences, Q),
-    {atomic, {ok, State2}} =
-        mnesia:transaction(
-          fun() -> %% now remove everything already delivered
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  Objs =
-                      mnesia:match_object(
-                        rabbit_disk_queue,
-                        #dq_msg_loc { queue_and_seq_id = {Q, '_'},
-                                      msg_id = '_',
-                                      is_delivered = '_'
-                                     },
-                        write),
-                  MsgSeqIds =
-                      lists:map(
-                        fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
-                                           msg_id = MsgId }) ->
-                                {MsgId, SeqId} end, Objs),
-                  remove_messages(Q, MsgSeqIds, txn, State1)
-          end),
-    {ok, State2}.
+    %% now remove everything already delivered
+    Objs = mnesia:dirty_match_object(
+             rabbit_disk_queue,
+             #dq_msg_loc { queue_and_seq_id = {Q, '_'},
+                           msg_id = '_',
+                           is_delivered = '_'
+                         }),
+    MsgSeqIds =
+        lists:map(
+          fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
+                             msg_id = MsgId }) ->
+                  {MsgId, SeqId} end, Objs),
+    remove_messages(Q, MsgSeqIds, true, State1).
 
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
@@ -1563,8 +1545,8 @@ load_from_disk(State) ->
     State1 = load_messages(undefined, Files, State),
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
-    {atomic, State2} =
-        mnesia:transaction(
+    State2 =
+        rabbit_misc:execute_mnesia_transaction(
           fun() ->
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   {State6, FinalQ, MsgSeqIds2, _Len} =
@@ -1605,7 +1587,7 @@ load_from_disk(State) ->
     {ok, State8}.
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
-    {atomic, true} = mnesia:transaction(
+    true = rabbit_misc:execute_mnesia_transaction(
       fun() ->
               ok = mnesia:read_lock_table(rabbit_disk_queue),
               mnesia:foldl(
@@ -1624,7 +1606,7 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                         end
                 end, true, rabbit_disk_queue)
       end),
-    remove_gaps_in_sequences(State),
+    ok = remove_gaps_in_sequences(State),
     State.
 
 remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
@@ -1637,18 +1619,18 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
     %% we could shuffle downwards. However, I think there's greater
     %% likelihood of gaps being at the bottom rather than the top of
     %% the queue, so shuffling up should be the better bet.
-    {atomic, _} =
-        mnesia:transaction(
-          fun() ->
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foreach(
-                    fun ({Q, ReadSeqId, WriteSeqId}) ->
-                            Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
-                            ReadSeqId1 = ReadSeqId + Gap,
-                            true = ets:insert(Sequences,
-                                              {Q, ReadSeqId1, WriteSeqId})
-                    end, ets:match_object(Sequences, '_'))
-          end).
+    rabbit_misc:execute_mnesia_transaction(
+      fun() ->
+              ok = mnesia:write_lock_table(rabbit_disk_queue),
+              lists:foreach(
+                fun ({Q, ReadSeqId, WriteSeqId}) ->
+                        Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
+                        ReadSeqId1 = ReadSeqId + Gap,
+                        true = ets:insert(Sequences,
+                                          {Q, ReadSeqId1, WriteSeqId})
+                end, ets:match_object(Sequences, '_'))
+      end),
+    ok.
 
 shuffle_up(_Q, SeqId, SeqId, Gap) ->
     Gap;
-- 
cgit v1.2.1


From c49a2beca39071770740653422dd4d3ed4312ade Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 10 Aug 2009 18:18:57 +0100
Subject: tidying up specs with dialyzer which was good enough to find one
 genuine mistake in the disk_queue.

---
 src/rabbit_amqqueue.erl           |  2 +-
 src/rabbit_disk_queue.erl         | 13 +++++++------
 src/rabbit_mixed_queue.erl        |  4 ++--
 src/rabbit_queue_mode_manager.erl |  5 ++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 50ad1023..9b77949d 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -102,7 +102,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(set_mode_pin/3 :: (vhost(), amqqueue(), bool) -> any()).
+-spec(set_mode_pin/3 :: (binary(), binary(), binary()) -> any()).
 -spec(set_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 75892f68..76a901fd 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -258,7 +258,7 @@
              ('empty' | {message(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()})).
 -spec(phantom_deliver/1 :: (queue_name()) ->
-             ( 'empty' | {msg_id(), bool(), {msg_id(), seq_id()},
+             ( 'empty' | {msg_id(), bool(), bool(), {msg_id(), seq_id()},
                           non_neg_integer()})).
 -spec(prefetch/1 :: (queue_name()) -> 'ok'). 
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
@@ -1673,7 +1673,7 @@ load_messages(Left, [File|Files],
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
     {ValidMessagesRev, ValidTotalSize} = lists:foldl(
-        fun ({MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+        fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                 case erlang:length(mnesia:dirty_index_match_object
                                    (rabbit_disk_queue,
                                     #dq_msg_loc { msg_id = MsgId,
@@ -1686,7 +1686,7 @@ load_messages(Left, [File|Files],
                         true = dets_ets_insert_new
                                  (State, {MsgId, RefCount, File,
                                           Offset, TotalSize, IsPersistent}),
-                        {[{MsgId, TotalSize, Offset}|VMAcc],
+                        {[Obj | VMAcc],
                          VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
                         }
                 end
@@ -1790,7 +1790,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% we could have failed after the extending truncate.
             %% Remember the head of the list will be the highest entry
             %% in the file
-            [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
+            [{_, _, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
             TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
             ExpectedAbsPos = Top + TmpSize,
             {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
@@ -1821,7 +1821,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
 %% address is at the head of the list. This matches what
 %% scan_file_for_valid_messages produces
 find_contiguous_block_prefix([]) -> {0, []};
-find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail]) ->
+find_contiguous_block_prefix([ {MsgId, _IsPersistent, TotalSize, Offset}
+                             | Tail]) ->
     case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
         {ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
                       lists:reverse(Acc)};
@@ -1831,7 +1832,7 @@ find_contiguous_block_prefix([], 0, Acc) ->
     {ok, Acc};
 find_contiguous_block_prefix([], _N, _Acc) ->
     {0, []};
-find_contiguous_block_prefix([{MsgId, TotalSize, Offset}|Tail],
+find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, Offset} | Tail],
                              ExpectedOffset, Acc)
   when ExpectedOffset =:= Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT ->
     find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index ac7495fe..dddafaee 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -79,7 +79,7 @@
 -spec(deliver/1 :: (mqstate()) ->
              {('empty' | {message(), bool(), acktag(), non_neg_integer()}),
               mqstate()}).
--spec(ack/2 :: ([acktag()], mqstate()) -> okmqs()).
+-spec(ack/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
 -spec(tx_publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(tx_commit/3 :: ([message()], [acktag()], mqstate()) -> okmqs()).
 -spec(tx_cancel/2 :: ([message()], mqstate()) -> okmqs()).
@@ -95,7 +95,7 @@
 -spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
 
 -spec(estimate_queue_memory/1 :: (mqstate()) ->
-             {non_neg_integer, non_neg_integer, non_neg_integer}).
+             {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
 -spec(reset_counters/1 :: (mqstate()) -> (mqstate())).
 -spec(info/1 :: (mqstate()) -> mode()).
 
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index d4bc21d4..ceb09d92 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -48,14 +48,13 @@
 
 -ifdef(use_specs).
 
--type(queue_mode() :: ( 'mixed' | 'disk' )).
-
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/4 :: (pid(), atom(), atom(), list()) -> 'ok').
 -spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
-                          non_neg_integer(), non_neg_integer(), bool()) ->
+                          (non_neg_integer() | 'undefined'),
+                          (non_neg_integer() | 'undefined'), bool()) ->
              'ok').
 -spec(pin_to_disk/1 :: (pid()) -> 'ok').
 -spec(unpin_to_disk/1 :: (pid()) -> 'ok').
-- 
cgit v1.2.1


From e9e3adff20cbf1e4292d275beeaaebcf56b6650f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 14 Aug 2009 12:38:16 +0100
Subject: Adjust the prefetcher in light of the bug matthias discovered.
 Documentation updated too.

---
 src/rabbit_disk_queue.erl       |  36 +++++-----
 src/rabbit_queue_prefetcher.erl | 144 +++++++++++++++++++---------------------
 2 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 76a901fd..2f831058 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -43,8 +43,7 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, length/1, foldl/3, prefetch/1,
-         set_delivered_and_advance/2
+         requeue_next_n/2, length/1, foldl/3, prefetch/1
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -267,8 +266,6 @@
 -spec(tx_commit/3 :: (queue_name(), [{msg_id(), bool()}],
                       [{msg_id(), seq_id()}]) -> 'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
--spec(set_delivered_and_advance/2 ::
-      (queue_name(), {msg_id(), seq_id()}) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{{msg_id(), seq_id()}, bool()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
@@ -323,9 +320,6 @@ tx_commit(Q, PubMsgIds, AckSeqIds)
 tx_cancel(MsgIds) when is_list(MsgIds) ->
     gen_server2:cast(?SERVER, {tx_cancel, MsgIds}).
 
-set_delivered_and_advance(Q, MsgSeqId) ->
-    gen_server2:cast(?SERVER, {set_delivered_and_advance, Q, MsgSeqId}).
-
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {requeue, Q, MsgSeqIds}).
 
@@ -547,16 +541,24 @@ handle_cast(report_memory, State) ->
     noreply1(State #dqstate { memory_report_timer = undefined });
 handle_cast({prefetch, Q, From}, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, true, false, State),
-    ok = rabbit_queue_prefetcher:publish(From, Result),
-    noreply(State1);
-handle_cast({set_delivered_and_advance, Q, MsgSeqId}, State) ->
-    State2 =
-        case internal_deliver(Q, false, false, true, State) of
-            {ok, empty, State1} -> State1;
-            {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Rem}, State1} ->
-                State1
-        end,
-    noreply(State2).
+    Cont =
+	try
+	    ok = rabbit_queue_prefetcher:publish(From, Result),
+	    true
+	catch exit:{noproc, _} ->
+		false
+	end,
+    State3 =
+	case Cont of
+	    true ->
+		case internal_deliver(Q, false, false, true, State1) of
+		    {ok, empty, State2} -> State2;
+		    {ok, {_MsgId, _IsPersistent, _Delivered, _MsgSeqId, _Rem},
+		     State2} -> State2
+		end;
+	    false -> State1
+	end,
+    noreply(State3).
         
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index 0265ba2b..bab5396e 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -89,20 +89,22 @@
 %%    queue would have lost a msg that the mixed_queue would not pick
 %%    up.
 %%
-%% 3) The prefetcher hopefully receives the cast from
-%%    prefetcher:publish(Msg). It then adds to its internal queue and
-%%    calls disk_queue:set_delivered_and_advance(Q) which is a normal
-%%    priority cast. This cannot be low-priority because if it was,
-%%    the mixed_queue could come along, drain the prefetcher, thus
+%% 3) The prefetcher hopefully receives the call from
+%%    prefetcher:publish(Msg). It replies immediately, and then adds
+%%    to its internal queue. A cast is not sufficient here because the
+%%    mixed_queue could come along, drain the prefetcher, thus
 %%    catching the msg just sent by the disk_queue and then call
 %%    disk_queue:deliver(Q) which is normal priority call, which could
-%%    overtake the low-priority
-%%    disk_queue:set_delivered_and_advance(Q) cast and thus result in
-%%    the same msg being delivered by the queue twice.
+%%    overtake a reply cast from the prefetcher to the disk queue,
+%%    which would result in the same message being delivered
+%%    twice. Thus when the disk_queue calls prefetcher:publish(Msg),
+%%    it is briefly blocked. However, a) the prefetcher replies
+%%    immediately, and b) the prefetcher should never have more than
+%%    one item in its mailbox anyway, so this should not cause a
+%%    problem to the disk_queue.
 %%
-%% 4) The disk_queue receives the set_delivered_and_advance(Q) cast,
-%%    marks the msg at the head of the queue Q as delivered, and
-%%    advances the Q to the next msg.
+%% 4) The disk_queue receives the reply, marks the msg at the head of
+%%    the queue Q as delivered, and advances the Q to the next msg.
 %%
 %% 5) If the prefetcher has not met its target then it goes back to
 %%    1). Otherwise it just sits and waits for the mixed_queue to
@@ -123,45 +125,37 @@
 %% on talk directly with the disk_queue and not via the
 %% prefetcher. This is more efficient and the mixed_queue will use
 %% normal priority blocking calls to the disk_queue and thus get
-%% better service that way. When exiting in this way, two situations
-%% could occur:
+%% better service that way.
 %%
-%% 1) The prefetcher has issued a disk_queue:prefetch(Q) which has not
-%% yet been picked up by the disk_queue. This msg won't go away and
-%% the disk_queue will eventually find it. However, when it does,
-%% it'll simply read the next message from the queue (which could now
-%% be empty), possibly populate the cache (no harm done) and try and
-%% call prefetcher:publish(Msg) which will go no where. However, the
-%% state of the queue and the state of the message has not been
-%% altered so the mixed_queue will be able to fetch this message as if
-%% it had never been prefetched.
-%%
-%% 2) The disk_queue has already picked up the disk_queue:prefetch(Q)
-%% low priority message and has read the message and replied, by
-%% calling prefetcher:publish(Msg). In fact, it's possible that
-%% message is directly behind the call from mixed_queue to
-%% prefetcher:drain(). Same reasoning as in 1) applies - neither the
-%% queue's nor the message's state have been altered, so the
-%% mixed_queue can absolutely go and fetch the message again.
+%% The prefetcher may at this point have issued a
+%% disk_queue:prefetch(Q) cast which has not yet been picked up by the
+%% disk_queue. This msg won't go away and the disk_queue will
+%% eventually find it. However, when it does, it'll simply read the
+%% next message from the queue (which could now be empty), possibly
+%% populate the cache (no harm done) and try and call
+%% prefetcher:publish(Msg) which will result in an error, which the
+%% disk_queue catches, as the publish call is to a non-existant
+%% process. However, the state of the queue and the state of the
+%% message has not been altered so the mixed_queue will be able to
+%% fetch this message as if it had never been prefetched.
 %%
 %% The only point at which the queue is advanced and the message
-%% marked as delivered is when the prefetcher calls
-%% disk_queue:set_delivered_and_advance(Q). At this point the message
-%% has been received by the prefetcher and so we guarantee it will be
-%% passed to the mixed_queue when the mixed_queue tries to drain the
-%% prefetcher. We must therefore ensure that this msg can't also be
-%% delivered to the mixed_queue directly by the disk_queue through the
-%% mixed_queue calling disk_queue:deliver(Q) which is why the
-%% disk_queue:set_delivered_and_advance(Q) cast must be normal
-%% priority (or at least match the priority of disk_queue:deliver(Q)).
+%% marked as delivered is when the prefetcher replies to the publish
+%% call. At this point the message has been received by the prefetcher
+%% and so we guarantee it will be passed to the mixed_queue when the
+%% mixed_queue tries to drain the prefetcher. We must therefore ensure
+%% that this msg can't also be delivered to the mixed_queue directly
+%% by the disk_queue through the mixed_queue calling
+%% disk_queue:deliver(Q) which is why the prefetcher:publish function
+%% is a call and not a cast, thus blocking the disk_queue.
 %%
 %% Finally, the prefetcher is only created when the mixed_queue is
 %% operating in mixed mode and it sees that the next N messages are
-%% all on disk. During this phase, the mixed_queue can be asked to go
-%% back to disk_only mode. When this happens, it calls
-%% prefetcher:drain_and_stop() which behaves like two consecutive
-%% calls to drain() - i.e. replies with all prefetched messages and
-%% causes the prefetcher to exit.
+%% all on disk, and the queue process is about to hibernate. During
+%% this phase, the mixed_queue can be asked to go back to disk_only
+%% mode. When this happens, it calls prefetcher:drain_and_stop() which
+%% behaves like two consecutive calls to drain() - i.e. replies with
+%% all prefetched messages and causes the prefetcher to exit.
 %%
 %% Note there is a flaw here in that we end up marking messages which
 %% have come through the prefetcher as delivered even if they don't
@@ -172,25 +166,24 @@
 %% we have no guarantee that the message will really go out of the
 %% socket. What we do still have is that messages which have the
 %% redelivered bit set false really are guaranteed to have not been
-%% delivered already. Well, almost: if the disk_queue has a large back
-%% log of messages then the prefetcher invocation of
-%% disk_queue:set_delivered_and_advance(Q) may not be acted upon
-%% before a crash. However, given that the prefetching is operating in
-%% lock-step with the disk_queue, this means that at most, 1 (one)
-%% message can fail to have its delivered flag raised. The alternative
-%% is that disk_queue:set_delivered_and_advance(Q) could be made into
-%% a call. However, if the disk_queue is heavily loaded, this can
-%% block the prefetcher for some time, which in turn can block the
-%% mixed_queue when it wants to drain the prefetcher.
+%% delivered already. In theory, it's possible that the disk_queue
+%% calls prefetcher:publish, blocks waiting for the reply. The
+%% prefetcher grabs the message, is drained, the message goes out of
+%% the socket and is delivered. The broker then crashes before the
+%% disk_queue processes the reply from the prefetcher, thus the fact
+%% the message has been delivered is not recorded. However, this can
+%% only affect a single message at a time. I.e. there is a tiny chance
+%% that the first message delivered on queue recovery that has the
+%% redelivery bit set false, has in fact been delivered before.
 
 start_link(Queue, Count) ->
     gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
 
 publish(Prefetcher, Obj = { #basic_message {}, _Size, _IsDelivered,
                             _AckTag, _Remaining }) ->
-    gen_server2:cast(Prefetcher, {publish, Obj});
+    gen_server2:call(Prefetcher, {publish, Obj}, infinity);
 publish(Prefetcher, empty) ->
-    gen_server2:cast(Prefetcher, publish_empty).
+    gen_server2:call(Prefetcher, publish_empty, infinity).
 
 drain(Prefetcher) ->
     gen_server2:call(Prefetcher, drain, infinity).
@@ -213,6 +206,25 @@ init([Q, Count, QPid]) ->
     {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN,
                             ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
+handle_call({publish, { Msg = #basic_message {},
+                        _Size, IsDelivered, AckTag, _Remaining }},
+	    DiskQueue, State =
+	    #pstate { fetched_count = Fetched, target_count = Target,
+		      msg_buf = MsgBuf, buf_length = Length, queue = Q
+		    }) ->
+    gen_server2:reply(DiskQueue, ok),
+    ok = case Fetched + 1 == Target of
+             true -> ok;
+             false -> rabbit_disk_queue:prefetch(Q)
+         end,
+    MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
+    {noreply, State #pstate { fetched_count = Fetched + 1,
+                              buf_length = Length + 1,
+                              msg_buf = MsgBuf1 }, hibernate};
+handle_call(publish_empty, _From, State) ->
+    %% Very odd. This could happen if the queue is deleted or purged
+    %% and the mixed queue fails to shut us down.
+    {reply, ok, State, hibernate};
 handle_call(drain, _From, State = #pstate { buf_length = 0 }) ->
     {stop, normal, empty, State};
 handle_call(drain, _From, State = #pstate { fetched_count = Count,
@@ -230,24 +242,8 @@ handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf,
                                                      buf_length = Length }) ->
     {stop, normal, {MsgBuf, Length}, State}.
 
-handle_cast(publish_empty, State) ->
-    %% Very odd. This could happen if the queue is deleted or purged
-    %% and the mixed queue fails to shut us down.
-    {noreply, State, hibernate};
-handle_cast({publish, { Msg = #basic_message {},
-                        _Size, IsDelivered, AckTag, _Remaining }},
-            State = #pstate { fetched_count = Fetched, target_count = Target,
-                              msg_buf = MsgBuf, buf_length = Length, queue = Q
-                            }) ->
-    ok = rabbit_disk_queue:set_delivered_and_advance(Q, AckTag),
-    ok = case Fetched + 1 == Target of
-             true -> ok;
-             false -> rabbit_disk_queue:prefetch(Q)
-         end,
-    MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
-    {noreply, State #pstate { fetched_count = Fetched + 1,
-                              buf_length = Length + 1,
-                              msg_buf = MsgBuf1 }, hibernate}.
+handle_cast(Msg, State) ->
+    exit({unexpected_message_cast_to_prefetcher, Msg, State}).
 
 handle_info({'DOWN', MRef, process, _Pid, _Reason},
             State = #pstate { queue_mref = MRef }) ->
-- 
cgit v1.2.1


From cff8f1b0dbd3c4d44842cd15feb1142dde312b0f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 14 Aug 2009 12:46:40 +0100
Subject: Made the prefetcher only consider hibernation when it's fully drained
 the queue.

---
 src/rabbit_queue_prefetcher.erl | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index bab5396e..ec969bfc 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -203,8 +203,8 @@ init([Q, Count, QPid]) ->
                       queue_mref = MRef
                      },
     ok = rabbit_disk_queue:prefetch(Q),
-    {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN,
-                            ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+    {ok, State, infinity, {backoff, ?HIBERNATE_AFTER_MIN,
+                           ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({publish, { Msg = #basic_message {},
                         _Size, IsDelivered, AckTag, _Remaining }},
@@ -213,14 +213,15 @@ handle_call({publish, { Msg = #basic_message {},
 		      msg_buf = MsgBuf, buf_length = Length, queue = Q
 		    }) ->
     gen_server2:reply(DiskQueue, ok),
-    ok = case Fetched + 1 == Target of
-             true -> ok;
-             false -> rabbit_disk_queue:prefetch(Q)
-         end,
+    Timeout = case Fetched + 1 == Target of
+                  true -> hibernate;
+                  false -> ok = rabbit_disk_queue:prefetch(Q),
+                           infinity
+              end,
     MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
     {noreply, State #pstate { fetched_count = Fetched + 1,
                               buf_length = Length + 1,
-                              msg_buf = MsgBuf1 }, hibernate};
+                              msg_buf = MsgBuf1 }, Timeout};
 handle_call(publish_empty, _From, State) ->
     %% Very odd. This could happen if the queue is deleted or purged
     %% and the mixed queue fails to shut us down.
@@ -235,7 +236,7 @@ handle_call(drain, _From, State = #pstate { fetched_count = Count,
 handle_call(drain, _From, State = #pstate { msg_buf = MsgBuf,
                                             buf_length = Length }) ->
     {reply, {MsgBuf, Length, continuing},
-     State #pstate { msg_buf = queue:new(), buf_length = 0 }, hibernate};
+     State #pstate { msg_buf = queue:new(), buf_length = 0 }, infinity};
 handle_call(drain_and_stop, _From, State = #pstate { buf_length = 0 }) ->
     {stop, normal, empty, State};
 handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf,
-- 
cgit v1.2.1


From 677e88ac5f7698f3ae6e3a9d5497fbede2ba9a43 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 14 Aug 2009 12:51:41 +0100
Subject: case => if. This is the last of the corrections for the prefetcher
 from comment #8

---
 src/rabbit_queue_prefetcher.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index ec969bfc..c847848d 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -213,10 +213,9 @@ handle_call({publish, { Msg = #basic_message {},
 		      msg_buf = MsgBuf, buf_length = Length, queue = Q
 		    }) ->
     gen_server2:reply(DiskQueue, ok),
-    Timeout = case Fetched + 1 == Target of
-                  true -> hibernate;
-                  false -> ok = rabbit_disk_queue:prefetch(Q),
-                           infinity
+    Timeout = if Fetched + 1 == Target -> hibernate;
+                 true -> ok = rabbit_disk_queue:prefetch(Q),
+                         infinity
               end,
     MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
     {noreply, State #pstate { fetched_count = Fetched + 1,
-- 
cgit v1.2.1


From 53812940a915ae2c206445d56a92d415aba0f127 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 14 Aug 2009 15:20:35 +0100
Subject: Bumped requirement for checking specs to R13B.

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index e8ac7276..e3f40beb 100644
--- a/Makefile
+++ b/Makefile
@@ -20,10 +20,10 @@ PYTHON=python
 
 ifndef USE_SPECS
 # our type specs rely on features / bug fixes in dialyzer that are
-# only available in R12B-3 upwards
+# only available in R13B upwards (R13B is eshell 5.7.1)
 #
 # NB: the test assumes that version number will only contain single digits
-USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.6.2" ]; then echo "true"; else echo "false"; fi)
+USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.0" ]; then echo "true"; else echo "false"; fi)
 endif
 
 #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests
-- 
cgit v1.2.1


From 4ccb63794e574fb57aa78675db5c9257d0412d65 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 14 Aug 2009 19:47:19 +0100
Subject: removed pout and friends from priority queue as requested. Large
 refactoring of queue_mode_manager. Appears to still work but needs a
 rereading in the cold light of day!

---
 src/priority_queue.erl            |  32 +------
 src/rabbit_queue_mode_manager.erl | 176 ++++++++++++++++++++------------------
 src/rabbit_tests.erl              |  56 +++++-------
 3 files changed, 112 insertions(+), 152 deletions(-)

diff --git a/src/priority_queue.erl b/src/priority_queue.erl
index 9421f281..0c777471 100644
--- a/src/priority_queue.erl
+++ b/src/priority_queue.erl
@@ -56,7 +56,7 @@
 -module(priority_queue).
 
 -export([new/0, is_queue/1, is_empty/1, len/1, to_list/1, in/2, in/3,
-         out/1, out/2, pout/1, join/2]).
+         out/1, join/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -74,8 +74,6 @@
 -spec(in/2 :: (any(), pqueue()) -> pqueue()).
 -spec(in/3 :: (any(), priority(), pqueue()) -> pqueue()).
 -spec(out/1 :: (pqueue()) -> {(empty | {value, any()}), pqueue()}).
--spec(out/2 :: (priority(), pqueue()) -> {(empty | {value, any()}), pqueue()}).
--spec(pout/1 :: (pqueue()) -> {(empty | {value, any(), priority()}), pqueue()}).
 -spec(join/2 :: (pqueue(), pqueue()) -> pqueue()).
 
 -endif.
@@ -151,34 +149,6 @@ out({pqueue, [{P, Q} | Queues]}) ->
            end,
     {R, NewQ}.
 
-out(_Priority, {queue, [], []} = Q) ->
-    {empty, Q};
-out(Priority, {queue, _, _} = Q) when Priority =< 0 ->
-    out(Q);
-out(_Priority, {queue, _, _} = Q) ->
-    {empty, Q};
-out(Priority, {pqueue, [{P, _Q} | _Queues]} = Q) when Priority =< (-P) ->
-    out(Q);
-out(_Priority, {pqueue, [_|_]} = Q) ->
-    {empty, Q}.
-
-pout({queue, [], []} = Q) ->
-    {empty, Q};
-pout({queue, _, _} = Q) ->
-    {{value, V}, Q1} = out(Q),
-    {{value, V, 0}, Q1};
-pout({pqueue, [{P, Q} | Queues]}) ->
-    {{value, V}, Q1} = out(Q),
-    NewQ = case is_empty(Q1) of
-               true -> case Queues of
-                           []           -> {queue, [], []};
-                           [{0, OnlyQ}] -> OnlyQ;
-                           [_|_]        -> {pqueue, Queues}
-                       end;
-               false -> {pqueue, [{P, Q1} | Queues]}
-           end,
-    {{value, V, -P}, NewQ}.
-
 join(A, {queue, [], []}) ->
     A;
 join({queue, [], []}, B) ->
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index ceb09d92..194ddf95 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -289,10 +289,10 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
         case ActivityNew of
             active -> StateN;
             disk -> StateN;
-            lowrate -> StateN #state { lowrate =
-                                       priority_queue:in(Pid, Req, Lazy) };
-            hibernate -> StateN #state { hibernate =
-                                         queue:in(Pid, Sleepy) }
+            lowrate ->
+                StateN #state { lowrate = add_to_lowrate(Pid, Req, Lazy) };
+            hibernate ->
+                StateN #state { hibernate = queue:in(Pid, Sleepy) }
         end,
     {noreply, StateN1};
 
@@ -324,6 +324,10 @@ terminate(_Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
+add_to_lowrate(Pid, Alloc, Lazy) ->
+    Bucket = trunc(math:log(Alloc)),
+    priority_queue:in({Pid, Bucket, Alloc}, Bucket, Lazy).
+
 find_queue(Pid, Mixed) ->
     case dict:find(Pid, Mixed) of
         {ok, Value} -> {mixed, Value};
@@ -331,96 +335,98 @@ find_queue(Pid, Mixed) ->
     end.
 
 tidy_and_sum_lazy(IgnorePid, Lazy, Mixed) ->
-    tidy_and_sum_lazy(sets:add_element(IgnorePid, sets:new()),
-                      Lazy, Mixed, 0, priority_queue:new()).
-
-tidy_and_sum_lazy(DupCheckSet, Lazy, Mixed, FreeAcc, LazyAcc) ->
-    case priority_queue:pout(Lazy) of
-        {empty, Lazy} -> {FreeAcc, LazyAcc};
-        {{value, Pid, Alloc}, Lazy1} ->
-            case sets:is_element(Pid, DupCheckSet) of
-                true ->
-                    tidy_and_sum_lazy(DupCheckSet, Lazy1, Mixed, FreeAcc,
-                                      LazyAcc);
-                false ->
-                    DupCheckSet1 = sets:add_element(Pid, DupCheckSet),
-                    case find_queue(Pid, Mixed) of
-                        {mixed, {Alloc, lowrate}} ->
-                            tidy_and_sum_lazy(DupCheckSet1, Lazy1, Mixed,
-                                              FreeAcc + Alloc, priority_queue:in
-                                              (Pid, Alloc, LazyAcc));
-                        _ ->
-                            tidy_and_sum_lazy(DupCheckSet1, Lazy1, Mixed,
-                                              FreeAcc, LazyAcc)
-                    end
-            end
-    end.
+    tidy_and_sum(IgnorePid, Mixed,
+                 fun (Lazy1) ->
+                         case priority_queue:out(Lazy1) of
+                             {empty, Lazy1} ->
+                                 {empty, Lazy1};
+                             {{value, {Pid, _Bucket, _Alloc}}, Lazy2} ->
+                                 {{value, Pid}, Lazy2}
+                         end
+                 end, fun add_to_lowrate/3, Lazy, priority_queue:new(),
+                 lowrate).
             
 tidy_and_sum_sleepy(IgnorePid, Sleepy, Mixed) ->
-    tidy_and_sum_sleepy(sets:add_element(IgnorePid, sets:new()),
-                        Sleepy, Mixed, 0, queue:new()).
-
-tidy_and_sum_sleepy(DupCheckSet, Sleepy, Mixed, FreeAcc, SleepyAcc) ->
-    case queue:out(Sleepy) of
-        {empty, Sleepy} -> {FreeAcc, SleepyAcc};
-        {{value, Pid}, Sleepy1} ->
-            case sets:is_element(Pid, DupCheckSet) of
-                true ->
-                    tidy_and_sum_sleepy(DupCheckSet, Sleepy1, Mixed, FreeAcc,
-                                        SleepyAcc);
-                false ->
-                    DupCheckSet1 = sets:add_element(Pid, DupCheckSet),
-                    case find_queue(Pid, Mixed) of
-                        {mixed, {Alloc, hibernate}} ->
-                            tidy_and_sum_sleepy(DupCheckSet1, Sleepy1, Mixed,
-                                                FreeAcc + Alloc, queue:in
-                                                (Pid, SleepyAcc));
-                        _ -> tidy_and_sum_sleepy(DupCheckSet1, Sleepy1, Mixed,
-                                                 FreeAcc, SleepyAcc)
-                    end
-            end
+    tidy_and_sum(IgnorePid, Mixed, fun queue:out/1,
+                 fun (Pid, _Alloc, Queue) ->
+                         queue:in(Pid, Queue)
+                 end, Sleepy, queue:new(), hibernate).
+
+tidy_and_sum(IgnorePid, Mixed, Catamorphism, Anamorphism, CataInit, AnaInit,
+             AtomExpected) ->
+    tidy_and_sum(sets:add_element(IgnorePid, sets:new()),
+                 Mixed, Catamorphism, Anamorphism, CataInit, AnaInit, 0,
+                 AtomExpected).
+
+tidy_and_sum(DupCheckSet, Mixed, Catamorphism, Anamorphism, CataInit, AnaInit,
+             AllocAcc, AtomExpected) ->
+    case Catamorphism(CataInit) of
+        {empty, CataInit} -> {AnaInit, AllocAcc};
+        {{value, Pid}, CataInit1} ->
+            {DupCheckSet2, AnaInit2, AllocAcc2} =
+                case sets:is_element(Pid, DupCheckSet) of
+                    true ->
+                        {DupCheckSet, AnaInit, AllocAcc};
+                    false ->
+                        {AnaInit1, AllocAcc1} =
+                            case find_queue(Pid, Mixed) of
+                                {mixed, {Alloc, AtomExpected}} ->
+                                    {Anamorphism(Pid, Alloc, AnaInit),
+                                     Alloc + AllocAcc};
+                                _ ->
+                                    {AnaInit, AllocAcc}
+                            end,
+                        {sets:add_element(Pid, DupCheckSet), AnaInit1,
+                         AllocAcc1}
+                end,
+            tidy_and_sum(DupCheckSet2, Mixed, Catamorphism, Anamorphism,
+                         CataInit1, AnaInit2, AllocAcc2, AtomExpected)
     end.
 
 free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req) ->
-    free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req,
-                   priority_queue:new()).
-
-free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req, LazyAcc) ->
-    case priority_queue:pout(Lazy) of
-        {empty, Lazy} -> {priority_queue:join(Lazy, LazyAcc), Mixed, Req};
-        {{value, IgnorePid, Alloc}, Lazy1} ->
-            free_upto_lazy(IgnorePid, Callbacks, Lazy1, Mixed, Req,
-                           priority_queue:in(IgnorePid, Alloc, LazyAcc));
-        {{value, Pid, Alloc}, Lazy1} ->
-            {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-            ok = erlang:apply(Module, Function, Args ++ [disk]),
-            Mixed1 = dict:erase(Pid, Mixed),
-            case Req > Alloc of
-                true -> free_upto_lazy(IgnorePid, Callbacks, Lazy1, Mixed1,
-                                       Req - Alloc, LazyAcc);
-                false -> {priority_queue:join(Lazy1, LazyAcc), Mixed1,
-                          Req - Alloc}
-            end
-    end.
+    free_from(Callbacks, Mixed,
+              fun(Lazy1, LazyAcc) ->
+                      case priority_queue:out(Lazy1) of
+                          {empty, Lazy1} ->
+                              empty;
+                          {{value, {IgnorePid, Bucket, Alloc}}, Lazy2} ->
+                              {skip, Lazy2,
+                               priority_queue:in({IgnorePid, Bucket, Alloc},
+                                                 Bucket, LazyAcc)};
+                          {{value, {Pid, _Bucket, Alloc}}, Lazy3} ->
+                              {value, Lazy3, Pid, Alloc}
+                      end
+              end, fun priority_queue:join/2, Lazy, priority_queue:new(), Req).
 
 free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req) ->
-    free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req, queue:new()).
-
-free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req, SleepyAcc) ->
-    case queue:out(Sleepy) of
-        {empty, Sleepy} -> {queue:join(Sleepy, SleepyAcc), Mixed, Req};
-        {{value, IgnorePid}, Sleepy1} ->
-            free_upto_sleepy(IgnorePid, Callbacks, Sleepy1, Mixed, Req,
-                             queue:in(IgnorePid, SleepyAcc));
-        {{value, Pid}, Sleepy1} ->
-            {Alloc, hibernate} = dict:fetch(Pid, Mixed),
+    free_from(Callbacks, Mixed,
+              fun(Sleepy1, SleepyAcc) ->
+                      case queue:out(Sleepy1) of
+                          {empty, Sleepy1} ->
+                              empty;
+                          {{value, IgnorePid}, Sleepy2} ->
+                              {skip, Sleepy2, queue:in(IgnorePid, SleepyAcc)};
+                          {{value, Pid}, Sleepy3} ->
+                              {Alloc, hibernate} = dict:fetch(Pid, Mixed),
+                              {value, Sleepy3, Pid, Alloc}
+                      end
+              end, fun queue:join/2, Sleepy, queue:new(), Req).
+
+free_from(Callbacks, Mixed, Hylomorphism, BaseCase, CataInit, AnaInit, Req) ->
+    case Hylomorphism(CataInit, AnaInit) of
+        empty ->
+            {BaseCase(CataInit, AnaInit), Mixed, Req};
+        {skip, CataInit1, AnaInit1} ->
+            free_from(Callbacks, Mixed, Hylomorphism, BaseCase, CataInit1,
+                      AnaInit1, Req);
+        {value, CataInit1, Pid, Alloc} ->
             {Module, Function, Args} = dict:fetch(Pid, Callbacks),
             ok = erlang:apply(Module, Function, Args ++ [disk]),
             Mixed1 = dict:erase(Pid, Mixed),
             case Req > Alloc of
-                true -> free_upto_sleepy(IgnorePid, Callbacks, Sleepy1, Mixed1,
-                                         Req - Alloc, SleepyAcc);
-                false -> {queue:join(Sleepy1, SleepyAcc), Mixed1, Req - Alloc}
+                true -> free_from(Callbacks, Mixed1, Hylomorphism, BaseCase,
+                                  CataInit1, AnaInit, Req - Alloc);
+                false -> {BaseCase(CataInit1, AnaInit), Mixed1, Req - Alloc}
             end
     end.
 
@@ -431,10 +437,10 @@ free_upto(Pid, Req, State = #state { available_tokens = Avail,
                                      hibernate = Sleepy }) ->
     case Req > Avail of
         true ->
-            {SleepySum, Sleepy1} = tidy_and_sum_sleepy(Pid, Sleepy, Mixed),
+            {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Pid, Sleepy, Mixed),
             case Req > Avail + SleepySum of
                 true -> %% not enough in sleepy, have a look in lazy too
-                    {LazySum, Lazy1} = tidy_and_sum_lazy(Pid, Lazy, Mixed),
+                    {Lazy1, LazySum} = tidy_and_sum_lazy(Pid, Lazy, Mixed),
                     case Req > Avail + SleepySum + LazySum of
                         true -> %% can't free enough, just return tidied state
                             State #state { lowrate = Lazy1,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 6e3a92d0..476fff41 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -69,7 +69,7 @@ test_priority_queue() ->
 
     %% empty Q
     Q = priority_queue:new(),
-    {true, true, 0, [], [], [], []} = test_priority_queue(Q),
+    {true, true, 0, [], []} = test_priority_queue(Q),
 
     %% 1-4 element no-priority Q
     true = lists:all(fun (X) -> X =:= passed end,
@@ -78,59 +78,58 @@ test_priority_queue() ->
 
     %% 1-element priority Q
     Q1 = priority_queue:in(foo, 1, priority_queue:new()),
-    {true, false, 1, [{1, foo}], [foo], [], [{foo, 1}]} =
+    {true, false, 1, [{1, foo}], [foo]} =
         test_priority_queue(Q1),
 
     %% 2-element same-priority Q
     Q2 = priority_queue:in(bar, 1, Q1),
-    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [], [{foo, 1}, {bar, 1}]}
-        = test_priority_queue(Q2),
+    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar]} =
+        test_priority_queue(Q2),
 
     %% 2-element different-priority Q
     Q3 = priority_queue:in(bar, 2, Q1),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [bar], [{bar, 2}, {foo, 1}]} =
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo]} =
         test_priority_queue(Q3),
 
     %% 1-element negative priority Q
     Q4 = priority_queue:in(foo, -1, priority_queue:new()),
-    {true, false, 1, [{-1, foo}], [foo], [], [{foo, -1}]} =
-        test_priority_queue(Q4),
+    {true, false, 1, [{-1, foo}], [foo]} = test_priority_queue(Q4),
 
     %% merge 2 * 1-element no-priority Qs
     Q5 = priority_queue:join(priority_queue:in(foo, Q),
                              priority_queue:in(bar, Q)),
-    {true, false, 2, [{0, foo}, {0, bar}], [foo, bar], [], [{foo, 0}, {bar, 0}]}
-        = test_priority_queue(Q5),
+    {true, false, 2, [{0, foo}, {0, bar}], [foo, bar]} =
+        test_priority_queue(Q5),
 
     %% merge 1-element no-priority Q with 1-element priority Q
     Q6 = priority_queue:join(priority_queue:in(foo, Q),
                              priority_queue:in(bar, 1, Q)),
-    {true, false, 2, [{1, bar}, {0, foo}], [bar, foo], [], [{bar, 1}, {foo, 0}]}
-        = test_priority_queue(Q6),
+    {true, false, 2, [{1, bar}, {0, foo}], [bar, foo]} =
+        test_priority_queue(Q6),
 
     %% merge 1-element priority Q with 1-element no-priority Q 
     Q7 = priority_queue:join(priority_queue:in(foo, 1, Q),
                              priority_queue:in(bar, Q)),
-    {true, false, 2, [{1, foo}, {0, bar}], [foo, bar], [], [{foo, 1}, {bar, 0}]}
-        = test_priority_queue(Q7),
+    {true, false, 2, [{1, foo}, {0, bar}], [foo, bar]} =
+        test_priority_queue(Q7),
 
     %% merge 2 * 1-element same-priority Qs
     Q8 = priority_queue:join(priority_queue:in(foo, 1, Q),
                              priority_queue:in(bar, 1, Q)),
-    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar], [], [{foo, 1}, {bar, 1}]}
-        = test_priority_queue(Q8),
+    {true, false, 2, [{1, foo}, {1, bar}], [foo, bar]} =
+        test_priority_queue(Q8),
 
     %% merge 2 * 1-element different-priority Qs
     Q9 = priority_queue:join(priority_queue:in(foo, 1, Q),
                              priority_queue:in(bar, 2, Q)),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [bar],
-     [{bar, 2}, {foo, 1}]} = test_priority_queue(Q9),
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo]} =
+        test_priority_queue(Q9),
 
     %% merge 2 * 1-element different-priority Qs (other way around)
     Q10 = priority_queue:join(priority_queue:in(bar, 2, Q),
                               priority_queue:in(foo, 1, Q)),
-    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo], [bar],
-     [{bar, 2}, {foo, 1}]} = test_priority_queue(Q10),
+    {true, false, 2, [{2, bar}, {1, foo}], [bar, foo]} =
+        test_priority_queue(Q10),
 
     passed.
 
@@ -142,33 +141,18 @@ priority_queue_out_all(Q) ->
         {empty, _}       -> [];
         {{value, V}, Q1} -> [V | priority_queue_out_all(Q1)]
     end.
-priority_queue_out_2_all(Q) ->
-    case priority_queue:out(2, Q) of
-        {empty, _}       -> [];
-        {{value, V}, Q1} -> [V | priority_queue_out_2_all(Q1)]
-    end.
-
-priority_queue_pout_all(Q) ->
-    case priority_queue:pout(Q) of
-        {empty, _}       -> [];
-        {{value, V, P}, Q1} -> [{V, P} | priority_queue_pout_all(Q1)]
-    end.
-
 test_priority_queue(Q) ->
     {priority_queue:is_queue(Q),
      priority_queue:is_empty(Q),
      priority_queue:len(Q),
      priority_queue:to_list(Q),
-     priority_queue_out_all(Q),
-     priority_queue_out_2_all(Q),
-     priority_queue_pout_all(Q)}.
+     priority_queue_out_all(Q)}.
 
 test_simple_n_element_queue(N) ->
     Items = lists:seq(1, N),
     Q = priority_queue_in_all(priority_queue:new(), Items),
     ToListRes = [{0, X} || X <- Items],
-    POutAllRes = [{X, 0} || X <- Items],
-    {true, false, N, ToListRes, Items, [], POutAllRes} = test_priority_queue(Q),
+    {true, false, N, ToListRes, Items} = test_priority_queue(Q),
     passed.
 
 test_parsing() ->
-- 
cgit v1.2.1


From 36da66ff63e3e3dd19e1e05924104a2e092ede4d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 16 Aug 2009 21:08:30 +0100
Subject: More fixes to the queue_mode_manager.

---
 src/rabbit_amqqueue.erl           |   2 +-
 src/rabbit_amqqueue_process.erl   |   2 +-
 src/rabbit_disk_queue.erl         |   2 +-
 src/rabbit_queue_mode_manager.erl | 326 ++++++++++++++++++++------------------
 4 files changed, 172 insertions(+), 160 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 9b77949d..f0e8d4c2 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -232,7 +232,7 @@ set_mode_pin(VHostPath, Queue, DiskBin)
          fun(Q) -> case Disk of
                        true -> rabbit_queue_mode_manager:pin_to_disk
                                  (Q #amqqueue.pid);
-                       false -> rabbit_queue_mode_manager:unpin_to_disk
+                       false -> rabbit_queue_mode_manager:unpin_from_disk
                                   (Q #amqqueue.pid)
                    end
          end).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index dba7ec24..8ee576f7 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -102,7 +102,7 @@ start_link(Q) ->
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     ok = rabbit_queue_mode_manager:register
-           (self(), rabbit_amqqueue, set_mode, [self()]),
+           (self(), false, rabbit_amqqueue, set_mode, [self()]),
     {ok, MS} = rabbit_mixed_queue:init(QName, Durable),
     State = #q{q = Q,
                owner = none,
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2f831058..76399022 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -379,7 +379,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
     ok = rabbit_queue_mode_manager:register
-           (self(), rabbit_disk_queue, set_mode, []),
+           (self(), true, rabbit_disk_queue, set_mode, []),
     Node = node(),
     ok = 
         case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 194ddf95..37afdc6c 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -38,8 +38,8 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/4, report_memory/3, report_memory/5, info/0,
-         pin_to_disk/1, unpin_to_disk/1]).
+-export([register/5, report_memory/3, report_memory/5, info/0,
+         pin_to_disk/1, unpin_from_disk/1]).
 
 -define(TOTAL_TOKENS, 10000000).
 -define(ACTIVITY_THRESHOLD, 25).
@@ -50,14 +50,14 @@
 
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(register/4 :: (pid(), atom(), atom(), list()) -> 'ok').
+-spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
 -spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
 -spec(report_memory/5 :: (pid(), non_neg_integer(),
                           (non_neg_integer() | 'undefined'),
                           (non_neg_integer() | 'undefined'), bool()) ->
              'ok').
 -spec(pin_to_disk/1 :: (pid()) -> 'ok').
--spec(unpin_to_disk/1 :: (pid()) -> 'ok').
+-spec(unpin_from_disk/1 :: (pid()) -> 'ok').
 
 -endif.
 
@@ -67,7 +67,8 @@
                  tokens_per_byte,
                  lowrate,
                  hibernate,
-                 disk_mode_pins
+                 disk_mode_pins,
+                 unevictable
                }).
 
 %% Token-credit based memory management
@@ -91,24 +92,36 @@
 %% queue, and so is implicitly sorted by the order in which processes
 %% were added to the queue. This means that when removing from the
 %% queue, we hibernate the sleepiest pid first. The lowrate group is a
-%% priority queue, where the priority is the amount of memory
-%% allocated. Thus when we remove from the queue, we first remove the
-%% queue with the most amount of memory.
+%% priority queue, where the priority is the truncated log (base e) of
+%% the amount of memory allocated. Thus when we remove from the queue,
+%% we first remove the queue from the highest bucket.
 %%
 %% If the request still can't be satisfied after evicting to disk
 %% everyone from those two groups (and note that we check first
 %% whether or not freeing them would make available enough tokens to
 %% satisfy the request rather than just sending all those queues to
-%% disk and then going "whoops, didn't help afterall"), then we send
-%% the requesting process to disk.
+%% disk and then going "whoops, didn't help after all"), then we send
+%% the requesting process to disk. When a queue registers, it can
+%% declare itself "unevictable". If a queue is unevictable then it
+%% will not be sent to disk as a result of other processes requesting
+%% more memory. However, if it itself is requesting more memory and
+%% that request can't be satisfied then it is still sent to disk as
+%% before. This feature is only used by the disk_queue, because if the
+%% disk queue is not being used, and hibernates, and then memory
+%% pressure gets tight, the disk_queue would typically be one of the
+%% first processes to get sent to disk, which cripples
+%% performance. Thus by setting it unevictable, it is only possible
+%% for the disk_queue to be sent to disk when it is active and
+%% attempting to increase its memory allocation.
 %%
 %% If a process has been sent to disk, it continues making
 %% requests. As soon as a request can be satisfied (and this can
 %% include sending other processes to disk in the way described
-%% above), it will be told to come back into mixed mode.
+%% above), it will be told to come back into mixed mode. We do not
+%% keep any information about queues in disk mode.
 %%
 %% Note that the lowrate and hibernate groups can get very out of
-%% date. This is fine, and kinda unavoidable given the absence of
+%% date. This is fine, and somewhat unavoidable given the absence of
 %% useful APIs for queues. Thus we allow them to get out of date
 %% (processes will be left in there when they change groups,
 %% duplicates can appear, dead processes are not pruned etc etc etc),
@@ -116,17 +129,17 @@
 %% memory, we tidy up at that point.
 %%
 %% A process which is not evicted to disk, and is requesting a smaller
-%% amount of ram than its last request will always be satisfied. A
+%% amount of RAM than its last request will always be satisfied. A
 %% mixed-mode process that is busy but consuming an unchanging amount
 %% of RAM will never be sent to disk. The disk_queue is also managed
 %% in the same way. This means that a queue that has gone back to
 %% being mixed after being in disk mode now has its messages counted
 %% twice as they are counted both in the request made by the queue
-%% (even though they may not yet be in RAM) and also by the
-%% disk_queue. This means that the threshold for going mixed -> disk
-%% is above the threshold for going disk -> mixed. This is actually
-%% fairly sensible as it reduces the risk of any oscillations
-%% occurring.
+%% (even though they may not yet be in RAM (though see the
+%% prefetcher)) and also by the disk_queue. Thus the amount of
+%% available RAM must be higher when going disk -> mixed than when
+%% going mixed -> disk. This is fairly sensible as it reduces the risk
+%% of any oscillations occurring.
 %%
 %% The queue process deliberately reports 4 times its estimated RAM
 %% usage, and the disk_queue 2.5 times. In practise, this seems to
@@ -138,14 +151,15 @@
 start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
 
-register(Pid, Module, Function, Args) ->
-    gen_server2:cast(?SERVER, {register, Pid, Module, Function, Args}).
+register(Pid, Unevictable, Module, Function, Args) ->
+    gen_server2:cast(?SERVER, {register, Pid, Unevictable,
+                               Module, Function, Args}).
 
 pin_to_disk(Pid) ->
     gen_server2:call(?SERVER, {pin_to_disk, Pid}).
 
-unpin_to_disk(Pid) ->
-    gen_server2:call(?SERVER, {unpin_to_disk, Pid}).
+unpin_from_disk(Pid) ->
+    gen_server2:call(?SERVER, {unpin_from_disk, Pid}).
 
 report_memory(Pid, Memory, Hibernating) ->
     report_memory(Pid, Memory, undefined, undefined, Hibernating).
@@ -168,7 +182,8 @@ init([]) ->
                   tokens_per_byte = ?TOTAL_TOKENS / MemAvail,
                   lowrate = priority_queue:new(),
                   hibernate = queue:new(),
-                  disk_mode_pins = sets:new()
+                  disk_mode_pins = sets:new(),
+                  unevictable = sets:new()
                 }}.
 
 handle_call({pin_to_disk, Pid}, _From,
@@ -182,23 +197,20 @@ handle_call({pin_to_disk, Pid}, _From,
             false ->
                 case find_queue(Pid, Mixed) of
                     {mixed, {OAlloc, _OActivity}} ->
-                        {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-                        ok = erlang:apply(Module, Function, Args ++ [disk]),
-                        {ok,
-                         State #state { mixed_queues = dict:erase(Pid, Mixed),
-                                        available_tokens = Avail + OAlloc,
-                                        disk_mode_pins =
-                                        sets:add_element(Pid, Pins)
-                                       }};
+                        Mixed1 = send_to_disk(Callbacks, Mixed, Pid),
+                        {ok, State #state { mixed_queues = Mixed1,
+                                            available_tokens = Avail + OAlloc,
+                                            disk_mode_pins =
+                                            sets:add_element(Pid, Pins)
+                                          }};
                     disk ->
-                        {ok,
-                         State #state { disk_mode_pins =
-                                        sets:add_element(Pid, Pins) }}
+                        {ok, State #state { disk_mode_pins =
+                                            sets:add_element(Pid, Pins) }}
                 end
         end,
     {reply, Res, State1};
 
-handle_call({unpin_to_disk, Pid}, _From,
+handle_call({unpin_from_disk, Pid}, _From,
             State = #state { disk_mode_pins = Pins }) ->
     {reply, ok, State #state { disk_mode_pins = sets:del_element(Pid, Pins) }};
 
@@ -207,13 +219,15 @@ handle_call(info, _From, State) ->
                       mixed_queues = Mixed,
                       lowrate = Lazy,
                       hibernate = Sleepy,
-                      disk_mode_pins = Pins } =
+                      disk_mode_pins = Pins,
+                      unevictable = Unevictable } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
     {reply, [{ available_tokens, Avail },
              { mixed_queues, dict:to_list(Mixed) },
              { lowrate_queues, priority_queue:to_list(Lazy) },
              { hibernated_queues, queue:to_list(Sleepy) },
-             { queues_pinned_to_disk, sets:to_list(Pins) }], State1}.
+             { queues_pinned_to_disk, sets:to_list(Pins) },
+             { unevictable_queues, sets:to_list(Unevictable) }], State1}.
 
 
 handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
@@ -229,31 +243,28 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                   {G, L} -> G < ?ACTIVITY_THRESHOLD andalso
                             L < ?ACTIVITY_THRESHOLD
               end,
+    MixedActivity = if Hibernating -> hibernate;
+                       LowRate -> lowrate;
+                       true -> active
+                    end,
     {StateN = #state { lowrate = Lazy, hibernate = Sleepy }, ActivityNew} =
         case find_queue(Pid, Mixed) of
             {mixed, {OAlloc, _OActivity}} ->
                 Avail1 = Avail + OAlloc,
-                State1 = #state { available_tokens = Avail2,
-                                  mixed_queues = Mixed1 } =
-                    free_upto(Pid, Req,
-                              State #state { available_tokens = Avail1 }),
+                State1 =
+                    #state { available_tokens = Avail2, mixed_queues = Mixed1 }
+                    = free_upto(Pid, Req,
+                                State #state { available_tokens = Avail1 }),
                 case Req > Avail2 of
                     true -> %% nowt we can do, send to disk
-                        {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-                        ok = erlang:apply(Module, Function, Args ++ [disk]),
-                        {State1 #state { mixed_queues =
-                                         dict:erase(Pid, Mixed1) },
-                         disk};
+                        Mixed2 = send_to_disk(Callbacks, Mixed1, Pid),
+                        {State1 #state { mixed_queues = Mixed2 }, disk};
                     false -> %% keep mixed
-                        Activity = if Hibernating -> hibernate;
-                                      LowRate -> lowrate;
-                                      true -> active
-                                   end,
                         {State1 #state
                          { mixed_queues =
-                           dict:store(Pid, {Req, Activity}, Mixed1),
+                           dict:store(Pid, {Req, MixedActivity}, Mixed1),
                            available_tokens = Avail2 - Req },
-                         Activity}
+                         MixedActivity}
                 end;
             disk ->
                 case sets:is_element(Pid, Pins) of
@@ -273,15 +284,11 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                                     dict:fetch(Pid, Callbacks),
                                 ok = erlang:apply(Module, Function,
                                                   Args ++ [mixed]),
-                                Activity = if Hibernating -> hibernate;
-                                              LowRate -> lowrate;
-                                              true -> active
-                                           end,
                                 {State1 #state {
                                    mixed_queues =
-                                   dict:store(Pid, {Req, Activity}, Mixed1),
+                                   dict:store(Pid, {Req, MixedActivity}, Mixed1),
                                    available_tokens = Avail1 - Req },
-                                 disk}
+                                 MixedActivity}
                         end
                 end
         end,
@@ -296,11 +303,18 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
         end,
     {noreply, StateN1};
 
-handle_cast({register, Pid, Module, Function, Args},
-            State = #state { callbacks = Callbacks }) ->
+handle_cast({register, Pid, IsUnevictable, Module, Function, Args},
+            State = #state { callbacks = Callbacks,
+                             unevictable = Unevictable }) ->
     _MRef = erlang:monitor(process, Pid),
+    Unevictable1 = case IsUnevictable of
+                       true -> sets:add_element(Pid, Unevictable);
+                       false -> Unevictable
+                   end,
     {noreply, State #state { callbacks = dict:store
-                   (Pid, {Module, Function, Args}, Callbacks) }}.
+                             (Pid, {Module, Function, Args}, Callbacks),
+                             unevictable = Unevictable1
+                           }}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
@@ -325,7 +339,7 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 add_to_lowrate(Pid, Alloc, Lazy) ->
-    Bucket = trunc(math:log(Alloc)),
+    Bucket = trunc(math:log(Alloc)), %% log base e
     priority_queue:in({Pid, Bucket, Alloc}, Bucket, Lazy).
 
 find_queue(Pid, Mixed) ->
@@ -334,97 +348,95 @@ find_queue(Pid, Mixed) ->
         error -> disk
     end.
 
-tidy_and_sum_lazy(IgnorePid, Lazy, Mixed) ->
-    tidy_and_sum(IgnorePid, Mixed,
+send_to_disk(Callbacks, Mixed, Pid) ->
+    {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+    ok = erlang:apply(Module, Function, Args ++ [disk]),
+    dict:erase(Pid, Mixed).
+
+tidy_and_sum_lazy(IgnorePids, Lazy, Mixed) ->
+    tidy_and_sum(lowrate, Mixed,
                  fun (Lazy1) ->
                          case priority_queue:out(Lazy1) of
-                             {empty, Lazy1} ->
-                                 {empty, Lazy1};
+                             {empty, Lazy2} ->
+                                 {empty, Lazy2};
                              {{value, {Pid, _Bucket, _Alloc}}, Lazy2} ->
                                  {{value, Pid}, Lazy2}
                          end
-                 end, fun add_to_lowrate/3, Lazy, priority_queue:new(),
-                 lowrate).
+                 end, fun add_to_lowrate/3, IgnorePids, Lazy,
+                 priority_queue:new(), 0).
             
-tidy_and_sum_sleepy(IgnorePid, Sleepy, Mixed) ->
-    tidy_and_sum(IgnorePid, Mixed, fun queue:out/1,
-                 fun (Pid, _Alloc, Queue) ->
-                         queue:in(Pid, Queue)
-                 end, Sleepy, queue:new(), hibernate).
-
-tidy_and_sum(IgnorePid, Mixed, Catamorphism, Anamorphism, CataInit, AnaInit,
-             AtomExpected) ->
-    tidy_and_sum(sets:add_element(IgnorePid, sets:new()),
-                 Mixed, Catamorphism, Anamorphism, CataInit, AnaInit, 0,
-                 AtomExpected).
-
-tidy_and_sum(DupCheckSet, Mixed, Catamorphism, Anamorphism, CataInit, AnaInit,
-             AllocAcc, AtomExpected) ->
+tidy_and_sum_sleepy(IgnorePids, Sleepy, Mixed) ->
+    tidy_and_sum(hibernate, Mixed, fun queue:out/1,
+                 fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
+                 IgnorePids, Sleepy, queue:new(), 0).
+
+tidy_and_sum(AtomExpected, Mixed, Catamorphism, Anamorphism, DupCheckSet,
+             CataInit, AnaInit, AllocAcc) ->
     case Catamorphism(CataInit) of
-        {empty, CataInit} -> {AnaInit, AllocAcc};
+        {empty, _CataInit} -> {AnaInit, AllocAcc};
         {{value, Pid}, CataInit1} ->
-            {DupCheckSet2, AnaInit2, AllocAcc2} =
+            {DupCheckSet1, AnaInit1, AllocAcc1} =
                 case sets:is_element(Pid, DupCheckSet) of
                     true ->
                         {DupCheckSet, AnaInit, AllocAcc};
                     false ->
-                        {AnaInit1, AllocAcc1} =
-                            case find_queue(Pid, Mixed) of
-                                {mixed, {Alloc, AtomExpected}} ->
-                                    {Anamorphism(Pid, Alloc, AnaInit),
-                                     Alloc + AllocAcc};
-                                _ ->
-                                    {AnaInit, AllocAcc}
-                            end,
-                        {sets:add_element(Pid, DupCheckSet), AnaInit1,
-                         AllocAcc1}
+                        case find_queue(Pid, Mixed) of
+                            {mixed, {Alloc, AtomExpected}} ->
+                                {sets:add_element(Pid, DupCheckSet),
+                                 Anamorphism(Pid, Alloc, AnaInit),
+                                 Alloc + AllocAcc};
+                            _ ->
+                                {DupCheckSet, AnaInit, AllocAcc}
+                        end
                 end,
-            tidy_and_sum(DupCheckSet2, Mixed, Catamorphism, Anamorphism,
-                         CataInit1, AnaInit2, AllocAcc2, AtomExpected)
+            tidy_and_sum(AtomExpected, Mixed, Catamorphism, Anamorphism,
+                          DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
     end.
 
-free_upto_lazy(IgnorePid, Callbacks, Lazy, Mixed, Req) ->
-    free_from(Callbacks, Mixed,
-              fun(Lazy1, LazyAcc) ->
-                      case priority_queue:out(Lazy1) of
-                          {empty, Lazy1} ->
-                              empty;
-                          {{value, {IgnorePid, Bucket, Alloc}}, Lazy2} ->
-                              {skip, Lazy2,
-                               priority_queue:in({IgnorePid, Bucket, Alloc},
-                                                 Bucket, LazyAcc)};
-                          {{value, {Pid, _Bucket, Alloc}}, Lazy3} ->
-                              {value, Lazy3, Pid, Alloc}
+free_upto_lazy(IgnorePids, Callbacks, Lazy, Mixed, Req) ->
+    free_from(
+      Callbacks,
+      fun(_Mixed, Lazy1, LazyAcc) ->
+              case priority_queue:out(Lazy1) of
+                  {empty, _Lazy2} ->
+                      empty;
+                  {{value, V = {Pid, Bucket, Alloc}}, Lazy2} ->
+                      case sets:is_element(Pid, IgnorePids) of
+                          true  -> {skip, Lazy2,
+                                    priority_queue:in(V, Bucket, LazyAcc)};
+                          false -> {value, Lazy2, Pid, Alloc}
                       end
-              end, fun priority_queue:join/2, Lazy, priority_queue:new(), Req).
+              end
+      end, fun priority_queue:join/2, Mixed, Lazy, priority_queue:new(), Req).
 
-free_upto_sleepy(IgnorePid, Callbacks, Sleepy, Mixed, Req) ->
-    free_from(Callbacks, Mixed,
-              fun(Sleepy1, SleepyAcc) ->
+free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Mixed, Req) ->
+    free_from(Callbacks,
+              fun(Mixed1, Sleepy1, SleepyAcc) ->
                       case queue:out(Sleepy1) of
-                          {empty, Sleepy1} ->
+                          {empty, _Sleepy2} ->
                               empty;
-                          {{value, IgnorePid}, Sleepy2} ->
-                              {skip, Sleepy2, queue:in(IgnorePid, SleepyAcc)};
-                          {{value, Pid}, Sleepy3} ->
-                              {Alloc, hibernate} = dict:fetch(Pid, Mixed),
-                              {value, Sleepy3, Pid, Alloc}
+                          {{value, Pid}, Sleepy2} ->
+                              case sets:is_element(Pid, IgnorePids) of
+                                  true  -> {skip, Sleepy2,
+                                            queue:in(Pid, SleepyAcc)};
+                                  false -> {Alloc, hibernate} =
+                                               dict:fetch(Pid, Mixed1),
+                                           {value, Sleepy2, Pid, Alloc}
+                              end
                       end
-              end, fun queue:join/2, Sleepy, queue:new(), Req).
+              end, fun queue:join/2, Mixed, Sleepy, queue:new(), Req).
 
-free_from(Callbacks, Mixed, Hylomorphism, BaseCase, CataInit, AnaInit, Req) ->
-    case Hylomorphism(CataInit, AnaInit) of
+free_from(Callbacks, Hylomorphism, BaseCase, Mixed, CataInit, AnaInit, Req) ->
+    case Hylomorphism(Mixed, CataInit, AnaInit) of
         empty ->
-            {BaseCase(CataInit, AnaInit), Mixed, Req};
+            {AnaInit, Mixed, Req};
         {skip, CataInit1, AnaInit1} ->
-            free_from(Callbacks, Mixed, Hylomorphism, BaseCase, CataInit1,
+            free_from(Callbacks, Hylomorphism, BaseCase, Mixed, CataInit1,
                       AnaInit1, Req);
         {value, CataInit1, Pid, Alloc} ->
-            {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-            ok = erlang:apply(Module, Function, Args ++ [disk]),
-            Mixed1 = dict:erase(Pid, Mixed),
+            Mixed1 = send_to_disk(Callbacks, Mixed, Pid),
             case Req > Alloc of
-                true -> free_from(Callbacks, Mixed1, Hylomorphism, BaseCase,
+                true -> free_from(Callbacks, Hylomorphism, BaseCase, Mixed1,
                                   CataInit1, AnaInit, Req - Alloc);
                 false -> {BaseCase(CataInit1, AnaInit), Mixed1, Req - Alloc}
             end
@@ -434,36 +446,36 @@ free_upto(Pid, Req, State = #state { available_tokens = Avail,
                                      mixed_queues = Mixed,
                                      callbacks = Callbacks,
                                      lowrate = Lazy,
-                                     hibernate = Sleepy }) ->
-    case Req > Avail of
-        true ->
-            {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Pid, Sleepy, Mixed),
-            case Req > Avail + SleepySum of
-                true -> %% not enough in sleepy, have a look in lazy too
-                    {Lazy1, LazySum} = tidy_and_sum_lazy(Pid, Lazy, Mixed),
-                    case Req > Avail + SleepySum + LazySum of
-                        true -> %% can't free enough, just return tidied state
-                            State #state { lowrate = Lazy1,
-                                           hibernate = Sleepy1 };
-                        false -> %% need to free all of sleepy, and some of lazy
-                            {Sleepy2, Mixed1, ReqRem} =
-                                free_upto_sleepy
-                                  (Pid, Callbacks, Sleepy1, Mixed, Req),
-                            {Lazy2, Mixed2, ReqRem1} =
-                                free_upto_lazy(Pid, Callbacks, Lazy1, Mixed1,
-                                               ReqRem),
-                            State #state { available_tokens =
-                                           Avail + (Req - ReqRem1),
-                                           mixed_queues = Mixed2,
-                                           lowrate = Lazy2,
-                                           hibernate = Sleepy2 }
-                    end;
-                false -> %% enough available in sleepy, don't touch lazy
+                                     hibernate = Sleepy,
+                                     unevictable = Unevictable })
+  when Req > Avail ->
+    Unevictable1 = sets:add_element(Pid, Unevictable),
+    {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unevictable1, Sleepy, Mixed),
+    case Req > Avail + SleepySum of
+        true -> %% not enough in sleepy, have a look in lazy too
+            {Lazy1, LazySum} = tidy_and_sum_lazy(Unevictable1, Lazy, Mixed),
+            case Req > Avail + SleepySum + LazySum of
+                true -> %% can't free enough, just return tidied state
+                    State #state { lowrate = Lazy1, hibernate = Sleepy1 };
+                false -> %% need to free all of sleepy, and some of lazy
                     {Sleepy2, Mixed1, ReqRem} =
-                        free_upto_sleepy(Pid, Callbacks, Sleepy1, Mixed, Req),
-                    State #state { available_tokens = Avail + (Req - ReqRem),
-                                   mixed_queues = Mixed1,
+                        free_upto_sleepy(Unevictable1, Callbacks,
+                                         Sleepy1, Mixed, Req),
+                    {Lazy2, Mixed2, ReqRem1} =
+                        free_upto_lazy(Unevictable1, Callbacks,
+                                       Lazy1, Mixed1, ReqRem),
+                    %% ReqRem1 will be <= 0 because it's
+                    %% likely we'll have freed more than we
+                    %% need, thus Req - ReqRem1 is total freed
+                    State #state { available_tokens = Avail + (Req - ReqRem1),
+                                   mixed_queues = Mixed2, lowrate = Lazy2,
                                    hibernate = Sleepy2 }
             end;
-        false -> State
-    end.
+        false -> %% enough available in sleepy, don't touch lazy
+            {Sleepy2, Mixed1, ReqRem} =
+                free_upto_sleepy(Unevictable1, Callbacks, Sleepy1, Mixed, Req),
+            State #state { available_tokens = Avail + (Req - ReqRem),
+                           mixed_queues = Mixed1, hibernate = Sleepy2 }
+    end;
+free_upto(_Pid, _Req, State) ->
+    State.
-- 
cgit v1.2.1


From d48e2ad807648200fb5613b60e74da66281bd8a5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 16 Aug 2009 23:19:53 +0100
Subject: A Matthias stipulated refactoring. Also, in testing found some issues
 with the prefetcher, found a couple of places where it being stopped wasn't
 being recorded properly but that wasn't the cause of the problem. Eventually
 found the cause to be the disk_queue attempting to publish to the prefetcher
 and getting back exit:{normal,_} instead of exit:{noproc,_}, which I didn't
 know could happen. To test:

1) create 100 queues
2) fire 100 msgs to each queue ((1 msg to every queue)*100)
3) pin all queues to disk
4) unpin all queues
5) do step 2 again
6) wait for the prefetchers to start up (watch CPU load and disk activity)
7) pin all queues to disk

In step 7, the prefetchers are being stopped whilst the disk_queue is feeding them messages. I believe that exit:{noproc,_} comes back when sending a msg to a non-existant process, and exit:{normal,_} comes back if the process existed when we sent the message as part of the call but the process exited (normally) before our message was replied to.
---
 src/rabbit_disk_queue.erl         | 10 ++++++++--
 src/rabbit_mixed_queue.erl        |  6 +++---
 src/rabbit_queue_mode_manager.erl | 23 +++++++++++------------
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 76399022..9cb233f8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -545,8 +545,14 @@ handle_cast({prefetch, Q, From}, State) ->
 	try
 	    ok = rabbit_queue_prefetcher:publish(From, Result),
 	    true
-	catch exit:{noproc, _} ->
-		false
+	catch
+            exit:{noproc, _} ->
+                %% prefetcher was stopped *before* we sent message
+		false;
+            exit:{normal, _} ->
+                %% prefetcher was stopped *after* our message was
+                %% sent, but before it was processed
+                false
 	end,
     State3 =
 	case Cont of
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index dddafaee..7bce3a04 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -420,7 +420,7 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                             { msg_buf = queue:join(Fetched, MsgBuf2),
                               prefetcher = case Status of
                                                finished -> undefined;
-                                               _ -> Prefetcher
+                                               continuing -> Prefetcher
                                            end })
             end
     end.
@@ -595,7 +595,7 @@ purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
     rabbit_disk_queue:purge(Q),
     {Length,
      State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
-                      memory_loss = Loss + QSize }}.
+                      memory_loss = Loss + QSize, prefetcher = undefined }}.
 
 delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
                                 memory_loss = Loss, prefetcher = Prefetcher
@@ -606,7 +606,7 @@ delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
     end,
     ok = rabbit_disk_queue:delete_queue(Q),
     {ok, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
-                          memory_loss = Loss + QSize }}.
+                          memory_loss = Loss + QSize, prefetcher = undefined }}.
 
 length(#mqstate { length = Length }) ->
     Length.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 37afdc6c..1ab5e7a8 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -197,8 +197,9 @@ handle_call({pin_to_disk, Pid}, _From,
             false ->
                 case find_queue(Pid, Mixed) of
                     {mixed, {OAlloc, _OActivity}} ->
-                        Mixed1 = send_to_disk(Callbacks, Mixed, Pid),
-                        {ok, State #state { mixed_queues = Mixed1,
+                        ok = set_queue_mode(Callbacks, Pid, disk),
+                        {ok, State #state { mixed_queues =
+                                            dict:erase(Pid, Mixed),
                                             available_tokens = Avail + OAlloc,
                                             disk_mode_pins =
                                             sets:add_element(Pid, Pins)
@@ -257,8 +258,9 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                                 State #state { available_tokens = Avail1 }),
                 case Req > Avail2 of
                     true -> %% nowt we can do, send to disk
-                        Mixed2 = send_to_disk(Callbacks, Mixed1, Pid),
-                        {State1 #state { mixed_queues = Mixed2 }, disk};
+                        ok = set_queue_mode(Callbacks, Pid, disk),
+                        {State1 #state { mixed_queues =
+                                         dict:erase(Pid, Mixed1) }, disk};
                     false -> %% keep mixed
                         {State1 #state
                          { mixed_queues =
@@ -280,10 +282,7 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                                 %% reason, so stay as disk
                                 {State1, disk};
                             false -> %% can go to mixed mode
-                                {Module, Function, Args} =
-                                    dict:fetch(Pid, Callbacks),
-                                ok = erlang:apply(Module, Function,
-                                                  Args ++ [mixed]),
+                                set_queue_mode(Callbacks, Pid, mixed),
                                 {State1 #state {
                                    mixed_queues =
                                    dict:store(Pid, {Req, MixedActivity}, Mixed1),
@@ -348,10 +347,9 @@ find_queue(Pid, Mixed) ->
         error -> disk
     end.
 
-send_to_disk(Callbacks, Mixed, Pid) ->
+set_queue_mode(Callbacks, Pid, Mode) ->
     {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-    ok = erlang:apply(Module, Function, Args ++ [disk]),
-    dict:erase(Pid, Mixed).
+    erlang:apply(Module, Function, Args ++ [Mode]).
 
 tidy_and_sum_lazy(IgnorePids, Lazy, Mixed) ->
     tidy_and_sum(lowrate, Mixed,
@@ -434,7 +432,8 @@ free_from(Callbacks, Hylomorphism, BaseCase, Mixed, CataInit, AnaInit, Req) ->
             free_from(Callbacks, Hylomorphism, BaseCase, Mixed, CataInit1,
                       AnaInit1, Req);
         {value, CataInit1, Pid, Alloc} ->
-            Mixed1 = send_to_disk(Callbacks, Mixed, Pid),
+            Mixed1 = dict:erase(Pid, Mixed),
+            ok = set_queue_mode(Callbacks, Pid, disk),
             case Req > Alloc of
                 true -> free_from(Callbacks, Hylomorphism, BaseCase, Mixed1,
                                   CataInit1, AnaInit, Req - Alloc);
-- 
cgit v1.2.1


From c1bfbc60e548a80b2df131de33cf27f17c4bd2b7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 10:32:16 +0100
Subject: switch to using with_exit_handler.

---
 src/rabbit_disk_queue.erl | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 9cb233f8..0d5b6a04 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -541,19 +541,12 @@ handle_cast(report_memory, State) ->
     noreply1(State #dqstate { memory_report_timer = undefined });
 handle_cast({prefetch, Q, From}, State) ->
     {ok, Result, State1} = internal_deliver(Q, true, true, false, State),
-    Cont =
-	try
-	    ok = rabbit_queue_prefetcher:publish(From, Result),
-	    true
-	catch
-            exit:{noproc, _} ->
-                %% prefetcher was stopped *before* we sent message
-		false;
-            exit:{normal, _} ->
-                %% prefetcher was stopped *after* our message was
-                %% sent, but before it was processed
-                false
-	end,
+    Cont = rabbit_misc:with_exit_handler(
+             fun () -> false end,
+             fun () ->
+                     ok = rabbit_queue_prefetcher:publish(From, Result),
+                     true
+             end),
     State3 =
 	case Cont of
 	    true ->
-- 
cgit v1.2.1


From 5d5884408bb659140d3f9339e8fd297a4a83caa0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 10:38:27 +0100
Subject: adding mnesia threshold to rabbitmq-service.bat

---
 scripts/rabbitmq-service.bat | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat
index 6b997a25..82aa4d5c 100755
--- a/scripts/rabbitmq-service.bat
+++ b/scripts/rabbitmq-service.bat
@@ -177,6 +177,7 @@ set ERLANG_SERVICE_ARGUMENTS= ^
 -os_mon memsup_system_only true ^
 -os_mon system_memory_high_watermark 0.8 ^
 -mnesia dir \""%RABBITMQ_MNESIA_DIR%"\" ^
+-mnesia dump_log_write_threshold 10000 ^
 %CLUSTER_CONFIG% ^
 %RABBITMQ_SERVER_START_ARGS% ^
 %*
-- 
cgit v1.2.1


From 57ccc9d91cc2d7366b1e03e87b001250525da9cd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 11:11:32 +0100
Subject: Changes to avoid having strings going to binaries going to strings
 going to atoms in the control. Also dialyzer spotted that multiplying by 2.5
 can sometimes result in not-an-int. 'Atta boy.

---
 src/rabbit_amqqueue.erl   | 5 ++---
 src/rabbit_control.erl    | 6 ++++--
 src/rabbit_disk_queue.erl | 3 ++-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index f0e8d4c2..62ea465d 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -102,7 +102,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(set_mode_pin/3 :: (binary(), binary(), binary()) -> any()).
+-spec(set_mode_pin/3 :: (vhost(), resource_name(), ('disk'|'mixed')) -> any()).
 -spec(set_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -225,9 +225,8 @@ list(VHostPath) ->
 
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
-set_mode_pin(VHostPath, Queue, DiskBin)
+set_mode_pin(VHostPath, Queue, Disk)
   when is_binary(VHostPath) andalso is_binary(Queue) ->
-    Disk = list_to_atom(binary_to_list(DiskBin)),
     with(rabbit_misc:r(VHostPath, queue, Queue),
          fun(Q) -> case Disk of
                        true -> rabbit_queue_mode_manager:pin_to_disk
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 79578000..99bbb742 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -289,12 +289,14 @@ action(Command, Node, Args, Inform) ->
 action(pin_queue_to_disk, Node, VHost, [Queue], Inform) ->
     Inform("Pinning queue ~p in vhost ~p to disk",
            [Queue, VHost]),
-    call(Node, {rabbit_amqqueue, set_mode_pin, [VHost, Queue, "true"]});
+    rpc_call(Node, rabbit_amqqueue, set_mode_pin,
+             [list_to_binary(VHost), list_to_binary(Queue), true]);
     
 action(unpin_queue_from_disk, Node, VHost, [Queue], Inform) ->
     Inform("Unpinning queue ~p in vhost ~p from disk",
            [Queue, VHost]),
-    call(Node, {rabbit_amqqueue, set_mode_pin, [VHost, Queue, "false"]});
+    rpc_call(Node, rabbit_amqqueue, set_mode_pin,
+             [list_to_binary(VHost), list_to_binary(Queue), false]);
     
 action(set_permissions, Node, VHost, [Username, CPerm, WPerm, RPerm], Inform) ->
     Inform("Setting permissions for user ~p in vhost ~p", [Username, VHost]),
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0d5b6a04..5940f5ad 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -622,7 +622,8 @@ start_memory_timer(State) ->
 
 report_memory(Hibernating, State) ->
     Bytes = memory_use(State),
-    rabbit_queue_mode_manager:report_memory(self(), 2.5 * Bytes, Hibernating).
+    rabbit_queue_mode_manager:report_memory(self(), trunc(2.5 * Bytes),
+                                            Hibernating).
 
 memory_use(#dqstate { operation_mode = ram_disk,
                       file_summary = FileSummary,
-- 
cgit v1.2.1


From 25a4547ffe7651f1e717683da593ceaf9579e9d7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 11:39:35 +0100
Subject: corrections to amqqueue_process.

---
 src/rabbit_amqqueue_process.erl | 21 +++++++++------------
 src/rabbit_mixed_queue.erl      | 19 ++++++++-----------
 src/rabbit_tests.erl            | 11 ++++++-----
 3 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 8ee576f7..14a0370d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -112,11 +112,11 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
                next_msg_id = 1,
                active_consumers = queue:new(),
                blocked_consumers = queue:new(),
-               memory_report_timer = start_memory_timer()
+               memory_report_timer = undefined
               },
     %% first thing we must do is report_memory which will clear out
     %% the 'undefined' values in gain and loss in mixed_queue state
-    {ok, report_memory(false, State), hibernate,
+    {ok, start_memory_timer(State), hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 terminate(_Reason, State) ->
@@ -141,13 +141,10 @@ reply(Reply, NewState) ->
 noreply(NewState) ->
     {noreply, start_memory_timer(NewState), hibernate}.
 
-start_memory_timer() ->
+start_memory_timer(State = #q { memory_report_timer = undefined }) ->
     {ok, TRef} = timer:apply_after(?MEMORY_REPORT_TIME_INTERVAL,
                                    rabbit_amqqueue, report_memory, [self()]),
-    TRef.
-start_memory_timer(State = #q { memory_report_timer = undefined }) ->
-    report_memory(false,
-                  State #q { memory_report_timer = start_memory_timer() });
+    report_memory(false, State #q { memory_report_timer = TRef });
 start_memory_timer(State) ->
     State.
 
@@ -529,8 +526,8 @@ i(name,        #q{q = #amqqueue{name        = Name}})       -> Name;
 i(durable,     #q{q = #amqqueue{durable     = Durable}})    -> Durable;
 i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete;
 i(arguments,   #q{q = #amqqueue{arguments   = Arguments}})  -> Arguments;
-i(mode,        #q{ mixed_state = MS })                      ->
-         rabbit_mixed_queue:info(MS);
+i(mode, #q{ mixed_state = MS }) ->
+    rabbit_mixed_queue:info(MS);
 i(pid, _) ->
     self();
 i(messages_ready, #q { mixed_state = MS }) ->
@@ -559,10 +556,10 @@ i(Item, _) ->
     throw({bad_argument, Item}).
 
 report_memory(Hib, State = #q { mixed_state = MS }) ->
-    {MSize, Gain, Loss} =
-        rabbit_mixed_queue:estimate_queue_memory(MS),
+    {MS1, MSize, Gain, Loss} =
+        rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
     rabbit_queue_mode_manager:report_memory(self(), MSize, Gain, Loss, Hib),
-    State #q { mixed_state = rabbit_mixed_queue:reset_counters(MS) }.
+    State #q { mixed_state = MS1 }.
 
 %---------------------------------------------------------------------------
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 7bce3a04..f798b369 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,8 +39,8 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
--export([to_disk_only_mode/2, to_mixed_mode/2, estimate_queue_memory/1,
-         reset_counters/1, info/1]).
+-export([to_disk_only_mode/2, to_mixed_mode/2, info/1,
+         estimate_queue_memory_and_reset_counters/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -94,9 +94,9 @@
 -spec(to_disk_only_mode/2 :: ([message()], mqstate()) -> okmqs()).
 -spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
 
--spec(estimate_queue_memory/1 :: (mqstate()) ->
-             {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
--spec(reset_counters/1 :: (mqstate()) -> (mqstate())).
+-spec(estimate_queue_memory_and_reset_counters/1 :: (mqstate()) ->
+             {mqstate(), non_neg_integer(), non_neg_integer(),
+              non_neg_integer()}).
 -spec(info/1 :: (mqstate()) -> mode()).
 
 -endif.
@@ -614,12 +614,9 @@ length(#mqstate { length = Length }) ->
 is_empty(#mqstate { length = Length }) ->
     0 == Length.
 
-estimate_queue_memory(#mqstate { memory_size = Size, memory_gain = Gain,
-                                 memory_loss = Loss }) ->
-    {4 * Size, Gain, Loss}.
-
-reset_counters(State) ->
-    State #mqstate { memory_gain = 0, memory_loss = 0 }.
+estimate_queue_memory_and_reset_counters(State =
+  #mqstate { memory_size = Size, memory_gain = Gain, memory_loss = Loss }) ->
+    {State #mqstate { memory_gain = 0, memory_loss = 0 }, 4 * Size, Gain, Loss}.
 
 info(#mqstate { mode = Mode }) ->
     Mode.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 476fff41..f6d42e7c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1039,7 +1039,8 @@ rdq_test_purge() ->
 
 rdq_new_mixed_queue(Q, Durable, Disk) ->
     {ok, MS} = rabbit_mixed_queue:init(Q, Durable),
-    MS1 = rabbit_mixed_queue:reset_counters(MS),
+    {MS1, _, _, _} =
+        rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
     case Disk of
         true -> {ok, MS2} = rabbit_mixed_queue:to_disk_only_mode([], MS1),
                 MS2;
@@ -1072,15 +1073,15 @@ rdq_test_mixed_queue_modes() ->
             end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages; ~w~n",
-              [rabbit_mixed_queue:estimate_queue_memory(MS6)]),
+              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS6)]),
     {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode([], MS6),
     30 = rabbit_mixed_queue:length(MS7),
     io:format("Converted to disk only mode; ~w~n",
-             [rabbit_mixed_queue:estimate_queue_memory(MS7)]),
+             [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS7)]),
     {ok, MS8} = rabbit_mixed_queue:to_mixed_mode([], MS7),
     30 = rabbit_mixed_queue:length(MS8),
     io:format("Converted to mixed mode; ~w~n",
-              [rabbit_mixed_queue:estimate_queue_memory(MS8)]),
+              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS8)]),
     MS10 =
         lists:foldl(
           fun (N, MS9) ->
@@ -1119,7 +1120,7 @@ rdq_test_mixed_queue_modes() ->
     rdq_start(),
     MS17 = rdq_new_mixed_queue(q, true, false),
     0 = rabbit_mixed_queue:length(MS17),
-    {0,0,0} = rabbit_mixed_queue:estimate_queue_memory(MS17),
+    {MS17,0,0,0} = rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS17),
     io:format("Recovered queue~n"),
     rdq_stop(),
     passed.
-- 
cgit v1.2.1


From afbdb7a17d8053f3108fcbbe05409470ae9e345d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 12:23:10 +0100
Subject: Reworking of mixed queue.

---
 src/rabbit_mixed_queue.erl | 122 ++++++++++++++++++---------------------------
 1 file changed, 48 insertions(+), 74 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index f798b369..4b0810a8 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -165,12 +165,11 @@ to_disk_only_mode(TxnMessages, State =
 send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       Commit, MsgBuf) ->
     case queue:out(Queue) of
-        {empty, Queue} ->
+        {empty, _Queue} ->
             ok = flush_messages_to_disk_queue(Q, Commit),
             [] = flush_requeue_to_disk_queue(Q, RequeueCount, []),
             {ok, MsgBuf};
-        {{value, {Msg = #basic_message { guid = MsgId,
-                                         is_persistent = IsPersistent },
+        {{value, {Msg = #basic_message { is_persistent = IsPersistent },
                   IsDelivered}}, Queue1} ->
             case IsDurable andalso IsPersistent of
                 true -> %% it's already in the Q
@@ -178,61 +177,47 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
                       Commit, inc_queue_length(Q, MsgBuf, 1));
                 false ->
-                    Commit1 = flush_requeue_to_disk_queue
-                                (Q, RequeueCount, Commit),
-                    ok = rabbit_disk_queue:tx_publish(Msg),
-                    case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
-                        true ->
-                            ok = flush_messages_to_disk_queue(Q, Commit1),
-                            send_messages_to_disk(
-                              IsDurable, Q, Queue1, 1, 0,
-                              [{MsgId, IsDelivered}],
-                              inc_queue_length(Q, MsgBuf, 1));
-                        false ->
-                            send_messages_to_disk(
-                              IsDurable, Q, Queue1, PublishCount + 1, 0,
-                              [{MsgId, IsDelivered} | Commit1],
-                              inc_queue_length(Q, MsgBuf, 1))
-                    end
+                    republish_message_to_disk_queue(
+                      IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
+                      MsgBuf, Msg, IsDelivered)
             end;
-        {{value, {Msg = #basic_message { guid = MsgId }, IsDelivered, _AckTag}},
-         Queue1} ->
+        {{value, {Msg, IsDelivered, _AckTag}}, Queue1} ->
             %% these have come via the prefetcher, so are no longer in
             %% the disk queue so they need to be republished
-            Commit1 = flush_requeue_to_disk_queue(Q, RequeueCount, Commit),
-            ok = rabbit_disk_queue:tx_publish(Msg),
-            case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
-                true ->
-                    ok = flush_messages_to_disk_queue(Q, Commit1),
-                    send_messages_to_disk(IsDurable, Q, Queue1, 1, 0,
-                                          [{MsgId, IsDelivered}],
-                                          inc_queue_length(Q, MsgBuf, 1));
-                false ->
-                    send_messages_to_disk(IsDurable, Q, Queue1, PublishCount+1,
-                                          0, [{MsgId, IsDelivered} | Commit1],
-                                          inc_queue_length(Q, MsgBuf, 1))
-            end;
+            republish_message_to_disk_queue(IsDelivered, Q, Queue1,
+                                            PublishCount, RequeueCount, Commit,
+                                            MsgBuf, Msg, IsDelivered);
         {{value, {Q, Count}}, Queue1} ->
             send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
                                   RequeueCount + Count, Commit,
                                   inc_queue_length(Q, MsgBuf, Count))
     end.
 
+republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
+                                Commit, MsgBuf, Msg =
+                                #basic_message { guid = MsgId }, IsDelivered) ->
+    Commit1 = flush_requeue_to_disk_queue(Q, RequeueCount, Commit),
+    ok = rabbit_disk_queue:tx_publish(Msg),
+    {PublishCount1, Commit2} =
+        case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
+            true  -> ok = flush_messages_to_disk_queue(Q, Commit1),
+                     {1, [{MsgId, IsDelivered}]};
+            false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1]}
+        end,
+    send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
+                          Commit2, inc_queue_length(Q, MsgBuf, 1)).
+
+flush_messages_to_disk_queue(_Q, []) ->
+    ok;
 flush_messages_to_disk_queue(Q, Commit) ->
-    ok = if [] == Commit -> ok;
-            true -> rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), [])
-         end.
+    rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), []).
 
+flush_requeue_to_disk_queue(_Q, 0, Commit) ->
+    Commit;
 flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
-    if 0 == RequeueCount -> Commit;
-       true ->
-            ok = if [] == Commit -> ok;
-                    true -> rabbit_disk_queue:tx_commit
-                              (Q, lists:reverse(Commit), [])
-                 end,
-            rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
-            []
-    end.
+    ok = flush_messages_to_disk_queue(Q, Commit),
+    ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
+    [].
 
 to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
     {ok, State};
@@ -266,14 +251,13 @@ to_mixed_mode(TxnMessages, State = #mqstate { mode = disk, queue = Q,
 inc_queue_length(_Q, MsgBuf, 0) ->
     MsgBuf;
 inc_queue_length(Q, MsgBuf, Count) ->
-    case queue:out_r(MsgBuf) of
-        {empty, MsgBuf} ->
-            queue:in({Q, Count}, MsgBuf);
-        {{value, {Q, Len}}, MsgBuf1} ->
-            queue:in({Q, Len + Count}, MsgBuf1);
-        {{value, _}, _MsgBuf1} ->
-            queue:in({Q, Count}, MsgBuf)
-    end.
+    {NewCount, MsgBufTail} =
+        case queue:out_r(MsgBuf) of
+            {empty, MsgBuf1}             -> {Count, MsgBuf1};
+            {{value, {Q, Len}}, MsgBuf1} -> {Len + Count, MsgBuf1};
+            {{value, _}, _MsgBuf1}       -> {Count, MsgBuf}
+        end,
+    queue:in({Q, NewCount}, MsgBufTail).
 
 dec_queue_length(Count, State = #mqstate { queue = Q, msg_buf = MsgBuf }) ->
     case queue:out(MsgBuf) of
@@ -314,8 +298,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
         #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                    msg_buf = MsgBuf, length = Length, memory_size = QSize,
                    memory_gain = Gain }) ->
-    Persist = IsDurable andalso IsPersistent,
-    ok = case Persist of
+    ok = case IsDurable andalso IsPersistent of
              true -> rabbit_disk_queue:publish(Q, Msg, false);
              false -> ok
          end,
@@ -333,12 +316,11 @@ publish_delivered(Msg =
                              queue = Q, length = 0,
                              memory_size = QSize, memory_gain = Gain })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    Persist = IsDurable andalso IsPersistent,
     ok = rabbit_disk_queue:publish(Q, Msg, true),
     MsgSize = size_of_message(Msg),
     State1 = State #mqstate { memory_size = QSize + MsgSize,
                               memory_gain = Gain + MsgSize },
-    case Persist of
+    case IsDurable andalso IsPersistent of
         true ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
@@ -386,14 +368,7 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             %% message has come via the prefetcher, thus it's been
             %% delivered. If it's not persistent+durable, we should
             %% ack it now
-            AckTag1 =
-                case IsDurable andalso IsPersistent of
-                    true ->
-                        AckTag;
-                    false ->
-                        ok = rabbit_disk_queue:ack(Q, [AckTag]),
-                        noack
-                end,
+            AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
             {{Msg, IsDelivered, AckTag1, Rem},
              State1 #mqstate { msg_buf = MsgBuf1 }};
         _ when Prefetcher == undefined ->
@@ -401,14 +376,7 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             {Msg = #basic_message { is_persistent = IsPersistent },
              _Size, IsDelivered, AckTag, _PersistRem}
                 = rabbit_disk_queue:deliver(Q),
-            AckTag1 =
-                case IsDurable andalso IsPersistent of
-                    true ->
-                        AckTag;
-                    false ->
-                        ok = rabbit_disk_queue:ack(Q, [AckTag]),
-                        noack
-                end,
+            AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
             {{Msg, IsDelivered, AckTag1, Rem}, State2};
         _ ->
             case rabbit_queue_prefetcher:drain(Prefetcher) of
@@ -425,6 +393,12 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             end
     end.
 
+maybe_ack(_Q, true, true, AckTag) ->
+    AckTag;
+maybe_ack(Q, _, _, AckTag) ->
+    ok = rabbit_disk_queue:ack(Q, [AckTag]),
+    noack.
+
 remove_noacks(MsgsWithAcks) ->
     lists:foldl(
       fun ({Msg, noack}, {AccAckTags, AccSize}) ->
-- 
cgit v1.2.1


From fff4acb39dfd4ff9b3a548c604d6780bd90ffee6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 12:44:00 +0100
Subject: Make sure the queue mode mananger can never tell a queue to go back
 to mixed mode when memory alarms are going off

---
 src/rabbit_queue_mode_manager.erl | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 1ab5e7a8..22d85867 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -39,7 +39,7 @@
          terminate/2, code_change/3]).
 
 -export([register/5, report_memory/3, report_memory/5, info/0,
-         pin_to_disk/1, unpin_from_disk/1]).
+         pin_to_disk/1, unpin_from_disk/1, conserve_memory/2]).
 
 -define(TOTAL_TOKENS, 10000000).
 -define(ACTIVITY_THRESHOLD, 25).
@@ -58,6 +58,8 @@
              'ok').
 -spec(pin_to_disk/1 :: (pid()) -> 'ok').
 -spec(unpin_from_disk/1 :: (pid()) -> 'ok').
+-spec(info/0 :: () -> [{atom(), any()}]).
+-spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
 
 -endif.
 
@@ -68,7 +70,8 @@
                  lowrate,
                  hibernate,
                  disk_mode_pins,
-                 unevictable
+                 unevictable,
+                 alarmed
                }).
 
 %% Token-credit based memory management
@@ -171,8 +174,12 @@ report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
 info() ->
     gen_server2:call(?SERVER, info).
 
+conserve_memory(_Pid, Conserve) ->
+    gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
+
 init([]) ->
     process_flag(trap_exit, true),
+    rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
     %% todo, fix up this call as os_mon may not be running
     {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
     MemAvail = MemTotal - MemUsed,
@@ -183,7 +190,8 @@ init([]) ->
                   lowrate = priority_queue:new(),
                   hibernate = queue:new(),
                   disk_mode_pins = sets:new(),
-                  unevictable = sets:new()
+                  unevictable = sets:new(),
+                  alarmed = false
                 }}.
 
 handle_call({pin_to_disk, Pid}, _From,
@@ -236,7 +244,8 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                              available_tokens = Avail,
                              callbacks = Callbacks,
                              disk_mode_pins = Pins,
-                             tokens_per_byte = TPB }) ->
+                             tokens_per_byte = TPB,
+                             alarmed = Alarmed }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
     LowRate = case {BytesGained, BytesLost} of
                   {undefined, _} -> false;
@@ -269,7 +278,7 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                          MixedActivity}
                 end;
             disk ->
-                case sets:is_element(Pid, Pins) of
+                case sets:is_element(Pid, Pins) orelse Alarmed of
                     true ->
                         {State, disk};
                     false ->
@@ -313,7 +322,10 @@ handle_cast({register, Pid, IsUnevictable, Module, Function, Args},
     {noreply, State #state { callbacks = dict:store
                              (Pid, {Module, Function, Args}, Callbacks),
                              unevictable = Unevictable1
-                           }}.
+                           }};
+
+handle_cast({conserve_memory, Conserve}, State) ->
+    {noreply, State #state { alarmed = Conserve }}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
-- 
cgit v1.2.1


From c490516c5bf2c3056644891edc289bc19cddc590 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 12:59:52 +0100
Subject: if memsup isn't runnig, we get back {0,0,undefined} from
 memsup:get_memory_data/0. Thus adjust maths to cope with this. In the absence
 of memsup, we don't hand out tokens at all, so everything stays in mixed mode
 all the time, and we can cheerfully run out of memory (another good reason
 for the manual pin_to_disk stuff).

---
 src/rabbit_queue_mode_manager.erl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 22d85867..fa3b27e9 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -183,10 +183,13 @@ init([]) ->
     %% todo, fix up this call as os_mon may not be running
     {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
     MemAvail = MemTotal - MemUsed,
+    TPB = if MemAvail == 0 -> 0;
+             true -> ?TOTAL_TOKENS / MemAvail
+          end,
     {ok, #state { available_tokens = ?TOTAL_TOKENS,
                   mixed_queues = dict:new(),
                   callbacks = dict:new(),
-                  tokens_per_byte = ?TOTAL_TOKENS / MemAvail,
+                  tokens_per_byte = TPB,
                   lowrate = priority_queue:new(),
                   hibernate = queue:new(),
                   disk_mode_pins = sets:new(),
@@ -350,7 +353,9 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 add_to_lowrate(Pid, Alloc, Lazy) ->
-    Bucket = trunc(math:log(Alloc)), %% log base e
+    Bucket = if Alloc == 0 -> 0; %% can't take log(0)
+                true -> trunc(math:log(Alloc)) %% log base e
+             end,
     priority_queue:in({Pid, Bucket, Alloc}, Bucket, Lazy).
 
 find_queue(Pid, Mixed) ->
-- 
cgit v1.2.1


From bd42a7546d8d09b383d59459ee86c76aa1581b7c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 13:48:21 +0100
Subject: ...in which case the todo comment can go away too.

---
 src/rabbit_queue_mode_manager.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index fa3b27e9..5a6c8b39 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -180,7 +180,6 @@ conserve_memory(_Pid, Conserve) ->
 init([]) ->
     process_flag(trap_exit, true),
     rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
-    %% todo, fix up this call as os_mon may not be running
     {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
     MemAvail = MemTotal - MemUsed,
     TPB = if MemAvail == 0 -> 0;
-- 
cgit v1.2.1


From 1ab3c3688118083cd975bd46f785318a7e8900bb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 16:16:58 +0100
Subject: made memsup work on mac os x. Substantial refactorings. I will
 shortly move this out to another branch.

---
 src/rabbit_alarm.erl         |  52 +++++++++---------
 src/rabbit_memsup.erl        | 126 +++++++++++++++++++++++++++++++++++++++++++
 src/rabbit_memsup_darwin.erl | 102 +++++++++++++++++++++++++++++++++++
 src/rabbit_memsup_linux.erl  | 113 +++++++++-----------------------------
 4 files changed, 280 insertions(+), 113 deletions(-)
 create mode 100644 src/rabbit_memsup.erl
 create mode 100644 src/rabbit_memsup_darwin.erl

diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl
index 21999f16..309c9a0e 100644
--- a/src/rabbit_alarm.erl
+++ b/src/rabbit_alarm.erl
@@ -41,7 +41,7 @@
 -define(MEMSUP_CHECK_INTERVAL, 1000).
 
 %% OSes on which we know memory alarms to be trustworthy
--define(SUPPORTED_OS, [{unix, linux}]).
+-define(SUPPORTED_OS, [{unix, linux}, {unix, darwin}]).
 
 -record(alarms, {alertees, system_memory_high_watermark = false}).
 
@@ -136,33 +136,35 @@ code_change(_OldVsn, State, _Extra) ->
 %%----------------------------------------------------------------------------
 
 start_memsup() ->
-    Mod = case os:type() of
-              %% memsup doesn't take account of buffers or cache when
-              %% considering "free" memory - therefore on Linux we can
-              %% get memory alarms very easily without any pressure
-              %% existing on memory at all. Therefore we need to use
-              %% our own simple memory monitor.
-              %%
-              {unix, linux} -> rabbit_memsup_linux;
-
-              %% Start memsup programmatically rather than via the
-              %% rabbitmq-server script. This is not quite the right
-              %% thing to do as os_mon checks to see if memsup is
-              %% available before starting it, but as memsup is
-              %% available everywhere (even on VXWorks) it should be
-              %% ok.
-              %%
-              %% One benefit of the programmatic startup is that we
-              %% can add our alarm_handler before memsup is running,
-              %% thus ensuring that we notice memory alarms that go
-              %% off on startup.
-              %%
-              _             -> memsup
-          end,
+    {Mod, Args} =
+        case os:type() of
+            %% memsup doesn't take account of buffers or cache when
+            %% considering "free" memory - therefore on Linux we can
+            %% get memory alarms very easily without any pressure
+            %% existing on memory at all. Therefore we need to use
+            %% our own simple memory monitor.
+            %%
+            {unix, linux}  -> {rabbit_memsup, [rabbit_memsup_linux]};
+            {unix, darwin} -> {rabbit_memsup, [rabbit_memsup_darwin]};
+
+            %% Start memsup programmatically rather than via the
+            %% rabbitmq-server script. This is not quite the right
+            %% thing to do as os_mon checks to see if memsup is
+            %% available before starting it, but as memsup is
+            %% available everywhere (even on VXWorks) it should be
+            %% ok.
+            %%
+            %% One benefit of the programmatic startup is that we
+            %% can add our alarm_handler before memsup is running,
+            %% thus ensuring that we notice memory alarms that go
+            %% off on startup.
+            %%
+            _              -> {memsup, []}
+        end,
     %% This is based on os_mon:childspec(memsup, true)
     {ok, _} = supervisor:start_child(
                 os_mon_sup,
-                {memsup, {Mod, start_link, []},
+                {memsup, {Mod, start_link, Args},
                  permanent, 2000, worker, [Mod]}),
     ok.
 
diff --git a/src/rabbit_memsup.erl b/src/rabbit_memsup.erl
new file mode 100644
index 00000000..5f242881
--- /dev/null
+++ b/src/rabbit_memsup.erl
@@ -0,0 +1,126 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_memsup).
+
+-behaviour(gen_server).
+
+-export([start_link/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([update/0]).
+
+-record(state, {memory_fraction,
+                timeout,
+                timer,
+                mod,
+                mod_state
+               }).
+
+-define(SERVER, memsup). %% must be the same as the standard memsup
+
+-define(DEFAULT_MEMORY_CHECK_INTERVAL, 1000).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/1 :: (atom()) -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(update/0 :: () -> 'ok').
+     
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link(Args) ->
+    gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []).
+
+update() ->
+    gen_server:cast(?SERVER, update).
+
+%%----------------------------------------------------------------------------
+
+init([Mod]) -> 
+    Fraction = os_mon:get_env(memsup, system_memory_high_watermark),
+    TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL),
+    InitState = Mod:init(),
+    State = #state { memory_fraction = Fraction,
+                     timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL,
+                     timer = TRef,
+                     mod = Mod,
+                     mod_state = Mod:update(Fraction, InitState) },
+    {ok, State}.
+
+start_timer(Timeout) ->
+    {ok, TRef} = timer:apply_interval(Timeout, ?MODULE, update, []),
+    TRef.
+
+%% Export the same API as the real memsup. Note that
+%% get_sysmem_high_watermark gives an int in the range 0 - 100, while
+%% set_sysmem_high_watermark takes a float in the range 0.0 - 1.0.
+handle_call(get_sysmem_high_watermark, _From, State) ->
+    {reply, trunc(100 * State#state.memory_fraction), State};
+
+handle_call({set_sysmem_high_watermark, Float}, _From, State) ->
+    {reply, ok, State#state{memory_fraction = Float}};
+
+handle_call(get_check_interval, _From, State) ->
+    {reply, State#state.timeout, State};
+
+handle_call({set_check_interval, Timeout}, _From, State) ->
+    {ok, cancel} = timer:cancel(State#state.timer),
+    {reply, ok, State#state{timeout = Timeout, timer = start_timer(Timeout)}};
+
+handle_call(get_memory_data, _From,
+            State = #state { mod = Mod, mod_state = ModState }) ->
+    {reply, Mod:get_memory_data(ModState), State};
+
+handle_call(_Request, _From, State) ->
+    {noreply, State}.
+
+handle_cast(update, State = #state { memory_fraction = MemoryFraction,
+                                     mod = Mod, mod_state = ModState }) ->
+    ModState1 = Mod:update(MemoryFraction, ModState),
+    {noreply, State #state { mod_state = ModState1 }};
+
+handle_cast(_Request, State) -> 
+    {noreply, State}.
+
+handle_info(_Info, State) -> 
+    {noreply, State}.
+
+terminate(_Reason, _State) -> 
+    ok.
+
+code_change(_OldVsn, State, _Extra) -> 
+    {ok, State}.
diff --git a/src/rabbit_memsup_darwin.erl b/src/rabbit_memsup_darwin.erl
new file mode 100644
index 00000000..034fa891
--- /dev/null
+++ b/src/rabbit_memsup_darwin.erl
@@ -0,0 +1,102 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_memsup_darwin).
+
+-export([init/0, update/2, get_memory_data/1]).
+
+-record(state, {alarmed,
+                total_memory,
+                allocated_memory}).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(state() :: #state { alarmed :: boolean(),
+                          total_memory :: ('undefined' | non_neg_integer()),
+                          allocated_memory :: ('undefined' | non_neg_integer())
+                        }).
+
+-spec(init/0 :: () -> state()).
+-spec(update/2 :: (float(), state()) -> state()).
+-spec(get_memory_data/1 :: (state()) -> {non_neg_integer(), non_neg_integer(),
+                                         ('undefined' | pid())}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+init() -> 
+    #state{alarmed = false,
+           total_memory = undefined,
+           allocated_memory = undefined}.
+
+update(MemoryFraction, State = #state{ alarmed = Alarmed }) ->
+    File = os:cmd("/usr/bin/vm_stat"),
+    Lines = string:tokens(File, "\n"),
+    Dict = dict:from_list(lists:map(fun parse_line/1, Lines)),
+    PageSize = dict:fetch(page_size, Dict),
+    Inactive = dict:fetch('Pages inactive', Dict),
+    Active = dict:fetch('Pages active', Dict),
+    Free = dict:fetch('Pages free', Dict),
+    Wired = dict:fetch('Pages wired down', Dict),
+    MemTotal = PageSize * (Inactive + Active + Free + Wired),
+    MemUsed = PageSize * (Active + Wired),
+    NewAlarmed = MemUsed / MemTotal > MemoryFraction,
+    case {Alarmed, NewAlarmed} of
+        {false, true} ->
+            alarm_handler:set_alarm({system_memory_high_watermark, []});
+        {true, false} ->
+            alarm_handler:clear_alarm(system_memory_high_watermark);
+        _ ->
+            ok
+    end,
+    State#state{alarmed = NewAlarmed,
+                total_memory = MemTotal, allocated_memory = MemUsed}.
+
+get_memory_data(State) ->
+    {State#state.total_memory, State#state.allocated_memory, undefined}.
+
+%%----------------------------------------------------------------------------
+
+%% A line looks like "Foo bar: 123456"
+parse_line(Line) ->
+    [Name, RHS | _Rest] = string:tokens(Line, ":"),
+    case Name of
+        "Mach Virtual Memory Statistics" ->
+            ["(page", "size", "of", PageSize, "bytes)"] =
+                string:tokens(RHS, " "),
+            {page_size, list_to_integer(PageSize)};
+        _ ->
+            [Value | _Rest1] = string:tokens(RHS, " ."),
+            {list_to_atom(Name), list_to_integer(Value)}
+    end.
diff --git a/src/rabbit_memsup_linux.erl b/src/rabbit_memsup_linux.erl
index 158df679..460fd88f 100644
--- a/src/rabbit_memsup_linux.erl
+++ b/src/rabbit_memsup_linux.erl
@@ -31,105 +31,36 @@
 
 -module(rabbit_memsup_linux).
 
--behaviour(gen_server).
+-export([init/0, update/2, get_memory_data/1]).
 
--export([start_link/0]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([update/0]).
-
--define(SERVER, memsup). %% must be the same as the standard memsup
-
--define(DEFAULT_MEMORY_CHECK_INTERVAL, 1000).
-
--record(state, {memory_fraction,
-                alarmed,
-                timeout,
-                timer,
+-record(state, {alarmed,
                 total_memory,
-                allocated_memory
-               }).
+                allocated_memory}).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(update/0 :: () -> 'ok').
-     
--endif.
-
-%%----------------------------------------------------------------------------
-
-start_link() ->
-    gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
+-type(state() :: #state { alarmed :: boolean(),
+                          total_memory :: ('undefined' | non_neg_integer()),
+                          allocated_memory :: ('undefined' | non_neg_integer())
+                        }).
 
+-spec(init/0 :: () -> state()).
+-spec(update/2 :: (float(), state()) -> state()).
+-spec(get_memory_data/1 :: (state()) -> {non_neg_integer(), non_neg_integer(),
+                                         ('undefined' | pid())}).
 
-update() ->
-    gen_server:cast(?SERVER, update).
+-endif.
 
 %%----------------------------------------------------------------------------
 
-init(_Args) -> 
-    Fraction = os_mon:get_env(memsup, system_memory_high_watermark),
-    TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL),
-    {ok, update(#state{alarmed = false, 
-                       memory_fraction = Fraction, 
-                       timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL,
-                       timer = TRef,
-                       total_memory = undefined,
-                       allocated_memory = undefined
-                      })}.
-
-start_timer(Timeout) ->
-    {ok, TRef} = timer:apply_interval(Timeout, ?MODULE, update, []),
-    TRef.
-
-%% Export the same API as the real memsup. Note that
-%% get_sysmem_high_watermark gives an int in the range 0 - 100, while
-%% set_sysmem_high_watermark takes a float in the range 0.0 - 1.0.
-handle_call(get_sysmem_high_watermark, _From, State) ->
-    {reply, trunc(100 * State#state.memory_fraction), State};
-
-handle_call({set_sysmem_high_watermark, Float}, _From, State) ->
-    {reply, ok, State#state{memory_fraction = Float}};
-
-handle_call(get_check_interval, _From, State) ->
-    {reply, State#state.timeout, State};
+init() -> 
+    #state{alarmed = false,
+           total_memory = undefined,
+           allocated_memory = undefined}.
 
-handle_call({set_check_interval, Timeout}, _From, State) ->
-    {ok, cancel} = timer:cancel(State#state.timer),
-    {reply, ok, State#state{timeout = Timeout, timer = start_timer(Timeout)}};
-
-handle_call(get_memory_data, _From,
-            State = #state { total_memory = MemTotal,
-                             allocated_memory = MemUsed }) ->
-    {reply, {MemTotal, MemUsed, undefined}, State};
-
-handle_call(_Request, _From, State) ->
-    {noreply, State}.
-
-handle_cast(update, State) ->
-    {noreply, update(State)};
-
-handle_cast(_Request, State) -> 
-    {noreply, State}.
-
-handle_info(_Info, State) -> 
-    {noreply, State}.
-
-terminate(_Reason, _State) -> 
-    ok.
-
-code_change(_OldVsn, State, _Extra) -> 
-    {ok, State}.
-
-%%----------------------------------------------------------------------------
-
-update(State = #state{alarmed = Alarmed,
-                      memory_fraction = MemoryFraction}) -> 
+update(MemoryFraction, State = #state { alarmed = Alarmed }) ->
     File = read_proc_file("/proc/meminfo"),
     Lines = string:tokens(File, "\n"),
     Dict = dict:from_list(lists:map(fun parse_line/1, Lines)),
@@ -150,6 +81,11 @@ update(State = #state{alarmed = Alarmed,
     State#state{alarmed = NewAlarmed,
                 total_memory = MemTotal, allocated_memory = MemUsed}.
 
+get_memory_data(State) ->
+    {State#state.total_memory, State#state.allocated_memory, undefined}.
+
+%%----------------------------------------------------------------------------
+
 -define(BUFFER_SIZE, 1024).
 
 %% file:read_file does not work on files in /proc as it seems to get
@@ -170,8 +106,9 @@ read_proc_file(IoDevice, Acc) ->
 
 %% A line looks like "FooBar: 123456 kB"
 parse_line(Line) ->
-    [Name, Value | Rest] = string:tokens(Line, ": "),
-    Value1 = case Rest of
+    [Name, RHS | _Rest] = string:tokens(Line, ":"),
+    [Value | UnitsRest] = string:tokens(RHS, " "),
+    Value1 = case UnitsRest of
                  [] -> list_to_integer(Value); %% no units
                  ["kB"] -> list_to_integer(Value) * 1024
              end,
-- 
cgit v1.2.1


From 010ec7fa1b8f1d3cd2ff274b427211445c6ab163 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 Aug 2009 16:28:14 +0100
Subject: making a comment accurate.

---
 src/rabbit_memsup_darwin.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_memsup_darwin.erl b/src/rabbit_memsup_darwin.erl
index 034fa891..990c5b99 100644
--- a/src/rabbit_memsup_darwin.erl
+++ b/src/rabbit_memsup_darwin.erl
@@ -88,7 +88,7 @@ get_memory_data(State) ->
 
 %%----------------------------------------------------------------------------
 
-%% A line looks like "Foo bar: 123456"
+%% A line looks like "Foo bar: 123456."
 parse_line(Line) ->
     [Name, RHS | _Rest] = string:tokens(Line, ":"),
     case Name of
-- 
cgit v1.2.1


From 69b1470d3c399e16953041b04a639805a7367868 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 Aug 2009 11:54:40 +0100
Subject: New branch for bug 21368

---
 src/rabbit_amqqueue.erl           | 14 +-----------
 src/rabbit_control.erl            | 19 ----------------
 src/rabbit_mixed_queue.erl        |  8 +++----
 src/rabbit_queue_mode_manager.erl | 46 ++-------------------------------------
 4 files changed, 7 insertions(+), 80 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 62ea465d..fdf73729 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([set_mode_pin/3, set_mode/2, report_memory/1]).
+-export([set_mode/2, report_memory/1]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -102,7 +102,6 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(set_mode_pin/3 :: (vhost(), resource_name(), ('disk'|'mixed')) -> any()).
 -spec(set_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -225,17 +224,6 @@ list(VHostPath) ->
 
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
-set_mode_pin(VHostPath, Queue, Disk)
-  when is_binary(VHostPath) andalso is_binary(Queue) ->
-    with(rabbit_misc:r(VHostPath, queue, Queue),
-         fun(Q) -> case Disk of
-                       true -> rabbit_queue_mode_manager:pin_to_disk
-                                 (Q #amqqueue.pid);
-                       false -> rabbit_queue_mode_manager:unpin_from_disk
-                                  (Q #amqqueue.pid)
-                   end
-         end).
-
 set_mode(QPid, Mode) ->
     gen_server2:pcast(QPid, 10, {set_mode, Mode}).
 
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 99bbb742..d5a83ac9 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -137,9 +137,6 @@ Available commands:
   list_bindings  [-p <VHostPath>] 
   list_connections [<ConnectionInfoItem> ...]
 
-  pin_queue_to_disk <QueueName>
-  unpin_queue_from_disk <QueueName>
-
 Quiet output mode is selected with the \"-q\" flag. Informational messages
 are suppressed when quiet mode is in effect.
 
@@ -168,10 +165,6 @@ exchange name, routing key, queue name and arguments, in that order.
 peer_address, peer_port, state, channels, user, vhost, timeout, frame_max,
 recv_oct, recv_cnt, send_oct, send_cnt, send_pend]. The default is to display 
 user, peer_address and peer_port.
-
-pin_queue_to_disk will force a queue to be in disk mode.
-unpin_queue_from_disk will permit a queue that has been pinned to disk mode
-to be converted to mixed mode should there be enough memory available.
 "),
     halt(1).
 
@@ -286,18 +279,6 @@ action(Command, Node, Args, Inform) ->
     {VHost, RemainingArgs} = parse_vhost_flag(Args),
     action(Command, Node, VHost, RemainingArgs, Inform).
 
-action(pin_queue_to_disk, Node, VHost, [Queue], Inform) ->
-    Inform("Pinning queue ~p in vhost ~p to disk",
-           [Queue, VHost]),
-    rpc_call(Node, rabbit_amqqueue, set_mode_pin,
-             [list_to_binary(VHost), list_to_binary(Queue), true]);
-    
-action(unpin_queue_from_disk, Node, VHost, [Queue], Inform) ->
-    Inform("Unpinning queue ~p in vhost ~p from disk",
-           [Queue, VHost]),
-    rpc_call(Node, rabbit_amqqueue, set_mode_pin,
-             [list_to_binary(VHost), list_to_binary(Queue), false]);
-    
 action(set_permissions, Node, VHost, [Username, CPerm, WPerm, RPerm], Inform) ->
     Inform("Setting permissions for user ~p in vhost ~p", [Username, VHost]),
     call(Node, {rabbit_access_control, set_permissions,
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 4b0810a8..3d989662 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -62,7 +62,7 @@
 -type(mqstate() :: #mqstate { mode :: mode(),
                               msg_buf :: queue(),
                               queue :: queue_name(),
-                              is_durable :: bool(),
+                              is_durable :: boolean(),
                               length :: non_neg_integer(),
                               memory_size :: (non_neg_integer() | 'undefined'),
                               memory_gain :: (non_neg_integer() | 'undefined'),
@@ -72,12 +72,12 @@
 -type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
 -type(okmqs() :: {'ok', mqstate()}).
 
--spec(init/2 :: (queue_name(), bool()) -> okmqs()).
+-spec(init/2 :: (queue_name(), boolean()) -> okmqs()).
 -spec(publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(publish_delivered/2 :: (message(), mqstate()) ->
              {'ok', acktag(), mqstate()}).
 -spec(deliver/1 :: (mqstate()) ->
-             {('empty' | {message(), bool(), acktag(), non_neg_integer()}),
+             {('empty' | {message(), boolean(), acktag(), non_neg_integer()}),
               mqstate()}).
 -spec(ack/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
 -spec(tx_publish/2 :: (message(), mqstate()) -> okmqs()).
@@ -89,7 +89,7 @@
 -spec(delete_queue/1 :: (mqstate()) -> {'ok', mqstate()}).
              
 -spec(length/1 :: (mqstate()) -> non_neg_integer()).
--spec(is_empty/1 :: (mqstate()) -> bool()).
+-spec(is_empty/1 :: (mqstate()) -> boolean()).
 
 -spec(to_disk_only_mode/2 :: ([message()], mqstate()) -> okmqs()).
 -spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
index 5a6c8b39..a2fab615 100644
--- a/src/rabbit_queue_mode_manager.erl
+++ b/src/rabbit_queue_mode_manager.erl
@@ -39,7 +39,7 @@
          terminate/2, code_change/3]).
 
 -export([register/5, report_memory/3, report_memory/5, info/0,
-         pin_to_disk/1, unpin_from_disk/1, conserve_memory/2]).
+         conserve_memory/2]).
 
 -define(TOTAL_TOKENS, 10000000).
 -define(ACTIVITY_THRESHOLD, 25).
@@ -56,8 +56,6 @@
                           (non_neg_integer() | 'undefined'),
                           (non_neg_integer() | 'undefined'), bool()) ->
              'ok').
--spec(pin_to_disk/1 :: (pid()) -> 'ok').
--spec(unpin_from_disk/1 :: (pid()) -> 'ok').
 -spec(info/0 :: () -> [{atom(), any()}]).
 -spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
 
@@ -69,7 +67,6 @@
                  tokens_per_byte,
                  lowrate,
                  hibernate,
-                 disk_mode_pins,
                  unevictable,
                  alarmed
                }).
@@ -158,12 +155,6 @@ register(Pid, Unevictable, Module, Function, Args) ->
     gen_server2:cast(?SERVER, {register, Pid, Unevictable,
                                Module, Function, Args}).
 
-pin_to_disk(Pid) ->
-    gen_server2:call(?SERVER, {pin_to_disk, Pid}).
-
-unpin_from_disk(Pid) ->
-    gen_server2:call(?SERVER, {unpin_from_disk, Pid}).
-
 report_memory(Pid, Memory, Hibernating) ->
     report_memory(Pid, Memory, undefined, undefined, Hibernating).
 
@@ -191,53 +182,21 @@ init([]) ->
                   tokens_per_byte = TPB,
                   lowrate = priority_queue:new(),
                   hibernate = queue:new(),
-                  disk_mode_pins = sets:new(),
                   unevictable = sets:new(),
                   alarmed = false
                 }}.
 
-handle_call({pin_to_disk, Pid}, _From,
-            State = #state { mixed_queues = Mixed,
-                             callbacks = Callbacks,
-                             available_tokens = Avail,
-                             disk_mode_pins = Pins }) ->
-    {Res, State1} =
-        case sets:is_element(Pid, Pins) of
-            true -> {ok, State};
-            false ->
-                case find_queue(Pid, Mixed) of
-                    {mixed, {OAlloc, _OActivity}} ->
-                        ok = set_queue_mode(Callbacks, Pid, disk),
-                        {ok, State #state { mixed_queues =
-                                            dict:erase(Pid, Mixed),
-                                            available_tokens = Avail + OAlloc,
-                                            disk_mode_pins =
-                                            sets:add_element(Pid, Pins)
-                                          }};
-                    disk ->
-                        {ok, State #state { disk_mode_pins =
-                                            sets:add_element(Pid, Pins) }}
-                end
-        end,
-    {reply, Res, State1};
-
-handle_call({unpin_from_disk, Pid}, _From,
-            State = #state { disk_mode_pins = Pins }) ->
-    {reply, ok, State #state { disk_mode_pins = sets:del_element(Pid, Pins) }};
-
 handle_call(info, _From, State) ->
     State1 = #state { available_tokens = Avail,
                       mixed_queues = Mixed,
                       lowrate = Lazy,
                       hibernate = Sleepy,
-                      disk_mode_pins = Pins,
                       unevictable = Unevictable } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
     {reply, [{ available_tokens, Avail },
              { mixed_queues, dict:to_list(Mixed) },
              { lowrate_queues, priority_queue:to_list(Lazy) },
              { hibernated_queues, queue:to_list(Sleepy) },
-             { queues_pinned_to_disk, sets:to_list(Pins) },
              { unevictable_queues, sets:to_list(Unevictable) }], State1}.
 
 
@@ -245,7 +204,6 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             State = #state { mixed_queues = Mixed,
                              available_tokens = Avail,
                              callbacks = Callbacks,
-                             disk_mode_pins = Pins,
                              tokens_per_byte = TPB,
                              alarmed = Alarmed }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
@@ -280,7 +238,7 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                          MixedActivity}
                 end;
             disk ->
-                case sets:is_element(Pid, Pins) orelse Alarmed of
+                case Alarmed of
                     true ->
                         {State, disk};
                     false ->
-- 
cgit v1.2.1


From 60401a8039c2e8b9d71a54b4f408f3e97bef7f3c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 Aug 2009 18:29:39 +0100
Subject: Pretty much all the low hanging fruit. Need to check the ets access
 in txns in disk_queue and also deal with the clean shutdown and delivery
 bits.

> ** queue_prefetcher
> - s/publish/deliver ?

No, I really don't like that. Publish is about pushing messages to the receiver. Thus it's named correctly.
---
 src/rabbit_amqqueue.erl         |   6 +-
 src/rabbit_amqqueue_process.erl |  60 +++++++--------
 src/rabbit_disk_queue.erl       | 111 +++++++++++++--------------
 src/rabbit_mixed_queue.erl      | 162 +++++++++++++++++++---------------------
 src/rabbit_queue_prefetcher.erl |   4 +-
 src/rabbit_tests.erl            |  64 ++++++++--------
 6 files changed, 189 insertions(+), 218 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 1d9f8c53..6c4c0ebb 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([set_mode/2, report_memory/1]).
+-export([set_mode/2]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -107,7 +107,6 @@
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
--spec(report_memory/1 :: (pid()) -> 'ok').
 
 -endif.
 
@@ -227,9 +226,6 @@ map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 set_mode(QPid, Mode) ->
     gen_server2:pcast(QPid, 10, {set_mode, Mode}).
 
-report_memory(QPid) ->
-    gen_server2:cast(QPid, report_memory).
-
 info(#amqqueue{ pid = QPid }) ->
     gen_server2:pcall(QPid, 9, info, infinity).
 
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 14a0370d..b1c409b1 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -38,7 +38,7 @@
 -define(UNSENT_MESSAGE_LIMIT, 100).
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
--define(MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
+-define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
 
 -export([start_link/1]).
 
@@ -142,8 +142,8 @@ noreply(NewState) ->
     {noreply, start_memory_timer(NewState), hibernate}.
 
 start_memory_timer(State = #q { memory_report_timer = undefined }) ->
-    {ok, TRef} = timer:apply_after(?MEMORY_REPORT_TIME_INTERVAL,
-                                   rabbit_amqqueue, report_memory, [self()]),
+    {ok, TRef} = timer:send_after(?MINIMUM_MEMORY_REPORT_TIME_INTERVAL,
+                                  report_memory),
     report_memory(false, State #q { memory_report_timer = TRef });
 start_memory_timer(State) ->
     State.
@@ -199,11 +199,12 @@ record_current_channel_tx(ChPid, Txn) ->
     %% that wasn't happening already)
     store_ch_record((ch_record(ChPid))#cr{txn = Txn}).
     
-deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc,
-              State = #q{q = #amqqueue{name = QName},
-                         active_consumers = ActiveConsumers,
-                         blocked_consumers = BlockedConsumers,
-                         next_msg_id = NextId}) ->
+deliver_msgs_to_consumers(
+  Funs = {PredFun, DeliverFun}, FunAcc,
+  State = #q{q = #amqqueue{name = QName},
+             active_consumers = ActiveConsumers,
+             blocked_consumers = BlockedConsumers,
+             next_msg_id = NextId}) ->
     case queue:out(ActiveConsumers) of
         {{value, QEntry = {ChPid, #consumer{tag = ConsumerTag,
                                             ack_required = AckRequired}}},
@@ -246,7 +247,7 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc,
                                blocked_consumers = NewBlockedConsumers,
                                next_msg_id = NextId + 1
                                        },
-                    deliver_queue(Funs, FunAcc1, State2);
+                    deliver_msgs_to_consumers(Funs, FunAcc1, State2);
                 %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
                     store_ch_record(C#cr{is_limit_active = true}),
@@ -254,7 +255,7 @@ deliver_queue(Funs = {PredFun, DeliverFun}, FunAcc,
                         move_consumers(ChPid,
                                        ActiveConsumers,
                                        BlockedConsumers),
-                    deliver_queue(
+                    deliver_msgs_to_consumers(
                       Funs, FunAcc,
                       State#q{active_consumers = NewActiveConsumers,
                               blocked_consumers = NewBlockedConsumers});
@@ -271,7 +272,7 @@ deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
 deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
                            State = #q { mixed_state = MS }) ->
     {{Msg, IsDelivered, AckTag, Remaining}, MS1} =
-        rabbit_mixed_queue:deliver(MS),
+        rabbit_mixed_queue:fetch(MS),
     AutoAcks1 =
         case AckRequired of
             true -> AutoAcks;
@@ -285,7 +286,7 @@ run_message_queue(State = #q { mixed_state = MS }) ->
              fun deliver_from_queue_deliver/3 },
     IsEmpty = rabbit_mixed_queue:is_empty(MS),
     {{_IsEmpty1, AutoAcks}, State1} =
-        deliver_queue(Funs, {IsEmpty, []}, State),
+        deliver_msgs_to_consumers(Funs, {IsEmpty, []}, State),
     {ok, MS1} =
         rabbit_mixed_queue:ack(AutoAcks, State1 #q.mixed_state),
     State1 #q { mixed_state = MS1 }.
@@ -306,7 +307,7 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
                     end,
                 {{Msg, false, AckTag}, true, State2}
         end,
-    deliver_queue({ PredFun, DeliverFun }, false, State);
+    deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
 attempt_immediate_delivery(Txn, ChPid, Msg, State) ->
     {ok, MS} = rabbit_mixed_queue:tx_publish(Msg, State #q.mixed_state),
     record_pending_message(Txn, ChPid, Msg),
@@ -330,8 +331,8 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
     Funs = { fun deliver_or_requeue_msgs_pred/2,
              fun deliver_or_requeue_msgs_deliver/3 },
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
-        deliver_queue(Funs, {length(MsgsWithAcks) - 1, [], MsgsWithAcks},
-                      State),
+        deliver_msgs_to_consumers(
+          Funs, {length(MsgsWithAcks), [], MsgsWithAcks}, State),
     {ok, MS} = rabbit_mixed_queue:ack(AutoAcks,
                                       NewState #q.mixed_state),
     case OutstandingMsgs of
@@ -341,7 +342,7 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
     end.
 
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
-    -1 < Len.
+    0 < Len.
 deliver_or_requeue_msgs_deliver(
   false, {Len, AcksAcc, [(MsgAckTag = {Msg, _}) | MsgsWithAcks]}, State) ->
     {{Msg, true, noack}, {Len - 1, [MsgAckTag | AcksAcc], MsgsWithAcks}, State};
@@ -612,11 +613,11 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                        next_msg_id = NextId,
                        mixed_state = MS
                        }) ->
-    case rabbit_mixed_queue:deliver(MS) of
+    case rabbit_mixed_queue:fetch(MS) of
         {empty, MS1} -> reply(empty, State #q { mixed_state = MS1 });
         {{Msg, IsDelivered, AckTag, Remaining}, MS1} ->
             AckRequired = not(NoAck),
-            {ok, MS3} =
+            {ok, MS2} =
                 case AckRequired of
                     true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
@@ -628,9 +629,7 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
             reply({ok, Remaining, Message},
-                  State #q { next_msg_id = NextId + 1,
-                             mixed_state = MS3
-                           })
+                  State #q { next_msg_id = NextId + 1, mixed_state = MS2 })
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -769,9 +768,9 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
         not_found ->
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
-            {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
             case Txn of
                 none ->
+                    {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
                     {ok, MS} =
                         rabbit_mixed_queue:ack(MsgWithAcks, State #q.mixed_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
@@ -829,16 +828,13 @@ handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
     PendingMessages =
         lists:flatten([Pending || #tx { pending_messages = Pending}
                                       <- all_tx_record()]),
-    {ok, MS1} = (case Mode of
-                    disk  -> fun rabbit_mixed_queue:to_disk_only_mode/2;
-                    mixed -> fun rabbit_mixed_queue:to_mixed_mode/2
-                 end)(PendingMessages, MS),
-    noreply(State #q { mixed_state = MS1 });
-
-handle_cast(report_memory, State) ->
-    %% deliberately don't call noreply/2 as we don't want to restart the timer
-    %% by unsetting the timer, we force a report on the next normal message
-    {noreply, State #q { memory_report_timer = undefined }, hibernate}.
+    {ok, MS1} = rabbit_mixed_queue:set_mode(Mode, PendingMessages, MS),
+    noreply(State #q { mixed_state = MS1 }).
+
+handle_info(report_memory, State) ->
+    %% deliberately don't call noreply/2 as we don't want to restart the timer.
+    %% By unsetting the timer, we force a report on the next normal message
+    {noreply, State #q { memory_report_timer = undefined }, hibernate};
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 5940f5ad..e2f341ff 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -39,7 +39,7 @@
          terminate/2, code_change/3]).
 -export([handle_pre_hibernate/1]).
 
--export([publish/3, deliver/1, phantom_deliver/1, ack/2,
+-export([publish/3, fetch/1, phantom_fetch/1, ack/2,
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
@@ -48,27 +48,27 @@
 
 -export([filesync/0, cache_info/0]).
 
--export([stop/0, stop_and_obliterate/0, report_memory/0,
-         set_mode/1, to_disk_only_mode/0, to_ram_disk_mode/0]).
+-export([stop/0, stop_and_obliterate/0, set_mode/1, to_disk_only_mode/0,
+         to_ram_disk_mode/0]).
 
 -include("rabbit.hrl").
 
--define(WRITE_OK_SIZE_BITS,          8).
--define(WRITE_OK_TRANSIENT,        255).
--define(WRITE_OK_PERSISTENT,       254).
--define(INTEGER_SIZE_BYTES,          8).
--define(INTEGER_SIZE_BITS,           (8 * ?INTEGER_SIZE_BYTES)).
--define(MSG_LOC_NAME,                rabbit_disk_queue_msg_location).
--define(FILE_SUMMARY_ETS_NAME,       rabbit_disk_queue_file_summary).
--define(SEQUENCE_ETS_NAME,           rabbit_disk_queue_sequences).
--define(CACHE_ETS_NAME,              rabbit_disk_queue_cache).
--define(FILE_EXTENSION,              ".rdq").
--define(FILE_EXTENSION_TMP,          ".rdt").
--define(FILE_EXTENSION_DETS,         ".dets").
--define(FILE_PACKING_ADJUSTMENT,     (1 + (2* (?INTEGER_SIZE_BYTES)))).
--define(MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
--define(BATCH_SIZE,                  10000).
--define(CACHE_MAX_SIZE,              10485760).
+-define(WRITE_OK_SIZE_BITS,                  8).
+-define(WRITE_OK_TRANSIENT,                255).
+-define(WRITE_OK_PERSISTENT,               254).
+-define(INTEGER_SIZE_BYTES,                  8).
+-define(INTEGER_SIZE_BITS,                   (8 * ?INTEGER_SIZE_BYTES)).
+-define(MSG_LOC_NAME,                        rabbit_disk_queue_msg_location).
+-define(FILE_SUMMARY_ETS_NAME,               rabbit_disk_queue_file_summary).
+-define(SEQUENCE_ETS_NAME,                   rabbit_disk_queue_sequences).
+-define(CACHE_ETS_NAME,                      rabbit_disk_queue_cache).
+-define(FILE_EXTENSION,                      ".rdq").
+-define(FILE_EXTENSION_TMP,                  ".rdt").
+-define(FILE_EXTENSION_DETS,                 ".dets").
+-define(FILE_PACKING_ADJUSTMENT,             (1 + (2* (?INTEGER_SIZE_BYTES)))).
+-define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
+-define(BATCH_SIZE,                          10000).
+-define(CACHE_MAX_SIZE,                      10485760).
 
 -define(SERVER, ?MODULE).
 
@@ -94,11 +94,11 @@
          file_size_limit,         %% how big can our files get?
          read_file_handles,       %% file handles for reading (LRU)
          read_file_handles_limit, %% how many file handles can we open?
-         on_sync_txns,           %% list of commiters to run on sync (reversed)
+         on_sync_txns,            %% list of commiters to run on sync (reversed)
          commit_timer_ref,        %% TRef for our interval timer
          last_sync_offset,        %% current_offset at the last time we sync'd
          message_cache,           %% ets message cache
-         memory_report_timer,     %% TRef for the memory report timer
+         memory_report_timer_ref, %% TRef for the memory report timer
          wordsize,                %% bytes in a word on this platform
          mnesia_bytes_per_record, %% bytes per record in mnesia in ram_disk mode
          ets_bytes_per_record     %% bytes per record in msg_location_ets
@@ -253,10 +253,10 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(publish/3 :: (queue_name(), message(), bool()) -> 'ok').
--spec(deliver/1 :: (queue_name()) ->
+-spec(fetch/1 :: (queue_name()) ->
              ('empty' | {message(), non_neg_integer(),
                          bool(), {msg_id(), seq_id()}, non_neg_integer()})).
--spec(phantom_deliver/1 :: (queue_name()) ->
+-spec(phantom_fetch/1 :: (queue_name()) ->
              ( 'empty' | {msg_id(), bool(), bool(), {msg_id(), seq_id()},
                           non_neg_integer()})).
 -spec(prefetch/1 :: (queue_name()) -> 'ok'). 
@@ -281,7 +281,6 @@
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
--spec(report_memory/0 :: () -> 'ok').
 -spec(set_mode/1 :: ('disk' | 'mixed') -> 'ok').
 
 -endif.
@@ -295,11 +294,11 @@ start_link() ->
 publish(Q, Message = #basic_message {}, IsDelivered) ->
     gen_server2:cast(?SERVER, {publish, Q, Message, IsDelivered}).
 
-deliver(Q) ->
-    gen_server2:call(?SERVER, {deliver, Q}, infinity).
+fetch(Q) ->
+    gen_server2:call(?SERVER, {fetch, Q}, infinity).
 
-phantom_deliver(Q) ->
-    gen_server2:call(?SERVER, {phantom_deliver, Q}, infinity).
+phantom_fetch(Q) ->
+    gen_server2:call(?SERVER, {phantom_fetch, Q}, infinity).
 
 prefetch(Q) ->
     gen_server2:pcast(?SERVER, -1, {prefetch, Q, self()}).
@@ -360,9 +359,6 @@ filesync() ->
 cache_info() ->
     gen_server2:call(?SERVER, cache_info, infinity).
 
-report_memory() ->
-    gen_server2:cast(?SERVER, report_memory).
-
 set_mode(Mode) ->
     gen_server2:pcast(?SERVER, 10, {set_mode, Mode}).
 
@@ -406,8 +402,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% seems to blow up if it is set private
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
 
-    TRef = start_memory_timer(),
-
     InitName = "0" ++ ?FILE_EXTENSION,
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
@@ -430,7 +424,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    last_sync_offset        = 0,
                    message_cache           = ets:new(?CACHE_ETS_NAME,
                                                      [set, private]),
-                   memory_report_timer     = TRef,
+                   memory_report_timer_ref = undefined,
                    wordsize                = erlang:system_info(wordsize),
                    mnesia_bytes_per_record = undefined,
                    ets_bytes_per_record    = undefined
@@ -457,14 +451,14 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% ets_bytes_per_record otherwise.
     ok = rabbit_queue_mode_manager:report_memory(self(), 0, false),
     ok = report_memory(false, State2),
-    {ok, State2, hibernate, {backoff, ?HIBERNATE_AFTER_MIN,
-                             ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+    {ok, start_memory_timer(State2), hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call({deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, true, false, true, State),
+handle_call({fetch, Q}, _From, State) ->
+    {ok, Result, State1} = internal_fetch(Q, true, false, true, State),
     reply(Result, State1);
-handle_call({phantom_deliver, Q}, _From, State) ->
-    {ok, Result, State1} = internal_deliver(Q, false, false, true, State),
+handle_call({phantom_fetch, Q}, _From, State) ->
+    {ok, Result, State1} = internal_fetch(Q, false, false, true, State),
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     State1 =
@@ -534,13 +528,8 @@ handle_cast({set_mode, Mode}, State) ->
                  disk -> fun to_disk_only_mode/1;
                  mixed -> fun to_ram_disk_mode/1
              end)(State));
-handle_cast(report_memory, State) ->
-    %% call noreply1/2, not noreply/1/2, as we don't want to restart the
-    %% memory_report_timer
-    %% by unsetting the timer, we force a report on the next normal message
-    noreply1(State #dqstate { memory_report_timer = undefined });
 handle_cast({prefetch, Q, From}, State) ->
-    {ok, Result, State1} = internal_deliver(Q, true, true, false, State),
+    {ok, Result, State1} = internal_fetch(Q, true, true, false, State),
     Cont = rabbit_misc:with_exit_handler(
              fun () -> false end,
              fun () ->
@@ -550,7 +539,7 @@ handle_cast({prefetch, Q, From}, State) ->
     State3 =
 	case Cont of
 	    true ->
-		case internal_deliver(Q, false, false, true, State1) of
+		case internal_fetch(Q, false, false, true, State1) of
 		    {ok, empty, State2} -> State2;
 		    {ok, {_MsgId, _IsPersistent, _Delivered, _MsgSeqId, _Rem},
 		     State2} -> State2
@@ -559,6 +548,11 @@ handle_cast({prefetch, Q, From}, State) ->
 	end,
     noreply(State3).
         
+handle_info(report_memory, State) ->
+    %% call noreply1/2, not noreply/1/2, as we don't want to restart the
+    %% memory_report_timer_ref.
+    %% By unsetting the timer, we force a report on the next normal message
+    noreply1(State #dqstate { memory_report_timer_ref = undefined });
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(timeout, State) ->
@@ -595,7 +589,7 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
     State1 #dqstate { current_file_handle = undefined,
                       current_dirty = false,
                       read_file_handles = {dict:new(), gb_trees:empty()},
-                      memory_report_timer = undefined
+                      memory_report_timer_ref = undefined
                     }.
 
 code_change(_OldVsn, State, _Extra) ->
@@ -603,20 +597,17 @@ code_change(_OldVsn, State, _Extra) ->
 
 %% ---- UTILITY FUNCTIONS ----
 
-stop_memory_timer(State = #dqstate { memory_report_timer = undefined }) ->
+stop_memory_timer(State = #dqstate { memory_report_timer_ref = undefined }) ->
     State;
-stop_memory_timer(State = #dqstate { memory_report_timer = TRef }) ->
+stop_memory_timer(State = #dqstate { memory_report_timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
-    State #dqstate { memory_report_timer = undefined }.
-
-start_memory_timer() ->
-    {ok, TRef} = timer:apply_after(?MEMORY_REPORT_TIME_INTERVAL,
-                                   rabbit_disk_queue, report_memory, []),
-    TRef.
+    State #dqstate { memory_report_timer_ref = undefined }.
 
-start_memory_timer(State = #dqstate { memory_report_timer = undefined }) ->
+start_memory_timer(State = #dqstate { memory_report_timer_ref = undefined }) ->
     ok = report_memory(false, State),
-    State #dqstate { memory_report_timer = start_memory_timer() };
+    {ok, TRef} = timer:send_after(?MINIMUM_MEMORY_REPORT_TIME_INTERVAL,
+                                  report_memory),
+    State #dqstate { memory_report_timer_ref = TRef };
 start_memory_timer(State) ->
     State.
 
@@ -893,7 +884,7 @@ cache_is_full(#dqstate { message_cache = Cache }) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_deliver(Q, ReadMsg, FakeDeliver, Advance,
+internal_fetch(Q, ReadMsg, FakeDeliver, Advance,
                  State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
         {SeqId, SeqId} -> {ok, empty, State};
@@ -971,7 +962,7 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
     end.
 
 internal_auto_ack(Q, State) ->
-    case internal_deliver(Q, false, false, true, State) of
+    case internal_fetch(Q, false, false, true, State) of
         {ok, empty, State1} -> {ok, State1};
         {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Remaining},
          State1} ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 3d989662..2b25ab0f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -35,11 +35,11 @@
 
 -export([init/2]).
 
--export([publish/2, publish_delivered/2, deliver/1, ack/2,
+-export([publish/2, publish_delivered/2, fetch/1, ack/2,
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
--export([to_disk_only_mode/2, to_mixed_mode/2, info/1,
+-export([set_mode/3, info/1,
          estimate_queue_memory_and_reset_counters/1]).
 
 -record(mqstate, { mode,
@@ -76,7 +76,7 @@
 -spec(publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(publish_delivered/2 :: (message(), mqstate()) ->
              {'ok', acktag(), mqstate()}).
--spec(deliver/1 :: (mqstate()) ->
+-spec(fetch/1 :: (mqstate()) ->
              {('empty' | {message(), boolean(), acktag(), non_neg_integer()}),
               mqstate()}).
 -spec(ack/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
@@ -91,8 +91,7 @@
 -spec(length/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> boolean()).
 
--spec(to_disk_only_mode/2 :: ([message()], mqstate()) -> okmqs()).
--spec(to_mixed_mode/2 :: ([message()], mqstate()) -> okmqs()).
+-spec(set_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
 
 -spec(estimate_queue_memory_and_reset_counters/1 :: (mqstate()) ->
              {mqstate(), non_neg_integer(), non_neg_integer(),
@@ -120,8 +119,13 @@ size_of_message(
                         SumAcc + size(Frag)
                 end, 0, Payload).
 
-to_disk_only_mode(_TxnMessages, State = #mqstate { mode = disk }) ->
+set_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
+set_mode(disk, TxnMessages, State) ->
+    to_disk_only_mode(TxnMessages, State);
+set_mode(mixed, TxnMessages, State) ->
+    to_mixed_mode(TxnMessages, State).
+
 to_disk_only_mode(TxnMessages, State =
                   #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                              is_durable = IsDurable, prefetcher = Prefetcher
@@ -219,8 +223,6 @@ flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
     ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
     [].
 
-to_mixed_mode(_TxnMessages, State = #mqstate { mode = mixed }) ->
-    {ok, State};
 to_mixed_mode(TxnMessages, State = #mqstate { mode = disk, queue = Q,
                                               is_durable = IsDurable }) ->
     rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
@@ -248,6 +250,16 @@ to_mixed_mode(TxnMessages, State = #mqstate { mode = disk, queue = Q,
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
 
+gain_memory(Inc, State = #mqstate { memory_size = QSize,
+                                    memory_gain = Gain }) ->
+    State #mqstate { memory_size = QSize + Inc,
+                     memory_gain = Gain + Inc }.
+
+lose_memory(Dec, State = #mqstate { memory_size = QSize,
+                                    memory_loss = Loss }) ->
+    State #mqstate { memory_size = QSize - Dec,
+                     memory_loss = Loss + Dec }.
+
 inc_queue_length(_Q, MsgBuf, 0) ->
     MsgBuf;
 inc_queue_length(Q, MsgBuf, Count) ->
@@ -264,7 +276,7 @@ dec_queue_length(Count, State = #mqstate { queue = Q, msg_buf = MsgBuf }) ->
         {{value, {Q, Len}}, MsgBuf1} ->
             case Len of
                 Count ->
-                    maybe_prefetch(State #mqstate { msg_buf = MsgBuf1 });
+                    State #mqstate { msg_buf = MsgBuf1 };
                 _ when Len > Count ->
                     State #mqstate { msg_buf = queue:in_r({Q, Len-Count},
                                                           MsgBuf1)}
@@ -286,26 +298,23 @@ maybe_prefetch(State) ->
     State.
 
 publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
-                                msg_buf = MsgBuf, memory_size = QSize,
-                                memory_gain = Gain }) ->
+                                msg_buf = MsgBuf }) ->
     MsgBuf1 = inc_queue_length(Q, MsgBuf, 1),
     ok = rabbit_disk_queue:publish(Q, Msg, false),
     MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { memory_gain = Gain + MsgSize,
-                          memory_size = QSize + MsgSize,
-                          msg_buf = MsgBuf1, length = Length + 1 }};
+    {ok, gain_memory(MsgSize, State #mqstate { msg_buf = MsgBuf1,
+                                               length = Length + 1 })};
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
-                   msg_buf = MsgBuf, length = Length, memory_size = QSize,
-                   memory_gain = Gain }) ->
+                   msg_buf = MsgBuf, length = Length }) ->
     ok = case IsDurable andalso IsPersistent of
              true -> rabbit_disk_queue:publish(Q, Msg, false);
              false -> ok
          end,
     MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { msg_buf = queue:in({Msg, false}, MsgBuf),
-                          length = Length + 1, memory_size = QSize + MsgSize,
-                          memory_gain = Gain + MsgSize }}.
+    {ok, gain_memory(MsgSize,
+                     State #mqstate { msg_buf = queue:in({Msg, false}, MsgBuf),
+                                      length = Length + 1 })}.
 
 %% Assumption here is that the queue is empty already (only called via
 %% attempt_immediate_delivery).
@@ -313,20 +322,18 @@ publish_delivered(Msg =
                   #basic_message { guid = MsgId, is_persistent = IsPersistent},
                   State =
                   #mqstate { mode = Mode, is_durable = IsDurable,
-                             queue = Q, length = 0,
-                             memory_size = QSize, memory_gain = Gain })
+                             queue = Q, length = 0 })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     ok = rabbit_disk_queue:publish(Q, Msg, true),
     MsgSize = size_of_message(Msg),
-    State1 = State #mqstate { memory_size = QSize + MsgSize,
-                              memory_gain = Gain + MsgSize },
+    State1 = gain_memory(MsgSize, State),
     case IsDurable andalso IsPersistent of
         true ->
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
             {MsgId, IsPersistent, true, AckTag, 0} =
-                rabbit_disk_queue:phantom_deliver(Q),
+                rabbit_disk_queue:phantom_fetch(Q),
             {ok, AckTag, State1};
         false ->
             %% in this case, we don't actually care about the ack, so
@@ -334,18 +341,15 @@ publish_delivered(Msg =
             ok = rabbit_disk_queue:auto_ack_next_message(Q),
             {ok, noack, State1}
     end;
-publish_delivered(Msg, State =
-                  #mqstate { mode = mixed, length = 0, memory_size = QSize,
-                             memory_gain = Gain }) ->
+publish_delivered(Msg, State = #mqstate { mode = mixed, length = 0 }) ->
     MsgSize = size_of_message(Msg),
-    {ok, noack, State #mqstate { memory_size = QSize + MsgSize,
-                                 memory_gain = Gain + MsgSize }}.
+    {ok, noack, gain_memory(MsgSize, State)}.
 
-deliver(State = #mqstate { length = 0 }) ->
+fetch(State = #mqstate { length = 0 }) ->
     {empty, State};
-deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
-                           is_durable = IsDurable, length = Length,
-                           prefetcher = Prefetcher }) ->
+fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
+                         is_durable = IsDurable, length = Length,
+                         prefetcher = Prefetcher }) ->
     {{value, Value}, MsgBuf1} = queue:out(MsgBuf),
     Rem = Length - 1,
     State1 = State #mqstate { length = Rem },
@@ -356,13 +360,13 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                 case IsDurable andalso IsPersistent of
                     true ->
                         {MsgId, IsPersistent, IsDelivered, AckTag1, _PRem}
-                            = rabbit_disk_queue:phantom_deliver(Q),
+                            = rabbit_disk_queue:phantom_fetch(Q),
                         AckTag1;
                     false ->
                         noack
                 end,
-            State2 = maybe_prefetch(State1 #mqstate { msg_buf = MsgBuf1 }),
-            {{Msg, IsDelivered, AckTag, Rem}, State2};
+            {{Msg, IsDelivered, AckTag, Rem},
+             State1 #mqstate { msg_buf = MsgBuf1 }};
         {Msg = #basic_message { is_persistent = IsPersistent },
          IsDelivered, AckTag} ->
             %% message has come via the prefetcher, thus it's been
@@ -375,21 +379,21 @@ deliver(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             State2 = dec_queue_length(1, State1),
             {Msg = #basic_message { is_persistent = IsPersistent },
              _Size, IsDelivered, AckTag, _PersistRem}
-                = rabbit_disk_queue:deliver(Q),
+                = rabbit_disk_queue:fetch(Q),
             AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
             {{Msg, IsDelivered, AckTag1, Rem}, State2};
         _ ->
             case rabbit_queue_prefetcher:drain(Prefetcher) of
-                empty -> deliver(State #mqstate { prefetcher = undefined });
+                empty -> fetch(State #mqstate { prefetcher = undefined });
                 {Fetched, Len, Status} ->
                     State2 = #mqstate { msg_buf = MsgBuf2 } =
                         dec_queue_length(Len, State),
-                    deliver(State2 #mqstate
-                            { msg_buf = queue:join(Fetched, MsgBuf2),
-                              prefetcher = case Status of
-                                               finished -> undefined;
-                                               continuing -> Prefetcher
-                                           end })
+                    fetch(State2 #mqstate
+                          { msg_buf = queue:join(Fetched, MsgBuf2),
+                            prefetcher = case Status of
+                                             finished -> undefined;
+                                             continuing -> Prefetcher
+                                         end })
             end
     end.
 
@@ -407,38 +411,30 @@ remove_noacks(MsgsWithAcks) ->
               {[AckTag | AccAckTags], size_of_message(Msg) + AccSize}
       end, {[], 0}, MsgsWithAcks).
 
-ack(MsgsWithAcks, State = #mqstate { queue = Q, memory_size = QSize,
-                                     memory_loss = Loss }) ->
+ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
     {AckTags, ASize} = remove_noacks(MsgsWithAcks),
     ok = case AckTags of
              [] -> ok;
              _ -> rabbit_disk_queue:ack(Q, AckTags)
          end,
-    State1 = State #mqstate { memory_size = QSize - ASize,
-                              memory_loss = Loss + ASize },
-    {ok, State1}.
+    {ok, lose_memory(ASize, State)}.
                                                    
 tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
-           State = #mqstate { mode = Mode, memory_size = QSize,
-                              is_durable = IsDurable, memory_gain = Gain })
+           State = #mqstate { mode = Mode, is_durable = IsDurable })
   when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
     ok = rabbit_disk_queue:tx_publish(Msg),
     MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { memory_size = QSize + MsgSize,
-                          memory_gain = Gain + MsgSize }};
-tx_publish(Msg, State = #mqstate { mode = mixed, memory_size = QSize,
-                                   memory_gain = Gain }) ->
+    {ok, gain_memory(MsgSize, State)};
+tx_publish(Msg, State = #mqstate { mode = mixed }) ->
     %% this message will reappear in the tx_commit, so ignore for now
     MsgSize = size_of_message(Msg),
-    {ok, State #mqstate { memory_size = QSize + MsgSize,
-                          memory_gain = Gain + MsgSize }}.
+    {ok, gain_memory(MsgSize, State)}.
 
 only_msg_ids(Pubs) ->
     lists:map(fun (Msg) -> {Msg #basic_message.guid, false} end, Pubs).
 
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = disk, queue = Q, length = Length,
-                             memory_size = QSize, memory_loss = Loss,
                              msg_buf = MsgBuf }) ->
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
     ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
@@ -446,14 +442,12 @@ tx_commit(Publishes, MsgsWithAcks,
                                                 RealAcks)
          end,
     Len = erlang:length(Publishes),
-    {ok, State #mqstate { length = Length + Len,
-                          msg_buf = inc_queue_length(Q, MsgBuf, Len),
-                          memory_size = QSize - ASize,
-                          memory_loss = Loss + ASize }};
+    {ok, lose_memory(ASize, State #mqstate
+                     { length = Length + Len,
+                       msg_buf = inc_queue_length(Q, MsgBuf, Len) })};
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                             is_durable = IsDurable, length = Length,
-                             memory_size = QSize, memory_loss = Loss }) ->
+                             is_durable = IsDurable, length = Length }) ->
     {PersistentPubs, MsgBuf1} =
         lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
                          {Acc, MsgBuf2}) ->
@@ -471,23 +465,20 @@ tx_commit(Publishes, MsgsWithAcks,
              false -> rabbit_disk_queue:tx_commit(
                         Q, lists:reverse(PersistentPubs), RealAcks)
          end,
-    {ok, State #mqstate { msg_buf = MsgBuf1, memory_size = QSize - ASize,
-                          length = Length + erlang:length(Publishes),
-                          memory_loss = Loss + ASize }}.
+    {ok, lose_memory(ASize, State #mqstate
+                     { msg_buf = MsgBuf1,
+                       length = Length + erlang:length(Publishes) })}.
 
-tx_cancel(Publishes, State = #mqstate { mode = disk, memory_size = QSize,
-                                        memory_loss = Loss }) ->
+tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
     {MsgIds, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { guid = MsgId }, {MsgIdsAcc, CSizeAcc}) ->
                   {[MsgId | MsgIdsAcc], CSizeAcc + size_of_message(Msg)}
           end, {[], 0}, Publishes),
     ok = rabbit_disk_queue:tx_cancel(MsgIds),
-    {ok, State #mqstate { memory_size = QSize - CSize,
-                          memory_loss = Loss + CSize }};
-tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable,
-                                        memory_size = QSize,
-                                        memory_loss = Loss }) ->
+    {ok, lose_memory(CSize, State)};
+tx_cancel(Publishes,
+          State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
     {PersistentPubs, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent,
@@ -503,8 +494,7 @@ tx_cancel(Publishes, State = #mqstate { mode = mixed, is_durable = IsDurable,
                 rabbit_disk_queue:tx_cancel(PersistentPubs);
            true -> ok
         end,
-    {ok, State #mqstate { memory_size = QSize - CSize,
-                          memory_loss = Loss + CSize }}.
+    {ok, lose_memory(CSize, State)}.
 
 %% [{Msg, AckTag}]
 requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
@@ -555,32 +545,30 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                          length = Length + erlang:length(MessagesWithAckTags)}}.
 
 purge(State = #mqstate { queue = Q, mode = disk, length = Count,
-                         memory_loss = Loss, memory_size = QSize }) ->
+                         memory_size = QSize }) ->
     Count = rabbit_disk_queue:purge(Q),
-    {Count, State #mqstate { length = 0, memory_size = 0,
-                             memory_loss = Loss + QSize }};
+    {Count, lose_memory(QSize, State)};
 purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
-                         memory_loss = Loss, memory_size = QSize,
-                         prefetcher = Prefetcher }) ->
+                         memory_size = QSize, prefetcher = Prefetcher }) ->
     case Prefetcher of
         undefined -> ok;
         _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
     end,
     rabbit_disk_queue:purge(Q),
-    {Length,
-     State #mqstate { msg_buf = queue:new(), length = 0, memory_size = 0,
-                      memory_loss = Loss + QSize, prefetcher = undefined }}.
+    {Length, lose_memory(QSize, State #mqstate { msg_buf = queue:new(),
+                                                 length = 0,
+                                                 prefetcher = undefined })}.
 
 delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
-                                memory_loss = Loss, prefetcher = Prefetcher
+                                prefetcher = Prefetcher
                               }) ->
     case Prefetcher of
         undefined -> ok;
         _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
     end,
     ok = rabbit_disk_queue:delete_queue(Q),
-    {ok, State #mqstate { length = 0, memory_size = 0, msg_buf = queue:new(),
-                          memory_loss = Loss + QSize, prefetcher = undefined }}.
+    {ok, lose_memory(QSize, State #mqstate { length = 0, msg_buf = queue:new(),
+                                             prefetcher = undefined })}.
 
 length(#mqstate { length = Length }) ->
     Length.
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index c847848d..ad6b1ce2 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -94,7 +94,7 @@
 %%    to its internal queue. A cast is not sufficient here because the
 %%    mixed_queue could come along, drain the prefetcher, thus
 %%    catching the msg just sent by the disk_queue and then call
-%%    disk_queue:deliver(Q) which is normal priority call, which could
+%%    disk_queue:fetch(Q) which is normal priority call, which could
 %%    overtake a reply cast from the prefetcher to the disk queue,
 %%    which would result in the same message being delivered
 %%    twice. Thus when the disk_queue calls prefetcher:publish(Msg),
@@ -146,7 +146,7 @@
 %% mixed_queue tries to drain the prefetcher. We must therefore ensure
 %% that this msg can't also be delivered to the mixed_queue directly
 %% by the disk_queue through the mixed_queue calling
-%% disk_queue:deliver(Q) which is why the prefetcher:publish function
+%% disk_queue:fetch(Q) which is why the prefetcher:publish function
 %% is a call and not a cast, thus blocking the disk_queue.
 %%
 %% Finally, the prefetcher is only created when the mixed_queue is
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ffd675a0..ad5a2483 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -157,7 +157,7 @@ test_simple_n_element_queue(N) ->
     passed.
 
 test_unfold() ->
-    {[], test} = rabbit_misc:unfold(fun (V) -> false end, test),
+    {[], test} = rabbit_misc:unfold(fun (_V) -> false end, test),
     List = lists:seq(2,20,2),
     {List, 0} = rabbit_misc:unfold(fun (0) -> false;
                                        (N) -> {true, N*2, N-1}
@@ -848,7 +848,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
                                 [begin
                                      Remaining = MsgCount - N,
                                      {Message, _TSize, false, SeqId,
-                                      Remaining} = rabbit_disk_queue:deliver(Q),
+                                      Remaining} = rabbit_disk_queue:fetch(Q),
                                      ok = rdq_match_message(Message, N, Msg, MsgSizeBytes),
                                      SeqId
                                  end || N <- List],
@@ -895,7 +895,7 @@ rdq_stress_gc(MsgCount) ->
           fun (MsgId, Acc) ->
                   Remaining = MsgCount - MsgId,
                   {Message, _TSize, false, SeqId, Remaining} =
-                      rabbit_disk_queue:deliver(q),
+                      rabbit_disk_queue:fetch(q),
                   ok = rdq_match_message(Message, MsgId, Msg, MsgSizeBytes),
                   dict:store(MsgId, SeqId, Acc)
           end, dict:new(), List),
@@ -904,7 +904,7 @@ rdq_stress_gc(MsgCount) ->
            rabbit_disk_queue:ack(q, [SeqId])
      end || MsgId <- AckList2],
     rabbit_disk_queue:tx_commit(q, [], []),
-    empty = rabbit_disk_queue:deliver(q),
+    empty = rabbit_disk_queue:fetch(q),
     rdq_stop(),
     passed.
 
@@ -923,7 +923,7 @@ rdq_test_startup_with_queue_gaps() ->
     Seqs = [begin
                 Remaining = Total - N,
                 {Message, _TSize, false, SeqId, Remaining} =
-                    rabbit_disk_queue:deliver(q),
+                    rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- lists:seq(1,Half)],
@@ -945,7 +945,7 @@ rdq_test_startup_with_queue_gaps() ->
     Seqs2 = [begin
                  Remaining = round(Total - ((Half + N)/2)),
                  {Message, _TSize, true, SeqId, Remaining} =
-                     rabbit_disk_queue:deliver(q),
+                     rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(2,Half,2)],
@@ -955,13 +955,13 @@ rdq_test_startup_with_queue_gaps() ->
     Seqs3 = [begin
                  Remaining = Total - N,
                  {Message, _TSize, false, SeqId, Remaining} =
-                     rabbit_disk_queue:deliver(q),
+                     rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(1 + Half,Total)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
     io:format("Read second half done~n", []),
-    empty = rabbit_disk_queue:deliver(q),
+    empty = rabbit_disk_queue:fetch(q),
     rdq_stop(),
     passed.
 
@@ -980,7 +980,7 @@ rdq_test_redeliver() ->
     Seqs = [begin
                 Remaining = Total - N,
                 {Message, _TSize, false, SeqId, Remaining} =
-                    rabbit_disk_queue:deliver(q),
+                    rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- lists:seq(1,Half)],
@@ -1001,7 +1001,7 @@ rdq_test_redeliver() ->
     Seqs2 = [begin
                  Remaining = round(Total - N + (Half/2)),
                  {Message, _TSize, false, SeqId, Remaining} =
-                     rabbit_disk_queue:deliver(q),
+                     rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(1+Half, Total)],
@@ -1009,12 +1009,12 @@ rdq_test_redeliver() ->
     Seqs3 = [begin
                  Remaining = round((Half - N) / 2) - 1,
                  {Message, _TSize, true, SeqId, Remaining} =
-                     rabbit_disk_queue:deliver(q),
+                     rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- lists:seq(1, Half, 2)],
     rabbit_disk_queue:tx_commit(q, [], Seqs3),
-    empty = rabbit_disk_queue:deliver(q),
+    empty = rabbit_disk_queue:fetch(q),
     rdq_stop(),
     passed.
 
@@ -1033,7 +1033,7 @@ rdq_test_purge() ->
     Seqs = [begin
                 Remaining = Total - N,
                 {Message, _TSize, false, SeqId, Remaining} =
-                    rabbit_disk_queue:deliver(q),
+                    rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- lists:seq(1,Half)],
@@ -1042,7 +1042,7 @@ rdq_test_purge() ->
     io:format("Purge done~n", []),
     rabbit_disk_queue:tx_commit(q, [], Seqs),
     io:format("Ack first half done~n", []),
-    empty = rabbit_disk_queue:deliver(q),
+    empty = rabbit_disk_queue:fetch(q),
     rdq_stop(),
     passed.    
 
@@ -1051,7 +1051,7 @@ rdq_new_mixed_queue(Q, Durable, Disk) ->
     {MS1, _, _, _} =
         rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
     case Disk of
-        true -> {ok, MS2} = rabbit_mixed_queue:to_disk_only_mode([], MS1),
+        true -> {ok, MS2} = rabbit_mixed_queue:set_mode(disk, [], MS1),
                 MS2;
         false -> MS1
     end.
@@ -1083,11 +1083,11 @@ rdq_test_mixed_queue_modes() ->
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages; ~w~n",
               [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS6)]),
-    {ok, MS7} = rabbit_mixed_queue:to_disk_only_mode([], MS6),
+    {ok, MS7} = rabbit_mixed_queue:set_mode(disk, [], MS6),
     30 = rabbit_mixed_queue:length(MS7),
     io:format("Converted to disk only mode; ~w~n",
              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS7)]),
-    {ok, MS8} = rabbit_mixed_queue:to_mixed_mode([], MS7),
+    {ok, MS8} = rabbit_mixed_queue:set_mode(mixed, [], MS7),
     30 = rabbit_mixed_queue:length(MS8),
     io:format("Converted to mixed mode; ~w~n",
               [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS8)]),
@@ -1097,12 +1097,12 @@ rdq_test_mixed_queue_modes() ->
                   Rem = 30 - N,
                   {{#basic_message { is_persistent = false },
                     false, _AckTag, Rem},
-                   MS9a} = rabbit_mixed_queue:deliver(MS9),
+                   MS9a} = rabbit_mixed_queue:fetch(MS9),
                   MS9a
           end, MS8, lists:seq(1,10)),
     20 = rabbit_mixed_queue:length(MS10),
     io:format("Delivered initial non persistent messages~n"),
-    {ok, MS11} = rabbit_mixed_queue:to_disk_only_mode([], MS10),
+    {ok, MS11} = rabbit_mixed_queue:set_mode(disk, [], MS10),
     20 = rabbit_mixed_queue:length(MS11),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
@@ -1116,13 +1116,13 @@ rdq_test_mixed_queue_modes() ->
                   Rem = 10 - N,
                   {{Msg = #basic_message { is_persistent = true },
                     false, AckTag, Rem},
-                   MS13a} = rabbit_mixed_queue:deliver(MS13),
+                   MS13a} = rabbit_mixed_queue:fetch(MS13),
                   {MS13a, [{Msg, AckTag} | AcksAcc]}
           end, {MS12, []}, lists:seq(1,10)),
     0 = rabbit_mixed_queue:length(MS14),
     {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
     io:format("Delivered and acked all messages~n"),
-    {ok, MS16} = rabbit_mixed_queue:to_disk_only_mode([], MS15),
+    {ok, MS16} = rabbit_mixed_queue:set_mode(disk, [], MS15),
     0 = rabbit_mixed_queue:length(MS16),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
@@ -1149,28 +1149,28 @@ rdq_test_mode_conversion_mid_txn() ->
     rdq_start(),
     MS0 = rdq_new_mixed_queue(q, true, false),
     passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS0, MsgsA, MsgsB, fun rabbit_mixed_queue:to_disk_only_mode/2, commit),
+               MS0, MsgsA, MsgsB, disk, commit),
 
     rdq_stop_virgin_start(),
     MS1 = rdq_new_mixed_queue(q, true, false),
     passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS1, MsgsA, MsgsB, fun rabbit_mixed_queue:to_disk_only_mode/2, cancel),
+               MS1, MsgsA, MsgsB, disk, cancel),
 
 
     rdq_stop_virgin_start(),
     MS2 = rdq_new_mixed_queue(q, true, true),
     passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS2, MsgsA, MsgsB, fun rabbit_mixed_queue:to_mixed_mode/2, commit),
+               MS2, MsgsA, MsgsB, mixed, commit),
 
     rdq_stop_virgin_start(),
     MS3 = rdq_new_mixed_queue(q, true, true),
     passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS3, MsgsA, MsgsB, fun rabbit_mixed_queue:to_mixed_mode/2, cancel),
+               MS3, MsgsA, MsgsB, mixed, cancel),
 
     rdq_stop(),
     passed.
 
-rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCancel) ->
+rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) ->
     0 = rabbit_mixed_queue:length(MS0),
     MS2 = lists:foldl(
             fun (Msg, MS1) ->
@@ -1185,7 +1185,7 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                     MS3a
             end, MS2, MsgsB),
     Len0 = rabbit_mixed_queue:length(MS4),
-    {ok, MS5} = ChangeFun(MsgsB, MS4),
+    {ok, MS5} = rabbit_mixed_queue:set_mode(Mode, MsgsB, MS4),
     Len0 = rabbit_mixed_queue:length(MS5),
     {ok, MS9} =
         case CommitOrCancel of
@@ -1198,7 +1198,7 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                       fun (Msg, {Acc, MS7}) ->
                               Rem = Len1 - (Msg #basic_message.guid) - 1,
                               {{Msg, false, AckTag, Rem}, MS7a} =
-                                  rabbit_mixed_queue:deliver(MS7),
+                                  rabbit_mixed_queue:fetch(MS7),
                               {[{Msg, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA ++ MsgsB),
                 0 = rabbit_mixed_queue:length(MS8),
@@ -1211,7 +1211,7 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, ChangeFun, CommitOrCanc
                       fun (Msg, {Acc, MS7}) ->
                               Rem = Len0 - (Msg #basic_message.guid) - 1,
                               {{Msg, false, AckTag, Rem}, MS7a} =
-                                  rabbit_mixed_queue:deliver(MS7),
+                                  rabbit_mixed_queue:fetch(MS7),
                               {[{Msg, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA),
                 0 = rabbit_mixed_queue:length(MS8),
@@ -1244,7 +1244,7 @@ rdq_test_disk_queue_modes() ->
     Seqs = [begin
                 Remaining = Total - N,
                 {Message, _TSize, false, SeqId, Remaining} =
-                    rabbit_disk_queue:deliver(q),
+                    rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- Half1],
@@ -1254,7 +1254,7 @@ rdq_test_disk_queue_modes() ->
     Seqs2 = [begin
                  Remaining = Total - N,
                  {Message, _TSize, false, SeqId, Remaining} =
-                     rabbit_disk_queue:deliver(q),
+                     rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
              end || N <- Half2],
@@ -1262,7 +1262,7 @@ rdq_test_disk_queue_modes() ->
     ok = rabbit_disk_queue:tx_commit(q, [], Seqs),
     ok = rabbit_disk_queue:to_disk_only_mode(),
     ok = rabbit_disk_queue:tx_commit(q, [], Seqs2),
-    empty = rabbit_disk_queue:deliver(q),
+    empty = rabbit_disk_queue:fetch(q),
     rdq_stop(),
     passed.
 
-- 
cgit v1.2.1


From c1b27b587d7e47ab2c96a0607aa25dde2c63a9ab Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 11:11:08 +0100
Subject: > You should just *replace* to_{mixed,disk_only_mode}_mode with
 set_mode, not make the latter a wrapper for the former.

Done.
---
 src/rabbit_mixed_queue.erl | 67 +++++++++++++++++++++-------------------------
 1 file changed, 30 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 2b25ab0f..2e67735f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -121,15 +121,9 @@ size_of_message(
 
 set_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
-set_mode(disk, TxnMessages, State) ->
-    to_disk_only_mode(TxnMessages, State);
-set_mode(mixed, TxnMessages, State) ->
-    to_mixed_mode(TxnMessages, State).
-
-to_disk_only_mode(TxnMessages, State =
-                  #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                             is_durable = IsDurable, prefetcher = Prefetcher
-                           }) ->
+set_mode(disk, TxnMessages, State =
+         #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+                    is_durable = IsDurable, prefetcher = Prefetcher }) ->
     rabbit_log:info("Converting queue to disk only mode: ~p~n", [Q]),
     State1 = State #mqstate { mode = disk },
     {MsgBuf1, State2} =
@@ -164,7 +158,33 @@ to_disk_only_mode(TxnMessages, State =
                    end
       end, TxnMessages),
     garbage_collect(),
-    {ok, State2 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }}.
+    {ok, State2 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
+set_mode(mixed, TxnMessages, State = #mqstate { mode = disk, queue = Q,
+                                                is_durable = IsDurable }) ->
+    rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
+    %% The queue has a token just saying how many msgs are on disk
+    %% (this is already built for us when in disk mode).
+    %% Don't actually do anything to the disk
+    %% Don't start prefetcher just yet because the queue maybe busy -
+    %% wait for hibernate timeout in the amqqueue_process.
+
+    %% Remove txn messages from disk which are neither persistent and
+    %% durable. This is necessary to avoid leaks. This is also pretty
+    %% much the inverse behaviour of our own tx_cancel/2 which is why
+    %% we're not using it.
+    Cancel =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
+                  case IsDurable andalso IsPersistent of
+                      true  -> Acc;
+                      false -> [Msg #basic_message.guid | Acc]
+                  end
+          end, [], TxnMessages),
+    ok = if Cancel == [] -> ok;
+            true -> rabbit_disk_queue:tx_cancel(Cancel)
+         end,
+    garbage_collect(),
+    {ok, State #mqstate { mode = mixed }}.
 
 send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       Commit, MsgBuf) ->
@@ -223,33 +243,6 @@ flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
     ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
     [].
 
-to_mixed_mode(TxnMessages, State = #mqstate { mode = disk, queue = Q,
-                                              is_durable = IsDurable }) ->
-    rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
-    %% The queue has a token just saying how many msgs are on disk
-    %% (this is already built for us when in disk mode).
-    %% Don't actually do anything to the disk
-    %% Don't start prefetcher just yet because the queue maybe busy -
-    %% wait for hibernate timeout in the amqqueue_process.
-
-    %% Remove txn messages from disk which are neither persistent and
-    %% durable. This is necessary to avoid leaks. This is also pretty
-    %% much the inverse behaviour of our own tx_cancel/2 which is why
-    %% we're not using it.
-    Cancel =
-        lists:foldl(
-          fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
-                  case IsDurable andalso IsPersistent of
-                      true  -> Acc;
-                      false -> [Msg #basic_message.guid | Acc]
-                  end
-          end, [], TxnMessages),
-    ok = if Cancel == [] -> ok;
-            true -> rabbit_disk_queue:tx_cancel(Cancel)
-         end,
-    garbage_collect(),
-    {ok, State #mqstate { mode = mixed }}.
-
 gain_memory(Inc, State = #mqstate { memory_size = QSize,
                                     memory_gain = Gain }) ->
     State #mqstate { memory_size = QSize + Inc,
-- 
cgit v1.2.1


From d270fe2bd4ba30a17e372a4e178ea15d27e75ccd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 15:01:36 +0100
Subject: Well, this was a very very sneaky bug.

1) create durable queue
2) send persistent msgs and send queue to disk_only mode (note, requires branch bug21444)
3) when done, set queue to mixed mode
4) send more persistent msgs
5) when done, wait for the prefetcher to do its thing
6) restart rabbit
7) observe that queue length is wrong

Bugs fixed:
o) in the to_disk_only_mode code in mixed_queue, msgs that had come out of the prefetcher weren't being acked. This meant that on a restart, the msgs would be recovered. Given that we have to requeue everything anyway (sometimes) in a mixed -> disk transition, we obviously have to ack these msgs before republishing them. Note that we do this as part of a tx_commit, so it's perfectly safe
o) in the to_disk_only_mode code in mixed_queue, there was a recursion which swapped an IsDurable param with an IsDelivered param. This caused substantial fail.
o) transaction commit coalescing is dangerous, especially when you're relying on calls to the disk queue to happen in order. For example, should you tx_publish, tx_commit and then auto_ack, or requeue_next_n, you would expect that those last calls get to see the msgs tx_published. This is not necessarily the case. A further good example is a tx_commit followed by a queue.delete. So, in the disk_queue for such calls, make sure that we flush properly, but also expose this functionality (it was already exposed, but as a cast, and although not absolutely necessary to be a call, if we're tx_commiting anyway then that's a call, so another full round trip isn't a problem).

One final note, there is no way that this bug would have been discovered and so easily replicated and debugged without the pinning code in bug 21444. We will seriously hamper our own ability to debug and aid clients should the new persister get released without 21444.
---
 src/rabbit_disk_queue.erl  | 22 ++++++++++--------
 src/rabbit_mixed_queue.erl | 56 ++++++++++++++++++++++++----------------------
 2 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index e2f341ff..c1744d66 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -354,7 +354,7 @@ to_ram_disk_mode() ->
     gen_server2:pcall(?SERVER, 9, to_ram_disk_mode, infinity).
 
 filesync() ->
-    gen_server2:pcast(?SERVER, 10, filesync).
+    gen_server2:pcall(?SERVER, 9, filesync).
 
 cache_info() ->
     gen_server2:call(?SERVER, cache_info, infinity).
@@ -467,6 +467,8 @@ handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     reply(Count, State1);
+handle_call(filesync, _From, State) ->
+    reply(ok, sync_current_file_handle(State));
 handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     reply(WriteSeqId - ReadSeqId, State);
@@ -521,8 +523,6 @@ handle_cast({requeue_next_n, Q, N}, State) ->
 handle_cast({delete_queue, Q}, State) ->
     {ok, State1} = internal_delete_queue(Q, State),
     noreply(State1);
-handle_cast(filesync, State) ->
-    noreply(sync_current_file_handle(State));
 handle_cast({set_mode, Mode}, State) ->
     noreply((case Mode of
                  disk -> fun to_disk_only_mode/1;
@@ -909,9 +909,11 @@ internal_fetch(Q, ReadMsg, FakeDeliver, Advance,
              end, State1}
     end.
 
-internal_foldl(Q, Fun, Init, State = #dqstate { sequences = Sequences }) ->
+internal_foldl(Q, Fun, Init, State) ->
+    State1 = #dqstate { sequences = Sequences } =
+        sync_current_file_handle(State),
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    internal_foldl(Q, WriteSeqId, Fun, State, Init, ReadSeqId).
+    internal_foldl(Q, WriteSeqId, Fun, State1, Init, ReadSeqId).
 
 internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
     {ok, Acc, State};
@@ -1118,11 +1120,12 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
     {ok, {MsgId, WriteSeqId}, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
+    State1 = sync_current_file_handle(State),
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds),
                                                   undefined)),
-    remove_messages(undefined, MsgSeqIds, false, State).
+    remove_messages(undefined, MsgSeqIds, false, State1).
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
@@ -1218,8 +1221,9 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     end.
 
 internal_delete_queue(Q, State) ->
-    {ok, _Count, State1 = #dqstate { sequences = Sequences }} =
-        internal_purge(Q, State), %% remove everything undelivered
+    State1 = sync_current_file_handle(State),
+    {ok, _Count, State2 = #dqstate { sequences = Sequences }} =
+        internal_purge(Q, State1), %% remove everything undelivered
     true = ets:delete(Sequences, Q),
     %% now remove everything already delivered
     Objs = mnesia:dirty_match_object(
@@ -1233,7 +1237,7 @@ internal_delete_queue(Q, State) ->
           fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
                              msg_id = MsgId }) ->
                   {MsgId, SeqId} end, Objs),
-    remove_messages(Q, MsgSeqIds, true, State1).
+    remove_messages(Q, MsgSeqIds, true, State2).
 
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 2e67735f..f865b19a 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -145,7 +145,7 @@ set_mode(disk, TxnMessages, State =
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
     {ok, MsgBuf3} =
-        send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], queue:new()),
+        send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], [], queue:new()),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -187,11 +187,11 @@ set_mode(mixed, TxnMessages, State = #mqstate { mode = disk, queue = Q,
     {ok, State #mqstate { mode = mixed }}.
 
 send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
-                      Commit, MsgBuf) ->
+                      Commit, Ack, MsgBuf) ->
     case queue:out(Queue) of
         {empty, _Queue} ->
-            ok = flush_messages_to_disk_queue(Q, Commit),
-            [] = flush_requeue_to_disk_queue(Q, RequeueCount, []),
+            ok = flush_messages_to_disk_queue(Q, Commit, Ack),
+            {[], []} = flush_requeue_to_disk_queue(Q, RequeueCount, [], []),
             {ok, MsgBuf};
         {{value, {Msg = #basic_message { is_persistent = IsPersistent },
                   IsDelivered}}, Queue1} ->
@@ -199,49 +199,51 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                 true -> %% it's already in the Q
                     send_messages_to_disk(
                       IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
-                      Commit, inc_queue_length(Q, MsgBuf, 1));
+                      Commit, Ack, inc_queue_length(Q, MsgBuf, 1));
                 false ->
                     republish_message_to_disk_queue(
                       IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
-                      MsgBuf, Msg, IsDelivered)
+                      Ack, MsgBuf, Msg, IsDelivered)
             end;
-        {{value, {Msg, IsDelivered, _AckTag}}, Queue1} ->
+        {{value, {Msg, IsDelivered, AckTag}}, Queue1} ->
             %% these have come via the prefetcher, so are no longer in
             %% the disk queue so they need to be republished
-            republish_message_to_disk_queue(IsDelivered, Q, Queue1,
-                                            PublishCount, RequeueCount, Commit,
-                                            MsgBuf, Msg, IsDelivered);
+            republish_message_to_disk_queue(
+              IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
+              [AckTag | Ack], MsgBuf, Msg, IsDelivered);
         {{value, {Q, Count}}, Queue1} ->
             send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
-                                  RequeueCount + Count, Commit,
+                                  RequeueCount + Count, Commit, Ack,
                                   inc_queue_length(Q, MsgBuf, Count))
     end.
 
 republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
-                                Commit, MsgBuf, Msg =
+                                Commit, Ack, MsgBuf, Msg =
                                 #basic_message { guid = MsgId }, IsDelivered) ->
-    Commit1 = flush_requeue_to_disk_queue(Q, RequeueCount, Commit),
+    {Commit1, Ack1} = flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack),
     ok = rabbit_disk_queue:tx_publish(Msg),
-    {PublishCount1, Commit2} =
+    {PublishCount1, Commit2, Ack2} =
         case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
-            true  -> ok = flush_messages_to_disk_queue(Q, Commit1),
-                     {1, [{MsgId, IsDelivered}]};
-            false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1]}
+            true  -> ok = flush_messages_to_disk_queue(
+                            Q, [{MsgId, IsDelivered} | Commit1], Ack1),
+                     {0, [], []};
+            false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1], Ack1}
         end,
     send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
-                          Commit2, inc_queue_length(Q, MsgBuf, 1)).
+                          Commit2, Ack2, inc_queue_length(Q, MsgBuf, 1)).
 
-flush_messages_to_disk_queue(_Q, []) ->
+flush_messages_to_disk_queue(_Q, [], []) ->
     ok;
-flush_messages_to_disk_queue(Q, Commit) ->
-    rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), []).
-
-flush_requeue_to_disk_queue(_Q, 0, Commit) ->
-    Commit;
-flush_requeue_to_disk_queue(Q, RequeueCount, Commit) ->
-    ok = flush_messages_to_disk_queue(Q, Commit),
+flush_messages_to_disk_queue(Q, Commit, Ack) ->
+    rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), Ack).
+
+flush_requeue_to_disk_queue(_Q, 0, Commit, Ack) ->
+    {Commit, Ack};
+flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
+    ok = flush_messages_to_disk_queue(Q, Commit, Ack),
+    ok = rabbit_disk_queue:filesync(),
     ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
-    [].
+    {[], []}.
 
 gain_memory(Inc, State = #mqstate { memory_size = QSize,
                                     memory_gain = Gain }) ->
-- 
cgit v1.2.1


From b772f369cde9fb5d574ca9e5d0ca68262c846cc2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 15:43:20 +0100
Subject: remove the unnecessary sync from tx_cancel. A cancel cannot be
 misinterleaved with a commit for the same transaction so it's not necessary.

---
 src/rabbit_disk_queue.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c1744d66..9e70f8c5 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1120,12 +1120,11 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
     {ok, {MsgId, WriteSeqId}, State1}.
 
 internal_tx_cancel(MsgIds, State) ->
-    State1 = sync_current_file_handle(State),
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds),
                                                   undefined)),
-    remove_messages(undefined, MsgSeqIds, false, State1).
+    remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-- 
cgit v1.2.1


From d4b5e91484fb43bf74ca841696ed0824d6b8c177 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 22:42:39 +0100
Subject: Made messages be marked as delivered during prefetch *before* they
 are passed to the prefetcher.

---
 src/rabbit_disk_queue.erl       |  4 +-
 src/rabbit_queue_prefetcher.erl | 82 ++++++++++++++++++++---------------------
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 9e70f8c5..96125031 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -529,7 +529,7 @@ handle_cast({set_mode, Mode}, State) ->
                  mixed -> fun to_ram_disk_mode/1
              end)(State));
 handle_cast({prefetch, Q, From}, State) ->
-    {ok, Result, State1} = internal_fetch(Q, true, true, false, State),
+    {ok, Result, State1} = internal_fetch(Q, true, false, false, State),
     Cont = rabbit_misc:with_exit_handler(
              fun () -> false end,
              fun () ->
@@ -539,7 +539,7 @@ handle_cast({prefetch, Q, From}, State) ->
     State3 =
 	case Cont of
 	    true ->
-		case internal_fetch(Q, false, false, true, State1) of
+		case internal_fetch(Q, false, true, true, State1) of
 		    {ok, empty, State2} -> State2;
 		    {ok, {_MsgId, _IsPersistent, _Delivered, _MsgSeqId, _Rem},
 		     State2} -> State2
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index ad6b1ce2..f22aa6af 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -82,29 +82,36 @@
 %%    priority cast. Note that in the mean time, the mixed_queue could
 %%    have come along, found the prefetcher empty, asked it to
 %%    exit. This means the effective "reply" from the disk_queue will
-%%    go no where. As a result, the disk_queue must perform no
-%%    modification to the status of the message *or the queue* - do
-%%    not mark the message delivered, and do not advance the queue. If
-%%    it did advance the queue and the msg was then lost, then the
-%%    queue would have lost a msg that the mixed_queue would not pick
-%%    up.
+%%    go no where. As a result, the disk_queue should not advance the
+%%    queue. However, it does mark the messages as delivered. The
+%%    reasoning is that if it didn't, there would be the possibility
+%%    that the message was delivered without it being marked as such
+%%    on disk. We must maintain the property that a message which is
+%%    marked as non-redelivered really hasn't been delivered anywhere
+%%    before. The downside is that should the prefetcher not receive
+%%    this message, the queue will then fetch the message from the
+%%    disk_queue directly, and this message will have its delivered
+%%    bit set. The queue will not be advanced though - if it did
+%%    advance the queue and the msg was then lost, then the queue
+%%    would have lost a msg that the mixed_queue would not pick up.
 %%
 %% 3) The prefetcher hopefully receives the call from
 %%    prefetcher:publish(Msg). It replies immediately, and then adds
-%%    to its internal queue. A cast is not sufficient here because the
-%%    mixed_queue could come along, drain the prefetcher, thus
-%%    catching the msg just sent by the disk_queue and then call
-%%    disk_queue:fetch(Q) which is normal priority call, which could
-%%    overtake a reply cast from the prefetcher to the disk queue,
-%%    which would result in the same message being delivered
+%%    to its internal queue. A cast is not sufficient as a pseudo
+%%    "reply" here because the mixed_queue could come along, drain the
+%%    prefetcher, thus catching the msg just sent by the disk_queue
+%%    and then call disk_queue:fetch(Q) which is normal priority call,
+%%    which could overtake a reply cast from the prefetcher to the
+%%    disk queue, resulting in the same message being delivered
 %%    twice. Thus when the disk_queue calls prefetcher:publish(Msg),
 %%    it is briefly blocked. However, a) the prefetcher replies
 %%    immediately, and b) the prefetcher should never have more than
-%%    one item in its mailbox anyway, so this should not cause a
-%%    problem to the disk_queue.
+%%    two items in its mailbox anyway (one from the queue process /
+%%    mixed_queue and one from the disk_queue), so this should not
+%%    cause a problem to the disk_queue.
 %%
-%% 4) The disk_queue receives the reply, marks the msg at the head of
-%%    the queue Q as delivered, and advances the Q to the next msg.
+%% 4) The disk_queue receives the reply, and advances the Q to the
+%%    next msg.
 %%
 %% 5) If the prefetcher has not met its target then it goes back to
 %%    1). Otherwise it just sits and waits for the mixed_queue to
@@ -125,29 +132,30 @@
 %% on talk directly with the disk_queue and not via the
 %% prefetcher. This is more efficient and the mixed_queue will use
 %% normal priority blocking calls to the disk_queue and thus get
-%% better service that way.
+%% better service.
 %%
 %% The prefetcher may at this point have issued a
 %% disk_queue:prefetch(Q) cast which has not yet been picked up by the
 %% disk_queue. This msg won't go away and the disk_queue will
 %% eventually find it. However, when it does, it'll simply read the
 %% next message from the queue (which could now be empty), possibly
-%% populate the cache (no harm done) and try and call
-%% prefetcher:publish(Msg) which will result in an error, which the
-%% disk_queue catches, as the publish call is to a non-existant
-%% process. However, the state of the queue and the state of the
-%% message has not been altered so the mixed_queue will be able to
-%% fetch this message as if it had never been prefetched.
+%% populate the cache (no harm done), mark the message as deleted (oh
+%% well, not a spec violation, and better than the alternative) and
+%% try and call prefetcher:publish(Msg) which will result in an error,
+%% which the disk_queue catches, as the publish call is to a
+%% non-existant process. However, the state of the queue has not been
+%% altered so the mixed_queue will be able to fetch this message as if
+%% it had never been prefetched.
 %%
-%% The only point at which the queue is advanced and the message
-%% marked as delivered is when the prefetcher replies to the publish
-%% call. At this point the message has been received by the prefetcher
-%% and so we guarantee it will be passed to the mixed_queue when the
-%% mixed_queue tries to drain the prefetcher. We must therefore ensure
-%% that this msg can't also be delivered to the mixed_queue directly
-%% by the disk_queue through the mixed_queue calling
-%% disk_queue:fetch(Q) which is why the prefetcher:publish function
-%% is a call and not a cast, thus blocking the disk_queue.
+%% The only point at which the queue is advanced is when the
+%% prefetcher replies to the publish call. At this point the message
+%% has been received by the prefetcher and so we guarantee it will be
+%% passed to the mixed_queue when the mixed_queue tries to drain the
+%% prefetcher. We must therefore ensure that this msg can't also be
+%% delivered to the mixed_queue directly by the disk_queue through the
+%% mixed_queue calling disk_queue:fetch(Q) which is why the
+%% prefetcher:publish function is a call and not a cast, thus blocking
+%% the disk_queue.
 %%
 %% Finally, the prefetcher is only created when the mixed_queue is
 %% operating in mixed mode and it sees that the next N messages are
@@ -166,15 +174,7 @@
 %% we have no guarantee that the message will really go out of the
 %% socket. What we do still have is that messages which have the
 %% redelivered bit set false really are guaranteed to have not been
-%% delivered already. In theory, it's possible that the disk_queue
-%% calls prefetcher:publish, blocks waiting for the reply. The
-%% prefetcher grabs the message, is drained, the message goes out of
-%% the socket and is delivered. The broker then crashes before the
-%% disk_queue processes the reply from the prefetcher, thus the fact
-%% the message has been delivered is not recorded. However, this can
-%% only affect a single message at a time. I.e. there is a tiny chance
-%% that the first message delivered on queue recovery that has the
-%% redelivery bit set false, has in fact been delivered before.
+%% delivered already.
 
 start_link(Queue, Count) ->
     gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
-- 
cgit v1.2.1


From f82339ecf5ffc11e4734b5543253bdbbb8140a27 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 22:57:26 +0100
Subject: typo in docs in previous commit. Also changed internal_fetch so its
 result construction, which whilst not wrong, was at least confusing, and had
 unexecutable code in it. Associated changes elsewhere.

---
 src/rabbit_disk_queue.erl       | 20 ++++++--------------
 src/rabbit_mixed_queue.erl      |  8 ++++----
 src/rabbit_queue_prefetcher.erl | 16 ++++++++--------
 src/rabbit_tests.erl            | 22 +++++++++++-----------
 4 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 96125031..cb2487f3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -254,10 +254,10 @@
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(publish/3 :: (queue_name(), message(), bool()) -> 'ok').
 -spec(fetch/1 :: (queue_name()) ->
-             ('empty' | {message(), non_neg_integer(),
-                         bool(), {msg_id(), seq_id()}, non_neg_integer()})).
+             ('empty' | {{message(), non_neg_integer(),
+                          bool(), {msg_id(), seq_id()}}, non_neg_integer()})).
 -spec(phantom_fetch/1 :: (queue_name()) ->
-             ( 'empty' | {msg_id(), bool(), bool(), {msg_id(), seq_id()},
+             ( 'empty' | {{msg_id(), bool(), bool(), {msg_id(), seq_id()}},
                           non_neg_integer()})).
 -spec(prefetch/1 :: (queue_name()) -> 'ok'). 
 -spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
@@ -541,7 +541,7 @@ handle_cast({prefetch, Q, From}, State) ->
 	    true ->
 		case internal_fetch(Q, false, true, true, State1) of
 		    {ok, empty, State2} -> State2;
-		    {ok, {_MsgId, _IsPersistent, _Delivered, _MsgSeqId, _Rem},
+		    {ok, {{_MsgId, _IsPersistent, _Delivered, _MsgSeqId}, _Rem},
 		     State2} -> State2
 		end;
 	    false -> State1
@@ -898,15 +898,7 @@ internal_fetch(Q, ReadMsg, FakeDeliver, Advance,
                                           {Q, ReadSeqId+1, WriteSeqId});
                        false -> true
                    end,
-            {ok,
-             case Result of
-                 {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId}} ->
-                     {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId},
-                      Remaining};
-                 {Message, BodySize, Delivered, {MsgId, ReadSeqId}} ->
-                     {Message, BodySize, Delivered, {MsgId, ReadSeqId},
-                      Remaining}
-             end, State1}
+            {ok, {Result, Remaining}, State1}
     end.
 
 internal_foldl(Q, Fun, Init, State) ->
@@ -966,7 +958,7 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
 internal_auto_ack(Q, State) ->
     case internal_fetch(Q, false, false, true, State) of
         {ok, empty, State1} -> {ok, State1};
-        {ok, {_MsgId, _IsPersistent, _Delivered, MsgSeqId, _Remaining},
+        {ok, {{_MsgId, _IsPersistent, _Delivered, MsgSeqId}, _Remaining},
          State1} ->
             remove_messages(Q, [MsgSeqId], true, State1)
     end.        
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index f865b19a..cb34750f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -327,7 +327,7 @@ publish_delivered(Msg =
             %% must call phantom_deliver otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
-            {MsgId, IsPersistent, true, AckTag, 0} =
+            {{MsgId, IsPersistent, true, AckTag}, 0} =
                 rabbit_disk_queue:phantom_fetch(Q),
             {ok, AckTag, State1};
         false ->
@@ -354,7 +354,7 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             AckTag =
                 case IsDurable andalso IsPersistent of
                     true ->
-                        {MsgId, IsPersistent, IsDelivered, AckTag1, _PRem}
+                        {{MsgId, IsPersistent, IsDelivered, AckTag1}, _PRem}
                             = rabbit_disk_queue:phantom_fetch(Q),
                         AckTag1;
                     false ->
@@ -372,8 +372,8 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
              State1 #mqstate { msg_buf = MsgBuf1 }};
         _ when Prefetcher == undefined ->
             State2 = dec_queue_length(1, State1),
-            {Msg = #basic_message { is_persistent = IsPersistent },
-             _Size, IsDelivered, AckTag, _PersistRem}
+            {{Msg = #basic_message { is_persistent = IsPersistent },
+              _Size, IsDelivered, AckTag}, _PersistRem}
                 = rabbit_disk_queue:fetch(Q),
             AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
             {{Msg, IsDelivered, AckTag1, Rem}, State2};
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index f22aa6af..bab1b0c8 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -139,10 +139,10 @@
 %% disk_queue. This msg won't go away and the disk_queue will
 %% eventually find it. However, when it does, it'll simply read the
 %% next message from the queue (which could now be empty), possibly
-%% populate the cache (no harm done), mark the message as deleted (oh
-%% well, not a spec violation, and better than the alternative) and
-%% try and call prefetcher:publish(Msg) which will result in an error,
-%% which the disk_queue catches, as the publish call is to a
+%% populate the cache (no harm done), mark the message as delivered
+%% (oh well, not a spec violation, and better than the alternative)
+%% and try and call prefetcher:publish(Msg) which will result in an
+%% error, which the disk_queue catches, as the publish call is to a
 %% non-existant process. However, the state of the queue has not been
 %% altered so the mixed_queue will be able to fetch this message as if
 %% it had never been prefetched.
@@ -179,8 +179,8 @@
 start_link(Queue, Count) ->
     gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
 
-publish(Prefetcher, Obj = { #basic_message {}, _Size, _IsDelivered,
-                            _AckTag, _Remaining }) ->
+publish(Prefetcher, Obj = {{ #basic_message {}, _Size, _IsDelivered, _AckTag},
+                           _Remaining }) ->
     gen_server2:call(Prefetcher, {publish, Obj}, infinity);
 publish(Prefetcher, empty) ->
     gen_server2:call(Prefetcher, publish_empty, infinity).
@@ -206,8 +206,8 @@ init([Q, Count, QPid]) ->
     {ok, State, infinity, {backoff, ?HIBERNATE_AFTER_MIN,
                            ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call({publish, { Msg = #basic_message {},
-                        _Size, IsDelivered, AckTag, _Remaining }},
+handle_call({publish, { { Msg = #basic_message {}, _Size, IsDelivered, AckTag},
+                        _Remaining }},
 	    DiskQueue, State =
 	    #pstate { fetched_count = Fetched, target_count = Target,
 		      msg_buf = MsgBuf, buf_length = Length, queue = Q
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a9d6080b..acf3eb7f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -876,7 +876,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
           [[fun() -> [begin SeqIds =
                                 [begin
                                      Remaining = MsgCount - N,
-                                     {Message, _TSize, false, SeqId,
+                                     {{Message, _TSize, false, SeqId},
                                       Remaining} = rabbit_disk_queue:fetch(Q),
                                      ok = rdq_match_message(Message, N, Msg, MsgSizeBytes),
                                      SeqId
@@ -923,7 +923,7 @@ rdq_stress_gc(MsgCount) ->
         lists:foldl(
           fun (MsgId, Acc) ->
                   Remaining = MsgCount - MsgId,
-                  {Message, _TSize, false, SeqId, Remaining} =
+                  {{Message, _TSize, false, SeqId}, Remaining} =
                       rabbit_disk_queue:fetch(q),
                   ok = rdq_match_message(Message, MsgId, Msg, MsgSizeBytes),
                   dict:store(MsgId, SeqId, Acc)
@@ -951,7 +951,7 @@ rdq_test_startup_with_queue_gaps() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {Message, _TSize, false, SeqId, Remaining} =
+                {{Message, _TSize, false, SeqId}, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -973,7 +973,7 @@ rdq_test_startup_with_queue_gaps() ->
     %% lists:seq(2,500,2) already delivered
     Seqs2 = [begin
                  Remaining = round(Total - ((Half + N)/2)),
-                 {Message, _TSize, true, SeqId, Remaining} =
+                 {{Message, _TSize, true, SeqId}, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -983,7 +983,7 @@ rdq_test_startup_with_queue_gaps() ->
     %% and now fetch the rest
     Seqs3 = [begin
                  Remaining = Total - N,
-                 {Message, _TSize, false, SeqId, Remaining} =
+                 {{Message, _TSize, false, SeqId}, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -1008,7 +1008,7 @@ rdq_test_redeliver() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {Message, _TSize, false, SeqId, Remaining} =
+                {{Message, _TSize, false, SeqId}, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -1029,7 +1029,7 @@ rdq_test_redeliver() ->
     %% every-other-from-the-first-half
     Seqs2 = [begin
                  Remaining = round(Total - N + (Half/2)),
-                 {Message, _TSize, false, SeqId, Remaining} =
+                 {{Message, _TSize, false, SeqId}, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -1037,7 +1037,7 @@ rdq_test_redeliver() ->
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     Seqs3 = [begin
                  Remaining = round((Half - N) / 2) - 1,
-                 {Message, _TSize, true, SeqId, Remaining} =
+                 {{Message, _TSize, true, SeqId}, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -1061,7 +1061,7 @@ rdq_test_purge() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {Message, _TSize, false, SeqId, Remaining} =
+                {{Message, _TSize, false, SeqId}, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -1272,7 +1272,7 @@ rdq_test_disk_queue_modes() ->
     ok = rabbit_disk_queue:tx_commit(q, CommitHalf2, []),
     Seqs = [begin
                 Remaining = Total - N,
-                {Message, _TSize, false, SeqId, Remaining} =
+                {{Message, _TSize, false, SeqId}, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -1282,7 +1282,7 @@ rdq_test_disk_queue_modes() ->
     io:format("To RAM Disk done~n", []),
     Seqs2 = [begin
                  Remaining = Total - N,
-                 {Message, _TSize, false, SeqId, Remaining} =
+                 {{Message, _TSize, false, SeqId}, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
-- 
cgit v1.2.1


From e15a097959a9beadf53d88b7c53d5d391f043da9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 23:06:15 +0100
Subject: remove ForceInCache at it's not been used since before the current
 prefetcher.

---
 src/rabbit_disk_queue.erl | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index cb2487f3..dfd666c5 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -867,15 +867,10 @@ decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
     ok.
 
 insert_into_cache(Message = #basic_message { guid = MsgId }, MsgSize,
-                  Forced, State = #dqstate { message_cache = Cache }) ->
+                  State = #dqstate { message_cache = Cache }) ->
     case cache_is_full(State) of
         true -> ok;
-        false -> Count = case Forced of
-                             true -> 0;
-                             false -> 1
-                         end,
-                 true = ets:insert_new(Cache, {MsgId, Message,
-                                               MsgSize, Count}),
+        false -> true = ets:insert_new(Cache, {MsgId, Message, MsgSize, 1}),
                  ok
     end.
 
@@ -892,7 +887,7 @@ internal_fetch(Q, ReadMsg, FakeDeliver, Advance,
             Remaining = WriteSeqId - ReadSeqId - 1,
             {ok, Result, State1} =
                 internal_read_message(
-                  Q, ReadSeqId, ReadMsg, FakeDeliver, false, State),
+                  Q, ReadSeqId, ReadMsg, FakeDeliver, State),
             true = case Advance of
                        true -> ets:insert(Sequences,
                                           {Q, ReadSeqId+1, WriteSeqId});
@@ -911,11 +906,11 @@ internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
     {ok, Acc, State};
 internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
     {ok, MsgStuff, State1}
-        = internal_read_message(Q, ReadSeqId, true, true, false, State),
+        = internal_read_message(Q, ReadSeqId, true, true, State),
     Acc1 = Fun(MsgStuff, Acc),
     internal_foldl(Q, WriteSeqId, Fun, State1, Acc1, ReadSeqId + 1).
 
-internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) ->
+internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, State) ->
     [Obj =
      #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
@@ -936,9 +931,8 @@ internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, ForceInCache, State) -
                         read_message_at_offset(FileHdl, Offset, TotalSize),
                     #basic_message { is_persistent=IsPersistent, guid=MsgId } =
                         Message = bin_to_msg(MsgBody),
-                    ok = if RefCount > 1 orelse ForceInCache ->
-                                 insert_into_cache
-                                   (Message, BodySize, ForceInCache, State1);
+                    ok = if RefCount > 1 ->
+                                 insert_into_cache(Message, BodySize, State1);
                             true -> ok
                                  %% it's not in the cache and we only
                                  %% have 1 queue with the message. So
-- 
cgit v1.2.1


From 675697a2d5586d649a889087c2fd4dd64a173586 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 20 Aug 2009 23:11:33 +0100
Subject: tidying of memory estimation

---
 src/rabbit_disk_queue.erl | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index dfd666c5..3263ca5e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -624,10 +624,9 @@ memory_use(#dqstate { operation_mode = ram_disk,
                       wordsize = WordSize
                      }) ->
     WordSize * (mnesia:table_info(rabbit_disk_queue, memory) +
-                ets:info(MsgLocationEts, memory) +
-                ets:info(FileSummary, memory) +
-                ets:info(Cache, memory) +
-                ets:info(Sequences, memory));
+                lists:sum([ets:info(Table, memory)
+                           || Table <- [MsgLocationEts, FileSummary, Cache,
+                                        Sequences]]));
 memory_use(#dqstate { operation_mode = disk_only,
                       file_summary = FileSummary,
                       sequences = Sequences,
@@ -640,9 +639,8 @@ memory_use(#dqstate { operation_mode = disk_only,
         mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord,
     MsgLocationSizeEstimate =
         dets:info(MsgLocationDets, size) * EtsBytesPerRecord,
-    (WordSize * (ets:info(FileSummary, memory) +
-                 ets:info(Cache, memory) +
-                 ets:info(Sequences, memory))) +
+    (WordSize * (lists:sum([ets:info(Table, memory)
+                            || Table <- [FileSummary, Cache, Sequences]]))) +
         rabbit_misc:ceil(MnesiaSizeEstimate) +
         rabbit_misc:ceil(MsgLocationSizeEstimate).
 
-- 
cgit v1.2.1


From c3c3e7fad74142ba446acb990c7b303ec9f71e48 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 21 Aug 2009 14:02:30 +0100
Subject: Refactoring out code used in multiple paths through internal_fetch
 and internal_read_message, tidying of API.

---
 src/rabbit_disk_queue.erl       | 216 +++++++++++++++++++++-------------------
 src/rabbit_mixed_queue.erl      |  14 +--
 src/rabbit_queue_prefetcher.erl |  16 +--
 src/rabbit_tests.erl            |  26 ++---
 4 files changed, 144 insertions(+), 128 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3263ca5e..4b8759f8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -104,6 +104,9 @@
          ets_bytes_per_record     %% bytes per record in msg_location_ets
         }).
 
+-record(message_store_entry,
+        {msg_id, ref_count, file, offset, total_size, is_persistent}).
+
 %% The components:
 %%
 %% MsgLocation: this is a (d)ets table which contains:
@@ -249,32 +252,32 @@
 -ifdef(use_specs).
 
 -type(seq_id() :: non_neg_integer()).
+-type(ack_tag() :: {msg_id(), seq_id()}).
 
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(publish/3 :: (queue_name(), message(), bool()) -> 'ok').
+-spec(publish/3 :: (queue_name(), message(), boolean()) -> 'ok').
 -spec(fetch/1 :: (queue_name()) ->
-             ('empty' | {{message(), non_neg_integer(),
-                          bool(), {msg_id(), seq_id()}}, non_neg_integer()})).
+             ('empty' |
+              {message(), boolean(), ack_tag(), non_neg_integer()})).
 -spec(phantom_fetch/1 :: (queue_name()) ->
-             ( 'empty' | {{msg_id(), bool(), bool(), {msg_id(), seq_id()}},
-                          non_neg_integer()})).
+             ('empty' |
+              {msg_id(), boolean(), boolean(), ack_tag(), non_neg_integer()})).
 -spec(prefetch/1 :: (queue_name()) -> 'ok'). 
--spec(ack/2 :: (queue_name(), [{msg_id(), seq_id()}]) -> 'ok').
+-spec(ack/2 :: (queue_name(), [ack_tag()]) -> 'ok').
 -spec(auto_ack_next_message/1 :: (queue_name()) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [{msg_id(), bool()}],
-                      [{msg_id(), seq_id()}]) -> 'ok').
+-spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean()}], [ack_tag()]) ->
+             'ok').
 -spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
--spec(requeue/2 :: (queue_name(), [{{msg_id(), seq_id()}, bool()}]) -> 'ok').
+-spec(requeue/2 :: (queue_name(), [{ack_tag(), boolean()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(delete_queue/1 :: (queue_name()) -> 'ok').
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
 -spec(length/1 :: (queue_name()) -> non_neg_integer()).
--spec(foldl/3 :: (fun (({message(), non_neg_integer(),
-                         bool(), {msg_id(), seq_id()}}, A) ->
-                              A), A, queue_name()) -> A).
+-spec(foldl/3 :: (fun ((message(), ack_tag(), boolean(), A) -> A),
+                  A, queue_name()) -> A).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
 -spec(to_disk_only_mode/0 :: () -> 'ok').
@@ -455,10 +458,12 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({fetch, Q}, _From, State) ->
-    {ok, Result, State1} = internal_fetch(Q, true, false, true, State),
+    {ok, Result, State1} = 
+        internal_fetch_body(Q, record_delivery, pop_queue, State),
     reply(Result, State1);
 handle_call({phantom_fetch, Q}, _From, State) ->
-    {ok, Result, State1} = internal_fetch(Q, false, false, true, State),
+    {ok, Result, State1} =
+        internal_fetch_attributes(Q, record_delivery, pop_queue, State),
     reply(Result, State1);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     State1 =
@@ -529,7 +534,8 @@ handle_cast({set_mode, Mode}, State) ->
                  mixed -> fun to_ram_disk_mode/1
              end)(State));
 handle_cast({prefetch, Q, From}, State) ->
-    {ok, Result, State1} = internal_fetch(Q, true, false, false, State),
+    {ok, Result, State1} =
+        internal_fetch_body(Q, record_delivery, peek_queue, State),
     Cont = rabbit_misc:with_exit_handler(
              fun () -> false end,
              fun () ->
@@ -539,10 +545,10 @@ handle_cast({prefetch, Q, From}, State) ->
     State3 =
 	case Cont of
 	    true ->
-		case internal_fetch(Q, false, true, true, State1) of
+		case internal_fetch_attributes(
+                       Q, ignore_delivery, pop_queue, State1) of
 		    {ok, empty, State2} -> State2;
-		    {ok, {{_MsgId, _IsPersistent, _Delivered, _MsgSeqId}, _Rem},
-		     State2} -> State2
+		    {ok, _, State2} -> State2
 		end;
 	    false -> State1
 	end,
@@ -709,53 +715,43 @@ form_filename(Name) ->
     filename:join(base_directory(), Name).
 
 base_directory() ->
-    filename:join(mnesia:system_info(directory), "rabbit_disk_queue/").
+    filename:join(rabbit_mnesia:dir(), "rabbit_disk_queue/").
 
 dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only },
-                Key) ->
+                           operation_mode = disk_only }, Key) ->
     dets:lookup(MsgLocationDets, Key);
 dets_ets_lookup(#dqstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk },
-                Key) ->
+                           operation_mode = ram_disk }, Key) ->
     ets:lookup(MsgLocationEts, Key).
 
 dets_ets_delete(#dqstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only },
-                Key) ->
+                           operation_mode = disk_only }, Key) ->
     ok = dets:delete(MsgLocationDets, Key);
 dets_ets_delete(#dqstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk },
-                Key) ->
+                           operation_mode = ram_disk }, Key) ->
     true = ets:delete(MsgLocationEts, Key),
     ok.
 
 dets_ets_insert(#dqstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only },
-                Obj) ->
+                           operation_mode = disk_only }, Obj) ->
     ok = dets:insert(MsgLocationDets, Obj);
 dets_ets_insert(#dqstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk },
-                Obj) ->
+                           operation_mode = ram_disk }, Obj) ->
     true = ets:insert(MsgLocationEts, Obj),
     ok.
 
 dets_ets_insert_new(#dqstate { msg_location_dets = MsgLocationDets,
-                               operation_mode = disk_only },
-                    Obj) ->
+                               operation_mode = disk_only }, Obj) ->
     true = dets:insert_new(MsgLocationDets, Obj);
 dets_ets_insert_new(#dqstate { msg_location_ets = MsgLocationEts,
-                               operation_mode = ram_disk },
-                    Obj) ->
+                               operation_mode = ram_disk }, Obj) ->
     true = ets:insert_new(MsgLocationEts, Obj).
 
 dets_ets_match_object(#dqstate { msg_location_dets = MsgLocationDets,
-                                 operation_mode = disk_only },
-                      Obj) ->
+                                 operation_mode = disk_only }, Obj) ->
     dets:match_object(MsgLocationDets, Obj);
 dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
-                                 operation_mode = ram_disk },
-                      Obj) ->
+                                 operation_mode = ram_disk }, Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
 get_read_handle(File, Offset, State =
@@ -877,23 +873,80 @@ cache_is_full(#dqstate { message_cache = Cache }) ->
 
 %% ---- INTERNAL RAW FUNCTIONS ----
 
-internal_fetch(Q, ReadMsg, FakeDeliver, Advance,
-                 State = #dqstate { sequences = Sequences }) ->
+internal_fetch_body(Q, MarkDelivered, Advance, State) ->
+    case with_queue_head(Q, MarkDelivered, Advance, State) of
+        E = {ok, empty, _} -> E;
+        {ok, AckTag, IsDelivered, StoreEntry, Remaining, State1} ->
+            {Message, State2} = read_stored_message(StoreEntry, State1),
+            {ok, {Message, IsDelivered, AckTag, Remaining}, State2}
+    end.
+
+internal_fetch_attributes(Q, MarkDelivered, Advance, State) ->
+    case with_queue_head(Q, MarkDelivered, Advance, State) of
+        E = {ok, empty, _} -> E;
+        {ok, AckTag, IsDelivered, 
+         #message_store_entry { msg_id = MsgId, is_persistent = IsPersistent },
+         Remaining, State1} ->
+            {ok, {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}, State1}
+    end.
+
+with_queue_head(Q, MarkDelivered, Advance,
+                State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
         {SeqId, SeqId} -> {ok, empty, State};
-        {ReadSeqId, WriteSeqId} when WriteSeqId >= ReadSeqId ->
+        {ReadSeqId, WriteSeqId} when WriteSeqId > ReadSeqId ->
             Remaining = WriteSeqId - ReadSeqId - 1,
-            {ok, Result, State1} =
-                internal_read_message(
-                  Q, ReadSeqId, ReadMsg, FakeDeliver, State),
-            true = case Advance of
-                       true -> ets:insert(Sequences,
-                                          {Q, ReadSeqId+1, WriteSeqId});
-                       false -> true
-                   end,
-            {ok, {Result, Remaining}, State1}
+            {AckTag, IsDelivered, StoreEntry} =
+                update_message_attributes(Q, ReadSeqId, MarkDelivered, State),
+            ok = maybe_advance(Advance, Sequences, Q, ReadSeqId, WriteSeqId),
+            {ok, AckTag, IsDelivered, StoreEntry, Remaining, State}
     end.
 
+maybe_advance(peek_queue, _, _, _, _) ->
+    ok;
+maybe_advance(pop_queue, Sequences, Q, ReadSeqId, WriteSeqId) ->
+    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
+    ok.
+
+read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
+                                           file = File, offset = Offset,
+                                           total_size = TotalSize }, State) ->
+    case fetch_and_increment_cache(MsgId, State) of
+        not_found ->
+            {FileHdl, State1} = get_read_handle(File, Offset, State),
+            {ok, {MsgBody, _IsPersistent, EncodedBodySize}} =
+                read_message_at_offset(FileHdl, Offset, TotalSize),
+            Message = #basic_message {} = bin_to_msg(MsgBody),
+            ok = if RefCount > 1 ->
+                         insert_into_cache(Message, EncodedBodySize, State1);
+                    true -> ok
+                            %% it's not in the cache and we only have
+                            %% 1 queue with the message. So don't
+                            %% bother putting it in the cache.
+                 end,
+            {Message, State1};
+        {Message, _EncodedBodySize, _RefCount} ->
+            {Message, State}
+    end.
+
+update_message_attributes(Q, SeqId, MarkDelivered, State) ->
+    [Obj =
+     #dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
+        mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
+    [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
+        dets_ets_lookup(State, MsgId),
+    ok = case {IsDelivered, MarkDelivered} of
+             {true, _} -> ok;
+             {false, ignore_delivery} -> ok;
+             {false, record_delivery} ->
+                 mnesia:dirty_write(rabbit_disk_queue,
+                                    Obj #dq_msg_loc {is_delivered = true})
+         end,
+    {{MsgId, SeqId}, IsDelivered,
+     #message_store_entry { msg_id = MsgId, ref_count = RefCount, file = File,
+                            offset = Offset, total_size = TotalSize,
+                            is_persistent = IsPersistent }}.
+
 internal_foldl(Q, Fun, Init, State) ->
     State1 = #dqstate { sequences = Sequences } =
         sync_current_file_handle(State),
@@ -903,56 +956,19 @@ internal_foldl(Q, Fun, Init, State) ->
 internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
     {ok, Acc, State};
 internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
-    {ok, MsgStuff, State1}
-        = internal_read_message(Q, ReadSeqId, true, true, State),
-    Acc1 = Fun(MsgStuff, Acc),
+    {AckTag, IsDelivered, StoreEntry} =
+        update_message_attributes(Q, ReadSeqId, ignore_delivery, State),
+    {Message, State1} = read_stored_message(StoreEntry, State),
+    Acc1 = Fun(Message, AckTag, IsDelivered, Acc),
     internal_foldl(Q, WriteSeqId, Fun, State1, Acc1, ReadSeqId + 1).
 
-internal_read_message(Q, ReadSeqId, ReadMsg, FakeDeliver, State) ->
-    [Obj =
-     #dq_msg_loc {is_delivered = Delivered, msg_id = MsgId}] =
-        mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
-    [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
-        dets_ets_lookup(State, MsgId),
-    ok =
-        if FakeDeliver orelse Delivered -> ok;
-           true ->
-                mnesia:dirty_write(rabbit_disk_queue,
-                                   Obj #dq_msg_loc {is_delivered = true})
-        end,
-    case ReadMsg of
-        true ->
-            case fetch_and_increment_cache(MsgId, State) of
-                not_found ->
-                    {FileHdl, State1} = get_read_handle(File, Offset, State),
-                    {ok, {MsgBody, IsPersistent, BodySize}} =
-                        read_message_at_offset(FileHdl, Offset, TotalSize),
-                    #basic_message { is_persistent=IsPersistent, guid=MsgId } =
-                        Message = bin_to_msg(MsgBody),
-                    ok = if RefCount > 1 ->
-                                 insert_into_cache(Message, BodySize, State1);
-                            true -> ok
-                                 %% it's not in the cache and we only
-                                 %% have 1 queue with the message. So
-                                 %% don't bother putting it in the
-                                 %% cache.
-                         end,
-                    {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
-                     State1};
-                {Message, BodySize, _RefCount} ->
-                    {ok, {Message, BodySize, Delivered, {MsgId, ReadSeqId}},
-                     State}
-            end;
-        false ->
-            {ok, {MsgId, IsPersistent, Delivered, {MsgId, ReadSeqId}}, State}
-    end.
-
 internal_auto_ack(Q, State) ->
-    case internal_fetch(Q, false, false, true, State) of
-        {ok, empty, State1} -> {ok, State1};
-        {ok, {{_MsgId, _IsPersistent, _Delivered, MsgSeqId}, _Remaining},
+    case internal_fetch_attributes(Q, ignore_delivery, pop_queue, State) of
+        {ok, empty, State1} ->
+            {ok, State1};
+        {ok, {_MsgId, _IsPersistent, _IsDelivered, AckTag, _Remaining},
          State1} ->
-            remove_messages(Q, [MsgSeqId], true, State1)
+            remove_messages(Q, [AckTag], true, State1)
     end.        
 
 internal_ack(Q, MsgSeqIds, State) ->
@@ -1048,7 +1064,7 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                                       last_sync_offset = SyncOffset
                                     }) ->
     NeedsSync = IsDirty andalso
-        lists:any(fun ({MsgId, _Delivered}) ->
+        lists:any(fun ({MsgId, _IsDelivered}) ->
                           [{MsgId, _RefCount, File, Offset,
                             _TotalSize, _IsPersistent}] =
                               dets_ets_lookup(State, MsgId),
@@ -1072,12 +1088,12 @@ internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   {ok, WriteSeqId1} =
                       lists:foldl(
-                        fun ({MsgId, Delivered}, {ok, SeqId}) ->
+                        fun ({MsgId, IsDelivered}, {ok, SeqId}) ->
                                 {mnesia:write(
                                    rabbit_disk_queue,
                                    #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
                                                  msg_id = MsgId,
-                                                 is_delivered = Delivered
+                                                 is_delivered = IsDelivered
                                                }, write),
                                  SeqId + 1}
                         end, {ok, InitWriteSeqId}, PubMsgIds),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index cb34750f..9ad52566 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -104,8 +104,8 @@ init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:length(Queue),
     MsgBuf = inc_queue_length(Queue, queue:new(), Len),
     Size = rabbit_disk_queue:foldl(
-             fun ({Msg = #basic_message { is_persistent = true },
-                   _Size, _IsDelivered, _AckTag}, Acc) ->
+             fun (Msg = #basic_message { is_persistent = true },
+                  _AckTag, _IsDelivered, Acc) ->
                      Acc + size_of_message(Msg)
              end, 0, Queue),
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
@@ -324,10 +324,10 @@ publish_delivered(Msg =
     State1 = gain_memory(MsgSize, State),
     case IsDurable andalso IsPersistent of
         true ->
-            %% must call phantom_deliver otherwise the msg remains at
+            %% must call phantom_fetch otherwise the msg remains at
             %% the head of the queue. This is synchronous, but
             %% unavoidable as we need the AckTag
-            {{MsgId, IsPersistent, true, AckTag}, 0} =
+            {MsgId, IsPersistent, true, AckTag, 0} =
                 rabbit_disk_queue:phantom_fetch(Q),
             {ok, AckTag, State1};
         false ->
@@ -354,7 +354,7 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             AckTag =
                 case IsDurable andalso IsPersistent of
                     true ->
-                        {{MsgId, IsPersistent, IsDelivered, AckTag1}, _PRem}
+                        {MsgId, IsPersistent, IsDelivered, AckTag1, _PRem}
                             = rabbit_disk_queue:phantom_fetch(Q),
                         AckTag1;
                     false ->
@@ -372,8 +372,8 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
              State1 #mqstate { msg_buf = MsgBuf1 }};
         _ when Prefetcher == undefined ->
             State2 = dec_queue_length(1, State1),
-            {{Msg = #basic_message { is_persistent = IsPersistent },
-              _Size, IsDelivered, AckTag}, _PersistRem}
+            {Msg = #basic_message { is_persistent = IsPersistent },
+             IsDelivered, AckTag, _PersistRem}
                 = rabbit_disk_queue:fetch(Q),
             AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
             {{Msg, IsDelivered, AckTag1, Rem}, State2};
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index bab1b0c8..6f276d86 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -179,8 +179,8 @@
 start_link(Queue, Count) ->
     gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
 
-publish(Prefetcher, Obj = {{ #basic_message {}, _Size, _IsDelivered, _AckTag},
-                           _Remaining }) ->
+publish(Prefetcher,
+        Obj = { #basic_message {}, _IsDelivered, _AckTag, _Remaining }) ->
     gen_server2:call(Prefetcher, {publish, Obj}, infinity);
 publish(Prefetcher, empty) ->
     gen_server2:call(Prefetcher, publish_empty, infinity).
@@ -206,12 +206,12 @@ init([Q, Count, QPid]) ->
     {ok, State, infinity, {backoff, ?HIBERNATE_AFTER_MIN,
                            ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call({publish, { { Msg = #basic_message {}, _Size, IsDelivered, AckTag},
-                        _Remaining }},
-	    DiskQueue, State =
-	    #pstate { fetched_count = Fetched, target_count = Target,
-		      msg_buf = MsgBuf, buf_length = Length, queue = Q
-		    }) ->
+handle_call({publish,
+             {Msg = #basic_message {}, IsDelivered, AckTag, _Remaining}},
+	    DiskQueue,
+            State = #pstate { fetched_count = Fetched, target_count = Target,
+                              msg_buf = MsgBuf, buf_length = Length, queue = Q
+                            }) ->
     gen_server2:reply(DiskQueue, ok),
     Timeout = if Fetched + 1 == Target -> hibernate;
                  true -> ok = rabbit_disk_queue:prefetch(Q),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index acf3eb7f..2005cbd1 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -876,8 +876,8 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
           [[fun() -> [begin SeqIds =
                                 [begin
                                      Remaining = MsgCount - N,
-                                     {{Message, _TSize, false, SeqId},
-                                      Remaining} = rabbit_disk_queue:fetch(Q),
+                                     {Message, false, SeqId, Remaining}
+                                         = rabbit_disk_queue:fetch(Q),
                                      ok = rdq_match_message(Message, N, Msg, MsgSizeBytes),
                                      SeqId
                                  end || N <- List],
@@ -923,7 +923,7 @@ rdq_stress_gc(MsgCount) ->
         lists:foldl(
           fun (MsgId, Acc) ->
                   Remaining = MsgCount - MsgId,
-                  {{Message, _TSize, false, SeqId}, Remaining} =
+                  {Message, false, SeqId, Remaining} =
                       rabbit_disk_queue:fetch(q),
                   ok = rdq_match_message(Message, MsgId, Msg, MsgSizeBytes),
                   dict:store(MsgId, SeqId, Acc)
@@ -951,8 +951,8 @@ rdq_test_startup_with_queue_gaps() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {{Message, _TSize, false, SeqId}, Remaining} =
-                    rabbit_disk_queue:fetch(q),
+                {Message, false, SeqId, Remaining}
+                    = rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
             end || N <- lists:seq(1,Half)],
@@ -973,7 +973,7 @@ rdq_test_startup_with_queue_gaps() ->
     %% lists:seq(2,500,2) already delivered
     Seqs2 = [begin
                  Remaining = round(Total - ((Half + N)/2)),
-                 {{Message, _TSize, true, SeqId}, Remaining} =
+                 {Message, true, SeqId, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -983,7 +983,7 @@ rdq_test_startup_with_queue_gaps() ->
     %% and now fetch the rest
     Seqs3 = [begin
                  Remaining = Total - N,
-                 {{Message, _TSize, false, SeqId}, Remaining} =
+                 {Message, false, SeqId, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -1008,7 +1008,7 @@ rdq_test_redeliver() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {{Message, _TSize, false, SeqId}, Remaining} =
+                {Message, false, SeqId, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -1029,7 +1029,7 @@ rdq_test_redeliver() ->
     %% every-other-from-the-first-half
     Seqs2 = [begin
                  Remaining = round(Total - N + (Half/2)),
-                 {{Message, _TSize, false, SeqId}, Remaining} =
+                 {Message, false, SeqId, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -1037,7 +1037,7 @@ rdq_test_redeliver() ->
     rabbit_disk_queue:tx_commit(q, [], Seqs2),
     Seqs3 = [begin
                  Remaining = round((Half - N) / 2) - 1,
-                 {{Message, _TSize, true, SeqId}, Remaining} =
+                 {Message, true, SeqId, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
@@ -1061,7 +1061,7 @@ rdq_test_purge() ->
     %% deliver first half
     Seqs = [begin
                 Remaining = Total - N,
-                {{Message, _TSize, false, SeqId}, Remaining} =
+                {Message, false, SeqId, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -1272,7 +1272,7 @@ rdq_test_disk_queue_modes() ->
     ok = rabbit_disk_queue:tx_commit(q, CommitHalf2, []),
     Seqs = [begin
                 Remaining = Total - N,
-                {{Message, _TSize, false, SeqId}, Remaining} =
+                {Message, false, SeqId, Remaining} =
                     rabbit_disk_queue:fetch(q),
                 ok = rdq_match_message(Message, N, Msg, 256),
                 SeqId
@@ -1282,7 +1282,7 @@ rdq_test_disk_queue_modes() ->
     io:format("To RAM Disk done~n", []),
     Seqs2 = [begin
                  Remaining = Total - N,
-                 {{Message, _TSize, false, SeqId}, Remaining} =
+                 {Message, false, SeqId, Remaining} =
                      rabbit_disk_queue:fetch(q),
                  ok = rdq_match_message(Message, N, Msg, 256),
                  SeqId
-- 
cgit v1.2.1


From 77d688e5fddb1daa4374f42698770e6737e10ea0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 21 Aug 2009 15:14:59 +0100
Subject: cosmetic -> with_queue_head => queue_head

Also, time for a new optimisation! YAY!

Previously, reading a message off disk meant seeking to the correct position and then reading the data. Now if the handle is already in the right position, then that seek is a waste of quite a lot of time, as it is an OS call. Now, I cache the location of the handle and so avoid seeking when possible. This has a MASSIVE effect on performance, especially in straight line cases, eg where a single prefetcher can drain a queue of disk in about one third of the time it used to take. Just looking at the code coverage from the test suite, there were just 534 seeks and 8582 cases where we found the handle in the right position already. This is a fairly small amount of code, and provides very useful benefits.
---
 src/rabbit_disk_queue.erl | 56 ++++++++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 4b8759f8..d19469d6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -754,7 +754,7 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
                                  operation_mode = ram_disk }, Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
-get_read_handle(File, Offset, State =
+get_read_handle(File, Offset, TotalSize, State =
                 #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
                            read_file_handles_limit = ReadFileHandlesLimit,
                            current_file_name = CurName,
@@ -766,7 +766,8 @@ get_read_handle(File, Offset, State =
                 true -> State
              end,
     Now = now(),
-    {FileHdl, ReadHdls1, ReadHdlsAge1} =
+    NewOffset = Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+    {FileHdl, OldOffset, ReadHdls1, ReadHdlsAge1} =
         case dict:find(File, ReadHdls) of
             error ->
                 {ok, Hdl} = file:open(form_filename(File),
@@ -774,21 +775,21 @@ get_read_handle(File, Offset, State =
                                        read_ahead]),
                 case dict:size(ReadHdls) < ReadFileHandlesLimit of
                     true ->
-                        {Hdl, ReadHdls, ReadHdlsAge};
-                    _False ->
+                        {Hdl, 0, ReadHdls, ReadHdlsAge};
+                    false ->
                         {Then, OldFile, ReadHdlsAge2} =
                             gb_trees:take_smallest(ReadHdlsAge),
-                        {ok, {OldHdl, Then}} =
+                        {ok, {OldHdl, _Offset, Then}} =
                             dict:find(OldFile, ReadHdls),
                         ok = file:close(OldHdl),
-                        {Hdl, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
+                        {Hdl, 0, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
                 end;
-            {ok, {Hdl, Then}} ->
-                {Hdl, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
+            {ok, {Hdl, OldOffset1, Then}} ->
+                {Hdl, OldOffset1, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
         end,
-    ReadHdls2 = dict:store(File, {FileHdl, Now}, ReadHdls1),
+    ReadHdls2 = dict:store(File, {FileHdl, NewOffset, Now}, ReadHdls1),
     ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
-    {FileHdl,
+    {FileHdl, Offset /= OldOffset,
      State1 #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge3} }}.
 
 sequence_lookup(Sequences, Q) ->
@@ -874,7 +875,7 @@ cache_is_full(#dqstate { message_cache = Cache }) ->
 %% ---- INTERNAL RAW FUNCTIONS ----
 
 internal_fetch_body(Q, MarkDelivered, Advance, State) ->
-    case with_queue_head(Q, MarkDelivered, Advance, State) of
+    case queue_head(Q, MarkDelivered, Advance, State) of
         E = {ok, empty, _} -> E;
         {ok, AckTag, IsDelivered, StoreEntry, Remaining, State1} ->
             {Message, State2} = read_stored_message(StoreEntry, State1),
@@ -882,7 +883,7 @@ internal_fetch_body(Q, MarkDelivered, Advance, State) ->
     end.
 
 internal_fetch_attributes(Q, MarkDelivered, Advance, State) ->
-    case with_queue_head(Q, MarkDelivered, Advance, State) of
+    case queue_head(Q, MarkDelivered, Advance, State) of
         E = {ok, empty, _} -> E;
         {ok, AckTag, IsDelivered, 
          #message_store_entry { msg_id = MsgId, is_persistent = IsPersistent },
@@ -890,8 +891,8 @@ internal_fetch_attributes(Q, MarkDelivered, Advance, State) ->
             {ok, {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}, State1}
     end.
 
-with_queue_head(Q, MarkDelivered, Advance,
-                State = #dqstate { sequences = Sequences }) ->
+queue_head(Q, MarkDelivered, Advance,
+           State = #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
         {SeqId, SeqId} -> {ok, empty, State};
         {ReadSeqId, WriteSeqId} when WriteSeqId > ReadSeqId ->
@@ -913,9 +914,10 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                                            total_size = TotalSize }, State) ->
     case fetch_and_increment_cache(MsgId, State) of
         not_found ->
-            {FileHdl, State1} = get_read_handle(File, Offset, State),
+            {FileHdl, SeekReq, State1} =
+                get_read_handle(File, Offset, TotalSize, State),
             {ok, {MsgBody, _IsPersistent, EncodedBodySize}} =
-                read_message_at_offset(FileHdl, Offset, TotalSize),
+                read_message_at_offset(FileHdl, Offset, TotalSize, SeekReq),
             Message = #basic_message {} = bin_to_msg(MsgBody),
             ok = if RefCount > 1 ->
                          insert_into_cache(Message, EncodedBodySize, State1);
@@ -1480,7 +1482,7 @@ close_file(File, State = #dqstate { read_file_handles =
     case dict:find(File, ReadHdls) of
         error ->
             State;
-        {ok, {Hdl, Then}} ->
+        {ok, {Hdl, _Offset, Then}} ->
             ok = file:close(Hdl),
             State #dqstate { read_file_handles =
                              { dict:erase(File, ReadHdls),
@@ -1867,10 +1869,17 @@ append_message(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
         KO -> KO
     end.
 
-read_message_at_offset(FileHdl, Offset, TotalSize) ->
+read_message_at_offset(FileHdl, Offset, TotalSize, SeekReq) ->
     TotalSizeWriteOkBytes = TotalSize + 1,
-    case file:position(FileHdl, {bof, Offset}) of
-        {ok, Offset} ->
+    SeekRes = case SeekReq of
+                  true -> case file:position(FileHdl, {bof, Offset}) of
+                              {ok, Offset} -> ok;
+                              KO -> KO
+                          end;
+                  false -> ok
+              end,
+    case SeekRes of
+        ok ->
             case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
                 {ok, <<TotalSize:?INTEGER_SIZE_BITS,
                        MsgIdBinSize:?INTEGER_SIZE_BITS,
@@ -1884,9 +1893,9 @@ read_message_at_offset(FileHdl, Offset, TotalSize) ->
                          ?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>> ->
                             {ok, {MsgBody, true, BodySize}}
                     end;
-                KO -> KO
+                KO1 -> KO1
             end;
-        KO -> KO
+        KO2 -> KO2
     end.
 
 scan_file_for_valid_messages(File) ->
@@ -1931,7 +1940,8 @@ read_next_file_entry(FileHdl, Offset) ->
                 {false, false} -> %% all good, let's continue
                     case file:read(FileHdl, MsgIdBinSize) of
                         {ok, <<MsgId:MsgIdBinSize/binary>>} ->
-                            ExpectedAbsPos = Offset + TwoIntegers + TotalSize,
+                            ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT +
+                                TotalSize - 1,
                             case file:position(FileHdl,
                                                {cur, TotalSize - MsgIdBinSize}
                                               ) of
-- 
cgit v1.2.1


From 637ae28ea7871e4bf2ab134bb16bbb27294d5390 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 21 Aug 2009 18:24:02 +0100
Subject: mode => storage_mode in most places Also removed chattiness of
 mixed_queue on queue mode transitions

---
 src/rabbit_amqqueue.erl         |  8 ++++----
 src/rabbit_amqqueue_process.erl | 12 ++++++------
 src/rabbit_control.erl          |  4 ++--
 src/rabbit_mixed_queue.erl      | 20 +++++++++-----------
 src/rabbit_tests.erl            | 12 ++++++------
 5 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 6c4c0ebb..51b2e8f5 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,7 @@
 -export([notify_sent/2, unblock/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([set_mode/2]).
+-export([set_storage_mode/2]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -102,7 +102,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(set_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
+-spec(set_storage_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -223,8 +223,8 @@ list(VHostPath) ->
 
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
-set_mode(QPid, Mode) ->
-    gen_server2:pcast(QPid, 10, {set_mode, Mode}).
+set_storage_mode(QPid, Mode) ->
+    gen_server2:pcast(QPid, 10, {set_storage_mode, Mode}).
 
 info(#amqqueue{ pid = QPid }) ->
     gen_server2:pcall(QPid, 9, info, infinity).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b1c409b1..6d742b7a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -89,7 +89,7 @@
          consumers,
          transactions,
          memory,
-         mode
+         storage_mode
         ]).
          
 %%----------------------------------------------------------------------------
@@ -102,7 +102,7 @@ start_link(Q) ->
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     ok = rabbit_queue_mode_manager:register
-           (self(), false, rabbit_amqqueue, set_mode, [self()]),
+           (self(), false, rabbit_amqqueue, set_storage_mode, [self()]),
     {ok, MS} = rabbit_mixed_queue:init(QName, Durable),
     State = #q{q = Q,
                owner = none,
@@ -527,8 +527,8 @@ i(name,        #q{q = #amqqueue{name        = Name}})       -> Name;
 i(durable,     #q{q = #amqqueue{durable     = Durable}})    -> Durable;
 i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete;
 i(arguments,   #q{q = #amqqueue{arguments   = Arguments}})  -> Arguments;
-i(mode, #q{ mixed_state = MS }) ->
-    rabbit_mixed_queue:info(MS);
+i(storage_mode, #q{ mixed_state = MS }) ->
+    rabbit_mixed_queue:storage_mode(MS);
 i(pid, _) ->
     self();
 i(messages_ready, #q { mixed_state = MS }) ->
@@ -824,11 +824,11 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
         end));
 
-handle_cast({set_mode, Mode}, State = #q { mixed_state = MS }) ->
+handle_cast({set_storage_mode, Mode}, State = #q { mixed_state = MS }) ->
     PendingMessages =
         lists:flatten([Pending || #tx { pending_messages = Pending}
                                       <- all_tx_record()]),
-    {ok, MS1} = rabbit_mixed_queue:set_mode(Mode, PendingMessages, MS),
+    {ok, MS1} = rabbit_mixed_queue:set_storage_mode(Mode, PendingMessages, MS),
     noreply(State #q { mixed_state = MS1 }).
 
 handle_info(report_memory, State) ->
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index d5a83ac9..0935dcc8 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -152,8 +152,8 @@ virtual host parameter for which to display results. The default value is \"/\".
 
 <QueueInfoItem> must be a member of the list [name, durable, auto_delete, 
 arguments, node, messages_ready, messages_unacknowledged, messages_uncommitted, 
-messages, acks_uncommitted, consumers, transactions, memory, mode]. The default 
-is to display name and (number of) messages.
+messages, acks_uncommitted, consumers, transactions, memory, storage_mode]. The
+default is to display name and (number of) messages.
 
 <ExchangeInfoItem> must be a member of the list [name, type, durable, 
 auto_delete, arguments]. The default is to display name and type.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9ad52566..4d916cb3 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -39,7 +39,7 @@
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
          length/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
--export([set_mode/3, info/1,
+-export([set_storage_mode/3, storage_mode/1,
          estimate_queue_memory_and_reset_counters/1]).
 
 -record(mqstate, { mode,
@@ -91,12 +91,12 @@
 -spec(length/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> boolean()).
 
--spec(set_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
+-spec(set_storage_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
 
 -spec(estimate_queue_memory_and_reset_counters/1 :: (mqstate()) ->
              {mqstate(), non_neg_integer(), non_neg_integer(),
               non_neg_integer()}).
--spec(info/1 :: (mqstate()) -> mode()).
+-spec(storage_mode/1 :: (mqstate()) -> mode()).
 
 -endif.
 
@@ -119,12 +119,11 @@ size_of_message(
                         SumAcc + size(Frag)
                 end, 0, Payload).
 
-set_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
+set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
-set_mode(disk, TxnMessages, State =
+set_storage_mode(disk, TxnMessages, State =
          #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                     is_durable = IsDurable, prefetcher = Prefetcher }) ->
-    rabbit_log:info("Converting queue to disk only mode: ~p~n", [Q]),
     State1 = State #mqstate { mode = disk },
     {MsgBuf1, State2} =
         case Prefetcher of
@@ -159,15 +158,14 @@ set_mode(disk, TxnMessages, State =
       end, TxnMessages),
     garbage_collect(),
     {ok, State2 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
-set_mode(mixed, TxnMessages, State = #mqstate { mode = disk, queue = Q,
-                                                is_durable = IsDurable }) ->
-    rabbit_log:info("Converting queue to mixed mode: ~p~n", [Q]),
+set_storage_mode(mixed, TxnMessages, State =
+                 #mqstate { mode = disk, is_durable = IsDurable }) ->
     %% The queue has a token just saying how many msgs are on disk
     %% (this is already built for us when in disk mode).
     %% Don't actually do anything to the disk
     %% Don't start prefetcher just yet because the queue maybe busy -
     %% wait for hibernate timeout in the amqqueue_process.
-
+    
     %% Remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_cancel/2 which is why
@@ -575,5 +573,5 @@ estimate_queue_memory_and_reset_counters(State =
   #mqstate { memory_size = Size, memory_gain = Gain, memory_loss = Loss }) ->
     {State #mqstate { memory_gain = 0, memory_loss = 0 }, 4 * Size, Gain, Loss}.
 
-info(#mqstate { mode = Mode }) ->
+storage_mode(#mqstate { mode = Mode }) ->
     Mode.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 2005cbd1..33ede609 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1080,7 +1080,7 @@ rdq_new_mixed_queue(Q, Durable, Disk) ->
     {MS1, _, _, _} =
         rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
     case Disk of
-        true -> {ok, MS2} = rabbit_mixed_queue:set_mode(disk, [], MS1),
+        true -> {ok, MS2} = rabbit_mixed_queue:set_storage_mode(disk, [], MS1),
                 MS2;
         false -> MS1
     end.
@@ -1112,11 +1112,11 @@ rdq_test_mixed_queue_modes() ->
     30 = rabbit_mixed_queue:length(MS6),
     io:format("Published a mixture of messages; ~w~n",
               [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS6)]),
-    {ok, MS7} = rabbit_mixed_queue:set_mode(disk, [], MS6),
+    {ok, MS7} = rabbit_mixed_queue:set_storage_mode(disk, [], MS6),
     30 = rabbit_mixed_queue:length(MS7),
     io:format("Converted to disk only mode; ~w~n",
              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS7)]),
-    {ok, MS8} = rabbit_mixed_queue:set_mode(mixed, [], MS7),
+    {ok, MS8} = rabbit_mixed_queue:set_storage_mode(mixed, [], MS7),
     30 = rabbit_mixed_queue:length(MS8),
     io:format("Converted to mixed mode; ~w~n",
               [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS8)]),
@@ -1131,7 +1131,7 @@ rdq_test_mixed_queue_modes() ->
           end, MS8, lists:seq(1,10)),
     20 = rabbit_mixed_queue:length(MS10),
     io:format("Delivered initial non persistent messages~n"),
-    {ok, MS11} = rabbit_mixed_queue:set_mode(disk, [], MS10),
+    {ok, MS11} = rabbit_mixed_queue:set_storage_mode(disk, [], MS10),
     20 = rabbit_mixed_queue:length(MS11),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
@@ -1151,7 +1151,7 @@ rdq_test_mixed_queue_modes() ->
     0 = rabbit_mixed_queue:length(MS14),
     {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
     io:format("Delivered and acked all messages~n"),
-    {ok, MS16} = rabbit_mixed_queue:set_mode(disk, [], MS15),
+    {ok, MS16} = rabbit_mixed_queue:set_storage_mode(disk, [], MS15),
     0 = rabbit_mixed_queue:length(MS16),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
@@ -1214,7 +1214,7 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                     MS3a
             end, MS2, MsgsB),
     Len0 = rabbit_mixed_queue:length(MS4),
-    {ok, MS5} = rabbit_mixed_queue:set_mode(Mode, MsgsB, MS4),
+    {ok, MS5} = rabbit_mixed_queue:set_storage_mode(Mode, MsgsB, MS4),
     Len0 = rabbit_mixed_queue:length(MS5),
     {ok, MS9} =
         case CommitOrCancel of
-- 
cgit v1.2.1


From 45cf652845d63f75d827f204edc70d234ff42a2b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 22 Aug 2009 18:23:04 +0100
Subject: Could repeatably read zeros.

With the handle position caching in, avoiding seeks opened a lovely opportunity for reading zeros. The read handle had read_ahead turned on, so even though the write handle was being sync'd correctly, the next read was reading cached data. Consequently, reading zeros instead of the real message.

This makes a valuable point which is that with read_ahead turned on, the seeks on every read, as was happening previously, were evicting the read_ahead cache, even if the seek was in effect a noop. Thus read_ahead, in combination with the seeks, made no performance gain.

Consequently, turning read_ahead off and only seeking when necessary is a performance gain, over (pointlessly) having read_ahead on and seeking every time. Turning read_ahead off also solves this reading zeros bug.

Finally, corrected some maths so that we now check to see if the sync offset is < the offset we will reach rather than <= the offset we read from. Because the offsets should only ever be on message boundaries, both will work, but the new version is more intuitively correct.
---
 src/rabbit_disk_queue.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d19469d6..344aff91 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -761,18 +761,17 @@ get_read_handle(File, Offset, TotalSize, State =
                            current_dirty = IsDirty,
                            last_sync_offset = SyncOffset
                          }) ->
-    State1 = if CurName =:= File andalso IsDirty andalso Offset >= SyncOffset ->
+    NewOffset = Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+    State1 = if CurName =:= File andalso IsDirty andalso NewOffset > SyncOffset ->
                      sync_current_file_handle(State);
                 true -> State
              end,
     Now = now(),
-    NewOffset = Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
     {FileHdl, OldOffset, ReadHdls1, ReadHdlsAge1} =
         case dict:find(File, ReadHdls) of
             error ->
                 {ok, Hdl} = file:open(form_filename(File),
-                                      [read, raw, binary,
-                                       read_ahead]),
+                                      [read, raw, binary]),
                 case dict:size(ReadHdls) < ReadFileHandlesLimit of
                     true ->
                         {Hdl, 0, ReadHdls, ReadHdlsAge};
-- 
cgit v1.2.1


From 7eeec412c60abf4172dff808189742e7ec184071 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 23 Aug 2009 17:00:22 +0100
Subject: Sorted out transactions within the disk_queue, ensuring that if they
 do restart that other data structures cannot be left partially updated, and
 can continue successfully, in particular, manipulation of ets tables within
 mnesia transactions.

---
 src/rabbit_disk_queue.erl | 166 ++++++++++++++++++++++++++--------------------
 1 file changed, 95 insertions(+), 71 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 344aff91..835043c3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1012,8 +1012,6 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
                   ok = case MnesiaDelete of
                            true -> mnesia:dirty_delete(rabbit_disk_queue,
                                                        {Q, SeqId});
-                           txn -> mnesia:delete(rabbit_disk_queue,
-                                                {Q, SeqId}, write);
                            _ -> ok
                        end,
                   Files2
@@ -1542,67 +1540,92 @@ load_from_disk(State) ->
     State1 = load_messages(undefined, Files, State),
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
-    State2 =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  {State6, FinalQ, MsgSeqIds2, _Len} =
-                      mnesia:foldl(
-                        fun (#dq_msg_loc { msg_id = MsgId,
-                                           queue_and_seq_id = {Q, SeqId} },
-                             {State3, OldQ, MsgSeqIds, Len}) ->
-                                {State4, MsgSeqIds1, Len1} =
-                                    case {OldQ == Q, MsgSeqIds} of
-                                        {true, _} when Len < ?BATCH_SIZE ->
-                                            {State3, MsgSeqIds, Len};
-                                        {false, []} -> {State3, MsgSeqIds, Len};
-                                        {_, _} ->
-                                            {ok, State5} =
-                                                remove_messages(Q, MsgSeqIds,
-                                                                txn, State3),
-                                            {State5, [], 0}
-                                    end,
-                                case dets_ets_lookup(State4, MsgId) of
-                                    [] -> ok = mnesia:delete(rabbit_disk_queue,
-                                                             {Q, SeqId}, write),
-                                          {State4, Q, MsgSeqIds1, Len1};
-                                    [{MsgId, _RefCount, _File, _Offset,
-                                      _TotalSize, true}] ->
-                                        {State4, Q, MsgSeqIds1, Len1};
-                                    [{MsgId, _RefCount, _File, _Offset,
-                                      _TotalSize, false}] ->
-                                        {State4, Q,
-                                         [{MsgId, SeqId} | MsgSeqIds1], Len1+1}
-                                end
-                        end, {State1, undefined, [], 0}, rabbit_disk_queue),
-                  {ok, State7} =
-                      remove_messages(FinalQ, MsgSeqIds2, txn, State6),
-                  State7
-          end),
-    State8 = extract_sequence_numbers(State2),
+    Key = mnesia:dirty_first(rabbit_disk_queue),
+    {ok, State2} = prune_mnesia(State1, Key, [], [], 0),
+    State3 = extract_sequence_numbers(State2),
     ok = del_index(),
-    {ok, State8}.
+    {ok, State3}.
+
+prune_mnesia(State, DeleteAcc, RemoveAcc) ->
+    ok = lists:foldl(fun (Key, ok) ->
+                             mnesia:dirty_delete(rabbit_disk_queue, Key)
+                     end, ok, DeleteAcc),
+    {ok, _State1} = lists:foldl(
+                      fun ({Q, MsgSeqIds}, {ok, State2}) ->
+                              remove_messages(Q, MsgSeqIds, true, State2)
+                      end, {ok, State}, RemoveAcc).
+    
+prune_mnesia(State, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
+    {ok, State};
+prune_mnesia(State, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
+    prune_mnesia(State, DeleteAcc, RemoveAcc);
+prune_mnesia(State, Key, DeleteAcc, RemoveAcc, Len) ->
+    [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
+        mnesia:dirty_read(rabbit_disk_queue, Key),
+    {AccHeadLst, RemoveAcc1} =
+        case RemoveAcc of
+            [] -> {[], []};
+            [{Q, Lst} | Acc2] -> {Lst, Acc2};
+            [{_OldQ, []} | Acc2] -> {[], Acc2};
+            Acc2 -> {[], Acc2}
+        end,
+    {DeleteAcc1, AccHeadLst1, Len1} =
+        case dets_ets_lookup(State, MsgId) of
+            [] ->
+                %% msg hasn't been found on disk, delete it
+                {[{Q, SeqId} | DeleteAcc], AccHeadLst, Len + 1};
+            [{MsgId, _RefCount, _File, _Offset, _TotalSize, true}] ->
+                %% msg is persistent, keep it
+                {DeleteAcc, AccHeadLst, Len};
+            [{MsgId, _RefCount, _File, _Offset, _TotalSize, false}] ->
+                %% msg is not persistent, delete it
+                {DeleteAcc, [{MsgId, SeqId} | AccHeadLst], Len + 1}
+        end,
+    RemoveAcc2 = [{Q, AccHeadLst1} | RemoveAcc1],
+    {State1, Key1, DeleteAcc2, RemoveAcc3, Len2} =
+        if
+            Len1 >= ?BATCH_SIZE ->
+                %% We have no way of knowing how flushing the batch
+                %% will affect ordering of records within the table,
+                %% so have no choice but to start again. Although this
+                %% will make recovery slower for large queues, we
+                %% guarantee we can start up in constant memory
+                {ok, State2} = prune_mnesia(State, DeleteAcc1, RemoveAcc2),
+                Key2 = mnesia:dirty_first(rabbit_disk_queue),
+                {State2, Key2, [], [], 0};
+            true ->
+                Key2 = mnesia:dirty_next(rabbit_disk_queue, Key),
+                {State, Key2, DeleteAcc1, RemoveAcc2, Len1}
+        end,
+    prune_mnesia(State1, Key1, DeleteAcc2, RemoveAcc3, Len2).
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
-    true = rabbit_misc:execute_mnesia_transaction(
-      fun() ->
-              ok = mnesia:read_lock_table(rabbit_disk_queue),
-              mnesia:foldl(
-                fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
-                        NextWrite = SeqId + 1,
-                        case ets:lookup(Sequences, Q) of
-                            [] -> ets:insert_new(Sequences,
-                                                 {Q, SeqId, NextWrite});
-                            [Orig = {Q, Read, Write}] ->
-                                Repl = {Q, lists:min([Read, SeqId]),
-                                        lists:max([Write, NextWrite])},
-                                case Orig == Repl of
-                                    true -> true;
-                                    false -> ets:insert(Sequences, Repl)
-                                end
-                        end
-                end, true, rabbit_disk_queue)
-      end),
+    true =
+        rabbit_misc:execute_mnesia_transaction(
+          %% the ets manipulation within this transaction is
+          %% idempotent, in particular we're only reading from mnesia,
+          %% and combining what we read with what we find in
+          %% ets. Should the transaction restart, the non-rolledback
+          %% data in ets can still be successfully combined with what
+          %% we find in mnesia
+          fun() ->
+                  ok = mnesia:read_lock_table(rabbit_disk_queue),
+                  mnesia:foldl(
+                    fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
+                            NextWrite = SeqId + 1,
+                            case ets:lookup(Sequences, Q) of
+                                [] -> ets:insert_new(Sequences,
+                                                     {Q, SeqId, NextWrite});
+                                [Orig = {Q, Read, Write}] ->
+                                    Repl = {Q, lists:min([Read, SeqId]),
+                                            lists:max([Write, NextWrite])},
+                                    case Orig == Repl of
+                                        true -> true;
+                                        false -> ets:insert(Sequences, Repl)
+                                    end
+                            end
+                    end, true, rabbit_disk_queue)
+          end),
     ok = remove_gaps_in_sequences(State),
     State.
 
@@ -1616,17 +1639,18 @@ remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
     %% we could shuffle downwards. However, I think there's greater
     %% likelihood of gaps being at the bottom rather than the top of
     %% the queue, so shuffling up should be the better bet.
-    rabbit_misc:execute_mnesia_transaction(
-      fun() ->
-              ok = mnesia:write_lock_table(rabbit_disk_queue),
-              lists:foreach(
-                fun ({Q, ReadSeqId, WriteSeqId}) ->
-                        Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
-                        ReadSeqId1 = ReadSeqId + Gap,
-                        true = ets:insert(Sequences,
-                                          {Q, ReadSeqId1, WriteSeqId})
-                end, ets:match_object(Sequences, '_'))
-      end),
+    QueueBoundaries =
+        rabbit_misc:execute_mnesia_transaction(
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  lists:foldl(
+                    fun ({Q, ReadSeqId, WriteSeqId}, Acc) ->
+                            Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
+                            [{Q, ReadSeqId + Gap, WriteSeqId} | Acc]
+                    end, [], ets:match_object(Sequences, '_'))
+          end),
+    true = lists:foldl(fun (Obj, true) -> ets:insert(Sequences, Obj) end,
+                       true, QueueBoundaries),
     ok.
 
 shuffle_up(_Q, SeqId, SeqId, Gap) ->
-- 
cgit v1.2.1


From b67965ee34756355618826bae45fab16bd49e1a5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 23 Aug 2009 17:19:51 +0100
Subject: renaming

---
 src/rabbit_disk_queue.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 835043c3..c0ed7e62 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1546,7 +1546,7 @@ load_from_disk(State) ->
     ok = del_index(),
     {ok, State3}.
 
-prune_mnesia(State, DeleteAcc, RemoveAcc) ->
+prune_mnesia_flush_batch(State, DeleteAcc, RemoveAcc) ->
     ok = lists:foldl(fun (Key, ok) ->
                              mnesia:dirty_delete(rabbit_disk_queue, Key)
                      end, ok, DeleteAcc),
@@ -1558,7 +1558,7 @@ prune_mnesia(State, DeleteAcc, RemoveAcc) ->
 prune_mnesia(State, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
     {ok, State};
 prune_mnesia(State, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
-    prune_mnesia(State, DeleteAcc, RemoveAcc);
+    prune_mnesia_flush_batch(State, DeleteAcc, RemoveAcc);
 prune_mnesia(State, Key, DeleteAcc, RemoveAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
@@ -1590,7 +1590,8 @@ prune_mnesia(State, Key, DeleteAcc, RemoveAcc, Len) ->
                 %% so have no choice but to start again. Although this
                 %% will make recovery slower for large queues, we
                 %% guarantee we can start up in constant memory
-                {ok, State2} = prune_mnesia(State, DeleteAcc1, RemoveAcc2),
+                {ok, State2} =
+                    prune_mnesia_flush_batch(State, DeleteAcc1, RemoveAcc2),
                 Key2 = mnesia:dirty_first(rabbit_disk_queue),
                 {State2, Key2, [], [], 0};
             true ->
-- 
cgit v1.2.1


From b2d469a292fee7c7080062e06d1b700d783d1fe0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 23 Aug 2009 18:51:02 +0100
Subject: preemptive refactoring and other assorted changes

---
 src/rabbit_disk_queue.erl | 114 +++++++++++++++++++++-------------------------
 src/rabbit_mnesia.erl     |   5 +-
 2 files changed, 54 insertions(+), 65 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c0ed7e62..04c8a825 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -976,39 +976,11 @@ internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, true, State).
 
 %% Q is only needed if MnesiaDelete /= false
-remove_messages(Q, MsgSeqIds, MnesiaDelete,
-                State = #dqstate { file_summary = FileSummary,
-                                   current_file_name = CurName
-                                 }) ->
+remove_messages(Q, MsgSeqIds, MnesiaDelete, State) ->
     Files =
         lists:foldl(
           fun ({MsgId, SeqId}, Files1) ->
-                  [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
-                      dets_ets_lookup(State, MsgId),
-                  Files2 =
-                      case RefCount of
-                          1 ->
-                              ok = dets_ets_delete(State, MsgId),
-                              ok = remove_cache_entry(MsgId, State),
-                              [{File, ValidTotalSize, ContiguousTop,
-                                Left, Right}] = ets:lookup(FileSummary, File),
-                              ContiguousTop1 =
-                                  lists:min([ContiguousTop, Offset]),
-                              true =
-                                  ets:insert(FileSummary,
-                                             {File, (ValidTotalSize-TotalSize-
-                                                     ?FILE_PACKING_ADJUSTMENT),
-                                                 ContiguousTop1, Left, Right}),
-                              if CurName =:= File -> Files1;
-                                 true -> sets:add_element(File, Files1)
-                              end;
-                          _ when 1 < RefCount ->
-                              ok = decrement_cache(MsgId, State),
-                              ok = dets_ets_insert(
-                                     State, {MsgId, RefCount - 1, File, Offset,
-                                             TotalSize, IsPersistent}),
-                              Files1
-                      end,
+                  Files2 = remove_message(MsgId, Files1, State),
                   ok = case MnesiaDelete of
                            true -> mnesia:dirty_delete(rabbit_disk_queue,
                                                        {Q, SeqId});
@@ -1019,6 +991,34 @@ remove_messages(Q, MsgSeqIds, MnesiaDelete,
     State1 = compact(Files, State),
     {ok, State1}.
 
+remove_message(MsgId, Files,
+               State = #dqstate { file_summary = FileSummary,
+                                  current_file_name = CurName
+                                }) ->
+    [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
+        dets_ets_lookup(State, MsgId),
+    case RefCount of
+        1 ->
+            ok = dets_ets_delete(State, MsgId),
+            ok = remove_cache_entry(MsgId, State),
+            [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
+                ets:lookup(FileSummary, File),
+            ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+            true =
+                ets:insert(FileSummary,
+                           {File,
+                            (ValidTotalSize-TotalSize-?FILE_PACKING_ADJUSTMENT),
+                            ContiguousTop1, Left, Right}),
+            if CurName =:= File -> Files;
+               true -> sets:add_element(File, Files)
+            end;
+        _ when 1 < RefCount ->
+            ok = decrement_cache(MsgId, State),
+            ok = dets_ets_insert(State, {MsgId, RefCount - 1, File, Offset,
+                                         TotalSize, IsPersistent}),
+            Files
+    end.
+
 internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                                                guid = MsgId },
                     State = #dqstate { current_file_handle = CurHdl,
@@ -1541,48 +1541,39 @@ load_from_disk(State) ->
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
     Key = mnesia:dirty_first(rabbit_disk_queue),
-    {ok, State2} = prune_mnesia(State1, Key, [], [], 0),
+    {ok, AlteredFiles} = prune_mnesia(State1, Key, sets:new(), [], 0),
+    State2 = compact(AlteredFiles, State1),
     State3 = extract_sequence_numbers(State2),
     ok = del_index(),
     {ok, State3}.
 
-prune_mnesia_flush_batch(State, DeleteAcc, RemoveAcc) ->
-    ok = lists:foldl(fun (Key, ok) ->
-                             mnesia:dirty_delete(rabbit_disk_queue, Key)
-                     end, ok, DeleteAcc),
-    {ok, _State1} = lists:foldl(
-                      fun ({Q, MsgSeqIds}, {ok, State2}) ->
-                              remove_messages(Q, MsgSeqIds, true, State2)
-                      end, {ok, State}, RemoveAcc).
+prune_mnesia_flush_batch(DeleteAcc) ->
+    lists:foldl(fun (Key, ok) ->
+                        mnesia:dirty_delete(rabbit_disk_queue, Key)
+                end, ok, DeleteAcc).
     
-prune_mnesia(State, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
-    {ok, State};
-prune_mnesia(State, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
-    prune_mnesia_flush_batch(State, DeleteAcc, RemoveAcc);
-prune_mnesia(State, Key, DeleteAcc, RemoveAcc, Len) ->
+prune_mnesia(_State, '$end_of_table', Files, _DeleteAcc, 0) ->
+    {ok, Files};
+prune_mnesia(_State, '$end_of_table', Files, DeleteAcc, _Len) ->
+    ok = prune_mnesia_flush_batch(DeleteAcc),
+    {ok, Files};
+prune_mnesia(State, Key, Files, DeleteAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
-    {AccHeadLst, RemoveAcc1} =
-        case RemoveAcc of
-            [] -> {[], []};
-            [{Q, Lst} | Acc2] -> {Lst, Acc2};
-            [{_OldQ, []} | Acc2] -> {[], Acc2};
-            Acc2 -> {[], Acc2}
-        end,
-    {DeleteAcc1, AccHeadLst1, Len1} =
+    {DeleteAcc1, Files1, Len1} =
         case dets_ets_lookup(State, MsgId) of
             [] ->
                 %% msg hasn't been found on disk, delete it
-                {[{Q, SeqId} | DeleteAcc], AccHeadLst, Len + 1};
+                {[{Q, SeqId} | DeleteAcc], Files, Len + 1};
             [{MsgId, _RefCount, _File, _Offset, _TotalSize, true}] ->
                 %% msg is persistent, keep it
-                {DeleteAcc, AccHeadLst, Len};
+                {DeleteAcc, Files, Len};
             [{MsgId, _RefCount, _File, _Offset, _TotalSize, false}] ->
                 %% msg is not persistent, delete it
-                {DeleteAcc, [{MsgId, SeqId} | AccHeadLst], Len + 1}
+                Files2 = remove_message(MsgId, Files, State),
+                {[{Q, SeqId} | DeleteAcc], Files2, Len + 1}
         end,
-    RemoveAcc2 = [{Q, AccHeadLst1} | RemoveAcc1],
-    {State1, Key1, DeleteAcc2, RemoveAcc3, Len2} =
+    {Key1, DeleteAcc2, Len2} =
         if
             Len1 >= ?BATCH_SIZE ->
                 %% We have no way of knowing how flushing the batch
@@ -1590,15 +1581,14 @@ prune_mnesia(State, Key, DeleteAcc, RemoveAcc, Len) ->
                 %% so have no choice but to start again. Although this
                 %% will make recovery slower for large queues, we
                 %% guarantee we can start up in constant memory
-                {ok, State2} =
-                    prune_mnesia_flush_batch(State, DeleteAcc1, RemoveAcc2),
+                ok = prune_mnesia_flush_batch(DeleteAcc1),
                 Key2 = mnesia:dirty_first(rabbit_disk_queue),
-                {State2, Key2, [], [], 0};
+                {Key2, [], 0};
             true ->
                 Key2 = mnesia:dirty_next(rabbit_disk_queue, Key),
-                {State, Key2, DeleteAcc1, RemoveAcc2, Len1}
+                {Key2, DeleteAcc1, Len1}
         end,
-    prune_mnesia(State1, Key1, DeleteAcc2, RemoveAcc3, Len2).
+    prune_mnesia(State, Key1, Files1, DeleteAcc2, Len2).
 
 extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
     true =
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index dc3c9316..56d02f33 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -147,10 +147,9 @@ table_definitions() ->
        {attributes, record_info(fields, amqqueue)}]},
      {rabbit_disk_queue,
       [{record_name, dq_msg_loc},
-       {type, set},
-       {local_content, true},
        {attributes, record_info(fields, dq_msg_loc)},
-       {disc_copies, [node()]}]}
+       {disc_copies, [node()]},
+       {local_content, true}]}
     ].
 
 replicated_table_definitions() ->
-- 
cgit v1.2.1


From c524c50c5086e3b8bd62855d5b67f076216756f0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 12:56:29 +0100
Subject: Abstracted out all the read handles stuff to a separate module, and
 refined the API as discussed.

---
 src/rabbit_disk_queue.erl   | 128 +++++++++++++++-----------------------------
 src/rabbit_handle_cache.erl | 103 +++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+), 84 deletions(-)
 create mode 100644 src/rabbit_handle_cache.erl

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 04c8a825..362d1e42 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -92,8 +92,7 @@
          current_dirty,           %% has the current file been written to
                                   %% since the last fsync?
          file_size_limit,         %% how big can our files get?
-         read_file_handles,       %% file handles for reading (LRU)
-         read_file_handles_limit, %% how many file handles can we open?
+         read_file_hc_cache,      %% file handle cache for reading
          on_sync_txns,            %% list of commiters to run on sync (reversed)
          commit_timer_ref,        %% TRef for our interval timer
          last_sync_offset,        %% current_offset at the last time we sync'd
@@ -420,8 +419,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_offset          = 0,
                    current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
-                   read_file_handles       = {dict:new(), gb_trees:empty()},
-                   read_file_handles_limit = ReadFileHandlesLimit,
+                   read_file_hc_cache      = rabbit_handle_cache:init(
+                                               ReadFileHandlesLimit,
+                                               [read, raw, binary, read_ahead]),
                    on_sync_txns           = [],
                    commit_timer_ref        = undefined,
                    last_sync_offset        = 0,
@@ -491,8 +491,7 @@ handle_call(stop_vaporise, _From, State) ->
     true = ets:delete(Sequences),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok,
-     State1 #dqstate { current_file_handle = undefined,
-                       read_file_handles = {dict:new(), gb_trees:empty()}}};
+     State1 #dqstate { current_file_handle = undefined }};
     %% gen_server now calls terminate, which then calls shutdown
 handle_call(to_disk_only_mode, _From, State) ->
     reply(ok, to_disk_only_mode(State));
@@ -576,7 +575,7 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                             msg_location_ets = MsgLocationEts,
                             current_file_handle = FileHdl,
-                            read_file_handles = {ReadHdls, _ReadHdlsAge}
+                            read_file_hc_cache = HC
                           }) ->
     %% deliberately ignoring return codes here
     State1 = stop_commit_timer(stop_memory_timer(State)),
@@ -589,12 +588,10 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
         _ -> sync_current_file_handle(State),
              file:close(FileHdl)
     end,
-    dict:fold(fun (_File, Hdl, _Acc) ->
-                     file:close(Hdl)
-              end, ok, ReadHdls),
+    HC1 = rabbit_handle_cache:close_all(HC),
     State1 #dqstate { current_file_handle = undefined,
                       current_dirty = false,
-                      read_file_handles = {dict:new(), gb_trees:empty()},
+                      read_file_hc_cache = HC1,
                       memory_report_timer_ref = undefined
                     }.
 
@@ -754,42 +751,20 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
                                  operation_mode = ram_disk }, Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
-get_read_handle(File, Offset, TotalSize, State =
-                #dqstate { read_file_handles = {ReadHdls, ReadHdlsAge},
-                           read_file_handles_limit = ReadFileHandlesLimit,
-                           current_file_name = CurName,
-                           current_dirty = IsDirty,
-                           last_sync_offset = SyncOffset
-                         }) ->
-    NewOffset = Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
-    State1 = if CurName =:= File andalso IsDirty andalso NewOffset > SyncOffset ->
+with_read_handle_at(File, Offset, Fun, State =
+                    #dqstate { read_file_hc_cache = HC,
+                               current_file_name = CurName,
+                               current_dirty = IsDirty,
+                               last_sync_offset = SyncOffset
+                              }) ->
+    State1 = if CurName =:= File andalso IsDirty andalso Offset >= SyncOffset ->
                      sync_current_file_handle(State);
                 true -> State
              end,
-    Now = now(),
-    {FileHdl, OldOffset, ReadHdls1, ReadHdlsAge1} =
-        case dict:find(File, ReadHdls) of
-            error ->
-                {ok, Hdl} = file:open(form_filename(File),
-                                      [read, raw, binary]),
-                case dict:size(ReadHdls) < ReadFileHandlesLimit of
-                    true ->
-                        {Hdl, 0, ReadHdls, ReadHdlsAge};
-                    false ->
-                        {Then, OldFile, ReadHdlsAge2} =
-                            gb_trees:take_smallest(ReadHdlsAge),
-                        {ok, {OldHdl, _Offset, Then}} =
-                            dict:find(OldFile, ReadHdls),
-                        ok = file:close(OldHdl),
-                        {Hdl, 0, dict:erase(OldFile, ReadHdls), ReadHdlsAge2}
-                end;
-            {ok, {Hdl, OldOffset1, Then}} ->
-                {Hdl, OldOffset1, ReadHdls, gb_trees:delete(Then, ReadHdlsAge)}
-        end,
-    ReadHdls2 = dict:store(File, {FileHdl, NewOffset, Now}, ReadHdls1),
-    ReadHdlsAge3 = gb_trees:enter(Now, File, ReadHdlsAge1),
-    {FileHdl, Offset /= OldOffset,
-     State1 #dqstate { read_file_handles = {ReadHdls2, ReadHdlsAge3} }}.
+    FilePath = form_filename(File),
+    {Result, HC1} =
+        rabbit_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
+    {Result, State1 #dqstate { read_file_hc_cache = HC1 }}.
 
 sequence_lookup(Sequences, Q) ->
     case ets:lookup(Sequences, Q) of
@@ -913,10 +888,14 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                                            total_size = TotalSize }, State) ->
     case fetch_and_increment_cache(MsgId, State) of
         not_found ->
-            {FileHdl, SeekReq, State1} =
-                get_read_handle(File, Offset, TotalSize, State),
-            {ok, {MsgBody, _IsPersistent, EncodedBodySize}} =
-                read_message_at_offset(FileHdl, Offset, TotalSize, SeekReq),
+            {{ok, {MsgBody, _IsPersistent, EncodedBodySize}}, State1} =
+                with_read_handle_at(
+                  File, Offset,
+                  fun(Hdl) ->
+                          {ok, _} = Res =
+                              read_message_from_disk(Hdl, TotalSize),
+                          {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT, Res}
+                  end, State),
             Message = #basic_message {} = bin_to_msg(MsgBody),
             ok = if RefCount > 1 ->
                          insert_into_cache(Message, EncodedBodySize, State1);
@@ -1474,17 +1453,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
     {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
     ok.
 
-close_file(File, State = #dqstate { read_file_handles =
-                                   {ReadHdls, ReadHdlsAge} }) ->
-    case dict:find(File, ReadHdls) of
-        error ->
-            State;
-        {ok, {Hdl, _Offset, Then}} ->
-            ok = file:close(Hdl),
-            State #dqstate { read_file_handles =
-                             { dict:erase(File, ReadHdls),
-                               gb_trees:delete(Then, ReadHdlsAge) } }
-    end.
+close_file(File, State = #dqstate { read_file_hc_cache = HC }) ->
+    HC1 = rabbit_handle_cache:close_file(form_filename(File), HC),
+    State #dqstate { read_file_hc_cache = HC1 }.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     [{File, ValidData, _ContiguousTop, Left, Right}] =
@@ -1883,33 +1854,22 @@ append_message(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
         KO -> KO
     end.
 
-read_message_at_offset(FileHdl, Offset, TotalSize, SeekReq) ->
+read_message_from_disk(FileHdl, TotalSize) ->
     TotalSizeWriteOkBytes = TotalSize + 1,
-    SeekRes = case SeekReq of
-                  true -> case file:position(FileHdl, {bof, Offset}) of
-                              {ok, Offset} -> ok;
-                              KO -> KO
-                          end;
-                  false -> ok
-              end,
-    case SeekRes of
-        ok ->
-            case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
-                {ok, <<TotalSize:?INTEGER_SIZE_BITS,
-                       MsgIdBinSize:?INTEGER_SIZE_BITS,
-                       Rest:TotalSizeWriteOkBytes/binary>>} ->
-                    BodySize = TotalSize - MsgIdBinSize,
-                    case Rest of
-                        <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-                         ?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>> ->
-                            {ok, {MsgBody, false, BodySize}};
-                        <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-                         ?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>> ->
-                            {ok, {MsgBody, true, BodySize}}
-                    end;
-                KO1 -> KO1
+    case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
+        {ok, <<TotalSize:?INTEGER_SIZE_BITS,
+               MsgIdBinSize:?INTEGER_SIZE_BITS,
+               Rest:TotalSizeWriteOkBytes/binary>>} ->
+            BodySize = TotalSize - MsgIdBinSize,
+            case Rest of
+                <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+                 ?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>> ->
+                    {ok, {MsgBody, false, BodySize}};
+                <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+                 ?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>> ->
+                    {ok, {MsgBody, true, BodySize}}
             end;
-        KO2 -> KO2
+        KO -> KO
     end.
 
 scan_file_for_valid_messages(File) ->
diff --git a/src/rabbit_handle_cache.erl b/src/rabbit_handle_cache.erl
new file mode 100644
index 00000000..35094296
--- /dev/null
+++ b/src/rabbit_handle_cache.erl
@@ -0,0 +1,103 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_handle_cache).
+
+-export([init/2, close_all/1, close_file/2, with_file_handle_at/4]).
+
+-record(hcstate,
+        { limit,   %% how many file handles can we open?
+          handles, %% dict of the files to their handles, age and offset
+          ages,    %% gb_tree of the files, keyed by age
+          mode     %% the mode to open the files as
+        }).
+
+init(Limit, OpenMode) ->
+    #hcstate { limit   = Limit,
+               handles = dict:new(),
+               ages    = gb_trees:empty(),
+               mode    = OpenMode
+             }.
+
+close_all(State = #hcstate { handles = Handles }) ->
+    dict:fold(fun (_File, {Hdl, _Offset, _Then}, _Acc) ->
+                      file:close(Hdl)
+              end, ok, Handles),
+    State #hcstate { handles = dict:new(), ages = gb_trees:empty() }.
+
+close_file(File, State = #hcstate { handles = Handles,
+                                    ages = Ages }) ->
+    case dict:find(File, Handles) of
+        error ->
+            State;
+        {ok, {Hdl, _Offset, Then}} ->
+            ok = file:close(Hdl),
+            State #hcstate { handles = dict:erase(File, Handles),
+                             ages = gb_trees:delete(Then, Ages)
+                           }
+    end.
+
+with_file_handle_at(File, Offset, Fun, State = #hcstate { handles = Handles,
+                                                          ages    = Ages,
+                                                          limit   = Limit,
+                                                          mode    = Mode }) ->
+    {FileHdl, OldOffset, Handles1, Ages1} =
+        case dict:find(File, Handles) of
+            error ->
+                {ok, Hdl} = file:open(File, Mode),
+                case dict:size(Handles) < Limit of
+                    true ->
+                        {Hdl, 0, Handles, Ages};
+                    false ->
+                        {Then, OldFile, Ages2} = gb_trees:take_smallest(Ages),
+                        {ok, {OldHdl, _Offset, Then}} =
+                            dict:find(OldFile, Handles),
+                        ok = file:close(OldHdl),
+                        {Hdl, 0, dict:erase(OldFile, Handles), Ages2}
+                end;
+            {ok, {Hdl, OldOffset1, Then}} ->
+                {Hdl, OldOffset1, Handles, gb_trees:delete(Then, Ages)}
+        end,
+    SeekRes = case Offset == OldOffset of
+                  true -> ok;
+                  false -> case file:position(FileHdl, {bof, Offset}) of
+                               {ok, Offset} -> ok;
+                               KO -> KO
+                           end
+              end,
+    {NewOffset, Result} = case SeekRes of
+                              ok -> Fun(FileHdl);
+                              KO1 -> {Offset, KO1}
+                          end,
+    Now = now(),
+    Handles2 = dict:store(File, {FileHdl, NewOffset, Now}, Handles1),
+    Ages3 = gb_trees:enter(Now, File, Ages1),
+    {Result, State #hcstate { handles = Handles2, ages = Ages3 }}.
-- 
cgit v1.2.1


From af19e50aae0afbdff0556187003a19ba1d413573 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 13:01:46 +0100
Subject: cosmetic

---
 src/rabbit_handle_cache.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_handle_cache.erl b/src/rabbit_handle_cache.erl
index 35094296..b99b8bfb 100644
--- a/src/rabbit_handle_cache.erl
+++ b/src/rabbit_handle_cache.erl
@@ -61,7 +61,7 @@ close_file(File, State = #hcstate { handles = Handles,
         {ok, {Hdl, _Offset, Then}} ->
             ok = file:close(Hdl),
             State #hcstate { handles = dict:erase(File, Handles),
-                             ages = gb_trees:delete(Then, Ages)
+                             ages    = gb_trees:delete(Then, Ages)
                            }
     end.
 
-- 
cgit v1.2.1


From 67dfd462cd701a1fac6f5e2004b56681a3349881 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 13:06:11 +0100
Subject: More cosmetics. Also, if the seek failed, it's probably safer to
 assume that we've not changed position. However, it's really expected life
 will stop pretty soon after anyway, so it's probably not too important.

---
 src/rabbit_handle_cache.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_handle_cache.erl b/src/rabbit_handle_cache.erl
index b99b8bfb..fcd79269 100644
--- a/src/rabbit_handle_cache.erl
+++ b/src/rabbit_handle_cache.erl
@@ -74,7 +74,7 @@ with_file_handle_at(File, Offset, Fun, State = #hcstate { handles = Handles,
             error ->
                 {ok, Hdl} = file:open(File, Mode),
                 case dict:size(Handles) < Limit of
-                    true ->
+                    true  ->
                         {Hdl, 0, Handles, Ages};
                     false ->
                         {Then, OldFile, Ages2} = gb_trees:take_smallest(Ages),
@@ -87,15 +87,15 @@ with_file_handle_at(File, Offset, Fun, State = #hcstate { handles = Handles,
                 {Hdl, OldOffset1, Handles, gb_trees:delete(Then, Ages)}
         end,
     SeekRes = case Offset == OldOffset of
-                  true -> ok;
+                  true  -> ok;
                   false -> case file:position(FileHdl, {bof, Offset}) of
                                {ok, Offset} -> ok;
-                               KO -> KO
+                               KO           -> KO
                            end
               end,
     {NewOffset, Result} = case SeekRes of
-                              ok -> Fun(FileHdl);
-                              KO1 -> {Offset, KO1}
+                              ok  -> Fun(FileHdl);
+                              KO1 -> {OldOffset, KO1}
                           end,
     Now = now(),
     Handles2 = dict:store(File, {FileHdl, NewOffset, Now}, Handles1),
-- 
cgit v1.2.1


From 38b9808ca1e4acc1756701f059e2a7754efe27a2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 15:22:19 +0100
Subject: All the below, done.

- encoded body size is being cached, but doesn't need to be.
- extract_sequence_numbers and remove_gaps_in_sequences should just
  take #dqstate.sequences, not the whole #dqstate.
- rename length/1 to len/1 - that's what it's called in other APIs,
  e.g. queue. It will also allow you to remove the erlang: prefix from
  the calls to erlang:length.
- load_messages/3 - instead of taking and returning a state, make it
  take the file_summary only, and return {current_file_num,
  current_file_name, current_offset}.
(not done due to Good Reasons, but the 1st base case that was objected to has vanished)
---
 src/rabbit_disk_queue.erl  | 155 +++++++++++++++++++++++----------------------
 src/rabbit_mixed_queue.erl |   2 +-
 2 files changed, 79 insertions(+), 78 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 362d1e42..f6a1c8ca 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -43,7 +43,7 @@
          tx_publish/1, tx_commit/3, tx_cancel/1,
          requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, length/1, foldl/3, prefetch/1
+         requeue_next_n/2, len/1, foldl/3, prefetch/1
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -274,7 +274,7 @@
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(delete_queue/1 :: (queue_name()) -> 'ok').
 -spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
--spec(length/1 :: (queue_name()) -> non_neg_integer()).
+-spec(len/1 :: (queue_name()) -> non_neg_integer()).
 -spec(foldl/3 :: (fun ((message(), ack_tag(), boolean(), A) -> A),
                   A, queue_name()) -> A).
 -spec(stop/0 :: () -> 'ok').
@@ -337,8 +337,8 @@ delete_non_durable_queues(DurableQueues) ->
     gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues},
                      infinity).
 
-length(Q) ->
-    gen_server2:call(?SERVER, {length, Q}, infinity).
+len(Q) ->
+    gen_server2:call(?SERVER, {len, Q}, infinity).
 
 foldl(Fun, Init, Acc) ->
     gen_server2:call(?SERVER, {foldl, Fun, Init, Acc}, infinity).
@@ -474,7 +474,7 @@ handle_call({purge, Q}, _From, State) ->
     reply(Count, State1);
 handle_call(filesync, _From, State) ->
     reply(ok, sync_current_file_handle(State));
-handle_call({length, Q}, _From, State = #dqstate { sequences = Sequences }) ->
+handle_call({len, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     reply(WriteSeqId - ReadSeqId, State);
 handle_call({foldl, Fun, Init, Q}, _From, State) ->
@@ -817,13 +817,13 @@ fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
     case ets:lookup(Cache, MsgId) of
         [] ->
             not_found;
-        [{MsgId, Message, MsgSize, _RefCount}] ->
-            NewRefCount = ets:update_counter(Cache, MsgId, {4, 1}),
-            {Message, MsgSize, NewRefCount}
+        [{MsgId, Message, _RefCount}] ->
+            NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
+            {Message, NewRefCount}
     end.
 
 decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
-    true = try case ets:update_counter(Cache, MsgId, {4, -1}) of
+    true = try case ets:update_counter(Cache, MsgId, {3, -1}) of
                    N when N =< 0 -> true = ets:delete(Cache, MsgId);
                    _N -> true
                end
@@ -835,15 +835,15 @@ decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
            end,
     ok.
 
-insert_into_cache(Message = #basic_message { guid = MsgId }, MsgSize,
-                  State = #dqstate { message_cache = Cache }) ->
-    case cache_is_full(State) of
+insert_into_cache(Message = #basic_message { guid = MsgId },
+                  #dqstate { message_cache = Cache }) ->
+    case cache_is_full(Cache) of
         true -> ok;
-        false -> true = ets:insert_new(Cache, {MsgId, Message, MsgSize, 1}),
+        false -> true = ets:insert_new(Cache, {MsgId, Message, 1}),
                  ok
     end.
 
-cache_is_full(#dqstate { message_cache = Cache }) ->
+cache_is_full(Cache) ->
     ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
 
 %% ---- INTERNAL RAW FUNCTIONS ----
@@ -888,7 +888,7 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                                            total_size = TotalSize }, State) ->
     case fetch_and_increment_cache(MsgId, State) of
         not_found ->
-            {{ok, {MsgBody, _IsPersistent, EncodedBodySize}}, State1} =
+            {{ok, {MsgBody, _IsPersistent, _BodySize}}, State1} =
                 with_read_handle_at(
                   File, Offset,
                   fun(Hdl) ->
@@ -898,14 +898,14 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                   end, State),
             Message = #basic_message {} = bin_to_msg(MsgBody),
             ok = if RefCount > 1 ->
-                         insert_into_cache(Message, EncodedBodySize, State1);
+                         insert_into_cache(Message, State1);
                     true -> ok
                             %% it's not in the cache and we only have
                             %% 1 queue with the message. So don't
                             %% bother putting it in the cache.
                  end,
             {Message, State1};
-        {Message, _EncodedBodySize, _RefCount} ->
+        {Message, _RefCount} ->
             {Message, State}
     end.
 
@@ -1100,8 +1100,7 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
 internal_tx_cancel(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
-    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(erlang:length(MsgIds),
-                                                  undefined)),
+    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
     remove_messages(undefined, MsgSeqIds, false, State).
 
 internal_requeue(_Q, [], State) ->
@@ -1272,6 +1271,12 @@ compact(FilesSet, State) ->
                                  end, [], Files),
     lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
 
+%% At this stage, we simply know that the file has had msgs removed
+%% from it. However, we don't know if we need to merge it left (which
+%% is what we would prefer), or merge it right. If we merge left, then
+%% this file is the source, and the left file is the destination. If
+%% we merge right then this file is the destination and the right file
+%% is the source.
 combine_file(File, State = #dqstate { file_summary = FileSummary,
                                       current_file_name = CurName
                                     }) ->
@@ -1508,15 +1513,19 @@ load_from_disk(State) ->
     ok = recover_crashed_compactions(Files, TmpFiles),
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
-    State1 = load_messages(undefined, Files, State),
+    Files1 = case Files of
+                 [] -> [State #dqstate.current_file_name];
+                 _ -> Files
+             end,
+    State1 = load_messages(undefined, Files1, State),
     %% Finally, check there is nothing in mnesia which we haven't
     %% loaded
     Key = mnesia:dirty_first(rabbit_disk_queue),
     {ok, AlteredFiles} = prune_mnesia(State1, Key, sets:new(), [], 0),
     State2 = compact(AlteredFiles, State1),
-    State3 = extract_sequence_numbers(State2),
+    ok = extract_sequence_numbers(State2 #dqstate.sequences),
     ok = del_index(),
-    {ok, State3}.
+    {ok, State2}.
 
 prune_mnesia_flush_batch(DeleteAcc) ->
     lists:foldl(fun (Key, ok) ->
@@ -1561,7 +1570,7 @@ prune_mnesia(State, Key, Files, DeleteAcc, Len) ->
         end,
     prune_mnesia(State, Key1, Files1, DeleteAcc2, Len2).
 
-extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
+extract_sequence_numbers(Sequences) ->
     true =
         rabbit_misc:execute_mnesia_transaction(
           %% the ets manipulation within this transaction is
@@ -1588,10 +1597,9 @@ extract_sequence_numbers(State = #dqstate { sequences = Sequences }) ->
                             end
                     end, true, rabbit_disk_queue)
           end),
-    ok = remove_gaps_in_sequences(State),
-    State.
+    ok = remove_gaps_in_sequences(Sequences).
 
-remove_gaps_in_sequences(#dqstate { sequences = Sequences }) ->
+remove_gaps_in_sequences(Sequences) ->
     %% read the comments at internal_requeue.
 
     %% Because we are at startup, we know that no sequence ids have
@@ -1634,11 +1642,6 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
         end,
     shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
 
-load_messages(undefined, [],
-              State = #dqstate { file_summary = FileSummary,
-                                 current_file_name = CurName }) ->
-    true = ets:insert_new(FileSummary, {CurName, 0, 0, undefined, undefined}),
-    State;
 load_messages(Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset =
@@ -1655,15 +1658,15 @@ load_messages(Left, [File|Files],
               State = #dqstate { file_summary = FileSummary }) ->
     %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
-    {ValidMessagesRev, ValidTotalSize} = lists:foldl(
+    {ValidMessages, ValidTotalSize} = lists:foldl(
         fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case erlang:length(mnesia:dirty_index_match_object
-                                   (rabbit_disk_queue,
-                                    #dq_msg_loc { msg_id = MsgId,
-                                                  queue_and_seq_id = '_',
-                                                  is_delivered = '_'
-                                                 },
-                                    msg_id)) of
+                case length(mnesia:dirty_index_match_object
+                            (rabbit_disk_queue,
+                             #dq_msg_loc { msg_id = MsgId,
+                                           queue_and_seq_id = '_',
+                                           is_delivered = '_'
+                                          },
+                             msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
                         true = dets_ets_insert_new
@@ -1674,10 +1677,9 @@ load_messages(Left, [File|Files],
                         }
                 end
         end, {[], 0}, Messages),
-    %% foldl reverses lists and find_contiguous_block_prefix needs
-    %% elems in the same order as from scan_file_for_valid_messages
-    {ContiguousTop, _} = find_contiguous_block_prefix(
-                           lists:reverse(ValidMessagesRev)),
+    %% foldl reverses lists, find_contiguous_block_prefix needs
+    %% msgs eldest first, so, ValidMessages is the right way round
+    {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
     Right = case Files of
                 [] -> undefined;
                 [F|_] -> F
@@ -1697,13 +1699,13 @@ recover_crashed_compactions(Files, TmpFiles) ->
 verify_messages_in_mnesia(MsgIds) ->
     lists:foreach(
       fun (MsgId) ->
-              true = 0 < erlang:length(mnesia:dirty_index_match_object
-                                       (rabbit_disk_queue,
-                                        #dq_msg_loc { msg_id = MsgId,
-                                                      queue_and_seq_id = '_',
-                                                      is_delivered = '_'
-                                                     },
-                                        msg_id))
+              true = 0 < length(mnesia:dirty_index_match_object
+                                (rabbit_disk_queue,
+                                 #dq_msg_loc { msg_id = MsgId,
+                                               queue_and_seq_id = '_',
+                                               is_delivered = '_'
+                                              },
+                                 msg_id))
       end, MsgIds).
 
 grab_msg_id({MsgId, _IsPersistent, _TotalSize, _FileOffset}) ->
@@ -1758,7 +1760,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% main file is a valid message in mnesia
             verify_messages_in_mnesia(MsgIds),
             %% The main file should be contiguous
-            {Top, MsgIds} = find_contiguous_block_prefix(UncorruptedMessages),
+            {Top, MsgIds} = find_contiguous_block_prefix(
+                              lists:reverse(UncorruptedMessages)),
             %% we should have that none of the messages in the prefix
             %% are in the tmp file
             true = lists:all(fun (MsgId) ->
@@ -1800,28 +1803,22 @@ recover_crashed_compactions1(Files, TmpFile) ->
     end,
     ok.
 
-%% this assumes that the messages are ordered such that the highest
-%% address is at the head of the list. This matches what
-%% scan_file_for_valid_messages produces
+%% takes the list in *ascending* order (i.e. oldest message
+%% first). This is the opposite of whach scan_file_for_valid_messages
+%% produces. The list of msgs that is produced is youngest first
 find_contiguous_block_prefix([]) -> {0, []};
-find_contiguous_block_prefix([ {MsgId, _IsPersistent, TotalSize, Offset}
-                             | Tail]) ->
-    case find_contiguous_block_prefix(Tail, Offset, [MsgId]) of
-        {ok, Acc} -> {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
-                      lists:reverse(Acc)};
-        Res -> Res
-    end.
-find_contiguous_block_prefix([], 0, Acc) ->
-    {ok, Acc};
-find_contiguous_block_prefix([], _N, _Acc) ->
-    {0, []};
-find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, Offset} | Tail],
-                             ExpectedOffset, Acc)
-  when ExpectedOffset =:= Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT ->
-    find_contiguous_block_prefix(Tail, Offset, [MsgId|Acc]);
-find_contiguous_block_prefix(List, _ExpectedOffset, _Acc) ->
-    find_contiguous_block_prefix(List).
-    
+find_contiguous_block_prefix(List) ->
+    find_contiguous_block_prefix(List, 0, []).
+
+find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
+    {ExpectedOffset, MsgIds};
+find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, ExpectedOffset}
+                             | Tail], ExpectedOffset, MsgIds) ->
+    ExpectedOffset1 = ExpectedOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+    find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
+find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
+    {ExpectedOffset, MsgIds}.
+
 file_name_sort(A, B) ->
     ANum = list_to_integer(filename:rootname(A)),
     BNum = list_to_integer(filename:rootname(B)),
@@ -1873,11 +1870,15 @@ read_message_from_disk(FileHdl, TotalSize) ->
     end.
 
 scan_file_for_valid_messages(File) ->
-    {ok, Hdl} = file:open(File, [raw, binary, read]),
-    Valid = scan_file_for_valid_messages(Hdl, 0, []),
-    %% if something really bad's happened, the close could fail, but ignore
-    file:close(Hdl),
-    Valid.
+    case file:open(File, [raw, binary, read]) of
+        {ok, Hdl} ->
+            Valid = scan_file_for_valid_messages(Hdl, 0, []),
+            %% if something really bad's happened, the close could fail, but ignore
+            file:close(Hdl),
+            Valid;
+        {error, enoent} -> {ok, []};
+        {error, Reason} -> throw({error, {unable_to_scan_file, File, Reason}})
+    end.
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
     case read_next_file_entry(FileHdl, Offset) of
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 4d916cb3..771a920f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -101,7 +101,7 @@
 -endif.
 
 init(Queue, IsDurable) ->
-    Len = rabbit_disk_queue:length(Queue),
+    Len = rabbit_disk_queue:len(Queue),
     MsgBuf = inc_queue_length(Queue, queue:new(), Len),
     Size = rabbit_disk_queue:foldl(
              fun (Msg = #basic_message { is_persistent = true },
-- 
cgit v1.2.1


From c4444826cd389c3edc9198c16805e1e29351dcd4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 15:25:32 +0100
Subject: english preferable to gobbledegook

---
 src/rabbit_disk_queue.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f6a1c8ca..33a1aaa8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1803,9 +1803,9 @@ recover_crashed_compactions1(Files, TmpFile) ->
     end,
     ok.
 
-%% takes the list in *ascending* order (i.e. oldest message
-%% first). This is the opposite of whach scan_file_for_valid_messages
-%% produces. The list of msgs that is produced is youngest first
+%% Takes the list in *ascending* order (i.e. eldest message
+%% first). This is the opposite of what scan_file_for_valid_messages
+%% produces. The list of msgs that is produced is youngest first.
 find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix(List) ->
     find_contiguous_block_prefix(List, 0, []).
-- 
cgit v1.2.1


From bfdf4b3e6883ce2f3360e25bb0cfffc015c58446 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 16:53:35 +0100
Subject: handle_cache => file_handle_cache. Also switched to using the
 #message_store_entry record throughout.

---
 src/rabbit_disk_queue.erl        | 111 +++++++++++++++++++++------------------
 src/rabbit_file_handle_cache.erl | 103 ++++++++++++++++++++++++++++++++++++
 src/rabbit_handle_cache.erl      | 103 ------------------------------------
 3 files changed, 162 insertions(+), 155 deletions(-)
 create mode 100644 src/rabbit_file_handle_cache.erl
 delete mode 100644 src/rabbit_handle_cache.erl

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 33a1aaa8..aee91f5d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -397,12 +397,13 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                         {min_no_slots, 1024*1024},
                         %% man says this should be <= 32M. But it works...
                         {max_no_slots, 30*1024*1024},
-                        {type, set}
+                        {type, set},
+                        {keypos, 2}
                        ]),
 
     %% it would be better to have this as private, but dets:from_ets/2
     %% seems to blow up if it is set private
-    MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected]),
+    MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected, {keypos, 2}]),
 
     InitName = "0" ++ ?FILE_EXTENSION,
     State =
@@ -419,7 +420,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_offset          = 0,
                    current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
-                   read_file_hc_cache      = rabbit_handle_cache:init(
+                   read_file_hc_cache      = rabbit_file_handle_cache:init(
                                                ReadFileHandlesLimit,
                                                [read, raw, binary, read_ahead]),
                    on_sync_txns           = [],
@@ -588,7 +589,7 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
         _ -> sync_current_file_handle(State),
              file:close(FileHdl)
     end,
-    HC1 = rabbit_handle_cache:close_all(HC),
+    HC1 = rabbit_file_handle_cache:close_all(HC),
     State1 #dqstate { current_file_handle = undefined,
                       current_dirty = false,
                       read_file_hc_cache = HC1,
@@ -763,7 +764,7 @@ with_read_handle_at(File, Offset, Fun, State =
              end,
     FilePath = form_filename(File),
     {Result, HC1} =
-        rabbit_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
+        rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
     {Result, State1 #dqstate { read_file_hc_cache = HC1 }}.
 
 sequence_lookup(Sequences, Q) ->
@@ -913,7 +914,7 @@ update_message_attributes(Q, SeqId, MarkDelivered, State) ->
     [Obj =
      #dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
-    [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
+    [StoreEntry = #message_store_entry { msg_id = MsgId }] =
         dets_ets_lookup(State, MsgId),
     ok = case {IsDelivered, MarkDelivered} of
              {true, _} -> ok;
@@ -922,10 +923,7 @@ update_message_attributes(Q, SeqId, MarkDelivered, State) ->
                  mnesia:dirty_write(rabbit_disk_queue,
                                     Obj #dq_msg_loc {is_delivered = true})
          end,
-    {{MsgId, SeqId}, IsDelivered,
-     #message_store_entry { msg_id = MsgId, ref_count = RefCount, file = File,
-                            offset = Offset, total_size = TotalSize,
-                            is_persistent = IsPersistent }}.
+    {{MsgId, SeqId}, IsDelivered, StoreEntry}.
 
 internal_foldl(Q, Fun, Init, State) ->
     State1 = #dqstate { sequences = Sequences } =
@@ -974,7 +972,9 @@ remove_message(MsgId, Files,
                State = #dqstate { file_summary = FileSummary,
                                   current_file_name = CurName
                                 }) ->
-    [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] =
+    [StoreEntry =
+     #message_store_entry { msg_id = MsgId, ref_count = RefCount, file = File,
+                            offset = Offset, total_size = TotalSize }] =
         dets_ets_lookup(State, MsgId),
     case RefCount of
         1 ->
@@ -993,8 +993,8 @@ remove_message(MsgId, Files,
             end;
         _ when 1 < RefCount ->
             ok = decrement_cache(MsgId, State),
-            ok = dets_ets_insert(State, {MsgId, RefCount - 1, File, Offset,
-                                         TotalSize, IsPersistent}),
+            ok = dets_ets_insert(State, StoreEntry #message_store_entry
+                                 { ref_count = RefCount - 1 }),
             Files
     end.
 
@@ -1011,8 +1011,10 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
             {ok, TotalSize} = append_message(CurHdl, MsgId, msg_to_bin(Message),
                                              IsPersistent),
             true = dets_ets_insert_new
-                     (State, {MsgId, 1, CurName,
-                              CurOffset, TotalSize, IsPersistent}),
+                     (State, #message_store_entry
+                      { msg_id = MsgId, ref_count = 1, file = CurName,
+                        offset = CurOffset, total_size = TotalSize,
+                        is_persistent = IsPersistent }),
             [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize +
@@ -1028,10 +1030,11 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
             maybe_roll_to_new_file(
               NextOffset, State #dqstate {current_offset = NextOffset,
                                           current_dirty = true});
-        [{MsgId, RefCount, File, Offset, TotalSize, IsPersistent}] ->
+        [StoreEntry =
+         #message_store_entry { msg_id = MsgId, ref_count = RefCount }] ->
             %% We already know about it, just update counter
-            ok = dets_ets_insert(State, {MsgId, RefCount + 1, File,
-                                         Offset, TotalSize, IsPersistent}),
+            ok = dets_ets_insert(State, StoreEntry #message_store_entry
+                                 { ref_count = RefCount + 1 }),
             {ok, State}
     end.
 
@@ -1043,8 +1046,8 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                                     }) ->
     NeedsSync = IsDirty andalso
         lists:any(fun ({MsgId, _IsDelivered}) ->
-                          [{MsgId, _RefCount, File, Offset,
-                            _TotalSize, _IsPersistent}] =
+                          [#message_store_entry { msg_id = MsgId, file = File,
+                                                  offset = Offset }] =
                               dets_ets_lookup(State, MsgId),
                           File =:= CurFile andalso Offset >= SyncOffset
                   end, PubMsgIds),
@@ -1205,8 +1208,7 @@ internal_delete_queue(Q, State) ->
     Objs = mnesia:dirty_match_object(
              rabbit_disk_queue,
              #dq_msg_loc { queue_and_seq_id = {Q, '_'},
-                           msg_id = '_',
-                           is_delivered = '_'
+                           _ = '_'
                          }),
     MsgSeqIds =
         lists:map(
@@ -1330,12 +1332,13 @@ adjust_meta_and_combine(
        true -> {false, State}
     end.
 
-sort_msg_locations_by_offset(Asc, List) ->
-    Comp = case Asc of
-               true  -> fun erlang:'<'/2;
-               false -> fun erlang:'>'/2
+sort_msg_locations_by_offset(Dir, List) ->
+    Comp = case Dir of
+               asc  -> fun erlang:'<'/2;
+               desc -> fun erlang:'>'/2
            end,
-    lists:sort(fun ({_, _, _, OffA, _, _}, {_, _, _, OffB, _, _}) ->
+    lists:sort(fun (#message_store_entry { offset = OffA },
+                    #message_store_entry { offset = OffB }) ->
                        Comp(OffA, OffB)
                end, List).
 
@@ -1374,7 +1377,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                            read_ahead, delayed_write]),
             Worklist =
                 lists:dropwhile(
-                  fun ({_, _, _, Offset, _, _})
+                  fun (#message_store_entry { offset = Offset })
                       when Offset /= DestinationContiguousTop ->
                           %% it cannot be that Offset ==
                           %% DestinationContiguousTop because if it
@@ -1386,9 +1389,9 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                           %% as we require, however, we need to
                           %% enforce it anyway
                   end, sort_msg_locations_by_offset(
-                         true, dets_ets_match_object(State,
-                                                     {'_', '_', Destination,
-                                                      '_', '_', '_'}))),
+                         asc, dets_ets_match_object(
+                                 State, #message_store_entry
+                                 { file = Destination, _ = '_' }))),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State),
@@ -1408,9 +1411,8 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     end,
     SourceWorkList =
         sort_msg_locations_by_offset(
-          true, dets_ets_match_object(State,
-                                      {'_', '_', Source,
-                                       '_', '_', '_'})),
+          asc, dets_ets_match_object(State, #message_store_entry
+                                      { file = Source, _ = '_' })),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State),
     %% tidy up
@@ -1424,15 +1426,16 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, State) ->
     {FinalOffset, BlockStart1, BlockEnd1} =
         lists:foldl(
-          fun ({MsgId, RefCount, _Source, Offset, TotalSize, IsPersistent},
+          fun (StoreEntry = #message_store_entry { offset = Offset,
+                                                   total_size = TotalSize },
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
                   %% update MsgLocationDets to reflect change of file and offset
-                  ok = dets_ets_insert
-                         (State, {MsgId, RefCount, Destination,
-                                  CurOffset, TotalSize, IsPersistent}),
+                  ok = dets_ets_insert (State, StoreEntry #message_store_entry
+                                        { file = Destination,
+                                          offset = CurOffset }),
                   NextOffset = CurOffset + Size,
                   if BlockStart =:= undefined ->
                           %% base case, called only for the first list elem
@@ -1459,7 +1462,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
     ok.
 
 close_file(File, State = #dqstate { read_file_hc_cache = HC }) ->
-    HC1 = rabbit_handle_cache:close_file(form_filename(File), HC),
+    HC1 = rabbit_file_handle_cache:close_file(form_filename(File), HC),
     State #dqstate { read_file_hc_cache = HC1 }.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
@@ -1545,10 +1548,10 @@ prune_mnesia(State, Key, Files, DeleteAcc, Len) ->
             [] ->
                 %% msg hasn't been found on disk, delete it
                 {[{Q, SeqId} | DeleteAcc], Files, Len + 1};
-            [{MsgId, _RefCount, _File, _Offset, _TotalSize, true}] ->
+            [#message_store_entry { msg_id = MsgId, is_persistent = true }] ->
                 %% msg is persistent, keep it
                 {DeleteAcc, Files, Len};
-            [{MsgId, _RefCount, _File, _Offset, _TotalSize, false}] ->
+            [#message_store_entry { msg_id = MsgId, is_persistent = false}] ->
                 %% msg is not persistent, delete it
                 Files2 = remove_message(MsgId, Files, State),
                 {[{Q, SeqId} | DeleteAcc], Files2, Len + 1}
@@ -1645,11 +1648,14 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
 load_messages(Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset =
-        case dets_ets_match_object(State, {'_', '_', Left, '_', '_', '_'}) of
+        case dets_ets_match_object(State, #message_store_entry
+                                   { file = Left, _ = '_' }) of
             [] -> 0;
             L ->
-                [ {_MsgId, _RefCount, Left, MaxOffset, TotalSize, _IsPersistent}
-                | _ ] = sort_msg_locations_by_offset(false, L),
+                [ #message_store_entry {file = Left,
+                                        offset = MaxOffset,
+                                        total_size = TotalSize} | _ ] =
+                    sort_msg_locations_by_offset(desc, L),
                 MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
              end,
     State #dqstate { current_file_num = Num, current_file_name = Left,
@@ -1663,15 +1669,17 @@ load_messages(Left, [File|Files],
                 case length(mnesia:dirty_index_match_object
                             (rabbit_disk_queue,
                              #dq_msg_loc { msg_id = MsgId,
-                                           queue_and_seq_id = '_',
-                                           is_delivered = '_'
-                                          },
+                                           _ = '_'
+                                         },
                              msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
                         true = dets_ets_insert_new
-                                 (State, {MsgId, RefCount, File,
-                                          Offset, TotalSize, IsPersistent}),
+                                 (State, #message_store_entry
+                                  { msg_id = MsgId, ref_count = RefCount,
+                                    file = File, offset = Offset,
+                                    total_size = TotalSize,
+                                    is_persistent = IsPersistent }),
                         {[Obj | VMAcc],
                          VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
                         }
@@ -1702,9 +1710,8 @@ verify_messages_in_mnesia(MsgIds) ->
               true = 0 < length(mnesia:dirty_index_match_object
                                 (rabbit_disk_queue,
                                  #dq_msg_loc { msg_id = MsgId,
-                                               queue_and_seq_id = '_',
-                                               is_delivered = '_'
-                                              },
+                                               _ = '_'
+                                             },
                                  msg_id))
       end, MsgIds).
 
diff --git a/src/rabbit_file_handle_cache.erl b/src/rabbit_file_handle_cache.erl
new file mode 100644
index 00000000..83acffd0
--- /dev/null
+++ b/src/rabbit_file_handle_cache.erl
@@ -0,0 +1,103 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_file_handle_cache).
+
+-export([init/2, close_all/1, close_file/2, with_file_handle_at/4]).
+
+-record(hcstate,
+        { limit,   %% how many file handles can we open?
+          handles, %% dict of the files to their handles, age and offset
+          ages,    %% gb_tree of the files, keyed by age
+          mode     %% the mode to open the files as
+        }).
+
+init(Limit, OpenMode) ->
+    #hcstate { limit   = Limit,
+               handles = dict:new(),
+               ages    = gb_trees:empty(),
+               mode    = OpenMode
+             }.
+
+close_all(State = #hcstate { handles = Handles }) ->
+    dict:fold(fun (_File, {Hdl, _Offset, _Then}, _Acc) ->
+                      file:close(Hdl)
+              end, ok, Handles),
+    State #hcstate { handles = dict:new(), ages = gb_trees:empty() }.
+
+close_file(File, State = #hcstate { handles = Handles,
+                                    ages = Ages }) ->
+    case dict:find(File, Handles) of
+        error ->
+            State;
+        {ok, {Hdl, _Offset, Then}} ->
+            ok = file:close(Hdl),
+            State #hcstate { handles = dict:erase(File, Handles),
+                             ages    = gb_trees:delete(Then, Ages)
+                           }
+    end.
+
+with_file_handle_at(File, Offset, Fun, State = #hcstate { handles = Handles,
+                                                          ages    = Ages,
+                                                          limit   = Limit,
+                                                          mode    = Mode }) ->
+    {FileHdl, OldOffset, Handles1, Ages1} =
+        case dict:find(File, Handles) of
+            error ->
+                {ok, Hdl} = file:open(File, Mode),
+                case dict:size(Handles) < Limit of
+                    true  ->
+                        {Hdl, 0, Handles, Ages};
+                    false ->
+                        {Then, OldFile, Ages2} = gb_trees:take_smallest(Ages),
+                        {ok, {OldHdl, _Offset, Then}} =
+                            dict:find(OldFile, Handles),
+                        ok = file:close(OldHdl),
+                        {Hdl, 0, dict:erase(OldFile, Handles), Ages2}
+                end;
+            {ok, {Hdl, OldOffset1, Then}} ->
+                {Hdl, OldOffset1, Handles, gb_trees:delete(Then, Ages)}
+        end,
+    SeekRes = case Offset == OldOffset of
+                  true  -> ok;
+                  false -> case file:position(FileHdl, {bof, Offset}) of
+                               {ok, Offset} -> ok;
+                               KO           -> KO
+                           end
+              end,
+    {NewOffset, Result} = case SeekRes of
+                              ok  -> Fun(FileHdl);
+                              KO1 -> {OldOffset, KO1}
+                          end,
+    Now = now(),
+    Handles2 = dict:store(File, {FileHdl, NewOffset, Now}, Handles1),
+    Ages3 = gb_trees:enter(Now, File, Ages1),
+    {Result, State #hcstate { handles = Handles2, ages = Ages3 }}.
diff --git a/src/rabbit_handle_cache.erl b/src/rabbit_handle_cache.erl
deleted file mode 100644
index fcd79269..00000000
--- a/src/rabbit_handle_cache.erl
+++ /dev/null
@@ -1,103 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_handle_cache).
-
--export([init/2, close_all/1, close_file/2, with_file_handle_at/4]).
-
--record(hcstate,
-        { limit,   %% how many file handles can we open?
-          handles, %% dict of the files to their handles, age and offset
-          ages,    %% gb_tree of the files, keyed by age
-          mode     %% the mode to open the files as
-        }).
-
-init(Limit, OpenMode) ->
-    #hcstate { limit   = Limit,
-               handles = dict:new(),
-               ages    = gb_trees:empty(),
-               mode    = OpenMode
-             }.
-
-close_all(State = #hcstate { handles = Handles }) ->
-    dict:fold(fun (_File, {Hdl, _Offset, _Then}, _Acc) ->
-                      file:close(Hdl)
-              end, ok, Handles),
-    State #hcstate { handles = dict:new(), ages = gb_trees:empty() }.
-
-close_file(File, State = #hcstate { handles = Handles,
-                                    ages = Ages }) ->
-    case dict:find(File, Handles) of
-        error ->
-            State;
-        {ok, {Hdl, _Offset, Then}} ->
-            ok = file:close(Hdl),
-            State #hcstate { handles = dict:erase(File, Handles),
-                             ages    = gb_trees:delete(Then, Ages)
-                           }
-    end.
-
-with_file_handle_at(File, Offset, Fun, State = #hcstate { handles = Handles,
-                                                          ages    = Ages,
-                                                          limit   = Limit,
-                                                          mode    = Mode }) ->
-    {FileHdl, OldOffset, Handles1, Ages1} =
-        case dict:find(File, Handles) of
-            error ->
-                {ok, Hdl} = file:open(File, Mode),
-                case dict:size(Handles) < Limit of
-                    true  ->
-                        {Hdl, 0, Handles, Ages};
-                    false ->
-                        {Then, OldFile, Ages2} = gb_trees:take_smallest(Ages),
-                        {ok, {OldHdl, _Offset, Then}} =
-                            dict:find(OldFile, Handles),
-                        ok = file:close(OldHdl),
-                        {Hdl, 0, dict:erase(OldFile, Handles), Ages2}
-                end;
-            {ok, {Hdl, OldOffset1, Then}} ->
-                {Hdl, OldOffset1, Handles, gb_trees:delete(Then, Ages)}
-        end,
-    SeekRes = case Offset == OldOffset of
-                  true  -> ok;
-                  false -> case file:position(FileHdl, {bof, Offset}) of
-                               {ok, Offset} -> ok;
-                               KO           -> KO
-                           end
-              end,
-    {NewOffset, Result} = case SeekRes of
-                              ok  -> Fun(FileHdl);
-                              KO1 -> {OldOffset, KO1}
-                          end,
-    Now = now(),
-    Handles2 = dict:store(File, {FileHdl, NewOffset, Now}, Handles1),
-    Ages3 = gb_trees:enter(Now, File, Ages1),
-    {Result, State #hcstate { handles = Handles2, ages = Ages3 }}.
-- 
cgit v1.2.1


From 4ac380dc0daa2b050b4c3adb8b06d70d653a3396 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 17:46:30 +0100
Subject: remaining items from comment 92

---
 src/rabbit_disk_queue.erl | 52 +++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index aee91f5d..b4e6b8b1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1663,7 +1663,7 @@ load_messages(Left, [], State) ->
 load_messages(Left, [File|Files],
               State = #dqstate { file_summary = FileSummary }) ->
     %% [{MsgId, TotalSize, FileOffset}]
-    {ok, Messages} = scan_file_for_valid_messages(form_filename(File)),
+    {ok, Messages} = scan_file_for_valid_messages(File),
     {ValidMessages, ValidTotalSize} = lists:foldl(
         fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                 case length(mnesia:dirty_index_match_object
@@ -1718,19 +1718,21 @@ verify_messages_in_mnesia(MsgIds) ->
 grab_msg_id({MsgId, _IsPersistent, _TotalSize, _FileOffset}) ->
     MsgId.
 
+scan_file_for_valid_messages_msg_ids(File) ->
+    {ok, Messages} = scan_file_for_valid_messages(File),
+    {ok, Messages, lists:map(fun grab_msg_id/1, Messages)}.
+
 recover_crashed_compactions1(Files, TmpFile) ->
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFile, Files),
     %% [{MsgId, TotalSize, FileOffset}]
-    {ok, UncorruptedMessagesTmp} =
-        scan_file_for_valid_messages(form_filename(TmpFile)),
-    MsgIdsTmp = lists:map(fun grab_msg_id/1, UncorruptedMessagesTmp),
+    {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
+        scan_file_for_valid_messages_msg_ids(TmpFile),
     %% all of these messages should appear in the mnesia table,
     %% otherwise they wouldn't have been copied out
     verify_messages_in_mnesia(MsgIdsTmp),
-    {ok, UncorruptedMessages} =
-        scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
-    MsgIds = lists:map(fun grab_msg_id/1, UncorruptedMessages),
+    {ok, UncorruptedMessages, MsgIds} =
+        scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -1798,9 +1800,8 @@ recover_crashed_compactions1(Files, TmpFile) ->
             ok = file:close(TmpHdl),
             ok = file:delete(TmpFile),
 
-            {ok, MainMessages} =
-                scan_file_for_valid_messages(form_filename(NonTmpRelatedFile)),
-            MsgIdsMain = lists:map(fun grab_msg_id/1, MainMessages),
+            {ok, _MainMessages, MsgIdsMain} =
+                scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
             %% check that everything in MsgIds is in MsgIdsMain
             true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
                              MsgIds),
@@ -1877,7 +1878,8 @@ read_message_from_disk(FileHdl, TotalSize) ->
     end.
 
 scan_file_for_valid_messages(File) ->
-    case file:open(File, [raw, binary, read]) of
+    FilePath = form_filename(File),
+    case file:open(FilePath, [raw, binary, read]) of
         {ok, Hdl} ->
             Valid = scan_file_for_valid_messages(Hdl, 0, []),
             %% if something really bad's happened, the close could fail, but ignore
@@ -1889,10 +1891,10 @@ scan_file_for_valid_messages(File) ->
 
 scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
     case read_next_file_entry(FileHdl, Offset) of
-        {ok, eof} -> {ok, Acc};
-        {ok, {corrupted, NextOffset}} ->
+        eof -> {ok, Acc};
+        {corrupted, NextOffset} ->
             scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
-        {ok, {ok, MsgId, IsPersistent, TotalSize, NextOffset}} ->
+        {ok, {MsgId, IsPersistent, TotalSize, NextOffset}} ->
             scan_file_for_valid_messages(
               FileHdl, NextOffset,
               [{MsgId, IsPersistent, TotalSize, Offset} | Acc]);
@@ -1907,16 +1909,16 @@ read_next_file_entry(FileHdl, Offset) ->
         {ok,
          <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
             case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
-                {true, _} -> {ok, eof}; %% Nothing we can do other than stop
+                {true, _} -> eof; %% Nothing we can do other than stop
                 {false, true} ->
                     %% current message corrupted, try skipping past it
                     ExpectedAbsPos =
                         Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
                     case file:position(FileHdl, {cur, TotalSize + 1}) of
                         {ok, ExpectedAbsPos} ->
-                            {ok, {corrupted, ExpectedAbsPos}};
+                            {corrupted, ExpectedAbsPos};
                         {ok, _SomeOtherPos} ->
-                            {ok, eof}; %% seek failed, so give up
+                            eof; %% seek failed, so give up
                         KO -> KO
                     end;
                 {false, false} -> %% all good, let's continue
@@ -1933,27 +1935,23 @@ read_next_file_entry(FileHdl, Offset) ->
                                     case file:read(FileHdl, 1) of
                                         {ok,
                                          <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>} ->
-                                            {ok,
-                                             {ok, binary_to_term(MsgId),
-                                              false, TotalSize, NextOffset}};
+                                             {ok, {binary_to_term(MsgId),
+                                                   false, TotalSize, NextOffset}};
                                         {ok,
                                          <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} ->
-                                            {ok,
-                                             {ok, binary_to_term(MsgId),
-                                              true, TotalSize, NextOffset}};
+                                             {ok, {binary_to_term(MsgId),
+                                                   true, TotalSize, NextOffset}};
                                         {ok, _SomeOtherData} ->
-                                            {ok, {corrupted, NextOffset}};
+                                            {corrupted, NextOffset};
                                         KO -> KO
                                     end;
                                 {ok, _SomeOtherPos} ->
                                     %% seek failed, so give up
-                                    {ok, eof}; 
+                                    eof; 
                                 KO -> KO
                             end;
-                        eof -> {ok, eof};
                         KO -> KO
                     end
             end;
-        eof -> {ok, eof};
         KO -> KO
     end.
-- 
cgit v1.2.1


From 0723bbbf63ed850721c24d1c4d77659a464a8633 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 24 Aug 2009 18:06:15 +0100
Subject: Removed is_persistent from tx_tracking. Removed auto_ack_next_message
 as it wasn't needed as the one case where it was being used in mq was wrong.
 And some cosmetic stuff too.

---
 src/rabbit_amqqueue_process.erl | 13 ++++---------
 src/rabbit_disk_queue.erl       | 29 ++++++-----------------------
 src/rabbit_mixed_queue.erl      | 35 ++++++++++++-----------------------
 3 files changed, 22 insertions(+), 55 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6d742b7a..c65c65ed 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -63,7 +63,7 @@
 
 -record(consumer, {tag, ack_required}).
 
--record(tx, {ch_pid, is_persistent, pending_messages, pending_acks}).
+-record(tx, {ch_pid, pending_messages, pending_acks}).
 
 %% These are held in our process dictionary
 -record(cr, {consumer_count,
@@ -453,7 +453,6 @@ qname(#q{q = #amqqueue{name = QName}}) -> QName.
 lookup_tx(Txn) ->
     case get({txn, Txn}) of
         undefined -> #tx{ch_pid = none,
-                         is_persistent = false,
                          pending_messages = [],
                          pending_acks = []};
         V -> V
@@ -471,14 +470,10 @@ all_tx_record() ->
 all_tx() ->
     [Txn || {{txn, Txn}, _} <- get()].
 
-record_pending_message(Txn, ChPid, Message =
-                       #basic_message { is_persistent = IsPersistent }) ->
-    Tx = #tx{pending_messages = Pending, is_persistent = IsPersistentTxn } =
-        lookup_tx(Txn),
+record_pending_message(Txn, ChPid, Message) ->
+    Tx = #tx{pending_messages = Pending} = lookup_tx(Txn),
     record_current_channel_tx(ChPid, Txn),
-    store_tx(Txn, Tx #tx { pending_messages = [Message | Pending],
-                           is_persistent = IsPersistentTxn orelse IsPersistent
-                         }).
+    store_tx(Txn, Tx #tx { pending_messages = [Message | Pending] }).
 
 record_pending_acks(Txn, ChPid, MsgIds) ->
     Tx = #tx{pending_acks = Pending} = lookup_tx(Txn),
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b4e6b8b1..70d44845 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -39,11 +39,10 @@
          terminate/2, code_change/3]).
 -export([handle_pre_hibernate/1]).
 
--export([publish/3, fetch/1, phantom_fetch/1, ack/2,
-         tx_publish/1, tx_commit/3, tx_cancel/1,
-         requeue/2, purge/1, delete_queue/1,
-         delete_non_durable_queues/1, auto_ack_next_message/1,
-         requeue_next_n/2, len/1, foldl/3, prefetch/1
+-export([publish/3, fetch/1, phantom_fetch/1, ack/2, tx_publish/1, tx_commit/3,
+         tx_cancel/1, requeue/2, purge/1, delete_queue/1,
+         delete_non_durable_queues/1, requeue_next_n/2, len/1, foldl/3,
+         prefetch/1
         ]).
 
 -export([filesync/0, cache_info/0]).
@@ -264,7 +263,6 @@
               {msg_id(), boolean(), boolean(), ack_tag(), non_neg_integer()})).
 -spec(prefetch/1 :: (queue_name()) -> 'ok'). 
 -spec(ack/2 :: (queue_name(), [ack_tag()]) -> 'ok').
--spec(auto_ack_next_message/1 :: (queue_name()) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean()}], [ack_tag()]) ->
              'ok').
@@ -308,9 +306,6 @@ prefetch(Q) ->
 ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {ack, Q, MsgSeqIds}).
 
-auto_ack_next_message(Q) ->
-    gen_server2:cast(?SERVER, {auto_ack_next_message, Q}).
-
 tx_publish(Message = #basic_message {}) ->
     gen_server2:cast(?SERVER, {tx_publish, Message}).
 
@@ -510,9 +505,6 @@ handle_cast({publish, Q, Message, IsDelivered}, State) ->
 handle_cast({ack, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_ack(Q, MsgSeqIds, State),
     noreply(State1);
-handle_cast({auto_ack_next_message, Q}, State) ->
-    {ok, State1} = internal_auto_ack(Q, State),
-    noreply(State1);
 handle_cast({tx_publish, Message}, State) ->
     {ok, State1} = internal_tx_publish(Message, State),
     noreply(State1);
@@ -940,15 +932,6 @@ internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
     Acc1 = Fun(Message, AckTag, IsDelivered, Acc),
     internal_foldl(Q, WriteSeqId, Fun, State1, Acc1, ReadSeqId + 1).
 
-internal_auto_ack(Q, State) ->
-    case internal_fetch_attributes(Q, ignore_delivery, pop_queue, State) of
-        {ok, empty, State1} ->
-            {ok, State1};
-        {ok, {_MsgId, _IsPersistent, _IsDelivered, AckTag, _Remaining},
-         State1} ->
-            remove_messages(Q, [AckTag], true, State1)
-    end.        
-
 internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, true, State).
 
@@ -1950,8 +1933,8 @@ read_next_file_entry(FileHdl, Offset) ->
                                     eof; 
                                 KO -> KO
                             end;
-                        KO -> KO
+                        Other -> Other
                     end
             end;
-        KO -> KO
+        Other -> Other
     end.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 771a920f..9ead773d 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -311,32 +311,21 @@ publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
 
 %% Assumption here is that the queue is empty already (only called via
 %% attempt_immediate_delivery).
-publish_delivered(Msg =
-                  #basic_message { guid = MsgId, is_persistent = IsPersistent},
-                  State =
-                  #mqstate { mode = Mode, is_durable = IsDurable,
-                             queue = Q, length = 0 })
-  when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
+publish_delivered(Msg = #basic_message { guid = MsgId,
+                                         is_persistent = IsPersistent},
+                  State = #mqstate { is_durable = IsDurable, queue = Q,
+                                     length = 0 })
+  when IsDurable andalso IsPersistent ->
     ok = rabbit_disk_queue:publish(Q, Msg, true),
     MsgSize = size_of_message(Msg),
     State1 = gain_memory(MsgSize, State),
-    case IsDurable andalso IsPersistent of
-        true ->
-            %% must call phantom_fetch otherwise the msg remains at
-            %% the head of the queue. This is synchronous, but
-            %% unavoidable as we need the AckTag
-            {MsgId, IsPersistent, true, AckTag, 0} =
-                rabbit_disk_queue:phantom_fetch(Q),
-            {ok, AckTag, State1};
-        false ->
-            %% in this case, we don't actually care about the ack, so
-            %% auto ack it (asynchronously).
-            ok = rabbit_disk_queue:auto_ack_next_message(Q),
-            {ok, noack, State1}
-    end;
-publish_delivered(Msg, State = #mqstate { mode = mixed, length = 0 }) ->
-    MsgSize = size_of_message(Msg),
-    {ok, noack, gain_memory(MsgSize, State)}.
+    %% must call phantom_fetch otherwise the msg remains at the head
+    %% of the queue. This is synchronous, but unavoidable as we need
+    %% the AckTag
+    {MsgId, IsPersistent, true, AckTag, 0} = rabbit_disk_queue:phantom_fetch(Q),
+    {ok, AckTag, State1};
+publish_delivered(Msg, State = #mqstate { length = 0 }) ->
+    {ok, noack, gain_memory(size_of_message(Msg), State)}.
 
 fetch(State = #mqstate { length = 0 }) ->
     {empty, State};
-- 
cgit v1.2.1


From 1643873149b10f2ada1cd809a2266522eaa2fda8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 24 Aug 2009 18:15:56 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl  | 3 +--
 src/rabbit_mixed_queue.erl | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 70d44845..ef4ff74e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1861,8 +1861,7 @@ read_message_from_disk(FileHdl, TotalSize) ->
     end.
 
 scan_file_for_valid_messages(File) ->
-    FilePath = form_filename(File),
-    case file:open(FilePath, [raw, binary, read]) of
+    case file:open(form_filename(File), [raw, binary, read]) of
         {ok, Hdl} ->
             Valid = scan_file_for_valid_messages(Hdl, 0, []),
             %% if something really bad's happened, the close could fail, but ignore
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9ead773d..08d45094 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -317,8 +317,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                                      length = 0 })
   when IsDurable andalso IsPersistent ->
     ok = rabbit_disk_queue:publish(Q, Msg, true),
-    MsgSize = size_of_message(Msg),
-    State1 = gain_memory(MsgSize, State),
+    State1 = gain_memory(size_of_message(Msg), State),
     %% must call phantom_fetch otherwise the msg remains at the head
     %% of the queue. This is synchronous, but unavoidable as we need
     %% the AckTag
-- 
cgit v1.2.1


From 84dda8f75fca6f45344d04fd4cd7b6f1146a6901 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 24 Aug 2009 18:30:11 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 08d45094..7cda1004 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -294,9 +294,9 @@ publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
                                 msg_buf = MsgBuf }) ->
     MsgBuf1 = inc_queue_length(Q, MsgBuf, 1),
     ok = rabbit_disk_queue:publish(Q, Msg, false),
-    MsgSize = size_of_message(Msg),
-    {ok, gain_memory(MsgSize, State #mqstate { msg_buf = MsgBuf1,
-                                               length = Length + 1 })};
+    {ok, gain_memory(size_of_message(Msg),
+                     State #mqstate { msg_buf = MsgBuf1,
+                                      length = Length + 1 })};
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
                    msg_buf = MsgBuf, length = Length }) ->
@@ -304,8 +304,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
              true -> rabbit_disk_queue:publish(Q, Msg, false);
              false -> ok
          end,
-    MsgSize = size_of_message(Msg),
-    {ok, gain_memory(MsgSize,
+    {ok, gain_memory(size_of_message(Msg),
                      State #mqstate { msg_buf = queue:in({Msg, false}, MsgBuf),
                                       length = Length + 1 })}.
 
-- 
cgit v1.2.1


From 80b1346264bdd7ad1b32ecf212427393a978146f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 25 Aug 2009 12:04:54 +0100
Subject: MQ: Made run length encoding more obvious; Added comment about logic
 for starting up prefetcher; Tidied API for dec_queue_length and
 inc_queue_length

---
 src/rabbit_mixed_queue.erl | 73 ++++++++++++++++++++++++----------------------
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 7cda1004..2e8fb333 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -102,7 +102,7 @@
 
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:len(Queue),
-    MsgBuf = inc_queue_length(Queue, queue:new(), Len),
+    MsgBuf = inc_queue_length(queue:new(), Len),
     Size = rabbit_disk_queue:foldl(
              fun (Msg = #basic_message { is_persistent = true },
                   _AckTag, _IsDelivered, Acc) ->
@@ -125,16 +125,15 @@ set_storage_mode(disk, TxnMessages, State =
          #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                     is_durable = IsDurable, prefetcher = Prefetcher }) ->
     State1 = State #mqstate { mode = disk },
-    {MsgBuf1, State2} =
+    MsgBuf1 =
         case Prefetcher of
-            undefined -> {MsgBuf, State1};
+            undefined -> MsgBuf;
             _ ->
                 case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
-                    empty -> {MsgBuf, State1};
+                    empty -> MsgBuf;
                     {Fetched, Len} ->
-                        State3 = #mqstate { msg_buf = MsgBuf2 } =
-                            dec_queue_length(Len, State1),
-                        {queue:join(Fetched, MsgBuf2), State3}
+                        MsgBuf2 = dec_queue_length(MsgBuf, Len),
+                        queue:join(Fetched, MsgBuf2)
                 end
         end,
     %% We enqueue _everything_ here. This means that should a message
@@ -157,7 +156,7 @@ set_storage_mode(disk, TxnMessages, State =
                    end
       end, TxnMessages),
     garbage_collect(),
-    {ok, State2 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
+    {ok, State1 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
 set_storage_mode(mixed, TxnMessages, State =
                  #mqstate { mode = disk, is_durable = IsDurable }) ->
     %% The queue has a token just saying how many msgs are on disk
@@ -197,7 +196,7 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                 true -> %% it's already in the Q
                     send_messages_to_disk(
                       IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
-                      Commit, Ack, inc_queue_length(Q, MsgBuf, 1));
+                      Commit, Ack, inc_queue_length(MsgBuf, 1));
                 false ->
                     republish_message_to_disk_queue(
                       IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
@@ -209,10 +208,10 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
             republish_message_to_disk_queue(
               IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
               [AckTag | Ack], MsgBuf, Msg, IsDelivered);
-        {{value, {Q, Count}}, Queue1} ->
+        {{value, {on_disk, Count}}, Queue1} ->
             send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
                                   RequeueCount + Count, Commit, Ack,
-                                  inc_queue_length(Q, MsgBuf, Count))
+                                  inc_queue_length(MsgBuf, Count))
     end.
 
 republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
@@ -228,7 +227,7 @@ republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
             false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1], Ack1}
         end,
     send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
-                          Commit2, Ack2, inc_queue_length(Q, MsgBuf, 1)).
+                          Commit2, Ack2, inc_queue_length(MsgBuf, 1)).
 
 flush_messages_to_disk_queue(_Q, [], []) ->
     ok;
@@ -253,28 +252,27 @@ lose_memory(Dec, State = #mqstate { memory_size = QSize,
     State #mqstate { memory_size = QSize - Dec,
                      memory_loss = Loss + Dec }.
 
-inc_queue_length(_Q, MsgBuf, 0) ->
+inc_queue_length(MsgBuf, 0) ->
     MsgBuf;
-inc_queue_length(Q, MsgBuf, Count) ->
+inc_queue_length(MsgBuf, Count) ->
     {NewCount, MsgBufTail} =
         case queue:out_r(MsgBuf) of
-            {empty, MsgBuf1}             -> {Count, MsgBuf1};
-            {{value, {Q, Len}}, MsgBuf1} -> {Len + Count, MsgBuf1};
-            {{value, _}, _MsgBuf1}       -> {Count, MsgBuf}
+            {empty, MsgBuf1}                   -> {Count, MsgBuf1};
+            {{value, {on_disk, Len}}, MsgBuf1} -> {Len + Count, MsgBuf1};
+            {{value, _}, _MsgBuf1}             -> {Count, MsgBuf}
         end,
-    queue:in({Q, NewCount}, MsgBufTail).
+    queue:in({on_disk, NewCount}, MsgBufTail).
 
-dec_queue_length(Count, State = #mqstate { queue = Q, msg_buf = MsgBuf }) ->
+dec_queue_length(MsgBuf, Count) ->
     case queue:out(MsgBuf) of
-        {{value, {Q, Len}}, MsgBuf1} ->
+        {{value, {on_disk, Len}}, MsgBuf1} ->
             case Len of
                 Count ->
-                    State #mqstate { msg_buf = MsgBuf1 };
+                    MsgBuf1;
                 _ when Len > Count ->
-                    State #mqstate { msg_buf = queue:in_r({Q, Len-Count},
-                                                          MsgBuf1)}
+                    queue:in_r({on_disk, Len-Count}, MsgBuf1)
             end;
-        _ -> State
+        _ -> MsgBuf
     end.
 
 maybe_prefetch(State = #mqstate { prefetcher = undefined,
@@ -282,9 +280,13 @@ maybe_prefetch(State = #mqstate { prefetcher = undefined,
                                   msg_buf = MsgBuf,
                                   queue = Q }) ->
     case queue:peek(MsgBuf) of
-        {value, {Q, Count}} -> {ok, Prefetcher} =
-                                   rabbit_queue_prefetcher:start_link(Q, Count),
-                               State #mqstate { prefetcher = Prefetcher };
+        {value, {on_disk, Count}} ->
+            %% only prefetch for the next contiguous block on
+            %% disk. Beyond there, we either hit the end of the queue,
+            %% or the next msg is already in RAM, held by us, the
+            %% mixed queue
+            {ok, Prefetcher} = rabbit_queue_prefetcher:start_link(Q, Count),
+            State #mqstate { prefetcher = Prefetcher };
         _ -> State
     end;
 maybe_prefetch(State) ->
@@ -292,7 +294,7 @@ maybe_prefetch(State) ->
 
 publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
                                 msg_buf = MsgBuf }) ->
-    MsgBuf1 = inc_queue_length(Q, MsgBuf, 1),
+    MsgBuf1 = inc_queue_length(MsgBuf, 1),
     ok = rabbit_disk_queue:publish(Q, Msg, false),
     {ok, gain_memory(size_of_message(Msg),
                      State #mqstate { msg_buf = MsgBuf1,
@@ -356,19 +358,20 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             {{Msg, IsDelivered, AckTag1, Rem},
              State1 #mqstate { msg_buf = MsgBuf1 }};
         _ when Prefetcher == undefined ->
-            State2 = dec_queue_length(1, State1),
+            MsgBuf2 = dec_queue_length(MsgBuf, 1),
             {Msg = #basic_message { is_persistent = IsPersistent },
              IsDelivered, AckTag, _PersistRem}
                 = rabbit_disk_queue:fetch(Q),
             AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
-            {{Msg, IsDelivered, AckTag1, Rem}, State2};
+            {{Msg, IsDelivered, AckTag1, Rem},
+             State1 #mqstate { msg_buf = MsgBuf2 }};
         _ ->
             case rabbit_queue_prefetcher:drain(Prefetcher) of
                 empty -> fetch(State #mqstate { prefetcher = undefined });
                 {Fetched, Len, Status} ->
-                    State2 = #mqstate { msg_buf = MsgBuf2 } =
-                        dec_queue_length(Len, State),
-                    fetch(State2 #mqstate
+                    MsgBuf2 = dec_queue_length(MsgBuf, Len),
+                    %% use State, not State1 as we've not dec'd length
+                    fetch(State #mqstate
                           { msg_buf = queue:join(Fetched, MsgBuf2),
                             prefetcher = case Status of
                                              finished -> undefined;
@@ -424,7 +427,7 @@ tx_commit(Publishes, MsgsWithAcks,
     Len = erlang:length(Publishes),
     {ok, lose_memory(ASize, State #mqstate
                      { length = Length + Len,
-                       msg_buf = inc_queue_length(Q, MsgBuf, Len) })};
+                       msg_buf = inc_queue_length(MsgBuf, Len) })};
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                              is_durable = IsDurable, length = Length }) ->
@@ -501,7 +504,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
     Len = erlang:length(MessagesWithAckTags),
     {ok, State #mqstate { length = Length + Len,
-                          msg_buf = inc_queue_length(Q, MsgBuf, Len) }};
+                          msg_buf = inc_queue_length(MsgBuf, Len) }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
-- 
cgit v1.2.1


From a10e68fc2e6a24a85e43c68bfb9193cc78142cc6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 25 Aug 2009 12:10:00 +0100
Subject: MQ: length/0 => len/0

---
 src/rabbit_amqqueue_process.erl |  6 +++---
 src/rabbit_mixed_queue.erl      |  6 +++---
 src/rabbit_tests.erl            | 38 +++++++++++++++++++-------------------
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index c65c65ed..99951ae1 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -527,7 +527,7 @@ i(storage_mode, #q{ mixed_state = MS }) ->
 i(pid, _) ->
     self();
 i(messages_ready, #q { mixed_state = MS }) ->
-    rabbit_mixed_queue:length(MS);
+    rabbit_mixed_queue:len(MS);
 i(messages_unacknowledged, _) ->
     lists:sum([dict:size(UAM) ||
                   #cr{unacked_messages = UAM} <- all_ch_record()]);
@@ -706,12 +706,12 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
                                     mixed_state = MS,
                                     active_consumers = ActiveConsumers}) ->
-    Length = rabbit_mixed_queue:length(MS),
+    Length = rabbit_mixed_queue:len(MS),
     reply({ok, Name, Length, queue:len(ActiveConsumers)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
             State = #q { mixed_state = MS }) ->
-    Length = rabbit_mixed_queue:length(MS),
+    Length = rabbit_mixed_queue:len(MS),
     IsEmpty = Length == 0,
     IsUnused = is_unused(State),
     if
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 2e8fb333..60920b21 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -37,7 +37,7 @@
 
 -export([publish/2, publish_delivered/2, fetch/1, ack/2,
          tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
-         length/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
+         len/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
 -export([set_storage_mode/3, storage_mode/1,
          estimate_queue_memory_and_reset_counters/1]).
@@ -88,7 +88,7 @@
              
 -spec(delete_queue/1 :: (mqstate()) -> {'ok', mqstate()}).
              
--spec(length/1 :: (mqstate()) -> non_neg_integer()).
+-spec(len/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> boolean()).
 
 -spec(set_storage_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
@@ -553,7 +553,7 @@ delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
     {ok, lose_memory(QSize, State #mqstate { length = 0, msg_buf = queue:new(),
                                              prefetcher = undefined })}.
 
-length(#mqstate { length = Length }) ->
+len(#mqstate { length = Length }) ->
     Length.
 
 is_empty(#mqstate { length = Length }) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 33ede609..ae4117aa 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1109,15 +1109,15 @@ rdq_test_mixed_queue_modes() ->
                     {ok, MS5a} = rabbit_mixed_queue:publish(Msg, MS5),
                     MS5a
             end, MS4, lists:seq(1,10)),
-    30 = rabbit_mixed_queue:length(MS6),
+    30 = rabbit_mixed_queue:len(MS6),
     io:format("Published a mixture of messages; ~w~n",
               [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS6)]),
     {ok, MS7} = rabbit_mixed_queue:set_storage_mode(disk, [], MS6),
-    30 = rabbit_mixed_queue:length(MS7),
+    30 = rabbit_mixed_queue:len(MS7),
     io:format("Converted to disk only mode; ~w~n",
              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS7)]),
     {ok, MS8} = rabbit_mixed_queue:set_storage_mode(mixed, [], MS7),
-    30 = rabbit_mixed_queue:length(MS8),
+    30 = rabbit_mixed_queue:len(MS8),
     io:format("Converted to mixed mode; ~w~n",
               [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS8)]),
     MS10 =
@@ -1129,15 +1129,15 @@ rdq_test_mixed_queue_modes() ->
                    MS9a} = rabbit_mixed_queue:fetch(MS9),
                   MS9a
           end, MS8, lists:seq(1,10)),
-    20 = rabbit_mixed_queue:length(MS10),
+    20 = rabbit_mixed_queue:len(MS10),
     io:format("Delivered initial non persistent messages~n"),
     {ok, MS11} = rabbit_mixed_queue:set_storage_mode(disk, [], MS10),
-    20 = rabbit_mixed_queue:length(MS11),
+    20 = rabbit_mixed_queue:len(MS11),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
     MS12 = rdq_new_mixed_queue(q, true, false),
-    10 = rabbit_mixed_queue:length(MS12),
+    10 = rabbit_mixed_queue:len(MS12),
     io:format("Recovered queue~n"),
     {MS14, AckTags} =
         lists:foldl(
@@ -1148,16 +1148,16 @@ rdq_test_mixed_queue_modes() ->
                    MS13a} = rabbit_mixed_queue:fetch(MS13),
                   {MS13a, [{Msg, AckTag} | AcksAcc]}
           end, {MS12, []}, lists:seq(1,10)),
-    0 = rabbit_mixed_queue:length(MS14),
+    0 = rabbit_mixed_queue:len(MS14),
     {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
     io:format("Delivered and acked all messages~n"),
     {ok, MS16} = rabbit_mixed_queue:set_storage_mode(disk, [], MS15),
-    0 = rabbit_mixed_queue:length(MS16),
+    0 = rabbit_mixed_queue:len(MS16),
     io:format("Converted to disk only mode~n"),
     rdq_stop(),
     rdq_start(),
     MS17 = rdq_new_mixed_queue(q, true, false),
-    0 = rabbit_mixed_queue:length(MS17),
+    0 = rabbit_mixed_queue:len(MS17),
     {MS17,0,0,0} = rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS17),
     io:format("Recovered queue~n"),
     rdq_stop(),
@@ -1200,28 +1200,28 @@ rdq_test_mode_conversion_mid_txn() ->
     passed.
 
 rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) ->
-    0 = rabbit_mixed_queue:length(MS0),
+    0 = rabbit_mixed_queue:len(MS0),
     MS2 = lists:foldl(
             fun (Msg, MS1) ->
                     {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
                     MS1a
             end, MS0, MsgsA),
     Len0 = length(MsgsA),
-    Len0 = rabbit_mixed_queue:length(MS2),
+    Len0 = rabbit_mixed_queue:len(MS2),
     MS4 = lists:foldl(
             fun (Msg, MS3) ->
                     {ok, MS3a} = rabbit_mixed_queue:tx_publish(Msg, MS3),
                     MS3a
             end, MS2, MsgsB),
-    Len0 = rabbit_mixed_queue:length(MS4),
+    Len0 = rabbit_mixed_queue:len(MS4),
     {ok, MS5} = rabbit_mixed_queue:set_storage_mode(Mode, MsgsB, MS4),
-    Len0 = rabbit_mixed_queue:length(MS5),
+    Len0 = rabbit_mixed_queue:len(MS5),
     {ok, MS9} =
         case CommitOrCancel of
             commit ->
                 {ok, MS6} = rabbit_mixed_queue:tx_commit(MsgsB, [], MS5),
                 Len1 = Len0 + length(MsgsB),
-                Len1 = rabbit_mixed_queue:length(MS6),
+                Len1 = rabbit_mixed_queue:len(MS6),
                 {AckTags, MS8} =
                     lists:foldl(
                       fun (Msg, {Acc, MS7}) ->
@@ -1230,11 +1230,11 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                                   rabbit_mixed_queue:fetch(MS7),
                               {[{Msg, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA ++ MsgsB),
-                0 = rabbit_mixed_queue:length(MS8),
+                0 = rabbit_mixed_queue:len(MS8),
                 rabbit_mixed_queue:ack(AckTags, MS8);
             cancel ->
                 {ok, MS6} = rabbit_mixed_queue:tx_cancel(MsgsB, MS5),
-                Len0 = rabbit_mixed_queue:length(MS6),
+                Len0 = rabbit_mixed_queue:len(MS6),
                 {AckTags, MS8} =
                     lists:foldl(
                       fun (Msg, {Acc, MS7}) ->
@@ -1243,14 +1243,14 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                                   rabbit_mixed_queue:fetch(MS7),
                               {[{Msg, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA),
-                0 = rabbit_mixed_queue:length(MS8),
+                0 = rabbit_mixed_queue:len(MS8),
                 rabbit_mixed_queue:ack(AckTags, MS8)
         end,
-    0 = rabbit_mixed_queue:length(MS9),
+    0 = rabbit_mixed_queue:len(MS9),
     Msg = rdq_message(0, <<0:256>>, false),
     {ok, AckTag, MS10} = rabbit_mixed_queue:publish_delivered(Msg, MS9),
     {ok,MS11} = rabbit_mixed_queue:ack([{Msg, AckTag}], MS10),
-    0 = rabbit_mixed_queue:length(MS11),
+    0 = rabbit_mixed_queue:len(MS11),
     passed.
 
 rdq_test_disk_queue_modes() ->
-- 
cgit v1.2.1


From c0dec961fe6e29c2bf5ed5a648941028b12b1188 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 25 Aug 2009 13:07:32 +0100
Subject: s/erlang:length/length

---
 src/rabbit_mixed_queue.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 60920b21..031ca914 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -424,7 +424,7 @@ tx_commit(Publishes, MsgsWithAcks,
             true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
                                                 RealAcks)
          end,
-    Len = erlang:length(Publishes),
+    Len = length(Publishes),
     {ok, lose_memory(ASize, State #mqstate
                      { length = Length + Len,
                        msg_buf = inc_queue_length(MsgBuf, Len) })};
@@ -450,7 +450,7 @@ tx_commit(Publishes, MsgsWithAcks,
          end,
     {ok, lose_memory(ASize, State #mqstate
                      { msg_buf = MsgBuf1,
-                       length = Length + erlang:length(Publishes) })}.
+                       length = Length + length(Publishes) })}.
 
 tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
     {MsgIds, CSize} =
@@ -502,7 +502,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                     []
             end, [], MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
-    Len = erlang:length(MessagesWithAckTags),
+    Len = length(MessagesWithAckTags),
     {ok, State #mqstate { length = Length + Len,
                           msg_buf = inc_queue_length(MsgBuf, Len) }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
@@ -525,7 +525,7 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
              _  -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
          end,
     {ok, State #mqstate {msg_buf = MsgBuf1,
-                         length = Length + erlang:length(MessagesWithAckTags)}}.
+                         length = Length + length(MessagesWithAckTags)}}.
 
 purge(State = #mqstate { queue = Q, mode = disk, length = Count,
                          memory_size = QSize }) ->
-- 
cgit v1.2.1


From 419ddb80ff480e1c07016061e911acb72a5b2dde Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 25 Aug 2009 13:10:43 +0100
Subject: consistent field order

---
 src/rabbit_mixed_queue.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 031ca914..87de6450 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -426,8 +426,8 @@ tx_commit(Publishes, MsgsWithAcks,
          end,
     Len = length(Publishes),
     {ok, lose_memory(ASize, State #mqstate
-                     { length = Length + Len,
-                       msg_buf = inc_queue_length(MsgBuf, Len) })};
+                     { msg_buf = inc_queue_length(MsgBuf, Len),
+                       length = Length + Len })};
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
                              is_durable = IsDurable, length = Length }) ->
@@ -503,8 +503,8 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
             end, [], MessagesWithAckTags),
     ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
     Len = length(MessagesWithAckTags),
-    {ok, State #mqstate { length = Length + Len,
-                          msg_buf = inc_queue_length(MsgBuf, Len) }};
+    {ok, State #mqstate { msg_buf = inc_queue_length(MsgBuf, Len),
+                          length = Length + Len }};
 requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
                                                 msg_buf = MsgBuf,
                                                 is_durable = IsDurable,
-- 
cgit v1.2.1


From bbe818e24aa662acfb5170ff8b5b3b9522a12259 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 26 Aug 2009 06:21:54 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ef4ff74e..fea0092e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1190,9 +1190,7 @@ internal_delete_queue(Q, State) ->
     %% now remove everything already delivered
     Objs = mnesia:dirty_match_object(
              rabbit_disk_queue,
-             #dq_msg_loc { queue_and_seq_id = {Q, '_'},
-                           _ = '_'
-                         }),
+             #dq_msg_loc { queue_and_seq_id = {Q, '_'}, _ = '_' }),
     MsgSeqIds =
         lists:map(
           fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
@@ -1651,9 +1649,7 @@ load_messages(Left, [File|Files],
         fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                 case length(mnesia:dirty_index_match_object
                             (rabbit_disk_queue,
-                             #dq_msg_loc { msg_id = MsgId,
-                                           _ = '_'
-                                         },
+                             #dq_msg_loc { msg_id = MsgId, _ = '_' },
                              msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
@@ -1692,9 +1688,7 @@ verify_messages_in_mnesia(MsgIds) ->
       fun (MsgId) ->
               true = 0 < length(mnesia:dirty_index_match_object
                                 (rabbit_disk_queue,
-                                 #dq_msg_loc { msg_id = MsgId,
-                                               _ = '_'
-                                             },
+                                 #dq_msg_loc { msg_id = MsgId, _ = '_' },
                                  msg_id))
       end, MsgIds).
 
-- 
cgit v1.2.1


From 363bab6d2c2f0dd3c77e3ef7679a70aaff1c6b84 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 26 Aug 2009 06:27:55 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index fea0092e..cba84ed7 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -993,11 +993,11 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
             %% New message, lots to do
             {ok, TotalSize} = append_message(CurHdl, MsgId, msg_to_bin(Message),
                                              IsPersistent),
-            true = dets_ets_insert_new
-                     (State, #message_store_entry
-                      { msg_id = MsgId, ref_count = 1, file = CurName,
-                        offset = CurOffset, total_size = TotalSize,
-                        is_persistent = IsPersistent }),
+            true = dets_ets_insert_new(
+                     State, #message_store_entry
+                     { msg_id = MsgId, ref_count = 1, file = CurName,
+                       offset = CurOffset, total_size = TotalSize,
+                       is_persistent = IsPersistent }),
             [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize +
@@ -1414,9 +1414,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
                   %% update MsgLocationDets to reflect change of file and offset
-                  ok = dets_ets_insert (State, StoreEntry #message_store_entry
-                                        { file = Destination,
-                                          offset = CurOffset }),
+                  ok = dets_ets_insert(State, StoreEntry #message_store_entry
+                                       { file = Destination,
+                                         offset = CurOffset }),
                   NextOffset = CurOffset + Size,
                   if BlockStart =:= undefined ->
                           %% base case, called only for the first list elem
@@ -1653,12 +1653,12 @@ load_messages(Left, [File|Files],
                              msg_id)) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
-                        true = dets_ets_insert_new
-                                 (State, #message_store_entry
-                                  { msg_id = MsgId, ref_count = RefCount,
-                                    file = File, offset = Offset,
-                                    total_size = TotalSize,
-                                    is_persistent = IsPersistent }),
+                        true = dets_ets_insert_new(
+                                 State, #message_store_entry
+                                 { msg_id = MsgId, ref_count = RefCount,
+                                   file = File, offset = Offset,
+                                   total_size = TotalSize,
+                                   is_persistent = IsPersistent }),
                         {[Obj | VMAcc],
                          VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
                         }
@@ -1686,10 +1686,10 @@ recover_crashed_compactions(Files, TmpFiles) ->
 verify_messages_in_mnesia(MsgIds) ->
     lists:foreach(
       fun (MsgId) ->
-              true = 0 < length(mnesia:dirty_index_match_object
-                                (rabbit_disk_queue,
-                                 #dq_msg_loc { msg_id = MsgId, _ = '_' },
-                                 msg_id))
+              true = 0 < length(mnesia:dirty_index_match_object(
+                                  rabbit_disk_queue,
+                                  #dq_msg_loc { msg_id = MsgId, _ = '_' },
+                                  msg_id))
       end, MsgIds).
 
 grab_msg_id({MsgId, _IsPersistent, _TotalSize, _FileOffset}) ->
-- 
cgit v1.2.1


From 2c76ed6180052b50312d62d2016bcdfbacfa54a0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 10:05:03 +0100
Subject: Application of Matthias's patch. Can't find any faults in it, and the
 tests all pass too. Code definitely has got shorter!

---
 src/rabbit_mixed_queue.erl | 119 ++++++++++++++++++---------------------------
 1 file changed, 46 insertions(+), 73 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 87de6450..33cb38c4 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -292,22 +292,23 @@ maybe_prefetch(State = #mqstate { prefetcher = undefined,
 maybe_prefetch(State) ->
     State.
 
-publish(Msg, State = #mqstate { mode = disk, queue = Q, length = Length,
-                                msg_buf = MsgBuf }) ->
-    MsgBuf1 = inc_queue_length(MsgBuf, 1),
-    ok = rabbit_disk_queue:publish(Q, Msg, false),
-    {ok, gain_memory(size_of_message(Msg),
-                     State #mqstate { msg_buf = MsgBuf1,
-                                      length = Length + 1 })};
+on_disk(disk, _IsDurable, _IsPersistent)  -> true;
+on_disk(mixed, true, true)                -> true;
+on_disk(mixed, _IsDurable, _IsPersistent) -> false.
+
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
-        #mqstate { queue = Q, mode = mixed, is_durable = IsDurable,
+        #mqstate { queue = Q, mode = Mode, is_durable = IsDurable,
                    msg_buf = MsgBuf, length = Length }) ->
-    ok = case IsDurable andalso IsPersistent of
-             true -> rabbit_disk_queue:publish(Q, Msg, false);
+    ok = case on_disk(Mode, IsDurable, IsPersistent) of
+             true  -> rabbit_disk_queue:publish(Q, Msg, false);
              false -> ok
          end,
+    NewMsgBuf = case Mode of
+                    disk  -> inc_queue_length(MsgBuf, 1);
+                    mixed -> queue:in({Msg, false}, MsgBuf)
+                end,
     {ok, gain_memory(size_of_message(Msg),
-                     State #mqstate { msg_buf = queue:in({Msg, false}, MsgBuf),
+                     State #mqstate { msg_buf = NewMsgBuf,
                                       length = Length + 1 })}.
 
 %% Assumption here is that the queue is empty already (only called via
@@ -403,80 +404,52 @@ ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
     {ok, lose_memory(ASize, State)}.
                                                    
 tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
-           State = #mqstate { mode = Mode, is_durable = IsDurable })
-  when Mode =:= disk orelse (IsDurable andalso IsPersistent) ->
-    ok = rabbit_disk_queue:tx_publish(Msg),
-    MsgSize = size_of_message(Msg),
-    {ok, gain_memory(MsgSize, State)};
-tx_publish(Msg, State = #mqstate { mode = mixed }) ->
-    %% this message will reappear in the tx_commit, so ignore for now
-    MsgSize = size_of_message(Msg),
-    {ok, gain_memory(MsgSize, State)}.
-
-only_msg_ids(Pubs) ->
-    lists:map(fun (Msg) -> {Msg #basic_message.guid, false} end, Pubs).
-
-tx_commit(Publishes, MsgsWithAcks,
-          State = #mqstate { mode = disk, queue = Q, length = Length,
-                             msg_buf = MsgBuf }) ->
-    {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
-    ok = if ([] == Publishes) andalso ([] == RealAcks) -> ok;
-            true -> rabbit_disk_queue:tx_commit(Q, only_msg_ids(Publishes),
-                                                RealAcks)
+           State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
+    ok = case on_disk(Mode, IsDurable, IsPersistent) of
+             true  -> rabbit_disk_queue:tx_publish(Msg);
+             false -> ok
          end,
-    Len = length(Publishes),
-    {ok, lose_memory(ASize, State #mqstate
-                     { msg_buf = inc_queue_length(MsgBuf, Len),
-                       length = Length + Len })};
+    {ok, gain_memory(size_of_message(Msg), State)}.
+
 tx_commit(Publishes, MsgsWithAcks,
-          State = #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+          State = #mqstate { mode = Mode, queue = Q, msg_buf = MsgBuf,
                              is_durable = IsDurable, length = Length }) ->
-    {PersistentPubs, MsgBuf1} =
-        lists:foldl(fun (Msg = #basic_message { is_persistent = IsPersistent },
-                         {Acc, MsgBuf2}) ->
-                            Acc1 =
-                                case IsPersistent andalso IsDurable of
-                                    true -> [ {Msg #basic_message.guid, false}
-                                            | Acc];
-                                    false -> Acc
-                                end,
-                            {Acc1, queue:in({Msg, false}, MsgBuf2)}
-                    end, {[], MsgBuf}, Publishes),
+    PersistentPubs =
+        [{MsgId, false} ||
+            #basic_message { guid = MsgId,
+                             is_persistent = IsPersistent } <- Publishes,
+            on_disk(Mode, IsDurable, IsPersistent)],
     {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
-    ok = case ([] == PersistentPubs) andalso ([] == RealAcks) of
-             true -> ok;
-             false -> rabbit_disk_queue:tx_commit(
-                        Q, lists:reverse(PersistentPubs), RealAcks)
+    ok = case {PersistentPubs, RealAcks} of
+             {[], []} -> ok;
+             _        -> rabbit_disk_queue:tx_commit(
+                           Q, PersistentPubs, RealAcks)
          end,
-    {ok, lose_memory(ASize, State #mqstate
-                     { msg_buf = MsgBuf1,
-                       length = Length + length(Publishes) })}.
+    Len = length(Publishes),
+    NewMsgBuf = case Mode of
+                    disk  -> inc_queue_length(MsgBuf, Len);
+                    mixed -> ToAdd = [{Msg, false} || Msg <- Publishes],
+                             queue:join(MsgBuf, queue:from_list(ToAdd))
+                end,
+    {ok, lose_memory(ASize, State #mqstate { msg_buf = NewMsgBuf,
+                                             length = Length + Len })}.
 
-tx_cancel(Publishes, State = #mqstate { mode = disk }) ->
-    {MsgIds, CSize} =
-        lists:foldl(
-          fun (Msg = #basic_message { guid = MsgId }, {MsgIdsAcc, CSizeAcc}) ->
-                  {[MsgId | MsgIdsAcc], CSizeAcc + size_of_message(Msg)}
-          end, {[], 0}, Publishes),
-    ok = rabbit_disk_queue:tx_cancel(MsgIds),
-    {ok, lose_memory(CSize, State)};
 tx_cancel(Publishes,
-          State = #mqstate { mode = mixed, is_durable = IsDurable }) ->
+          State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
     {PersistentPubs, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent,
                                       guid = MsgId }, {Acc, CSizeAcc}) ->
                   CSizeAcc1 = CSizeAcc + size_of_message(Msg),
-                  {case IsPersistent of
+                  {case on_disk(Mode, IsDurable, IsPersistent) of
                        true -> [MsgId | Acc];
                        _    -> Acc
                    end, CSizeAcc1}
           end, {[], 0}, Publishes),
-    ok =
-        if IsDurable ->
-                rabbit_disk_queue:tx_cancel(PersistentPubs);
-           true -> ok
-        end,
+    ok = case PersistentPubs of
+             [] -> ok;
+             _  -> rabbit_disk_queue:tx_cancel(PersistentPubs)
+         end,
     {ok, lose_memory(CSize, State)}.
 
 %% [{Msg, AckTag}]
@@ -493,10 +466,10 @@ requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
                 when IsDurable andalso IsPersistent ->
                     [{AckTag, true} | RQ];
                 ({Msg, noack}, RQ) ->
-                    ok = case RQ == [] of
-                             true  -> ok;
-                             false -> rabbit_disk_queue:requeue(
-                                        Q, lists:reverse(RQ))
+                    ok = case RQ of
+                             [] -> ok;
+                             _  -> rabbit_disk_queue:requeue(
+                                     Q, lists:reverse(RQ))
                          end,
                     ok = rabbit_disk_queue:publish(Q, Msg, true),
                     []
-- 
cgit v1.2.1


From f726df2cfb85a9cc3977bb1904b5f956c0d421d5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 13:19:03 +0100
Subject: cosmetic in MQ. Refactored purge and requeue in MQ. Spotted that
 read_ahead was back in DQ. Also got noisier about reporting misreads in DQ.

---
 src/rabbit_disk_queue.erl  |  13 +++--
 src/rabbit_mixed_queue.erl | 131 +++++++++++++++++++++------------------------
 2 files changed, 71 insertions(+), 73 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index cba84ed7..b13f7566 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -417,7 +417,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    file_size_limit         = FileSizeLimit,
                    read_file_hc_cache      = rabbit_file_handle_cache:init(
                                                ReadFileHandlesLimit,
-                                               [read, raw, binary, read_ahead]),
+                                               [read, raw, binary]),
                    on_sync_txns           = [],
                    commit_timer_ref        = undefined,
                    last_sync_offset        = 0,
@@ -885,8 +885,15 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                 with_read_handle_at(
                   File, Offset,
                   fun(Hdl) ->
-                          {ok, _} = Res =
-                              read_message_from_disk(Hdl, TotalSize),
+                          Res = case read_message_from_disk(Hdl, TotalSize) of
+                                    {ok, {_, _, _}} = Obj -> Obj;
+                                    {ok, Rest} ->
+                                        throw({error,
+                                               {misread, [{old_state, State},
+                                                          {file, File},
+                                                          {offset, Offset},
+                                                          {read, Rest}]}})
+                                end,
                           {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT, Res}
                   end, State),
             Message = #basic_message {} = bin_to_msg(MsgBody),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 33cb38c4..ddc5aace 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -303,12 +303,12 @@ publish(Msg = #basic_message { is_persistent = IsPersistent }, State =
              true  -> rabbit_disk_queue:publish(Q, Msg, false);
              false -> ok
          end,
-    NewMsgBuf = case Mode of
-                    disk  -> inc_queue_length(MsgBuf, 1);
-                    mixed -> queue:in({Msg, false}, MsgBuf)
-                end,
+    MsgBuf1 = case Mode of
+                  disk  -> inc_queue_length(MsgBuf, 1);
+                  mixed -> queue:in({Msg, false}, MsgBuf)
+              end,
     {ok, gain_memory(size_of_message(Msg),
-                     State #mqstate { msg_buf = NewMsgBuf,
+                     State #mqstate { msg_buf = MsgBuf1,
                                       length = Length + 1 })}.
 
 %% Assumption here is that the queue is empty already (only called via
@@ -426,12 +426,12 @@ tx_commit(Publishes, MsgsWithAcks,
                            Q, PersistentPubs, RealAcks)
          end,
     Len = length(Publishes),
-    NewMsgBuf = case Mode of
-                    disk  -> inc_queue_length(MsgBuf, Len);
-                    mixed -> ToAdd = [{Msg, false} || Msg <- Publishes],
-                             queue:join(MsgBuf, queue:from_list(ToAdd))
-                end,
-    {ok, lose_memory(ASize, State #mqstate { msg_buf = NewMsgBuf,
+    MsgBuf1 = case Mode of
+                  disk  -> inc_queue_length(MsgBuf, Len);
+                  mixed -> ToAdd = [{Msg, false} || Msg <- Publishes],
+                           queue:join(MsgBuf, queue:from_list(ToAdd))
+              end,
+    {ok, lose_memory(ASize, State #mqstate { msg_buf = MsgBuf1,
                                              length = Length + Len })}.
 
 tx_cancel(Publishes,
@@ -453,67 +453,58 @@ tx_cancel(Publishes,
     {ok, lose_memory(CSize, State)}.
 
 %% [{Msg, AckTag}]
-requeue(MessagesWithAckTags, State = #mqstate { mode = disk, queue = Q,
-                                                is_durable = IsDurable,
-                                                length = Length,
-                                                msg_buf = MsgBuf }) ->
-    %% here, we may have messages with no ack tags, because of the
-    %% fact they are not persistent, but nevertheless we want to
-    %% requeue them. This means publishing them delivered.
-    Requeue
-        = lists:foldl(
-            fun ({#basic_message { is_persistent = IsPersistent }, AckTag}, RQ)
-                when IsDurable andalso IsPersistent ->
-                    [{AckTag, true} | RQ];
-                ({Msg, noack}, RQ) ->
-                    ok = case RQ of
-                             [] -> ok;
-                             _  -> rabbit_disk_queue:requeue(
-                                     Q, lists:reverse(RQ))
-                         end,
-                    ok = rabbit_disk_queue:publish(Q, Msg, true),
-                    []
-            end, [], MessagesWithAckTags),
-    ok = rabbit_disk_queue:requeue(Q, lists:reverse(Requeue)),
-    Len = length(MessagesWithAckTags),
-    {ok, State #mqstate { msg_buf = inc_queue_length(MsgBuf, Len),
-                          length = Length + Len }};
-requeue(MessagesWithAckTags, State = #mqstate { mode = mixed, queue = Q,
-                                                msg_buf = MsgBuf,
-                                                is_durable = IsDurable,
-                                                length = Length }) ->
-    {PersistentPubs, MsgBuf1} =
-        lists:foldl(
-          fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-               {Acc, MsgBuf2}) ->
-                  Acc1 =
-                      case IsDurable andalso IsPersistent of
-                          true -> [{AckTag, true} | Acc];
-                          false -> Acc
-                      end,
-                  {Acc1, queue:in({Msg, true}, MsgBuf2)}
-          end, {[], MsgBuf}, MessagesWithAckTags),
-    ok = case PersistentPubs of
+requeue(MsgsWithAckTags,
+        State = #mqstate { mode = Mode, queue = Q, msg_buf = MsgBuf,
+                           is_durable = IsDurable, length = Length }) ->
+    RQ = lists:foldl(
+           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
+                RQAcc) ->
+                   case IsDurable andalso IsPersistent of
+                       true ->
+                           [{AckTag, true} | RQAcc];
+                       false ->
+                           case Mode of
+                               mixed ->
+                                   RQAcc;
+                               disk when noack =:= AckTag ->
+                                   ok = case RQAcc of
+                                            [] -> ok;
+                                            _  -> rabbit_disk_queue:requeue
+                                                    (Q, lists:reverse(RQAcc))
+                                        end,
+                                   ok = rabbit_disk_queue:publish(Q, Msg, true),
+                                   []
+                           end
+                   end
+           end, [], MsgsWithAckTags),
+    ok = case RQ of
              [] -> ok;
-             _  -> rabbit_disk_queue:requeue(Q, lists:reverse(PersistentPubs))
+             _  -> rabbit_disk_queue:requeue(Q, lists:reverse(RQ))
          end,
-    {ok, State #mqstate {msg_buf = MsgBuf1,
-                         length = Length + length(MessagesWithAckTags)}}.
-
-purge(State = #mqstate { queue = Q, mode = disk, length = Count,
-                         memory_size = QSize }) ->
-    Count = rabbit_disk_queue:purge(Q),
-    {Count, lose_memory(QSize, State)};
-purge(State = #mqstate { queue = Q, mode = mixed, length = Length,
-                         memory_size = QSize, prefetcher = Prefetcher }) ->
-    case Prefetcher of
-        undefined -> ok;
-        _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
-    end,
-    rabbit_disk_queue:purge(Q),
-    {Length, lose_memory(QSize, State #mqstate { msg_buf = queue:new(),
-                                                 length = 0,
-                                                 prefetcher = undefined })}.
+    Len = length(MsgsWithAckTags),
+    MsgBuf1 = case Mode of
+                  mixed -> ToAdd = [{Msg, true} || {Msg, _} <- MsgsWithAckTags],
+                           queue:join(MsgBuf, queue:from_list(ToAdd));
+                  disk  -> inc_queue_length(MsgBuf, Len)
+              end,
+    {ok, State #mqstate { msg_buf = MsgBuf1, length = Length + Len }}.
+
+purge(State = #mqstate { queue = Q, mode = Mode, length = Count,
+                         prefetcher = Prefetcher, memory_size = QSize }) ->
+    PurgedFromDisk = rabbit_disk_queue:purge(Q),
+    Count = case Mode of
+                disk ->
+                    PurgedFromDisk;
+                mixed ->
+                    case Prefetcher of
+                        undefined -> ok;
+                        _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
+                    end,
+                    Count
+            end,
+    {Count, lose_memory(QSize, State #mqstate { msg_buf = queue:new(),
+                                                length = 0,
+                                                prefetcher = undefined })}.
 
 delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
                                 prefetcher = Prefetcher
-- 
cgit v1.2.1


From 4bf9311cf0c2b63c93d0a513564bd9ae315ba456 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 14:01:02 +0100
Subject: Everything outstanding on MQ. +prefetcher:stop. Also give DQ more
 time to shutdown (well, give everyone more time to shut down...). And
 tx_cancel => tx_rollback in MQ and DQ

---
 src/rabbit.erl                  |  2 +-
 src/rabbit_amqqueue_process.erl | 15 +++-----
 src/rabbit_disk_queue.erl       | 14 ++++----
 src/rabbit_mixed_queue.erl      | 79 +++++++++++++++++++++--------------------
 src/rabbit_queue_prefetcher.erl |  9 +++--
 src/rabbit_tests.erl            |  2 +-
 6 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 88c60eb9..665f10a2 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -243,7 +243,7 @@ print_banner() ->
 start_child(Mod) ->
     {ok,_} = supervisor:start_child(rabbit_sup,
                                     {Mod, {Mod, start_link, []},
-                                     transient, 100, worker, [Mod]}),
+                                     transient, 1000, worker, [Mod]}),
     ok.
 
 ensure_working_log_handlers() ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 99951ae1..b4b06b16 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -122,12 +122,8 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
 terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
     QName = qname(State),
-    NewState =
-        lists:foldl(fun (Txn, State1) ->
-                            rollback_transaction(Txn, State1)
-                    end, State, all_tx()),
-    rabbit_mixed_queue:delete_queue(NewState #q.mixed_state),
-    stop_memory_timer(NewState),
+    rabbit_mixed_queue:delete_queue(State #q.mixed_state),
+    stop_memory_timer(State),
     ok = rabbit_amqqueue:internal_delete(QName).
 
 code_change(_OldVsn, State, _Extra) ->
@@ -467,9 +463,6 @@ erase_tx(Txn) ->
 all_tx_record() ->
     [T || {{txn, _}, T} <- get()].
 
-all_tx() ->
-    [Txn || {{txn, Txn}, _} <- get()].
-
 record_pending_message(Txn, ChPid, Message) ->
     Tx = #tx{pending_messages = Pending} = lookup_tx(Txn),
     record_current_channel_tx(ChPid, Txn),
@@ -504,8 +497,8 @@ commit_transaction(Txn, State) ->
 rollback_transaction(Txn, State) ->
     #tx { pending_messages = PendingMessages
         } = lookup_tx(Txn),
-    {ok, MS} = rabbit_mixed_queue:tx_cancel(PendingMessages,
-                                            State #q.mixed_state),
+    {ok, MS} = rabbit_mixed_queue:tx_rollback(PendingMessages,
+                                              State #q.mixed_state),
     erase_tx(Txn),
     State #q { mixed_state = MS }.
 
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b13f7566..d9f318e0 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -40,7 +40,7 @@
 -export([handle_pre_hibernate/1]).
 
 -export([publish/3, fetch/1, phantom_fetch/1, ack/2, tx_publish/1, tx_commit/3,
-         tx_cancel/1, requeue/2, purge/1, delete_queue/1,
+         tx_rollback/1, requeue/2, purge/1, delete_queue/1,
          delete_non_durable_queues/1, requeue_next_n/2, len/1, foldl/3,
          prefetch/1
         ]).
@@ -266,7 +266,7 @@
 -spec(tx_publish/1 :: (message()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean()}], [ack_tag()]) ->
              'ok').
--spec(tx_cancel/1 :: ([msg_id()]) -> 'ok').
+-spec(tx_rollback/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{ack_tag(), boolean()}]) -> 'ok').
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
@@ -313,8 +313,8 @@ tx_commit(Q, PubMsgIds, AckSeqIds)
   when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
     gen_server2:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
 
-tx_cancel(MsgIds) when is_list(MsgIds) ->
-    gen_server2:cast(?SERVER, {tx_cancel, MsgIds}).
+tx_rollback(MsgIds) when is_list(MsgIds) ->
+    gen_server2:cast(?SERVER, {tx_rollback, MsgIds}).
 
 requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
     gen_server2:cast(?SERVER, {requeue, Q, MsgSeqIds}).
@@ -508,8 +508,8 @@ handle_cast({ack, Q, MsgSeqIds}, State) ->
 handle_cast({tx_publish, Message}, State) ->
     {ok, State1} = internal_tx_publish(Message, State),
     noreply(State1);
-handle_cast({tx_cancel, MsgIds}, State) ->
-    {ok, State1} = internal_tx_cancel(MsgIds, State),
+handle_cast({tx_rollback, MsgIds}, State) ->
+    {ok, State1} = internal_tx_rollback(MsgIds, State),
     noreply(State1);
 handle_cast({requeue, Q, MsgSeqIds}, State) ->
     {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
@@ -1090,7 +1090,7 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId + 1}),
     {ok, {MsgId, WriteSeqId}, State1}.
 
-internal_tx_cancel(MsgIds, State) ->
+internal_tx_rollback(MsgIds, State) ->
     %% we don't need seq ids because we're not touching mnesia,
     %% because seqids were never assigned
     MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index ddc5aace..af4cd834 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -36,7 +36,7 @@
 -export([init/2]).
 
 -export([publish/2, publish_delivered/2, fetch/1, ack/2,
-         tx_publish/2, tx_commit/3, tx_cancel/2, requeue/2, purge/1,
+         tx_publish/2, tx_commit/3, tx_rollback/2, requeue/2, purge/1,
          len/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
 -export([set_storage_mode/3, storage_mode/1,
@@ -69,7 +69,7 @@
                               memory_loss :: (non_neg_integer() | 'undefined'),
                               prefetcher :: (pid() | 'undefined')
                             }).
--type(acktag() :: ( 'noack' | { non_neg_integer(), non_neg_integer() })).
+-type(acktag() :: ( 'no_on_disk' | { non_neg_integer(), non_neg_integer() })).
 -type(okmqs() :: {'ok', mqstate()}).
 
 -spec(init/2 :: (queue_name(), boolean()) -> okmqs()).
@@ -82,7 +82,7 @@
 -spec(ack/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
 -spec(tx_publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(tx_commit/3 :: ([message()], [acktag()], mqstate()) -> okmqs()).
--spec(tx_cancel/2 :: ([message()], mqstate()) -> okmqs()).
+-spec(tx_rollback/2 :: ([message()], mqstate()) -> okmqs()).
 -spec(requeue/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
 -spec(purge/1 :: (mqstate()) -> okmqs()).
              
@@ -167,7 +167,7 @@ set_storage_mode(mixed, TxnMessages, State =
     
     %% Remove txn messages from disk which are neither persistent and
     %% durable. This is necessary to avoid leaks. This is also pretty
-    %% much the inverse behaviour of our own tx_cancel/2 which is why
+    %% much the inverse behaviour of our own tx_rollback/2 which is why
     %% we're not using it.
     Cancel =
         lists:foldl(
@@ -178,7 +178,7 @@ set_storage_mode(mixed, TxnMessages, State =
                   end
           end, [], TxnMessages),
     ok = if Cancel == [] -> ok;
-            true -> rabbit_disk_queue:tx_cancel(Cancel)
+            true -> rabbit_disk_queue:tx_rollback(Cancel)
          end,
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
@@ -326,7 +326,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
     {MsgId, IsPersistent, true, AckTag, 0} = rabbit_disk_queue:phantom_fetch(Q),
     {ok, AckTag, State1};
 publish_delivered(Msg, State = #mqstate { length = 0 }) ->
-    {ok, noack, gain_memory(size_of_message(Msg), State)}.
+    {ok, not_on_disk, gain_memory(size_of_message(Msg), State)}.
 
 fetch(State = #mqstate { length = 0 }) ->
     {empty, State};
@@ -346,7 +346,7 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                             = rabbit_disk_queue:phantom_fetch(Q),
                         AckTag1;
                     false ->
-                        noack
+                        not_on_disk
                 end,
             {{Msg, IsDelivered, AckTag, Rem},
              State1 #mqstate { msg_buf = MsgBuf1 }};
@@ -367,36 +367,37 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             {{Msg, IsDelivered, AckTag1, Rem},
              State1 #mqstate { msg_buf = MsgBuf2 }};
         _ ->
-            case rabbit_queue_prefetcher:drain(Prefetcher) of
-                empty -> fetch(State #mqstate { prefetcher = undefined });
-                {Fetched, Len, Status} ->
-                    MsgBuf2 = dec_queue_length(MsgBuf, Len),
-                    %% use State, not State1 as we've not dec'd length
-                    fetch(State #mqstate
-                          { msg_buf = queue:join(Fetched, MsgBuf2),
-                            prefetcher = case Status of
-                                             finished -> undefined;
-                                             continuing -> Prefetcher
-                                         end })
-            end
+            fetch(case rabbit_queue_prefetcher:drain(Prefetcher) of
+                      empty -> State #mqstate { prefetcher = undefined };
+                      {Fetched, Len, Status} ->
+                          MsgBuf2 = dec_queue_length(MsgBuf, Len),
+                          %% use State, not State1 as we've not dec'd length
+                          State #mqstate
+                            { msg_buf = queue:join(Fetched, MsgBuf2),
+                              prefetcher = case Status of
+                                               finished -> undefined;
+                                               continuing -> Prefetcher
+                                           end }
+                  end)
     end.
 
 maybe_ack(_Q, true, true, AckTag) ->
     AckTag;
 maybe_ack(Q, _, _, AckTag) ->
     ok = rabbit_disk_queue:ack(Q, [AckTag]),
-    noack.
+    not_on_disk.
 
-remove_noacks(MsgsWithAcks) ->
+remove_diskless(MsgsWithAcks) ->
     lists:foldl(
-      fun ({Msg, noack}, {AccAckTags, AccSize}) ->
-              {AccAckTags, size_of_message(Msg) + AccSize};
-          ({Msg, AckTag}, {AccAckTags, AccSize}) ->
-              {[AckTag | AccAckTags], size_of_message(Msg) + AccSize}
+      fun ({Msg, AckTag}, {AccAckTags, AccSize}) ->
+              {case AckTag of
+                   not_on_disk -> AccAckTags;
+                   _ -> [AckTag | AccAckTags]
+               end, size_of_message(Msg) + AccSize}
       end, {[], 0}, MsgsWithAcks).
 
 ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
-    {AckTags, ASize} = remove_noacks(MsgsWithAcks),
+    {AckTags, ASize} = remove_diskless(MsgsWithAcks),
     ok = case AckTags of
              [] -> ok;
              _ -> rabbit_disk_queue:ack(Q, AckTags)
@@ -419,7 +420,7 @@ tx_commit(Publishes, MsgsWithAcks,
             #basic_message { guid = MsgId,
                              is_persistent = IsPersistent } <- Publishes,
             on_disk(Mode, IsDurable, IsPersistent)],
-    {RealAcks, ASize} = remove_noacks(MsgsWithAcks),
+    {RealAcks, ASize} = remove_diskless(MsgsWithAcks),
     ok = case {PersistentPubs, RealAcks} of
              {[], []} -> ok;
              _        -> rabbit_disk_queue:tx_commit(
@@ -434,8 +435,8 @@ tx_commit(Publishes, MsgsWithAcks,
     {ok, lose_memory(ASize, State #mqstate { msg_buf = MsgBuf1,
                                              length = Length + Len })}.
 
-tx_cancel(Publishes,
-          State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
+tx_rollback(Publishes,
+            State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
     {PersistentPubs, CSize} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent,
@@ -448,7 +449,7 @@ tx_cancel(Publishes,
           end, {[], 0}, Publishes),
     ok = case PersistentPubs of
              [] -> ok;
-             _  -> rabbit_disk_queue:tx_cancel(PersistentPubs)
+             _  -> rabbit_disk_queue:tx_rollback(PersistentPubs)
          end,
     {ok, lose_memory(CSize, State)}.
 
@@ -466,7 +467,7 @@ requeue(MsgsWithAckTags,
                            case Mode of
                                mixed ->
                                    RQAcc;
-                               disk when noack =:= AckTag ->
+                               disk when not_on_disk =:= AckTag ->
                                    ok = case RQAcc of
                                             [] -> ok;
                                             _  -> rabbit_disk_queue:requeue
@@ -496,10 +497,10 @@ purge(State = #mqstate { queue = Q, mode = Mode, length = Count,
                 disk ->
                     PurgedFromDisk;
                 mixed ->
-                    case Prefetcher of
-                        undefined -> ok;
-                        _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
-                    end,
+                    ok = case Prefetcher of
+                             undefined -> ok;
+                             _ -> rabbit_queue_prefetcher:stop(Prefetcher)
+                         end,
                     Count
             end,
     {Count, lose_memory(QSize, State #mqstate { msg_buf = queue:new(),
@@ -509,10 +510,10 @@ purge(State = #mqstate { queue = Q, mode = Mode, length = Count,
 delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
                                 prefetcher = Prefetcher
                               }) ->
-    case Prefetcher of
-        undefined -> ok;
-        _ -> rabbit_queue_prefetcher:drain_and_stop(Prefetcher)
-    end,
+    ok = case Prefetcher of
+             undefined -> ok;
+             _ -> rabbit_queue_prefetcher:stop(Prefetcher)
+         end,
     ok = rabbit_disk_queue:delete_queue(Q),
     {ok, lose_memory(QSize, State #mqstate { length = 0, msg_buf = queue:new(),
                                              prefetcher = undefined })}.
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index 6f276d86..ffa98d69 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -38,7 +38,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([publish/2, drain/1, drain_and_stop/1]).
+-export([publish/2, drain/1, drain_and_stop/1, stop/1]).
 
 -include("rabbit.hrl").
 
@@ -191,6 +191,9 @@ drain(Prefetcher) ->
 drain_and_stop(Prefetcher) ->
     gen_server2:call(Prefetcher, drain_and_stop, infinity).
 
+stop(Prefetcher) ->
+    gen_server2:call(Prefetcher, stop, infinity).
+
 init([Q, Count, QPid]) ->
     %% link isn't enough because the signal will not appear if the
     %% queue exits normally. Thus have to use monitor.
@@ -240,7 +243,9 @@ handle_call(drain_and_stop, _From, State = #pstate { buf_length = 0 }) ->
     {stop, normal, empty, State};
 handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf,
                                                      buf_length = Length }) ->
-    {stop, normal, {MsgBuf, Length}, State}.
+    {stop, normal, {MsgBuf, Length}, State};
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}.
 
 handle_cast(Msg, State) ->
     exit({unexpected_message_cast_to_prefetcher, Msg, State}).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ae4117aa..884adbf8 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1233,7 +1233,7 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                 0 = rabbit_mixed_queue:len(MS8),
                 rabbit_mixed_queue:ack(AckTags, MS8);
             cancel ->
-                {ok, MS6} = rabbit_mixed_queue:tx_cancel(MsgsB, MS5),
+                {ok, MS6} = rabbit_mixed_queue:tx_rollback(MsgsB, MS5),
                 Len0 = rabbit_mixed_queue:len(MS6),
                 {AckTags, MS8} =
                     lists:foldl(
-- 
cgit v1.2.1


From 2a0749eda3f9dc4fd784b0fde43b29349407c5d9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 14:03:03 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index af4cd834..9b86522f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -367,11 +367,11 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             {{Msg, IsDelivered, AckTag1, Rem},
              State1 #mqstate { msg_buf = MsgBuf2 }};
         _ ->
+            %% use State, not State1 as we've not dec'd length
             fetch(case rabbit_queue_prefetcher:drain(Prefetcher) of
                       empty -> State #mqstate { prefetcher = undefined };
                       {Fetched, Len, Status} ->
                           MsgBuf2 = dec_queue_length(MsgBuf, Len),
-                          %% use State, not State1 as we've not dec'd length
                           State #mqstate
                             { msg_buf = queue:join(Fetched, MsgBuf2),
                               prefetcher = case Status of
-- 
cgit v1.2.1


From f5f86f6818b8a6b3d7dc132114274bcb88b88e19 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 14:07:36 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9b86522f..bb0ac973 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -353,8 +353,8 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
         {Msg = #basic_message { is_persistent = IsPersistent },
          IsDelivered, AckTag} ->
             %% message has come via the prefetcher, thus it's been
-            %% delivered. If it's not persistent+durable, we should
-            %% ack it now
+            %% marked delivered. If it's not persistent+durable, we
+            %% should ack it now
             AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
             {{Msg, IsDelivered, AckTag1, Rem},
              State1 #mqstate { msg_buf = MsgBuf1 }};
-- 
cgit v1.2.1


From 045e90d857b685479d9163034cd09931ed16d2b1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 15:01:58 +0100
Subject: made sure that messages that go through the MQ have binary properties
 added. This allows us to measure their size. The effect of this is that all
 messages that come out of the MQ have binary properties, even if they went in
 without. This might be controversial. The only reason for doing this is that
 otherwise, we'd have to convert-to-measure twice, once on the way in, and
 once on the way out. If people feel strongly about this, please yell.

---
 src/rabbit_mixed_queue.erl | 45 +++++++++++++++++++++++++++++++--------------
 src/rabbit_tests.erl       | 14 ++++++++++----
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index bb0ac973..9e0eb13f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -114,10 +114,21 @@ init(Queue, IsDurable) ->
                     memory_loss = undefined, prefetcher = undefined }}.
 
 size_of_message(
-  #basic_message { content = #content { payload_fragments_rev = Payload }}) ->
-    lists:foldl(fun (Frag, SumAcc) ->
-                        SumAcc + size(Frag)
-                end, 0, Payload).
+  #basic_message { content = #content { payload_fragments_rev = Payload,
+                                        properties_bin = PropsBin }})
+  when is_binary(PropsBin) ->
+    size(PropsBin) + lists:foldl(fun (Frag, SumAcc) ->
+                                         SumAcc + size(Frag)
+                                 end, 0, Payload).
+
+ensure_binary_properties(Msg = #basic_message {
+                           content = Content = #content {
+                                       properties = Props,
+                                       properties_bin = none }}) ->
+    Msg #basic_message { content = Content #content {
+      properties_bin = rabbit_framing:encode_properties(Props) }};
+ensure_binary_properties(Msg) ->
+    Msg.
 
 set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
@@ -299,15 +310,16 @@ on_disk(mixed, _IsDurable, _IsPersistent) -> false.
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = Mode, is_durable = IsDurable,
                    msg_buf = MsgBuf, length = Length }) ->
+    Msg1 = ensure_binary_properties(Msg),
     ok = case on_disk(Mode, IsDurable, IsPersistent) of
-             true  -> rabbit_disk_queue:publish(Q, Msg, false);
+             true  -> rabbit_disk_queue:publish(Q, Msg1, false);
              false -> ok
          end,
     MsgBuf1 = case Mode of
                   disk  -> inc_queue_length(MsgBuf, 1);
-                  mixed -> queue:in({Msg, false}, MsgBuf)
+                  mixed -> queue:in({Msg1, false}, MsgBuf)
               end,
-    {ok, gain_memory(size_of_message(Msg),
+    {ok, gain_memory(size_of_message(Msg1),
                      State #mqstate { msg_buf = MsgBuf1,
                                       length = Length + 1 })}.
 
@@ -318,15 +330,17 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                   State = #mqstate { is_durable = IsDurable, queue = Q,
                                      length = 0 })
   when IsDurable andalso IsPersistent ->
-    ok = rabbit_disk_queue:publish(Q, Msg, true),
-    State1 = gain_memory(size_of_message(Msg), State),
+    Msg1 = ensure_binary_properties(Msg),
+    ok = rabbit_disk_queue:publish(Q, Msg1, true),
+    State1 = gain_memory(size_of_message(Msg1), State),
     %% must call phantom_fetch otherwise the msg remains at the head
     %% of the queue. This is synchronous, but unavoidable as we need
     %% the AckTag
     {MsgId, IsPersistent, true, AckTag, 0} = rabbit_disk_queue:phantom_fetch(Q),
     {ok, AckTag, State1};
 publish_delivered(Msg, State = #mqstate { length = 0 }) ->
-    {ok, not_on_disk, gain_memory(size_of_message(Msg), State)}.
+    Msg1 = ensure_binary_properties(Msg),
+    {ok, not_on_disk, gain_memory(size_of_message(Msg1), State)}.
 
 fetch(State = #mqstate { length = 0 }) ->
     {empty, State};
@@ -390,10 +404,11 @@ maybe_ack(Q, _, _, AckTag) ->
 remove_diskless(MsgsWithAcks) ->
     lists:foldl(
       fun ({Msg, AckTag}, {AccAckTags, AccSize}) ->
+              Msg1 = ensure_binary_properties(Msg),
               {case AckTag of
                    not_on_disk -> AccAckTags;
                    _ -> [AckTag | AccAckTags]
-               end, size_of_message(Msg) + AccSize}
+               end, size_of_message(Msg1) + AccSize}
       end, {[], 0}, MsgsWithAcks).
 
 ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
@@ -406,11 +421,12 @@ ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
                                                    
 tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
            State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
+    Msg1 = ensure_binary_properties(Msg),
     ok = case on_disk(Mode, IsDurable, IsPersistent) of
-             true  -> rabbit_disk_queue:tx_publish(Msg);
+             true  -> rabbit_disk_queue:tx_publish(Msg1);
              false -> ok
          end,
-    {ok, gain_memory(size_of_message(Msg), State)}.
+    {ok, gain_memory(size_of_message(Msg1), State)}.
 
 tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = Mode, queue = Q, msg_buf = MsgBuf,
@@ -441,7 +457,8 @@ tx_rollback(Publishes,
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent,
                                       guid = MsgId }, {Acc, CSizeAcc}) ->
-                  CSizeAcc1 = CSizeAcc + size_of_message(Msg),
+                  Msg1 = ensure_binary_properties(Msg),
+                  CSizeAcc1 = CSizeAcc + size_of_message(Msg1),
                   {case on_disk(Mode, IsDurable, IsPersistent) of
                        true -> [MsgId | Acc];
                        _    -> Acc
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 884adbf8..44abdda4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -856,6 +856,10 @@ rdq_match_message(
   MsgId, MsgBody, Size) when size(MsgBody) =:= Size ->
     ok.
 
+rdq_match_messages(#basic_message { guid = MsgId, content = #content { payload_fragments_rev = MsgBody }},
+                   #basic_message { guid = MsgId, content = #content { payload_fragments_rev = MsgBody }}) ->
+    ok.
+
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Startup = rdq_virgin(),
     rdq_start(),
@@ -1226,9 +1230,10 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                     lists:foldl(
                       fun (Msg, {Acc, MS7}) ->
                               Rem = Len1 - (Msg #basic_message.guid) - 1,
-                              {{Msg, false, AckTag, Rem}, MS7a} =
+                              {{Msg1, false, AckTag, Rem}, MS7a} =
                                   rabbit_mixed_queue:fetch(MS7),
-                              {[{Msg, AckTag} | Acc], MS7a}
+                              ok = rdq_match_messages(Msg, Msg1),
+                              {[{Msg1, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA ++ MsgsB),
                 0 = rabbit_mixed_queue:len(MS8),
                 rabbit_mixed_queue:ack(AckTags, MS8);
@@ -1239,9 +1244,10 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                     lists:foldl(
                       fun (Msg, {Acc, MS7}) ->
                               Rem = Len0 - (Msg #basic_message.guid) - 1,
-                              {{Msg, false, AckTag, Rem}, MS7a} =
+                              {{Msg1, false, AckTag, Rem}, MS7a} =
                                   rabbit_mixed_queue:fetch(MS7),
-                              {[{Msg, AckTag} | Acc], MS7a}
+                              ok = rdq_match_messages(Msg, Msg1),
+                              {[{Msg1, AckTag} | Acc], MS7a}
                       end, {[], MS6}, MsgsA),
                 0 = rabbit_mixed_queue:len(MS8),
                 rabbit_mixed_queue:ack(AckTags, MS8)
-- 
cgit v1.2.1


From 3b3b9340c5b34c525829f631e02c3b42d7d15561 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 15:28:42 +0100
Subject: Removal of pointless calls to run_message_queue

---
 src/rabbit_amqqueue_process.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b4b06b16..5d78b205 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -322,7 +322,7 @@ deliver_or_enqueue(Txn, ChPid, Msg, State) ->
 %% all these messages have already been delivered at least once and
 %% not ack'd, but need to be either redelivered or requeued
 deliver_or_requeue_n([], State) ->
-    run_message_queue(State);
+    State;
 deliver_or_requeue_n(MsgsWithAcks, State) ->
     Funs = { fun deliver_or_requeue_msgs_pred/2,
              fun deliver_or_requeue_msgs_deliver/3 },
@@ -332,7 +332,7 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
     {ok, MS} = rabbit_mixed_queue:ack(AutoAcks,
                                       NewState #q.mixed_state),
     case OutstandingMsgs of
-        [] -> run_message_queue(NewState #q { mixed_state = MS });
+        [] -> NewState #q { mixed_state = MS };
         _ -> {ok, MS1} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
              NewState #q { mixed_state = MS1 }
     end.
-- 
cgit v1.2.1


From 01f60fbdf1f45fa78072b0812151885efd0cb6b0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 15:52:11 +0100
Subject: cosmetic + assert major invariant of queue_process on reply and
 noreply

---
 src/rabbit_amqqueue_process.erl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5d78b205..406429ef 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -132,11 +132,16 @@ code_change(_OldVsn, State, _Extra) ->
 %%----------------------------------------------------------------------------
 
 reply(Reply, NewState) ->
+    assert_invariant(NewState),
     {reply, Reply, start_memory_timer(NewState), hibernate}.
 
 noreply(NewState) ->
+    assert_invariant(NewState),
     {noreply, start_memory_timer(NewState), hibernate}.
 
+assert_invariant(#q { active_consumers = AC, mixed_state = MS }) ->
+    true = (queue:is_empty(AC) orelse rabbit_mixed_queue:is_empty(MS)).
+
 start_memory_timer(State = #q { memory_report_timer = undefined }) ->
     {ok, TRef} = timer:send_after(?MINIMUM_MEMORY_REPORT_TIME_INTERVAL,
                                   report_memory),
@@ -329,8 +334,7 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
         deliver_msgs_to_consumers(
           Funs, {length(MsgsWithAcks), [], MsgsWithAcks}, State),
-    {ok, MS} = rabbit_mixed_queue:ack(AutoAcks,
-                                      NewState #q.mixed_state),
+    {ok, MS} = rabbit_mixed_queue:ack(AutoAcks, NewState #q.mixed_state),
     case OutstandingMsgs of
         [] -> NewState #q { mixed_state = MS };
         _ -> {ok, MS1} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
-- 
cgit v1.2.1


From 6586f8ab5a88a3eb01a6a74482c1f75311e33912 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 16:07:11 +0100
Subject: pushed the encoding of the properties into the binary_generator. Plus
 associated types and changes to MQ.

---
 include/rabbit.hrl              |  6 ++++++
 src/rabbit_binary_generator.erl | 19 +++++++++++++++++++
 src/rabbit_mixed_queue.erl      | 11 +++--------
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 0ba31cb5..06564290 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -127,11 +127,17 @@
                properties            :: amqp_properties(),
                properties_bin        :: 'none',
                payload_fragments_rev :: [binary()]}).
+-type(unencoded_content() :: undecoded_content()).
 -type(decoded_content() ::
       #content{class_id              :: amqp_class_id(),
                properties            :: amqp_properties(),
                properties_bin        :: maybe(binary()),
                payload_fragments_rev :: [binary()]}).
+-type(encoded_content() ::
+      #content{class_id              :: amqp_class_id(),
+               properties            :: maybe(amqp_properties()),
+               properties_bin        :: binary(),
+               payload_fragments_rev :: [binary()]}).
 -type(content() :: undecoded_content() | decoded_content()).
 -type(basic_message() ::
       #basic_message{exchange_name  :: exchange_name(),
diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl
index 6cfa9e6d..0b68f33f 100644
--- a/src/rabbit_binary_generator.erl
+++ b/src/rabbit_binary_generator.erl
@@ -46,6 +46,7 @@
          build_heartbeat_frame/0]).
 -export([generate_table/1, encode_properties/2]).
 -export([check_empty_content_body_frame_size/0]).
+-export([ensure_content_encoded/1, clear_encoded_content/1]).
 
 -import(lists).
 
@@ -63,6 +64,8 @@
 -spec(generate_table/1 :: (amqp_table()) -> binary()). 
 -spec(encode_properties/2 :: ([amqp_property_type()], [any()]) -> binary()).
 -spec(check_empty_content_body_frame_size/0 :: () -> 'ok').
+-spec(ensure_content_encoded/1 :: (content()) -> encoded_content()).
+-spec(clear_encoded_content/1 :: (content()) -> unencoded_content()).
 
 -endif.
 
@@ -275,3 +278,19 @@ check_empty_content_body_frame_size() ->
             exit({incorrect_empty_content_body_frame_size,
                   ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE})
     end.
+
+ensure_content_encoded(Content = #content{properties_bin = PropsBin})
+  when PropsBin =/= 'none' ->
+    Content;
+ensure_content_encoded(Content = #content{properties = Props}) ->
+    Content #content{properties_bin = rabbit_framing:encode_properties(Props)}.
+
+clear_encoded_content(Content = #content{properties_bin = none}) ->
+    Content;
+clear_encoded_content(Content = #content{properties = none}) ->
+    %% Only clear when we can rebuild the properties later in
+    %% accordance to the content record definition comment - maximum
+    %% one of properties and properties_bin can be 'none'
+    Content;
+clear_encoded_content(Content = #content{}) ->
+    Content#content{properties_bin = none}.
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9e0eb13f..a80cadf3 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -121,14 +121,9 @@ size_of_message(
                                          SumAcc + size(Frag)
                                  end, 0, Payload).
 
-ensure_binary_properties(Msg = #basic_message {
-                           content = Content = #content {
-                                       properties = Props,
-                                       properties_bin = none }}) ->
-    Msg #basic_message { content = Content #content {
-      properties_bin = rabbit_framing:encode_properties(Props) }};
-ensure_binary_properties(Msg) ->
-    Msg.
+ensure_binary_properties(Msg = #basic_message { content = Content }) ->
+    Msg #basic_message { content = rabbit_binary_generator:
+                         ensure_content_encoded(Content) }.
 
 set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
-- 
cgit v1.2.1


From 9a41e5555466290fb71ccf25706f9896d99292f8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 16:18:23 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index a80cadf3..21e0eb31 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -122,8 +122,8 @@ size_of_message(
                                  end, 0, Payload).
 
 ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-    Msg #basic_message { content = rabbit_binary_generator:
-                         ensure_content_encoded(Content) }.
+    Msg #basic_message
+      { content = rabbit_binary_generator:ensure_content_encoded(Content) }.
 
 set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
-- 
cgit v1.2.1


From f867b097a19711d4a1a877c9505a81adf3665c10 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 16:19:21 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 21e0eb31..6fc5db61 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -122,8 +122,8 @@ size_of_message(
                                  end, 0, Payload).
 
 ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-    Msg #basic_message
-      { content = rabbit_binary_generator:ensure_content_encoded(Content) }.
+    Msg #basic_message {
+      content = rabbit_binary_generator:ensure_content_encoded(Content) }.
 
 set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
-- 
cgit v1.2.1


From 682aa7982bb6cb8e631d6f9c7423f22f75f65317 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 26 Aug 2009 18:54:44 +0100
Subject: cosmetic: place functions in appropriate sections of the file

---
 src/rabbit_mixed_queue.erl | 426 +++++++++++++++++++++++----------------------
 1 file changed, 218 insertions(+), 208 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 6fc5db61..d131eea1 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -56,6 +56,8 @@
 
 -define(TO_DISK_MAX_FLUSH_SIZE, 100000).
 
+%%----------------------------------------------------------------------------
+
 -ifdef(use_specs).
 
 -type(mode() :: ( 'disk' | 'mixed' )).
@@ -85,14 +87,11 @@
 -spec(tx_rollback/2 :: ([message()], mqstate()) -> okmqs()).
 -spec(requeue/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
 -spec(purge/1 :: (mqstate()) -> okmqs()).
-             
 -spec(delete_queue/1 :: (mqstate()) -> {'ok', mqstate()}).
-             
 -spec(len/1 :: (mqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (mqstate()) -> boolean()).
 
 -spec(set_storage_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
-
 -spec(estimate_queue_memory_and_reset_counters/1 :: (mqstate()) ->
              {mqstate(), non_neg_integer(), non_neg_integer(),
               non_neg_integer()}).
@@ -100,6 +99,8 @@
 
 -endif.
 
+%%----------------------------------------------------------------------------
+
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:len(Queue),
     MsgBuf = inc_queue_length(queue:new(), Len),
@@ -113,195 +114,6 @@ init(Queue, IsDurable) ->
                     memory_size = Size, memory_gain = undefined,
                     memory_loss = undefined, prefetcher = undefined }}.
 
-size_of_message(
-  #basic_message { content = #content { payload_fragments_rev = Payload,
-                                        properties_bin = PropsBin }})
-  when is_binary(PropsBin) ->
-    size(PropsBin) + lists:foldl(fun (Frag, SumAcc) ->
-                                         SumAcc + size(Frag)
-                                 end, 0, Payload).
-
-ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-    Msg #basic_message {
-      content = rabbit_binary_generator:ensure_content_encoded(Content) }.
-
-set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
-    {ok, State};
-set_storage_mode(disk, TxnMessages, State =
-         #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
-                    is_durable = IsDurable, prefetcher = Prefetcher }) ->
-    State1 = State #mqstate { mode = disk },
-    MsgBuf1 =
-        case Prefetcher of
-            undefined -> MsgBuf;
-            _ ->
-                case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
-                    empty -> MsgBuf;
-                    {Fetched, Len} ->
-                        MsgBuf2 = dec_queue_length(MsgBuf, Len),
-                        queue:join(Fetched, MsgBuf2)
-                end
-        end,
-    %% We enqueue _everything_ here. This means that should a message
-    %% already be in the disk queue we must remove it and add it back
-    %% in. Fortunately, by using requeue, we avoid rewriting the
-    %% message on disk.
-    %% Note we also batch together messages on disk so that we minimise
-    %% the calls to requeue.
-    {ok, MsgBuf3} =
-        send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], [], queue:new()),
-    %% tx_publish txn messages. Some of these will have been already
-    %% published if they really are durable and persistent which is
-    %% why we can't just use our own tx_publish/2 function (would end
-    %% up publishing twice, so refcount would go wrong in disk_queue).
-    lists:foreach(
-      fun (Msg = #basic_message { is_persistent = IsPersistent }) ->
-              ok = case IsDurable andalso IsPersistent of
-                       true -> ok;
-                       _    -> rabbit_disk_queue:tx_publish(Msg)
-                   end
-      end, TxnMessages),
-    garbage_collect(),
-    {ok, State1 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
-set_storage_mode(mixed, TxnMessages, State =
-                 #mqstate { mode = disk, is_durable = IsDurable }) ->
-    %% The queue has a token just saying how many msgs are on disk
-    %% (this is already built for us when in disk mode).
-    %% Don't actually do anything to the disk
-    %% Don't start prefetcher just yet because the queue maybe busy -
-    %% wait for hibernate timeout in the amqqueue_process.
-    
-    %% Remove txn messages from disk which are neither persistent and
-    %% durable. This is necessary to avoid leaks. This is also pretty
-    %% much the inverse behaviour of our own tx_rollback/2 which is why
-    %% we're not using it.
-    Cancel =
-        lists:foldl(
-          fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
-                  case IsDurable andalso IsPersistent of
-                      true  -> Acc;
-                      false -> [Msg #basic_message.guid | Acc]
-                  end
-          end, [], TxnMessages),
-    ok = if Cancel == [] -> ok;
-            true -> rabbit_disk_queue:tx_rollback(Cancel)
-         end,
-    garbage_collect(),
-    {ok, State #mqstate { mode = mixed }}.
-
-send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
-                      Commit, Ack, MsgBuf) ->
-    case queue:out(Queue) of
-        {empty, _Queue} ->
-            ok = flush_messages_to_disk_queue(Q, Commit, Ack),
-            {[], []} = flush_requeue_to_disk_queue(Q, RequeueCount, [], []),
-            {ok, MsgBuf};
-        {{value, {Msg = #basic_message { is_persistent = IsPersistent },
-                  IsDelivered}}, Queue1} ->
-            case IsDurable andalso IsPersistent of
-                true -> %% it's already in the Q
-                    send_messages_to_disk(
-                      IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
-                      Commit, Ack, inc_queue_length(MsgBuf, 1));
-                false ->
-                    republish_message_to_disk_queue(
-                      IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
-                      Ack, MsgBuf, Msg, IsDelivered)
-            end;
-        {{value, {Msg, IsDelivered, AckTag}}, Queue1} ->
-            %% these have come via the prefetcher, so are no longer in
-            %% the disk queue so they need to be republished
-            republish_message_to_disk_queue(
-              IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
-              [AckTag | Ack], MsgBuf, Msg, IsDelivered);
-        {{value, {on_disk, Count}}, Queue1} ->
-            send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
-                                  RequeueCount + Count, Commit, Ack,
-                                  inc_queue_length(MsgBuf, Count))
-    end.
-
-republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
-                                Commit, Ack, MsgBuf, Msg =
-                                #basic_message { guid = MsgId }, IsDelivered) ->
-    {Commit1, Ack1} = flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack),
-    ok = rabbit_disk_queue:tx_publish(Msg),
-    {PublishCount1, Commit2, Ack2} =
-        case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
-            true  -> ok = flush_messages_to_disk_queue(
-                            Q, [{MsgId, IsDelivered} | Commit1], Ack1),
-                     {0, [], []};
-            false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1], Ack1}
-        end,
-    send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
-                          Commit2, Ack2, inc_queue_length(MsgBuf, 1)).
-
-flush_messages_to_disk_queue(_Q, [], []) ->
-    ok;
-flush_messages_to_disk_queue(Q, Commit, Ack) ->
-    rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), Ack).
-
-flush_requeue_to_disk_queue(_Q, 0, Commit, Ack) ->
-    {Commit, Ack};
-flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
-    ok = flush_messages_to_disk_queue(Q, Commit, Ack),
-    ok = rabbit_disk_queue:filesync(),
-    ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
-    {[], []}.
-
-gain_memory(Inc, State = #mqstate { memory_size = QSize,
-                                    memory_gain = Gain }) ->
-    State #mqstate { memory_size = QSize + Inc,
-                     memory_gain = Gain + Inc }.
-
-lose_memory(Dec, State = #mqstate { memory_size = QSize,
-                                    memory_loss = Loss }) ->
-    State #mqstate { memory_size = QSize - Dec,
-                     memory_loss = Loss + Dec }.
-
-inc_queue_length(MsgBuf, 0) ->
-    MsgBuf;
-inc_queue_length(MsgBuf, Count) ->
-    {NewCount, MsgBufTail} =
-        case queue:out_r(MsgBuf) of
-            {empty, MsgBuf1}                   -> {Count, MsgBuf1};
-            {{value, {on_disk, Len}}, MsgBuf1} -> {Len + Count, MsgBuf1};
-            {{value, _}, _MsgBuf1}             -> {Count, MsgBuf}
-        end,
-    queue:in({on_disk, NewCount}, MsgBufTail).
-
-dec_queue_length(MsgBuf, Count) ->
-    case queue:out(MsgBuf) of
-        {{value, {on_disk, Len}}, MsgBuf1} ->
-            case Len of
-                Count ->
-                    MsgBuf1;
-                _ when Len > Count ->
-                    queue:in_r({on_disk, Len-Count}, MsgBuf1)
-            end;
-        _ -> MsgBuf
-    end.
-
-maybe_prefetch(State = #mqstate { prefetcher = undefined,
-                                  mode = mixed,
-                                  msg_buf = MsgBuf,
-                                  queue = Q }) ->
-    case queue:peek(MsgBuf) of
-        {value, {on_disk, Count}} ->
-            %% only prefetch for the next contiguous block on
-            %% disk. Beyond there, we either hit the end of the queue,
-            %% or the next msg is already in RAM, held by us, the
-            %% mixed queue
-            {ok, Prefetcher} = rabbit_queue_prefetcher:start_link(Q, Count),
-            State #mqstate { prefetcher = Prefetcher };
-        _ -> State
-    end;
-maybe_prefetch(State) ->
-    State.
-
-on_disk(disk, _IsDurable, _IsPersistent)  -> true;
-on_disk(mixed, true, true)                -> true;
-on_disk(mixed, _IsDurable, _IsPersistent) -> false.
-
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = Mode, is_durable = IsDurable,
                    msg_buf = MsgBuf, length = Length }) ->
@@ -390,22 +202,6 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
                   end)
     end.
 
-maybe_ack(_Q, true, true, AckTag) ->
-    AckTag;
-maybe_ack(Q, _, _, AckTag) ->
-    ok = rabbit_disk_queue:ack(Q, [AckTag]),
-    not_on_disk.
-
-remove_diskless(MsgsWithAcks) ->
-    lists:foldl(
-      fun ({Msg, AckTag}, {AccAckTags, AccSize}) ->
-              Msg1 = ensure_binary_properties(Msg),
-              {case AckTag of
-                   not_on_disk -> AccAckTags;
-                   _ -> [AckTag | AccAckTags]
-               end, size_of_message(Msg1) + AccSize}
-      end, {[], 0}, MsgsWithAcks).
-
 ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
     {AckTags, ASize} = remove_diskless(MsgsWithAcks),
     ok = case AckTags of
@@ -536,9 +332,223 @@ len(#mqstate { length = Length }) ->
 is_empty(#mqstate { length = Length }) ->
     0 == Length.
 
+%%----------------------------------------------------------------------------
+%% storage mode management
+%%----------------------------------------------------------------------------
+
+set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
+    {ok, State};
+set_storage_mode(disk, TxnMessages, State =
+         #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+                    is_durable = IsDurable, prefetcher = Prefetcher }) ->
+    State1 = State #mqstate { mode = disk },
+    MsgBuf1 =
+        case Prefetcher of
+            undefined -> MsgBuf;
+            _ ->
+                case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
+                    empty -> MsgBuf;
+                    {Fetched, Len} ->
+                        MsgBuf2 = dec_queue_length(MsgBuf, Len),
+                        queue:join(Fetched, MsgBuf2)
+                end
+        end,
+    %% We enqueue _everything_ here. This means that should a message
+    %% already be in the disk queue we must remove it and add it back
+    %% in. Fortunately, by using requeue, we avoid rewriting the
+    %% message on disk.
+    %% Note we also batch together messages on disk so that we minimise
+    %% the calls to requeue.
+    {ok, MsgBuf3} =
+        send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], [], queue:new()),
+    %% tx_publish txn messages. Some of these will have been already
+    %% published if they really are durable and persistent which is
+    %% why we can't just use our own tx_publish/2 function (would end
+    %% up publishing twice, so refcount would go wrong in disk_queue).
+    lists:foreach(
+      fun (Msg = #basic_message { is_persistent = IsPersistent }) ->
+              ok = case IsDurable andalso IsPersistent of
+                       true -> ok;
+                       _    -> rabbit_disk_queue:tx_publish(Msg)
+                   end
+      end, TxnMessages),
+    garbage_collect(),
+    {ok, State1 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
+set_storage_mode(mixed, TxnMessages, State =
+                 #mqstate { mode = disk, is_durable = IsDurable }) ->
+    %% The queue has a token just saying how many msgs are on disk
+    %% (this is already built for us when in disk mode).
+    %% Don't actually do anything to the disk
+    %% Don't start prefetcher just yet because the queue maybe busy -
+    %% wait for hibernate timeout in the amqqueue_process.
+    
+    %% Remove txn messages from disk which are neither persistent and
+    %% durable. This is necessary to avoid leaks. This is also pretty
+    %% much the inverse behaviour of our own tx_rollback/2 which is why
+    %% we're not using it.
+    Cancel =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
+                  case IsDurable andalso IsPersistent of
+                      true  -> Acc;
+                      false -> [Msg #basic_message.guid | Acc]
+                  end
+          end, [], TxnMessages),
+    ok = if Cancel == [] -> ok;
+            true -> rabbit_disk_queue:tx_rollback(Cancel)
+         end,
+    garbage_collect(),
+    {ok, State #mqstate { mode = mixed }}.
+
+send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
+                      Commit, Ack, MsgBuf) ->
+    case queue:out(Queue) of
+        {empty, _Queue} ->
+            ok = flush_messages_to_disk_queue(Q, Commit, Ack),
+            {[], []} = flush_requeue_to_disk_queue(Q, RequeueCount, [], []),
+            {ok, MsgBuf};
+        {{value, {Msg = #basic_message { is_persistent = IsPersistent },
+                  IsDelivered}}, Queue1} ->
+            case IsDurable andalso IsPersistent of
+                true -> %% it's already in the Q
+                    send_messages_to_disk(
+                      IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
+                      Commit, Ack, inc_queue_length(MsgBuf, 1));
+                false ->
+                    republish_message_to_disk_queue(
+                      IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
+                      Ack, MsgBuf, Msg, IsDelivered)
+            end;
+        {{value, {Msg, IsDelivered, AckTag}}, Queue1} ->
+            %% these have come via the prefetcher, so are no longer in
+            %% the disk queue so they need to be republished
+            republish_message_to_disk_queue(
+              IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
+              [AckTag | Ack], MsgBuf, Msg, IsDelivered);
+        {{value, {on_disk, Count}}, Queue1} ->
+            send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
+                                  RequeueCount + Count, Commit, Ack,
+                                  inc_queue_length(MsgBuf, Count))
+    end.
+
+republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
+                                Commit, Ack, MsgBuf, Msg =
+                                #basic_message { guid = MsgId }, IsDelivered) ->
+    {Commit1, Ack1} = flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack),
+    ok = rabbit_disk_queue:tx_publish(Msg),
+    {PublishCount1, Commit2, Ack2} =
+        case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
+            true  -> ok = flush_messages_to_disk_queue(
+                            Q, [{MsgId, IsDelivered} | Commit1], Ack1),
+                     {0, [], []};
+            false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1], Ack1}
+        end,
+    send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
+                          Commit2, Ack2, inc_queue_length(MsgBuf, 1)).
+
+flush_messages_to_disk_queue(_Q, [], []) ->
+    ok;
+flush_messages_to_disk_queue(Q, Commit, Ack) ->
+    rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), Ack).
+
+flush_requeue_to_disk_queue(_Q, 0, Commit, Ack) ->
+    {Commit, Ack};
+flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
+    ok = flush_messages_to_disk_queue(Q, Commit, Ack),
+    ok = rabbit_disk_queue:filesync(),
+    ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
+    {[], []}.
+
 estimate_queue_memory_and_reset_counters(State =
   #mqstate { memory_size = Size, memory_gain = Gain, memory_loss = Loss }) ->
     {State #mqstate { memory_gain = 0, memory_loss = 0 }, 4 * Size, Gain, Loss}.
 
 storage_mode(#mqstate { mode = Mode }) ->
     Mode.
+
+%%----------------------------------------------------------------------------
+%% helpers
+%%----------------------------------------------------------------------------
+
+size_of_message(
+  #basic_message { content = #content { payload_fragments_rev = Payload,
+                                        properties_bin = PropsBin }})
+  when is_binary(PropsBin) ->
+    size(PropsBin) + lists:foldl(fun (Frag, SumAcc) ->
+                                         SumAcc + size(Frag)
+                                 end, 0, Payload).
+
+ensure_binary_properties(Msg = #basic_message { content = Content }) ->
+    Msg #basic_message {
+      content = rabbit_binary_generator:ensure_content_encoded(Content) }.
+
+gain_memory(Inc, State = #mqstate { memory_size = QSize,
+                                    memory_gain = Gain }) ->
+    State #mqstate { memory_size = QSize + Inc,
+                     memory_gain = Gain + Inc }.
+
+lose_memory(Dec, State = #mqstate { memory_size = QSize,
+                                    memory_loss = Loss }) ->
+    State #mqstate { memory_size = QSize - Dec,
+                     memory_loss = Loss + Dec }.
+
+inc_queue_length(MsgBuf, 0) ->
+    MsgBuf;
+inc_queue_length(MsgBuf, Count) ->
+    {NewCount, MsgBufTail} =
+        case queue:out_r(MsgBuf) of
+            {empty, MsgBuf1}                   -> {Count, MsgBuf1};
+            {{value, {on_disk, Len}}, MsgBuf1} -> {Len + Count, MsgBuf1};
+            {{value, _}, _MsgBuf1}             -> {Count, MsgBuf}
+        end,
+    queue:in({on_disk, NewCount}, MsgBufTail).
+
+dec_queue_length(MsgBuf, Count) ->
+    case queue:out(MsgBuf) of
+        {{value, {on_disk, Len}}, MsgBuf1} ->
+            case Len of
+                Count ->
+                    MsgBuf1;
+                _ when Len > Count ->
+                    queue:in_r({on_disk, Len-Count}, MsgBuf1)
+            end;
+        _ -> MsgBuf
+    end.
+
+maybe_prefetch(State = #mqstate { prefetcher = undefined,
+                                  mode = mixed,
+                                  msg_buf = MsgBuf,
+                                  queue = Q }) ->
+    case queue:peek(MsgBuf) of
+        {value, {on_disk, Count}} ->
+            %% only prefetch for the next contiguous block on
+            %% disk. Beyond there, we either hit the end of the queue,
+            %% or the next msg is already in RAM, held by us, the
+            %% mixed queue
+            {ok, Prefetcher} = rabbit_queue_prefetcher:start_link(Q, Count),
+            State #mqstate { prefetcher = Prefetcher };
+        _ -> State
+    end;
+maybe_prefetch(State) ->
+    State.
+
+maybe_ack(_Q, true, true, AckTag) ->
+    AckTag;
+maybe_ack(Q, _, _, AckTag) ->
+    ok = rabbit_disk_queue:ack(Q, [AckTag]),
+    not_on_disk.
+
+remove_diskless(MsgsWithAcks) ->
+    lists:foldl(
+      fun ({Msg, AckTag}, {AccAckTags, AccSize}) ->
+              Msg1 = ensure_binary_properties(Msg),
+              {case AckTag of
+                   not_on_disk -> AccAckTags;
+                   _ -> [AckTag | AccAckTags]
+               end, size_of_message(Msg1) + AccSize}
+      end, {[], 0}, MsgsWithAcks).
+
+on_disk(disk, _IsDurable, _IsPersistent)  -> true;
+on_disk(mixed, true, true)                -> true;
+on_disk(mixed, _IsDurable, _IsPersistent) -> false.
+
-- 
cgit v1.2.1


From 67c249a79afee73ebbd9bc52fcfeb662359a79f7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 23:12:55 +0100
Subject: A comment and a small amount of refactoring. Nothing outrageously
 exciting.

---
 src/rabbit_mixed_queue.erl | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index d131eea1..1e7d3287 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -365,6 +365,9 @@ set_storage_mode(disk, TxnMessages, State =
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
     %% up publishing twice, so refcount would go wrong in disk_queue).
+    %% The order of msgs within a txn is determined only at tx_commit
+    %% time, so it doesn't matter if we're publishing msgs to the disk
+    %% queue in a different order from that which we received them in.
     lists:foreach(
       fun (Msg = #basic_message { is_persistent = IsPersistent }) ->
               ok = case IsDurable andalso IsPersistent of
@@ -382,20 +385,17 @@ set_storage_mode(mixed, TxnMessages, State =
     %% Don't start prefetcher just yet because the queue maybe busy -
     %% wait for hibernate timeout in the amqqueue_process.
     
-    %% Remove txn messages from disk which are neither persistent and
-    %% durable. This is necessary to avoid leaks. This is also pretty
-    %% much the inverse behaviour of our own tx_rollback/2 which is why
-    %% we're not using it.
+    %% Remove txn messages from disk which are not (persistent and
+    %% durable). This is necessary to avoid leaks. This is also pretty
+    %% much the inverse behaviour of our own tx_rollback/2 which is
+    %% why we're not using that.
     Cancel =
-        lists:foldl(
-          fun (Msg = #basic_message { is_persistent = IsPersistent }, Acc) ->
-                  case IsDurable andalso IsPersistent of
-                      true  -> Acc;
-                      false -> [Msg #basic_message.guid | Acc]
-                  end
-          end, [], TxnMessages),
-    ok = if Cancel == [] -> ok;
-            true -> rabbit_disk_queue:tx_rollback(Cancel)
+        [ MsgId || #basic_message { is_persistent = IsPersistent,
+                                    guid = MsgId } <- TxnMessages,
+                   not (IsDurable andalso IsPersistent) ],
+    ok = case Cancel of
+             [] -> ok;
+             _  -> rabbit_disk_queue:tx_rollback(Cancel)
          end,
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
-- 
cgit v1.2.1


From f06d9bc745d5a1be216692ce658f0f69fb7a03c3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 26 Aug 2009 23:15:13 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 1e7d3287..788b4b5a 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -389,9 +389,8 @@ set_storage_mode(mixed, TxnMessages, State =
     %% durable). This is necessary to avoid leaks. This is also pretty
     %% much the inverse behaviour of our own tx_rollback/2 which is
     %% why we're not using that.
-    Cancel =
-        [ MsgId || #basic_message { is_persistent = IsPersistent,
-                                    guid = MsgId } <- TxnMessages,
+    Cancel = [ MsgId || #basic_message { is_persistent = IsPersistent,
+                                         guid = MsgId } <- TxnMessages,
                    not (IsDurable andalso IsPersistent) ],
     ok = case Cancel of
              [] -> ok;
-- 
cgit v1.2.1


From 72caba05a95e1736e6a0cd9034e6a73b6804152f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 27 Aug 2009 05:57:00 +0100
Subject: tiny bit of refactoring

---
 src/rabbit_mixed_queue.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 788b4b5a..ae5f771f 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -425,9 +425,9 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
               IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
               [AckTag | Ack], MsgBuf, Msg, IsDelivered);
         {{value, {on_disk, Count}}, Queue1} ->
-            send_messages_to_disk(IsDurable, Q, Queue1, PublishCount,
-                                  RequeueCount + Count, Commit, Ack,
-                                  inc_queue_length(MsgBuf, Count))
+            send_messages_to_disk(
+              IsDurable, Q, Queue1, PublishCount, RequeueCount + Count,
+              Commit, Ack, inc_queue_length(MsgBuf, Count))
     end.
 
 republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
@@ -435,15 +435,15 @@ republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
                                 #basic_message { guid = MsgId }, IsDelivered) ->
     {Commit1, Ack1} = flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack),
     ok = rabbit_disk_queue:tx_publish(Msg),
-    {PublishCount1, Commit2, Ack2} =
+    Commit2 = [{MsgId, IsDelivered} | Commit1],
+    {PublishCount1, Commit3, Ack2} =
         case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
-            true  -> ok = flush_messages_to_disk_queue(
-                            Q, [{MsgId, IsDelivered} | Commit1], Ack1),
+            true  -> ok = flush_messages_to_disk_queue(Q, Commit2, Ack1),
                      {0, [], []};
-            false -> {PublishCount + 1, [{MsgId, IsDelivered} | Commit1], Ack1}
+            false -> {PublishCount + 1, Commit2, Ack1}
         end,
     send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
-                          Commit2, Ack2, inc_queue_length(MsgBuf, 1)).
+                          Commit3, Ack2, inc_queue_length(MsgBuf, 1)).
 
 flush_messages_to_disk_queue(_Q, [], []) ->
     ok;
-- 
cgit v1.2.1


From 3e59b16c6e68b09b65a11ce779880a9d454ed4cb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 11:03:22 +0100
Subject: As Matthias spotted, if we crash in the middle of a mixed -> disk
 transition then we potentially have messages in the wrong order. To fix this,
 we push a marker message into the queue before the transition. When the
 transition completes, we find that that marker message is at the head of the
 queue. If we are unlucky enough to crash during the transition then on
 recovery, we have to foldl through the entire queue anyway, so we keep our
 eyes open for the marker message, and should we see it, we simply move all
 that is before the marker message to the end of the queue, and then remove
 the marker. By avoiding putting any numbers into the queue, we neatly
 sidestep the issue of the disk_queue deleting all non-persistent messages on
 startup.

This has been tested by merging into 21444, then, erlang client:
?> Conn = amqp_connection:start_network(#amqp_params{}), Chan = amqp_connection:open_channel(Conn).
?> [begin Q1 = list_to_binary(integer_to_list(R)), #'queue.declare_ok'{queue = Q1} = amqp_channel:call(Chan, #'queue.declare'{queue=Q1, durable=true}) end || R <- lists:seq(1,100) ].
?> [begin Q1 = list_to_binary(integer_to_list(R)), ok = amqp_channel:call(Chan, #'basic.publish'{routing_key = Q1}, #amqp_msg{props = (amqp_util:basic_properties())#'P_basic'{delivery_mode=2}, payload = << 0 : 1024 >>}), ok = amqp_channel:call(Chan, #'basic.publish'{routing_key = Q1}, #amqp_msg{props = (amqp_util:basic_properties()), payload = << 1 : 1024>>}) end || _ <- lists:seq(1,1000), R <- lists:seq(1,100) ].

Then, when that lot's done, get hold of the pid of rabbit and prepare a kill -9 $rabbitpid
?) In another shell, do:
    for t in $(seq 1 100); do ./scripts/rabbitmqctl pin_queue_to_disk $t ; done
?) When you get to about 50, kill rabbit.
?) Then start up with just make run-node
?) In another shell make start-cover
?) In Rabbit's erlang shell rabbit:start().
?) When all started up, in the other shell make stop-cover.
j) Check the lines hit in mixed_queue:init/2.
---
 src/rabbit_mixed_queue.erl | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index ae5f771f..fcd966e9 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -104,13 +104,26 @@
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:len(Queue),
     MsgBuf = inc_queue_length(queue:new(), Len),
-    Size = rabbit_disk_queue:foldl(
+    {Size, MarkerFound, MarkerCount} = rabbit_disk_queue:foldl(
              fun (Msg = #basic_message { is_persistent = true },
-                  _AckTag, _IsDelivered, Acc) ->
-                     Acc + size_of_message(Msg)
-             end, 0, Queue),
+                  _AckTag, _IsDelivered, {SizeAcc, MFound, MCount}) ->
+                     SizeAcc1 = SizeAcc + size_of_message(Msg),
+                     case {MFound, is_magic_marker_message(Msg)} of
+                         {false, false} -> {SizeAcc1, false, MCount + 1};
+                         {false, true}  -> {SizeAcc1, true, MCount};
+                         {true, false}  -> {SizeAcc1, true, MCount}
+                     end
+             end, {0, false, 0}, Queue),
+    Len1 = case MarkerFound of
+               false -> Len;
+               true ->
+                   ok = rabbit_disk_queue:requeue_next_n(Queue, MarkerCount),
+                   Len2 = Len - 1,
+                   {ok, Len2} = fetch_ack_magic_marker_message(Queue),
+                   Len2
+           end,
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
-                    is_durable = IsDurable, length = Len,
+                    is_durable = IsDurable, length = Len1,
                     memory_size = Size, memory_gain = undefined,
                     memory_loss = undefined, prefetcher = undefined }}.
 
@@ -339,7 +352,7 @@ is_empty(#mqstate { length = Length }) ->
 set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
     {ok, State};
 set_storage_mode(disk, TxnMessages, State =
-         #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf,
+         #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf, length = Length,
                     is_durable = IsDurable, prefetcher = Prefetcher }) ->
     State1 = State #mqstate { mode = disk },
     MsgBuf1 =
@@ -359,8 +372,10 @@ set_storage_mode(disk, TxnMessages, State =
     %% message on disk.
     %% Note we also batch together messages on disk so that we minimise
     %% the calls to requeue.
+    ok = publish_magic_marker_message(Q),
     {ok, MsgBuf3} =
         send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], [], queue:new()),
+    {ok, Length} = fetch_ack_magic_marker_message(Q),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -551,3 +566,21 @@ on_disk(disk, _IsDurable, _IsPersistent)  -> true;
 on_disk(mixed, true, true)                -> true;
 on_disk(mixed, _IsDurable, _IsPersistent) -> false.
 
+publish_magic_marker_message(Q) ->
+    Msg = rabbit_basic:message(
+            none, internal, [], <<>>, rabbit_guid:guid(), true),
+    ok = rabbit_disk_queue:publish(Q, ensure_binary_properties(Msg), false).
+
+fetch_ack_magic_marker_message(Q) ->
+    {#basic_message { exchange_name = none, routing_key = internal,
+                      is_persistent = true },
+     false, AckTag, Length} = rabbit_disk_queue:fetch(Q),
+    ok = rabbit_disk_queue:ack(Q, [AckTag]),
+    {ok, Length}.
+
+is_magic_marker_message(
+  #basic_message { exchange_name = none, routing_key = internal,
+                   is_persistent = true }) ->
+    true;
+is_magic_marker_message(_) ->
+    false.
-- 
cgit v1.2.1


From a10b20770e1a668ef3ba0e7c3e23758890cfd97a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 11:08:43 +0100
Subject: Tiny cosmetic, but also, we shouldn't include the size of the marker
 message in the inital size calculation.

---
 src/rabbit_mixed_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index fcd966e9..355fb1fc 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -110,8 +110,8 @@ init(Queue, IsDurable) ->
                      SizeAcc1 = SizeAcc + size_of_message(Msg),
                      case {MFound, is_magic_marker_message(Msg)} of
                          {false, false} -> {SizeAcc1, false, MCount + 1};
-                         {false, true}  -> {SizeAcc1, true, MCount};
-                         {true, false}  -> {SizeAcc1, true, MCount}
+                         {false, true}  -> {SizeAcc,  true,  MCount};
+                         {true, false}  -> {SizeAcc1, true,  MCount}
                      end
              end, {0, false, 0}, Queue),
     Len1 = case MarkerFound of
-- 
cgit v1.2.1


From a46b9cac011e7f36bc4cca05a4e72aa2f5f8e98c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 11:11:18 +0100
Subject: and of course the run length encoding should not include the marker
 message.

---
 src/rabbit_mixed_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 355fb1fc..0aa1b542 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -103,7 +103,6 @@
 
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:len(Queue),
-    MsgBuf = inc_queue_length(queue:new(), Len),
     {Size, MarkerFound, MarkerCount} = rabbit_disk_queue:foldl(
              fun (Msg = #basic_message { is_persistent = true },
                   _AckTag, _IsDelivered, {SizeAcc, MFound, MCount}) ->
@@ -122,6 +121,7 @@ init(Queue, IsDurable) ->
                    {ok, Len2} = fetch_ack_magic_marker_message(Queue),
                    Len2
            end,
+    MsgBuf = inc_queue_length(queue:new(), Len1),
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
                     is_durable = IsDurable, length = Len1,
                     memory_size = Size, memory_gain = undefined,
-- 
cgit v1.2.1


From feef6f7e8ed8dee591620c6fb84f61514501a4f3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 11:55:09 +0100
Subject: Reworked handle_ch_down so that we detect early whether or not we
 should auto_delete the queue and only do rollback and requeue if we know that
 the queue isn't going to be deleted. ALSO minor refactoring in MQ with
 magic_marker

---
 src/rabbit_amqqueue_process.erl | 42 ++++++++++++++++++++---------------------
 src/rabbit_mixed_queue.erl      |  5 ++---
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 406429ef..adf84c0e 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -399,27 +399,27 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
             unacked_messages = UAM} ->
             erlang:demonitor(MonitorRef),
             erase({ch, ChPid}),
-            State1 =
-                case Txn of
-                    none -> State;
-                    _    -> rollback_transaction(Txn, State)
-                end,
-            State2 =
-                deliver_or_requeue_n(
-                  [MsgWithAck ||
-                      {_MsgId, MsgWithAck} <- dict:to_list(UAM)],
-                  State1 #q {
-                    exclusive_consumer = case Holder of
-                                             {ChPid, _} -> none;
-                                             Other -> Other
-                                         end,
-                    active_consumers = remove_consumers(
-                                         ChPid, State1#q.active_consumers),
-                    blocked_consumers = remove_consumers(
-                                          ChPid, State1#q.blocked_consumers)}),
-            case should_auto_delete(State2) of
-                false -> noreply(State2);
-                true  -> {stop, normal, State2}
+            State1 = State#q{
+                       exclusive_consumer = case Holder of
+                                                {ChPid, _} -> none;
+                                                Other -> Other
+                                            end,
+                       active_consumers = remove_consumers(
+                                            ChPid, State#q.active_consumers),
+                       blocked_consumers = remove_consumers(
+                                             ChPid, State#q.blocked_consumers)},
+            case should_auto_delete(State1) of
+                true  ->
+                    {stop, normal, State1};
+                false -> 
+                    State2 = case Txn of
+                                 none -> State1;
+                                 _    -> rollback_transaction(Txn, State1)
+                             end,
+                    noreply(
+                      deliver_or_requeue_n(
+                        [MsgWithAck ||
+                            {_MsgId, MsgWithAck} <- dict:to_list(UAM)], State2))
             end
     end.
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 0aa1b542..c6f71fa6 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -572,9 +572,8 @@ publish_magic_marker_message(Q) ->
     ok = rabbit_disk_queue:publish(Q, ensure_binary_properties(Msg), false).
 
 fetch_ack_magic_marker_message(Q) ->
-    {#basic_message { exchange_name = none, routing_key = internal,
-                      is_persistent = true },
-     false, AckTag, Length} = rabbit_disk_queue:fetch(Q),
+    {Msg, false, AckTag, Length} = rabbit_disk_queue:fetch(Q),
+    true = is_magic_marker_message(Msg),
     ok = rabbit_disk_queue:ack(Q, [AckTag]),
     {ok, Length}.
 
-- 
cgit v1.2.1


From 3500aef03021d3d59a8fcc82db786a75734c5c1d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 12:07:56 +0100
Subject: documentation

---
 src/rabbit_mixed_queue.erl | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index c6f71fa6..f0dcea15 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -366,12 +366,16 @@ set_storage_mode(disk, TxnMessages, State =
                         queue:join(Fetched, MsgBuf2)
                 end
         end,
-    %% We enqueue _everything_ here. This means that should a message
-    %% already be in the disk queue we must remove it and add it back
-    %% in. Fortunately, by using requeue, we avoid rewriting the
-    %% message on disk.
-    %% Note we also batch together messages on disk so that we minimise
-    %% the calls to requeue.
+    %% (Re)enqueue _everything_ here.  Note that due to batching going
+    %% on (see comments above send_messages_to_disk), if we crash
+    %% during this transition, we could have messages in the wrong
+    %% order on disk. Thus we publish a magic_marker_message which,
+    %% when this transition is compelete, will be back at the head of
+    %% the queue. Should we die, on startup, during the foldl over the
+    %% queue, we detect the marker message and requeue all the
+    %% messages in front of it, to the back of the queue, thus
+    %% correcting the order.  The result is that everything ends up
+    %% back in the same order, but will have new sequence IDs.
     ok = publish_magic_marker_message(Q),
     {ok, MsgBuf3} =
         send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], [], queue:new()),
@@ -414,6 +418,16 @@ set_storage_mode(mixed, TxnMessages, State =
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
 
+%% (Re)enqueue _everything_ here. Messages which are not on disk will
+%% be tx_published, messages that are on disk will be requeued to the
+%% end of the queue. This is done in batches, where a batch consists
+%% of a number a tx_publishes, a tx_commit and then a call to
+%% requeue_next_n. We do not want to fetch messages off disk only to
+%% republish them later. Note in the tx_commit, we ack messages which
+%% are being _re_published. These are messages that have been fetched
+%% by the prefetcher.
+%% Batches are limited in size to make sure that the resultant mnesia
+%% transaction on tx_commit does not get too big, memory wise.
 send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       Commit, Ack, MsgBuf) ->
     case queue:out(Queue) of
@@ -434,8 +448,12 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
                       Ack, MsgBuf, Msg, IsDelivered)
             end;
         {{value, {Msg, IsDelivered, AckTag}}, Queue1} ->
-            %% these have come via the prefetcher, so are no longer in
-            %% the disk queue so they need to be republished
+            %% These have come via the prefetcher, so are no longer in
+            %% the disk queue (yes, they've not been ack'd yet, but
+            %% the head of the queue has passed these messages). We
+            %% need to requeue them, which we sneakily achieve by
+            %% tx_publishing them, and then in the tx_commit, ack the
+            %% old copy.
             republish_message_to_disk_queue(
               IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
               [AckTag | Ack], MsgBuf, Msg, IsDelivered);
-- 
cgit v1.2.1


From 75869cc77cfcd973d95d9bf4a4d44e6087986323 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 12:22:02 +0100
Subject: cosmetic - MarkerCount => MarkerPreludeCount

---
 src/rabbit_mixed_queue.erl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index f0dcea15..9cdca261 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -103,20 +103,21 @@
 
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:len(Queue),
-    {Size, MarkerFound, MarkerCount} = rabbit_disk_queue:foldl(
+    {Size, MarkerFound, MarkerPreludeCount} = rabbit_disk_queue:foldl(
              fun (Msg = #basic_message { is_persistent = true },
-                  _AckTag, _IsDelivered, {SizeAcc, MFound, MCount}) ->
+                  _AckTag, _IsDelivered, {SizeAcc, MFound, MPCount}) ->
                      SizeAcc1 = SizeAcc + size_of_message(Msg),
                      case {MFound, is_magic_marker_message(Msg)} of
-                         {false, false} -> {SizeAcc1, false, MCount + 1};
-                         {false, true}  -> {SizeAcc,  true,  MCount};
-                         {true, false}  -> {SizeAcc1, true,  MCount}
+                         {false, false} -> {SizeAcc1, false, MPCount + 1};
+                         {false, true}  -> {SizeAcc,  true,  MPCount};
+                         {true, false}  -> {SizeAcc1, true,  MPCount}
                      end
              end, {0, false, 0}, Queue),
     Len1 = case MarkerFound of
                false -> Len;
                true ->
-                   ok = rabbit_disk_queue:requeue_next_n(Queue, MarkerCount),
+                   ok = rabbit_disk_queue:requeue_next_n(Queue,
+                                                         MarkerPreludeCount),
                    Len2 = Len - 1,
                    {ok, Len2} = fetch_ack_magic_marker_message(Queue),
                    Len2
-- 
cgit v1.2.1


From a3eb3fc762c7160c3be3b40c6bc54e404f445ab7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 12:25:17 +0100
Subject: cosmetic

---
 src/rabbit_mixed_queue.erl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 9cdca261..bf69dbca 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -103,16 +103,17 @@
 
 init(Queue, IsDurable) ->
     Len = rabbit_disk_queue:len(Queue),
-    {Size, MarkerFound, MarkerPreludeCount} = rabbit_disk_queue:foldl(
-             fun (Msg = #basic_message { is_persistent = true },
-                  _AckTag, _IsDelivered, {SizeAcc, MFound, MPCount}) ->
-                     SizeAcc1 = SizeAcc + size_of_message(Msg),
-                     case {MFound, is_magic_marker_message(Msg)} of
-                         {false, false} -> {SizeAcc1, false, MPCount + 1};
-                         {false, true}  -> {SizeAcc,  true,  MPCount};
-                         {true, false}  -> {SizeAcc1, true,  MPCount}
-                     end
-             end, {0, false, 0}, Queue),
+    {Size, MarkerFound, MarkerPreludeCount} =
+        rabbit_disk_queue:foldl(
+          fun (Msg = #basic_message { is_persistent = true },
+               _AckTag, _IsDelivered, {SizeAcc, MFound, MPCount}) ->
+                  SizeAcc1 = SizeAcc + size_of_message(Msg),
+                  case {MFound, is_magic_marker_message(Msg)} of
+                      {false, false} -> {SizeAcc1, false, MPCount + 1};
+                      {false, true}  -> {SizeAcc,  true,  MPCount};
+                      {true, false}  -> {SizeAcc1, true,  MPCount}
+                  end
+          end, {0, false, 0}, Queue),
     Len1 = case MarkerFound of
                false -> Len;
                true ->
-- 
cgit v1.2.1


From 9ecd6957e922d07a9f60f3bd72de2b3d473acf6b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 15:21:50 +0100
Subject: filesync is unnecessary because tx_commit really is synchronous.

---
 src/rabbit_mixed_queue.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index bf69dbca..f0df7777 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -489,7 +489,6 @@ flush_requeue_to_disk_queue(_Q, 0, Commit, Ack) ->
     {Commit, Ack};
 flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
     ok = flush_messages_to_disk_queue(Q, Commit, Ack),
-    ok = rabbit_disk_queue:filesync(),
     ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
     {[], []}.
 
-- 
cgit v1.2.1


From 8a21b95e92584e30356007f905d74c0ae0a885ca Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 15:56:36 +0100
Subject: QA-corrections to prefetcher.

---
 src/rabbit_mixed_queue.erl      |  8 ++---
 src/rabbit_queue_prefetcher.erl | 67 ++++++++++++++++++++++-------------------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index f0df7777..251c2046 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -206,8 +206,8 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             %% use State, not State1 as we've not dec'd length
             fetch(case rabbit_queue_prefetcher:drain(Prefetcher) of
                       empty -> State #mqstate { prefetcher = undefined };
-                      {Fetched, Len, Status} ->
-                          MsgBuf2 = dec_queue_length(MsgBuf, Len),
+                      {Fetched, Status} ->
+                          MsgBuf2 = dec_queue_length(MsgBuf, queue:len(Fetched)),
                           State #mqstate
                             { msg_buf = queue:join(Fetched, MsgBuf2),
                               prefetcher = case Status of
@@ -363,8 +363,8 @@ set_storage_mode(disk, TxnMessages, State =
             _ ->
                 case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
                     empty -> MsgBuf;
-                    {Fetched, Len} ->
-                        MsgBuf2 = dec_queue_length(MsgBuf, Len),
+                    Fetched ->
+                        MsgBuf2 = dec_queue_length(MsgBuf, queue:len(Fetched)),
                         queue:join(Fetched, MsgBuf2)
                 end
         end,
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index ffa98d69..eddb613c 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -47,9 +47,7 @@
 
 -record(pstate,
         { msg_buf,
-          buf_length,
           target_count,
-          fetched_count,
           queue,
           queue_mref
         }).
@@ -176,6 +174,17 @@
 %% redelivered bit set false really are guaranteed to have not been
 %% delivered already.
 
+-ifdef(use_specs).
+
+-spec(start_link/2 :: (queue_name(), non_neg_integer()) ->
+             ({'ok', pid()} | 'ignore' | {'error', any()})).
+-spec(publish/2 :: (pid(), message()) -> 'ok').
+-spec(drain/1 :: (pid()) -> ('empty' | {queue(), ('finished' | 'continuing')})).
+-spec(drain_and_stop/1 :: (pid()) -> ('empty' | queue())).
+-spec(stop/1 :: (pid()) -> 'ok').
+             
+-endif.
+
 start_link(Queue, Count) ->
     gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
 
@@ -194,14 +203,12 @@ drain_and_stop(Prefetcher) ->
 stop(Prefetcher) ->
     gen_server2:call(Prefetcher, stop, infinity).
 
-init([Q, Count, QPid]) ->
+init([Q, Count, QPid]) when Count > 0 andalso is_pid(QPid) ->
     %% link isn't enough because the signal will not appear if the
     %% queue exits normally. Thus have to use monitor.
     MRef = erlang:monitor(process, QPid),
     State = #pstate { msg_buf = queue:new(),
-                      buf_length = 0,
                       target_count = Count,
-                      fetched_count = 0,
                       queue = Q,
                       queue_mref = MRef
                      },
@@ -211,39 +218,37 @@ init([Q, Count, QPid]) ->
 
 handle_call({publish,
              {Msg = #basic_message {}, IsDelivered, AckTag, _Remaining}},
-	    DiskQueue,
-            State = #pstate { fetched_count = Fetched, target_count = Target,
-                              msg_buf = MsgBuf, buf_length = Length, queue = Q
-                            }) ->
+	    DiskQueue, State = #pstate {
+                         target_count = Target, msg_buf = MsgBuf, queue = Q}) ->
     gen_server2:reply(DiskQueue, ok),
-    Timeout = if Fetched + 1 == Target -> hibernate;
-                 true -> ok = rabbit_disk_queue:prefetch(Q),
-                         infinity
+    Timeout = case Target of
+                  1 -> hibernate;
+                  _ -> ok = rabbit_disk_queue:prefetch(Q),
+                       infinity
               end,
     MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
-    {noreply, State #pstate { fetched_count = Fetched + 1,
-                              buf_length = Length + 1,
-                              msg_buf = MsgBuf1 }, Timeout};
+    {noreply, State #pstate { target_count = Target - 1, msg_buf = MsgBuf1 },
+     Timeout};
 handle_call(publish_empty, _From, State) ->
     %% Very odd. This could happen if the queue is deleted or purged
     %% and the mixed queue fails to shut us down.
     {reply, ok, State, hibernate};
-handle_call(drain, _From, State = #pstate { buf_length = 0 }) ->
-    {stop, normal, empty, State};
-handle_call(drain, _From, State = #pstate { fetched_count = Count,
-                                            target_count = Count,
-                                            msg_buf = MsgBuf,
-                                            buf_length = Length }) ->
-    {stop, normal, {MsgBuf, Length, finished}, State};
-handle_call(drain, _From, State = #pstate { msg_buf = MsgBuf,
-                                            buf_length = Length }) ->
-    {reply, {MsgBuf, Length, continuing},
-     State #pstate { msg_buf = queue:new(), buf_length = 0 }, infinity};
-handle_call(drain_and_stop, _From, State = #pstate { buf_length = 0 }) ->
-    {stop, normal, empty, State};
-handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf,
-                                                     buf_length = Length }) ->
-    {stop, normal, {MsgBuf, Length}, State};
+handle_call(drain, _From, State = #pstate { target_count = 0,
+                                            msg_buf = MsgBuf }) ->
+    Res = case queue:is_empty(MsgBuf) of
+              true  -> empty;
+              false -> {MsgBuf, finished}
+          end,
+    {stop, normal, Res, State};
+handle_call(drain, _From, State = #pstate { msg_buf = MsgBuf }) ->
+    {reply, {MsgBuf, continuing}, State #pstate { msg_buf = queue:new() },
+     infinity};
+handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf }) ->
+    Res = case queue:is_empty(MsgBuf) of
+              true -> empty;
+              false -> MsgBuf
+          end,
+    {stop, normal, Res, State};
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}.
 
-- 
cgit v1.2.1


From 9b35e14c132381ea0370eae78ce1cc218b7b9cbe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 16:06:39 +0100
Subject: cosmetics

---
 src/rabbit_queue_prefetcher.erl | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index eddb613c..3b1c219d 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -52,6 +52,10 @@
           queue_mref
         }).
 
+%%----------------------------------------------------------------------------
+%% Novel
+%%----------------------------------------------------------------------------
+
 %% The design of the prefetcher is based on the following:
 %%
 %% a) It must issue low-priority (-ve) requests to the disk queue for
@@ -117,12 +121,12 @@
 %%
 %% Now at some point, the mixed_queue will come along and will call
 %% prefetcher:drain() - normal priority call. The prefetcher then
-%% replies with its internal queue and the length of that queue. If
-%% the prefetch target was reached, the prefetcher stops normally at
-%% this point. If it hasn't been reached, then the prefetcher
-%% continues to hang around (it almost certainly has issued a
-%% disk_queue:prefetch(Q) cast and is waiting for a reply from the
-%% disk_queue).
+%% replies with its internal queue and a flag saying if the prefetcher
+%% has finished or is continuing; if the prefetch target was reached,
+%% the prefetcher stops normally at this point. If it hasn't been
+%% reached, then the prefetcher continues to hang around (it almost
+%% certainly has issued a disk_queue:prefetch(Q) cast and is waiting
+%% for a reply from the disk_queue).
 %%
 %% If the mixed_queue calls prefetcher:drain() and the prefetcher's
 %% internal queue is empty then the prefetcher replies with 'empty',
@@ -174,17 +178,21 @@
 %% redelivered bit set false really are guaranteed to have not been
 %% delivered already.
 
+%%----------------------------------------------------------------------------
+
 -ifdef(use_specs).
 
 -spec(start_link/2 :: (queue_name(), non_neg_integer()) ->
              ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(publish/2 :: (pid(), message()) -> 'ok').
+-spec(publish/2 :: (pid(), (message()| 'empty')) -> 'ok').
 -spec(drain/1 :: (pid()) -> ('empty' | {queue(), ('finished' | 'continuing')})).
 -spec(drain_and_stop/1 :: (pid()) -> ('empty' | queue())).
 -spec(stop/1 :: (pid()) -> 'ok').
              
 -endif.
 
+%%----------------------------------------------------------------------------
+
 start_link(Queue, Count) ->
     gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
 
@@ -203,6 +211,8 @@ drain_and_stop(Prefetcher) ->
 stop(Prefetcher) ->
     gen_server2:call(Prefetcher, stop, infinity).
 
+%%----------------------------------------------------------------------------
+
 init([Q, Count, QPid]) when Count > 0 andalso is_pid(QPid) ->
     %% link isn't enough because the signal will not appear if the
     %% queue exits normally. Thus have to use monitor.
-- 
cgit v1.2.1


From 16df4f9a9d65e6b18db2ea192c1a7e2a7599bd96 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 18:12:12 +0100
Subject: All QA comments relating to queue_mode_manager

---
 src/rabbit.erl                    |   2 +-
 src/rabbit_amqqueue.erl           |   2 +-
 src/rabbit_amqqueue_process.erl   |  10 +-
 src/rabbit_disk_queue.erl         |  12 +-
 src/rabbit_memory_manager.erl     | 460 ++++++++++++++++++++++++++++++++++++++
 src/rabbit_queue_mode_manager.erl | 454 -------------------------------------
 6 files changed, 475 insertions(+), 465 deletions(-)
 create mode 100644 src/rabbit_memory_manager.erl
 delete mode 100644 src/rabbit_queue_mode_manager.erl

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 665f10a2..8962b12e 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -140,7 +140,7 @@ start(normal, []) ->
                 {ok, MemoryAlarms} = application:get_env(memory_alarms),
                 ok = rabbit_alarm:start(MemoryAlarms),
 
-                ok = start_child(rabbit_queue_mode_manager),
+                ok = start_child(rabbit_memory_manager),
                 
                 ok = rabbit_binary_generator:
                     check_empty_content_body_frame_size(),
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 51b2e8f5..ad0a0f0c 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -102,7 +102,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(set_storage_mode/2 :: (pid(), ('disk' | 'mixed')) -> 'ok').
+-spec(set_storage_mode/2 :: (pid(), ('oppressed' | 'liberated')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), bool()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index adf84c0e..72325414 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -101,7 +101,7 @@ start_link(Q) ->
 
 init(Q = #amqqueue { name = QName, durable = Durable }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    ok = rabbit_queue_mode_manager:register
+    ok = rabbit_memory_manager:register
            (self(), false, rabbit_amqqueue, set_storage_mode, [self()]),
     {ok, MS} = rabbit_mixed_queue:init(QName, Durable),
     State = #q{q = Q,
@@ -551,7 +551,7 @@ i(Item, _) ->
 report_memory(Hib, State = #q { mixed_state = MS }) ->
     {MS1, MSize, Gain, Loss} =
         rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
-    rabbit_queue_mode_manager:report_memory(self(), MSize, Gain, Loss, Hib),
+    rabbit_memory_manager:report_memory(self(), MSize, Gain, Loss, Hib),
     State #q { mixed_state = MS1 }.
 
 %---------------------------------------------------------------------------
@@ -820,7 +820,11 @@ handle_cast({set_storage_mode, Mode}, State = #q { mixed_state = MS }) ->
     PendingMessages =
         lists:flatten([Pending || #tx { pending_messages = Pending}
                                       <- all_tx_record()]),
-    {ok, MS1} = rabbit_mixed_queue:set_storage_mode(Mode, PendingMessages, MS),
+    Mode1 = case Mode of
+                liberated -> mixed;
+                oppressed -> disk
+            end,
+    {ok, MS1} = rabbit_mixed_queue:set_storage_mode(Mode1, PendingMessages, MS),
     noreply(State #q { mixed_state = MS1 }).
 
 handle_info(report_memory, State) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d9f318e0..18b250c5 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -281,7 +281,7 @@
 -spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(filesync/0 :: () -> 'ok').
 -spec(cache_info/0 :: () -> [{atom(), term()}]).
--spec(set_mode/1 :: ('disk' | 'mixed') -> 'ok').
+-spec(set_mode/1 :: ('oppressed' | 'liberated') -> 'ok').
 
 -endif.
 
@@ -371,7 +371,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
-    ok = rabbit_queue_mode_manager:register
+    ok = rabbit_memory_manager:register
            (self(), true, rabbit_disk_queue, set_mode, []),
     Node = node(),
     ok = 
@@ -448,7 +448,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% grant us to ram_disk mode. We have to start in ram_disk mode
     %% because we can't find values for mnesia_bytes_per_record or
     %% ets_bytes_per_record otherwise.
-    ok = rabbit_queue_mode_manager:report_memory(self(), 0, false),
+    ok = rabbit_memory_manager:report_memory(self(), 0, false),
     ok = report_memory(false, State2),
     {ok, start_memory_timer(State2), hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -522,8 +522,8 @@ handle_cast({delete_queue, Q}, State) ->
     noreply(State1);
 handle_cast({set_mode, Mode}, State) ->
     noreply((case Mode of
-                 disk -> fun to_disk_only_mode/1;
-                 mixed -> fun to_ram_disk_mode/1
+                 oppressed -> fun to_disk_only_mode/1;
+                 liberated -> fun to_ram_disk_mode/1
              end)(State));
 handle_cast({prefetch, Q, From}, State) ->
     {ok, Result, State1} =
@@ -609,7 +609,7 @@ start_memory_timer(State) ->
 
 report_memory(Hibernating, State) ->
     Bytes = memory_use(State),
-    rabbit_queue_mode_manager:report_memory(self(), trunc(2.5 * Bytes),
+    rabbit_memory_manager:report_memory(self(), trunc(2.5 * Bytes),
                                             Hibernating).
 
 memory_use(#dqstate { operation_mode = ram_disk,
diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
new file mode 100644
index 00000000..055e4795
--- /dev/null
+++ b/src/rabbit_memory_manager.erl
@@ -0,0 +1,460 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_memory_manager).
+
+-behaviour(gen_server2).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([register/5, report_memory/3, report_memory/5, info/0,
+         conserve_memory/2]).
+
+-define(TOTAL_TOKENS, 10000000).
+-define(ACTIVITY_THRESHOLD, 25).
+
+-define(SERVER, ?MODULE).
+
+-ifdef(use_specs).
+
+-spec(start_link/0 :: () ->
+              ({'ok', pid()} | 'ignore' | {'error', any()})).
+-spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
+-spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
+-spec(report_memory/5 :: (pid(), non_neg_integer(),
+                          (non_neg_integer() | 'undefined'),
+                          (non_neg_integer() | 'undefined'), bool()) ->
+             'ok').
+-spec(info/0 :: () -> [{atom(), any()}]).
+-spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
+
+-endif.
+
+-record(state, { available_tokens,
+                 liberated_processes,
+                 callbacks,
+                 tokens_per_byte,
+                 lowrate,
+                 hibernate,
+                 unoppressable,
+                 alarmed
+               }).
+
+%% Token-credit based memory management
+
+%% Start off by working out the amount of memory available in the
+%% system (RAM). Then, work out how many tokens each byte corresponds
+%% to. This is the tokens_per_byte field. When a process registers, it
+%% must provide an M-F-A triple to a function that needs one further
+%% argument, which is the new mode. This will either be 'liberated' or
+%% 'oppressed'.
+%%
+%% Processes then report their own memory usage, in bytes, and the
+%% manager takes care of the rest.
+%%
+%% There are a finite number of tokens in the system. These are
+%% allocated to processes as the processes report their memory
+%% usage. We keep track of processes which have hibernated, and
+%% processes that are doing only a low rate of work (reported as a low
+%% gain or loss in memory between memory reports). When a process
+%% reports memory use which can't be satisfied by the available
+%% tokens, we try and oppress processes first from the hibernated
+%% group, and then from the lowrate group. The hibernated group is a
+%% simple queue, and so is implicitly sorted by the order in which
+%% processes were added to the queue. This means that when removing
+%% from the queue, we evict the sleepiest (and most passive) pid
+%% first. The lowrate group is a priority queue, where the priority is
+%% the truncated log (base e) of the amount of memory allocated. Thus
+%% when we remove from the queue, we first remove the queue from the
+%% highest bucket.
+%%
+%% If the reported memory use still can't be satisfied after
+%% oppressing everyone from those two groups (and note that we check
+%% first whether or not oppressing them would make available enough
+%% tokens to satisfy the reported use rather than just oppressing all
+%% those processes and then going "whoops, didn't help after all"),
+%% then we oppress the reporting process. When a process registers, it
+%% can declare itself "unoppressable". If a process is unoppressable
+%% then it will not be sent to disk as a result of other processes
+%% needing more tokens. However, if it itself needs additional tokens
+%% which aren't available then it is still oppressed as before. This
+%% feature is only used by the disk_queue, because if the disk queue
+%% is not being used, and hibernates, and then memory pressure gets
+%% tight, the disk_queue would typically be one of the first processes
+%% to be oppressed (sent to disk_only mode), which cripples
+%% performance. Thus by setting it unoppressable, it is only possible
+%% for the disk_queue to be oppressed when it is active and
+%% attempting to increase its memory allocation.
+%%
+%% If a process has been oppressed, it continues making memory
+%% reports, as if it was liberated. As soon as a reported amount of
+%% memory can be satisfied (and this can include oppressing other
+%% processes in the way described above), it will be liberated. We do
+%% not keep any information about oppressed processes.
+%%
+%% Note that the lowrate and hibernate groups can get very out of
+%% date. This is fine, and somewhat unavoidable given the absence of
+%% useful APIs for queues. Thus we allow them to get out of date
+%% (processes will be left in there when they change groups,
+%% duplicates can appear, dead processes are not pruned etc etc etc),
+%% and when we go through the groups, summing up their allocated
+%% tokens, we tidy up at that point.
+%%
+%% A liberated process, which is reporting a smaller amount of RAM
+%% than its last report will remain liberated. A liberated process
+%% that is busy but consuming an unchanging amount of RAM will never
+%% be oppressed.
+
+%% Specific notes as applied to queues and the disk_queue:
+%%
+%% The disk_queue is managed in the same way as queues. This means
+%% that a queue that has gone back to mixed mode after being in disk
+%% mode now has its messages counted twice as they are counted both in
+%% the report made by the queue (even though they may not yet be in
+%% RAM (though see the prefetcher)) and also by the disk_queue. Thus
+%% the amount of available RAM must be higher when going disk -> mixed
+%% than when going mixed -> disk. This is fairly sensible as it
+%% reduces the risk of any oscillations occurring.
+%%
+%% The queue process deliberately reports 4 times its estimated RAM
+%% usage, and the disk_queue 2.5 times. In practise, this seems to
+%% work well. Note that we are deliberately running out of tokes a
+%% little early because of the fact that the mixed -> disk transition
+%% can transiently eat a lot of memory and take some time (flushing a
+%% few million messages to disk is never going to be instantaneous).
+
+start_link() ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+register(Pid, Unoppressable, Module, Function, Args) ->
+    gen_server2:cast(?SERVER, {register, Pid, Unoppressable,
+                               Module, Function, Args}).
+
+report_memory(Pid, Memory, Hibernating) ->
+    report_memory(Pid, Memory, undefined, undefined, Hibernating).
+
+report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
+    gen_server2:cast(?SERVER,
+                     {report_memory, Pid, Memory, Gain, Loss, Hibernating}).
+
+info() ->
+    gen_server2:call(?SERVER, info).
+
+conserve_memory(_Pid, Conserve) ->
+    gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
+
+init([]) ->
+    process_flag(trap_exit, true),
+    rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
+    {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
+    MemAvail = MemTotal - MemUsed,
+    TPB = if MemAvail == 0 -> 0;
+             true -> ?TOTAL_TOKENS / MemAvail
+          end,
+    {ok, #state { available_tokens = ?TOTAL_TOKENS,
+                  liberated_processes = dict:new(),
+                  callbacks = dict:new(),
+                  tokens_per_byte = TPB,
+                  lowrate = priority_queue:new(),
+                  hibernate = queue:new(),
+                  unoppressable = sets:new(),
+                  alarmed = false
+                }}.
+
+handle_call(info, _From, State) ->
+    State1 = #state { available_tokens = Avail,
+                      liberated_processes = Libre,
+                      lowrate = Lazy,
+                      hibernate = Sleepy,
+                      unoppressable = Unoppressable } =
+        free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
+    {reply, [{ available_tokens, Avail },
+             { liberated_processes, dict:to_list(Libre) },
+             { lowrate_queues, priority_queue:to_list(Lazy) },
+             { hibernated_queues, queue:to_list(Sleepy) },
+             { unoppressable_queues, sets:to_list(Unoppressable) }], State1}.
+
+
+handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
+            State = #state { liberated_processes = Libre,
+                             available_tokens = Avail,
+                             callbacks = Callbacks,
+                             tokens_per_byte = TPB,
+                             alarmed = Alarmed }) ->
+    Req = rabbit_misc:ceil(TPB * Memory),
+    LowRate = case {BytesGained, BytesLost} of
+                  {undefined, _} -> false;
+                  {_, undefined} -> false;
+                  {G, L} -> G < ?ACTIVITY_THRESHOLD andalso
+                            L < ?ACTIVITY_THRESHOLD
+              end,
+    LibreActivity = if Hibernating -> hibernate;
+                       LowRate -> lowrate;
+                       true -> active
+                    end,
+    {StateN = #state { lowrate = Lazy, hibernate = Sleepy }, ActivityNew} =
+        case find_process(Pid, Libre) of
+            {libre, {OAlloc, _OActivity}} ->
+                Avail1 = Avail + OAlloc,
+                State1 = #state { available_tokens = Avail2,
+                                  liberated_processes = Libre1 }
+                    = free_upto(Pid, Req,
+                                State #state { available_tokens = Avail1 }),
+                case Req > Avail2 of
+                    true -> %% nowt we can do, oppress the process
+                        ok = set_process_mode(Callbacks, Pid, oppressed),
+                        {State1 #state { liberated_processes =
+                                         dict:erase(Pid, Libre1) }, oppressed};
+                    false -> %% keep liberated
+                        {State1 #state
+                         { liberated_processes =
+                           dict:store(Pid, {Req, LibreActivity}, Libre1),
+                           available_tokens = Avail2 - Req },
+                         LibreActivity}
+                end;
+            oppressed ->
+                case Alarmed of
+                    true ->
+                        {State, oppressed};
+                    false ->
+                        State1 = #state { available_tokens = Avail1,
+                                          liberated_processes = Libre1 } =
+                            free_upto(Pid, Req, State),
+                        case Req > Avail1 orelse Hibernating orelse LowRate of
+                            true ->
+                                %% not enough space, or no compelling
+                                %% reason, so stay oppressed
+                                {State1, oppressed};
+                            false -> %% can liberate the process
+                                set_process_mode(Callbacks, Pid, liberated),
+                                {State1 #state {
+                                   liberated_processes =
+                                   dict:store(Pid, {Req, LibreActivity}, Libre1),
+                                   available_tokens = Avail1 - Req },
+                                 LibreActivity}
+                        end
+                end
+        end,
+    StateN1 =
+        case ActivityNew of
+            active    -> StateN;
+            oppressed -> StateN;
+            lowrate ->
+                StateN #state { lowrate = add_to_lowrate(Pid, Req, Lazy) };
+            hibernate ->
+                StateN #state { hibernate = queue:in(Pid, Sleepy) }
+        end,
+    {noreply, StateN1};
+
+handle_cast({register, Pid, IsUnoppressable, Module, Function, Args},
+            State = #state { callbacks = Callbacks,
+                             unoppressable = Unoppressable }) ->
+    _MRef = erlang:monitor(process, Pid),
+    Unoppressable1 = case IsUnoppressable of
+                       true -> sets:add_element(Pid, Unoppressable);
+                       false -> Unoppressable
+                   end,
+    {noreply, State #state { callbacks = dict:store
+                             (Pid, {Module, Function, Args}, Callbacks),
+                             unoppressable = Unoppressable1
+                           }};
+
+handle_cast({conserve_memory, Conserve}, State) ->
+    {noreply, State #state { alarmed = Conserve }}.
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+            State = #state { available_tokens = Avail,
+                             liberated_processes = Libre }) ->
+    State1 = case find_process(Pid, Libre) of
+                 oppressed ->
+                     State;
+                 {libre, {Alloc, _Activity}} ->
+                     State #state { available_tokens = Avail + Alloc,
+                                    liberated_processes = dict:erase(Pid, Libre) }
+             end,
+    {noreply, State1};
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State};
+handle_info(_Info, State) ->
+    {noreply, State}.
+
+terminate(_Reason, State) ->
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+add_to_lowrate(Pid, Alloc, Lazy) ->
+    Bucket = if Alloc == 0 -> 0; %% can't take log(0)
+                true -> trunc(math:log(Alloc)) %% log base e
+             end,
+    priority_queue:in({Pid, Bucket, Alloc}, Bucket, Lazy).
+
+find_process(Pid, Libre) ->
+    case dict:find(Pid, Libre) of
+        {ok, Value} -> {libre, Value};
+        error -> oppressed
+    end.
+
+set_process_mode(Callbacks, Pid, Mode) ->
+    {Module, Function, Args} = dict:fetch(Pid, Callbacks),
+    erlang:apply(Module, Function, Args ++ [Mode]).
+
+tidy_and_sum_lazy(IgnorePids, Lazy, Libre) ->
+    tidy_and_sum(lowrate, Libre,
+                 fun (Lazy1) ->
+                         case priority_queue:out(Lazy1) of
+                             {empty, Lazy2} ->
+                                 {empty, Lazy2};
+                             {{value, {Pid, _Bucket, _Alloc}}, Lazy2} ->
+                                 {{value, Pid}, Lazy2}
+                         end
+                 end, fun add_to_lowrate/3, IgnorePids, Lazy,
+                 priority_queue:new(), 0).
+            
+tidy_and_sum_sleepy(IgnorePids, Sleepy, Libre) ->
+    tidy_and_sum(hibernate, Libre, fun queue:out/1,
+                 fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
+                 IgnorePids, Sleepy, queue:new(), 0).
+
+tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism, DupCheckSet,
+             CataInit, AnaInit, AllocAcc) ->
+    case Catamorphism(CataInit) of
+        {empty, _CataInit} -> {AnaInit, AllocAcc};
+        {{value, Pid}, CataInit1} ->
+            {DupCheckSet1, AnaInit1, AllocAcc1} =
+                case sets:is_element(Pid, DupCheckSet) of
+                    true ->
+                        {DupCheckSet, AnaInit, AllocAcc};
+                    false ->
+                        case find_process(Pid, Libre) of
+                            {libre, {Alloc, AtomExpected}} ->
+                                {sets:add_element(Pid, DupCheckSet),
+                                 Anamorphism(Pid, Alloc, AnaInit),
+                                 Alloc + AllocAcc};
+                            _ ->
+                                {DupCheckSet, AnaInit, AllocAcc}
+                        end
+                end,
+            tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism,
+                          DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
+    end.
+
+free_upto_lazy(IgnorePids, Callbacks, Lazy, Libre, Req) ->
+    free_from(
+      Callbacks,
+      fun(_Libre, Lazy1, LazyAcc) ->
+              case priority_queue:out(Lazy1) of
+                  {empty, _Lazy2} ->
+                      empty;
+                  {{value, V = {Pid, Bucket, Alloc}}, Lazy2} ->
+                      case sets:is_element(Pid, IgnorePids) of
+                          true  -> {skip, Lazy2,
+                                    priority_queue:in(V, Bucket, LazyAcc)};
+                          false -> {value, Lazy2, Pid, Alloc}
+                      end
+              end
+      end, fun priority_queue:join/2, Libre, Lazy, priority_queue:new(), Req).
+
+free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Libre, Req) ->
+    free_from(Callbacks,
+              fun(Libre1, Sleepy1, SleepyAcc) ->
+                      case queue:out(Sleepy1) of
+                          {empty, _Sleepy2} ->
+                              empty;
+                          {{value, Pid}, Sleepy2} ->
+                              case sets:is_element(Pid, IgnorePids) of
+                                  true  -> {skip, Sleepy2,
+                                            queue:in(Pid, SleepyAcc)};
+                                  false -> {Alloc, hibernate} =
+                                               dict:fetch(Pid, Libre1),
+                                           {value, Sleepy2, Pid, Alloc}
+                              end
+                      end
+              end, fun queue:join/2, Libre, Sleepy, queue:new(), Req).
+
+free_from(Callbacks, Hylomorphism, BaseCase, Libre, CataInit, AnaInit, Req) ->
+    case Hylomorphism(Libre, CataInit, AnaInit) of
+        empty ->
+            {AnaInit, Libre, Req};
+        {skip, CataInit1, AnaInit1} ->
+            free_from(Callbacks, Hylomorphism, BaseCase, Libre, CataInit1,
+                      AnaInit1, Req);
+        {value, CataInit1, Pid, Alloc} ->
+            Libre1 = dict:erase(Pid, Libre),
+            ok = set_process_mode(Callbacks, Pid, oppressed),
+            case Req > Alloc of
+                true -> free_from(Callbacks, Hylomorphism, BaseCase, Libre1,
+                                  CataInit1, AnaInit, Req - Alloc);
+                false -> {BaseCase(CataInit1, AnaInit), Libre1, Req - Alloc}
+            end
+    end.
+
+free_upto(Pid, Req, State = #state { available_tokens = Avail,
+                                     liberated_processes = Libre,
+                                     callbacks = Callbacks,
+                                     lowrate = Lazy,
+                                     hibernate = Sleepy,
+                                     unoppressable = Unoppressable })
+  when Req > Avail ->
+    Unoppressable1 = sets:add_element(Pid, Unoppressable),
+    {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Libre),
+    case Req > Avail + SleepySum of
+        true -> %% not enough in sleepy, have a look in lazy too
+            {Lazy1, LazySum} = tidy_and_sum_lazy(Unoppressable1, Lazy, Libre),
+            case Req > Avail + SleepySum + LazySum of
+                true -> %% can't free enough, just return tidied state
+                    State #state { lowrate = Lazy1, hibernate = Sleepy1 };
+                false -> %% need to free all of sleepy, and some of lazy
+                    {Sleepy2, Libre1, ReqRem} =
+                        free_upto_sleepy(Unoppressable1, Callbacks,
+                                         Sleepy1, Libre, Req),
+                    {Lazy2, Libre2, ReqRem1} =
+                        free_upto_lazy(Unoppressable1, Callbacks,
+                                       Lazy1, Libre1, ReqRem),
+                    %% ReqRem1 will be <= 0 because it's
+                    %% likely we'll have freed more than we
+                    %% need, thus Req - ReqRem1 is total freed
+                    State #state { available_tokens = Avail + (Req - ReqRem1),
+                                   liberated_processes = Libre2, lowrate = Lazy2,
+                                   hibernate = Sleepy2 }
+            end;
+        false -> %% enough available in sleepy, don't touch lazy
+            {Sleepy2, Libre1, ReqRem} =
+                free_upto_sleepy(Unoppressable1, Callbacks, Sleepy1, Libre, Req),
+            State #state { available_tokens = Avail + (Req - ReqRem),
+                           liberated_processes = Libre1, hibernate = Sleepy2 }
+    end;
+free_upto(_Pid, _Req, State) ->
+    State.
diff --git a/src/rabbit_queue_mode_manager.erl b/src/rabbit_queue_mode_manager.erl
deleted file mode 100644
index a2fab615..00000000
--- a/src/rabbit_queue_mode_manager.erl
+++ /dev/null
@@ -1,454 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_queue_mode_manager).
-
--behaviour(gen_server2).
-
--export([start_link/0]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([register/5, report_memory/3, report_memory/5, info/0,
-         conserve_memory/2]).
-
--define(TOTAL_TOKENS, 10000000).
--define(ACTIVITY_THRESHOLD, 25).
-
--define(SERVER, ?MODULE).
-
--ifdef(use_specs).
-
--spec(start_link/0 :: () ->
-              ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
--spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
--spec(report_memory/5 :: (pid(), non_neg_integer(),
-                          (non_neg_integer() | 'undefined'),
-                          (non_neg_integer() | 'undefined'), bool()) ->
-             'ok').
--spec(info/0 :: () -> [{atom(), any()}]).
--spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
-
--endif.
-
--record(state, { available_tokens,
-                 mixed_queues,
-                 callbacks,
-                 tokens_per_byte,
-                 lowrate,
-                 hibernate,
-                 unevictable,
-                 alarmed
-               }).
-
-%% Token-credit based memory management
-
-%% Start off by working out the amount of memory available in the
-%% system (RAM). Then, work out how many tokens each byte corresponds
-%% to. This is the tokens_per_byte field. When a process registers, it
-%% must provide an M-F-A triple to a function that needs one further
-%% argument, which is the new mode. This will either be 'mixed' or
-%% 'disk'.
-%%
-%% Processes then report their own memory usage, in bytes, and the
-%% manager takes care of the rest.
-%%
-%% There are a finite number of tokens in the system. These are
-%% allocated to processes as they are requested. We keep track of
-%% processes which have hibernated, and processes that are doing only
-%% a low rate of work. When a request for memory can't be satisfied,
-%% we try and evict processes first from the hibernated group, and
-%% then from the lowrate group. The hibernated group is a simple
-%% queue, and so is implicitly sorted by the order in which processes
-%% were added to the queue. This means that when removing from the
-%% queue, we hibernate the sleepiest pid first. The lowrate group is a
-%% priority queue, where the priority is the truncated log (base e) of
-%% the amount of memory allocated. Thus when we remove from the queue,
-%% we first remove the queue from the highest bucket.
-%%
-%% If the request still can't be satisfied after evicting to disk
-%% everyone from those two groups (and note that we check first
-%% whether or not freeing them would make available enough tokens to
-%% satisfy the request rather than just sending all those queues to
-%% disk and then going "whoops, didn't help after all"), then we send
-%% the requesting process to disk. When a queue registers, it can
-%% declare itself "unevictable". If a queue is unevictable then it
-%% will not be sent to disk as a result of other processes requesting
-%% more memory. However, if it itself is requesting more memory and
-%% that request can't be satisfied then it is still sent to disk as
-%% before. This feature is only used by the disk_queue, because if the
-%% disk queue is not being used, and hibernates, and then memory
-%% pressure gets tight, the disk_queue would typically be one of the
-%% first processes to get sent to disk, which cripples
-%% performance. Thus by setting it unevictable, it is only possible
-%% for the disk_queue to be sent to disk when it is active and
-%% attempting to increase its memory allocation.
-%%
-%% If a process has been sent to disk, it continues making
-%% requests. As soon as a request can be satisfied (and this can
-%% include sending other processes to disk in the way described
-%% above), it will be told to come back into mixed mode. We do not
-%% keep any information about queues in disk mode.
-%%
-%% Note that the lowrate and hibernate groups can get very out of
-%% date. This is fine, and somewhat unavoidable given the absence of
-%% useful APIs for queues. Thus we allow them to get out of date
-%% (processes will be left in there when they change groups,
-%% duplicates can appear, dead processes are not pruned etc etc etc),
-%% and when we go through the groups, summing up their amount of
-%% memory, we tidy up at that point.
-%%
-%% A process which is not evicted to disk, and is requesting a smaller
-%% amount of RAM than its last request will always be satisfied. A
-%% mixed-mode process that is busy but consuming an unchanging amount
-%% of RAM will never be sent to disk. The disk_queue is also managed
-%% in the same way. This means that a queue that has gone back to
-%% being mixed after being in disk mode now has its messages counted
-%% twice as they are counted both in the request made by the queue
-%% (even though they may not yet be in RAM (though see the
-%% prefetcher)) and also by the disk_queue. Thus the amount of
-%% available RAM must be higher when going disk -> mixed than when
-%% going mixed -> disk. This is fairly sensible as it reduces the risk
-%% of any oscillations occurring.
-%%
-%% The queue process deliberately reports 4 times its estimated RAM
-%% usage, and the disk_queue 2.5 times. In practise, this seems to
-%% work well. Note that we are deliberately running out of tokes a
-%% little early because of the fact that the mixed -> disk transition
-%% can transiently eat a lot of memory and take some time (flushing a
-%% few million messages to disk is never going to be instantaneous).
-
-start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
-
-register(Pid, Unevictable, Module, Function, Args) ->
-    gen_server2:cast(?SERVER, {register, Pid, Unevictable,
-                               Module, Function, Args}).
-
-report_memory(Pid, Memory, Hibernating) ->
-    report_memory(Pid, Memory, undefined, undefined, Hibernating).
-
-report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
-    gen_server2:cast(?SERVER,
-                     {report_memory, Pid, Memory, Gain, Loss, Hibernating}).
-
-info() ->
-    gen_server2:call(?SERVER, info).
-
-conserve_memory(_Pid, Conserve) ->
-    gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
-
-init([]) ->
-    process_flag(trap_exit, true),
-    rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
-    {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
-    MemAvail = MemTotal - MemUsed,
-    TPB = if MemAvail == 0 -> 0;
-             true -> ?TOTAL_TOKENS / MemAvail
-          end,
-    {ok, #state { available_tokens = ?TOTAL_TOKENS,
-                  mixed_queues = dict:new(),
-                  callbacks = dict:new(),
-                  tokens_per_byte = TPB,
-                  lowrate = priority_queue:new(),
-                  hibernate = queue:new(),
-                  unevictable = sets:new(),
-                  alarmed = false
-                }}.
-
-handle_call(info, _From, State) ->
-    State1 = #state { available_tokens = Avail,
-                      mixed_queues = Mixed,
-                      lowrate = Lazy,
-                      hibernate = Sleepy,
-                      unevictable = Unevictable } =
-        free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
-    {reply, [{ available_tokens, Avail },
-             { mixed_queues, dict:to_list(Mixed) },
-             { lowrate_queues, priority_queue:to_list(Lazy) },
-             { hibernated_queues, queue:to_list(Sleepy) },
-             { unevictable_queues, sets:to_list(Unevictable) }], State1}.
-
-
-handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
-            State = #state { mixed_queues = Mixed,
-                             available_tokens = Avail,
-                             callbacks = Callbacks,
-                             tokens_per_byte = TPB,
-                             alarmed = Alarmed }) ->
-    Req = rabbit_misc:ceil(TPB * Memory),
-    LowRate = case {BytesGained, BytesLost} of
-                  {undefined, _} -> false;
-                  {_, undefined} -> false;
-                  {G, L} -> G < ?ACTIVITY_THRESHOLD andalso
-                            L < ?ACTIVITY_THRESHOLD
-              end,
-    MixedActivity = if Hibernating -> hibernate;
-                       LowRate -> lowrate;
-                       true -> active
-                    end,
-    {StateN = #state { lowrate = Lazy, hibernate = Sleepy }, ActivityNew} =
-        case find_queue(Pid, Mixed) of
-            {mixed, {OAlloc, _OActivity}} ->
-                Avail1 = Avail + OAlloc,
-                State1 =
-                    #state { available_tokens = Avail2, mixed_queues = Mixed1 }
-                    = free_upto(Pid, Req,
-                                State #state { available_tokens = Avail1 }),
-                case Req > Avail2 of
-                    true -> %% nowt we can do, send to disk
-                        ok = set_queue_mode(Callbacks, Pid, disk),
-                        {State1 #state { mixed_queues =
-                                         dict:erase(Pid, Mixed1) }, disk};
-                    false -> %% keep mixed
-                        {State1 #state
-                         { mixed_queues =
-                           dict:store(Pid, {Req, MixedActivity}, Mixed1),
-                           available_tokens = Avail2 - Req },
-                         MixedActivity}
-                end;
-            disk ->
-                case Alarmed of
-                    true ->
-                        {State, disk};
-                    false ->
-                        State1 = #state { available_tokens = Avail1,
-                                          mixed_queues = Mixed1 } =
-                            free_upto(Pid, Req, State),
-                        case Req > Avail1 orelse Hibernating orelse LowRate of
-                            true ->
-                                %% not enough space, or no compelling
-                                %% reason, so stay as disk
-                                {State1, disk};
-                            false -> %% can go to mixed mode
-                                set_queue_mode(Callbacks, Pid, mixed),
-                                {State1 #state {
-                                   mixed_queues =
-                                   dict:store(Pid, {Req, MixedActivity}, Mixed1),
-                                   available_tokens = Avail1 - Req },
-                                 MixedActivity}
-                        end
-                end
-        end,
-    StateN1 =
-        case ActivityNew of
-            active -> StateN;
-            disk -> StateN;
-            lowrate ->
-                StateN #state { lowrate = add_to_lowrate(Pid, Req, Lazy) };
-            hibernate ->
-                StateN #state { hibernate = queue:in(Pid, Sleepy) }
-        end,
-    {noreply, StateN1};
-
-handle_cast({register, Pid, IsUnevictable, Module, Function, Args},
-            State = #state { callbacks = Callbacks,
-                             unevictable = Unevictable }) ->
-    _MRef = erlang:monitor(process, Pid),
-    Unevictable1 = case IsUnevictable of
-                       true -> sets:add_element(Pid, Unevictable);
-                       false -> Unevictable
-                   end,
-    {noreply, State #state { callbacks = dict:store
-                             (Pid, {Module, Function, Args}, Callbacks),
-                             unevictable = Unevictable1
-                           }};
-
-handle_cast({conserve_memory, Conserve}, State) ->
-    {noreply, State #state { alarmed = Conserve }}.
-
-handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #state { available_tokens = Avail,
-                             mixed_queues = Mixed }) ->
-    State1 = case find_queue(Pid, Mixed) of
-                 disk ->
-                     State;
-                 {mixed, {Alloc, _Activity}} ->
-                     State #state { available_tokens = Avail + Alloc,
-                                    mixed_queues = dict:erase(Pid, Mixed) }
-             end,
-    {noreply, State1};
-handle_info({'EXIT', _Pid, Reason}, State) ->
-    {stop, Reason, State};
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, State) ->
-    State.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-add_to_lowrate(Pid, Alloc, Lazy) ->
-    Bucket = if Alloc == 0 -> 0; %% can't take log(0)
-                true -> trunc(math:log(Alloc)) %% log base e
-             end,
-    priority_queue:in({Pid, Bucket, Alloc}, Bucket, Lazy).
-
-find_queue(Pid, Mixed) ->
-    case dict:find(Pid, Mixed) of
-        {ok, Value} -> {mixed, Value};
-        error -> disk
-    end.
-
-set_queue_mode(Callbacks, Pid, Mode) ->
-    {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-    erlang:apply(Module, Function, Args ++ [Mode]).
-
-tidy_and_sum_lazy(IgnorePids, Lazy, Mixed) ->
-    tidy_and_sum(lowrate, Mixed,
-                 fun (Lazy1) ->
-                         case priority_queue:out(Lazy1) of
-                             {empty, Lazy2} ->
-                                 {empty, Lazy2};
-                             {{value, {Pid, _Bucket, _Alloc}}, Lazy2} ->
-                                 {{value, Pid}, Lazy2}
-                         end
-                 end, fun add_to_lowrate/3, IgnorePids, Lazy,
-                 priority_queue:new(), 0).
-            
-tidy_and_sum_sleepy(IgnorePids, Sleepy, Mixed) ->
-    tidy_and_sum(hibernate, Mixed, fun queue:out/1,
-                 fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
-                 IgnorePids, Sleepy, queue:new(), 0).
-
-tidy_and_sum(AtomExpected, Mixed, Catamorphism, Anamorphism, DupCheckSet,
-             CataInit, AnaInit, AllocAcc) ->
-    case Catamorphism(CataInit) of
-        {empty, _CataInit} -> {AnaInit, AllocAcc};
-        {{value, Pid}, CataInit1} ->
-            {DupCheckSet1, AnaInit1, AllocAcc1} =
-                case sets:is_element(Pid, DupCheckSet) of
-                    true ->
-                        {DupCheckSet, AnaInit, AllocAcc};
-                    false ->
-                        case find_queue(Pid, Mixed) of
-                            {mixed, {Alloc, AtomExpected}} ->
-                                {sets:add_element(Pid, DupCheckSet),
-                                 Anamorphism(Pid, Alloc, AnaInit),
-                                 Alloc + AllocAcc};
-                            _ ->
-                                {DupCheckSet, AnaInit, AllocAcc}
-                        end
-                end,
-            tidy_and_sum(AtomExpected, Mixed, Catamorphism, Anamorphism,
-                          DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
-    end.
-
-free_upto_lazy(IgnorePids, Callbacks, Lazy, Mixed, Req) ->
-    free_from(
-      Callbacks,
-      fun(_Mixed, Lazy1, LazyAcc) ->
-              case priority_queue:out(Lazy1) of
-                  {empty, _Lazy2} ->
-                      empty;
-                  {{value, V = {Pid, Bucket, Alloc}}, Lazy2} ->
-                      case sets:is_element(Pid, IgnorePids) of
-                          true  -> {skip, Lazy2,
-                                    priority_queue:in(V, Bucket, LazyAcc)};
-                          false -> {value, Lazy2, Pid, Alloc}
-                      end
-              end
-      end, fun priority_queue:join/2, Mixed, Lazy, priority_queue:new(), Req).
-
-free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Mixed, Req) ->
-    free_from(Callbacks,
-              fun(Mixed1, Sleepy1, SleepyAcc) ->
-                      case queue:out(Sleepy1) of
-                          {empty, _Sleepy2} ->
-                              empty;
-                          {{value, Pid}, Sleepy2} ->
-                              case sets:is_element(Pid, IgnorePids) of
-                                  true  -> {skip, Sleepy2,
-                                            queue:in(Pid, SleepyAcc)};
-                                  false -> {Alloc, hibernate} =
-                                               dict:fetch(Pid, Mixed1),
-                                           {value, Sleepy2, Pid, Alloc}
-                              end
-                      end
-              end, fun queue:join/2, Mixed, Sleepy, queue:new(), Req).
-
-free_from(Callbacks, Hylomorphism, BaseCase, Mixed, CataInit, AnaInit, Req) ->
-    case Hylomorphism(Mixed, CataInit, AnaInit) of
-        empty ->
-            {AnaInit, Mixed, Req};
-        {skip, CataInit1, AnaInit1} ->
-            free_from(Callbacks, Hylomorphism, BaseCase, Mixed, CataInit1,
-                      AnaInit1, Req);
-        {value, CataInit1, Pid, Alloc} ->
-            Mixed1 = dict:erase(Pid, Mixed),
-            ok = set_queue_mode(Callbacks, Pid, disk),
-            case Req > Alloc of
-                true -> free_from(Callbacks, Hylomorphism, BaseCase, Mixed1,
-                                  CataInit1, AnaInit, Req - Alloc);
-                false -> {BaseCase(CataInit1, AnaInit), Mixed1, Req - Alloc}
-            end
-    end.
-
-free_upto(Pid, Req, State = #state { available_tokens = Avail,
-                                     mixed_queues = Mixed,
-                                     callbacks = Callbacks,
-                                     lowrate = Lazy,
-                                     hibernate = Sleepy,
-                                     unevictable = Unevictable })
-  when Req > Avail ->
-    Unevictable1 = sets:add_element(Pid, Unevictable),
-    {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unevictable1, Sleepy, Mixed),
-    case Req > Avail + SleepySum of
-        true -> %% not enough in sleepy, have a look in lazy too
-            {Lazy1, LazySum} = tidy_and_sum_lazy(Unevictable1, Lazy, Mixed),
-            case Req > Avail + SleepySum + LazySum of
-                true -> %% can't free enough, just return tidied state
-                    State #state { lowrate = Lazy1, hibernate = Sleepy1 };
-                false -> %% need to free all of sleepy, and some of lazy
-                    {Sleepy2, Mixed1, ReqRem} =
-                        free_upto_sleepy(Unevictable1, Callbacks,
-                                         Sleepy1, Mixed, Req),
-                    {Lazy2, Mixed2, ReqRem1} =
-                        free_upto_lazy(Unevictable1, Callbacks,
-                                       Lazy1, Mixed1, ReqRem),
-                    %% ReqRem1 will be <= 0 because it's
-                    %% likely we'll have freed more than we
-                    %% need, thus Req - ReqRem1 is total freed
-                    State #state { available_tokens = Avail + (Req - ReqRem1),
-                                   mixed_queues = Mixed2, lowrate = Lazy2,
-                                   hibernate = Sleepy2 }
-            end;
-        false -> %% enough available in sleepy, don't touch lazy
-            {Sleepy2, Mixed1, ReqRem} =
-                free_upto_sleepy(Unevictable1, Callbacks, Sleepy1, Mixed, Req),
-            State #state { available_tokens = Avail + (Req - ReqRem),
-                           mixed_queues = Mixed1, hibernate = Sleepy2 }
-    end;
-free_upto(_Pid, _Req, State) ->
-    State.
-- 
cgit v1.2.1


From 22813580a1f9a035815ee2ba702f2e204c893c7c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 18:32:09 +0100
Subject: cosmetics

---
 src/rabbit_memory_manager.erl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 055e4795..9ed6bc46 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -199,12 +199,11 @@ handle_call(info, _From, State) ->
                       hibernate = Sleepy,
                       unoppressable = Unoppressable } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
-    {reply, [{ available_tokens, Avail },
-             { liberated_processes, dict:to_list(Libre) },
-             { lowrate_queues, priority_queue:to_list(Lazy) },
-             { hibernated_queues, queue:to_list(Sleepy) },
-             { unoppressable_queues, sets:to_list(Unoppressable) }], State1}.
-
+    {reply, [{ available_tokens,        Avail                       },
+             { liberated_processes,     dict:to_list(Libre)         },
+             { lowrate_processes,       priority_queue:to_list(Lazy)},
+             { hibernated_processes,    queue:to_list(Sleepy)       },
+             { unoppressable_processes, sets:to_list(Unoppressable) }], State1}.
 
 handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             State = #state { liberated_processes = Libre,
-- 
cgit v1.2.1


From 95592c3fc7848608a41c61f325808f8319a0adcf Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 27 Aug 2009 19:20:23 +0100
Subject: cosmetic

---
 src/rabbit_memory_manager.erl | 55 +++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 9ed6bc46..ab5b545a 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -182,22 +182,22 @@ init([]) ->
     TPB = if MemAvail == 0 -> 0;
              true -> ?TOTAL_TOKENS / MemAvail
           end,
-    {ok, #state { available_tokens = ?TOTAL_TOKENS,
+    {ok, #state { available_tokens    = ?TOTAL_TOKENS,
                   liberated_processes = dict:new(),
-                  callbacks = dict:new(),
-                  tokens_per_byte = TPB,
-                  lowrate = priority_queue:new(),
-                  hibernate = queue:new(),
-                  unoppressable = sets:new(),
-                  alarmed = false
+                  callbacks           = dict:new(),
+                  tokens_per_byte     = TPB,
+                  lowrate             = priority_queue:new(),
+                  hibernate           = queue:new(),
+                  unoppressable       = sets:new(),
+                  alarmed             = false
                 }}.
 
 handle_call(info, _From, State) ->
-    State1 = #state { available_tokens = Avail,
+    State1 = #state { available_tokens    = Avail,
                       liberated_processes = Libre,
-                      lowrate = Lazy,
-                      hibernate = Sleepy,
-                      unoppressable = Unoppressable } =
+                      lowrate             = Lazy,
+                      hibernate           = Sleepy,
+                      unoppressable       = Unoppressable } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
     {reply, [{ available_tokens,        Avail                       },
              { liberated_processes,     dict:to_list(Libre)         },
@@ -207,10 +207,10 @@ handle_call(info, _From, State) ->
 
 handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
             State = #state { liberated_processes = Libre,
-                             available_tokens = Avail,
-                             callbacks = Callbacks,
-                             tokens_per_byte = TPB,
-                             alarmed = Alarmed }) ->
+                             available_tokens    = Avail,
+                             callbacks           = Callbacks,
+                             tokens_per_byte     = TPB,
+                             alarmed             = Alarmed }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
     LowRate = case {BytesGained, BytesLost} of
                   {undefined, _} -> false;
@@ -259,7 +259,8 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                                 set_process_mode(Callbacks, Pid, liberated),
                                 {State1 #state {
                                    liberated_processes =
-                                   dict:store(Pid, {Req, LibreActivity}, Libre1),
+                                   dict:store(Pid, {Req, LibreActivity},
+                                              Libre1),
                                    available_tokens = Avail1 - Req },
                                  LibreActivity}
                         end
@@ -300,7 +301,8 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason},
                      State;
                  {libre, {Alloc, _Activity}} ->
                      State #state { available_tokens = Avail + Alloc,
-                                    liberated_processes = dict:erase(Pid, Libre) }
+                                    liberated_processes = 
+                                    dict:erase(Pid, Libre) }
              end,
     {noreply, State1};
 handle_info({'EXIT', _Pid, Reason}, State) ->
@@ -420,12 +422,12 @@ free_from(Callbacks, Hylomorphism, BaseCase, Libre, CataInit, AnaInit, Req) ->
             end
     end.
 
-free_upto(Pid, Req, State = #state { available_tokens = Avail,
+free_upto(Pid, Req, State = #state { available_tokens    = Avail,
                                      liberated_processes = Libre,
-                                     callbacks = Callbacks,
-                                     lowrate = Lazy,
-                                     hibernate = Sleepy,
-                                     unoppressable = Unoppressable })
+                                     callbacks           = Callbacks,
+                                     lowrate             = Lazy,
+                                     hibernate           = Sleepy,
+                                     unoppressable       = Unoppressable })
   when Req > Avail ->
     Unoppressable1 = sets:add_element(Pid, Unoppressable),
     {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Libre),
@@ -446,14 +448,17 @@ free_upto(Pid, Req, State = #state { available_tokens = Avail,
                     %% likely we'll have freed more than we
                     %% need, thus Req - ReqRem1 is total freed
                     State #state { available_tokens = Avail + (Req - ReqRem1),
-                                   liberated_processes = Libre2, lowrate = Lazy2,
+                                   liberated_processes = Libre2,
+                                   lowrate = Lazy2,
                                    hibernate = Sleepy2 }
             end;
         false -> %% enough available in sleepy, don't touch lazy
             {Sleepy2, Libre1, ReqRem} =
-                free_upto_sleepy(Unoppressable1, Callbacks, Sleepy1, Libre, Req),
+                free_upto_sleepy(Unoppressable1, Callbacks,
+                                 Sleepy1, Libre, Req),
             State #state { available_tokens = Avail + (Req - ReqRem),
-                           liberated_processes = Libre1, hibernate = Sleepy2 }
+                           liberated_processes = Libre1,
+                           hibernate = Sleepy2 }
     end;
 free_upto(_Pid, _Req, State) ->
     State.
-- 
cgit v1.2.1


From 20d1e61c1b559a6f35dd420e667b21839829b4c4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 20:23:36 +0100
Subject: Hacked out the lowrate stuff from memory_manager, and associated
 adjustments elsewhere.

---
 src/rabbit_amqqueue_process.erl |   5 +-
 src/rabbit_memory_manager.erl   | 129 ++++++++--------------------------------
 src/rabbit_mixed_queue.erl      |  31 ++++------
 src/rabbit_tests.erl            |  12 ++--
 4 files changed, 43 insertions(+), 134 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 72325414..916a2410 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -549,9 +549,8 @@ i(Item, _) ->
     throw({bad_argument, Item}).
 
 report_memory(Hib, State = #q { mixed_state = MS }) ->
-    {MS1, MSize, Gain, Loss} =
-        rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
-    rabbit_memory_manager:report_memory(self(), MSize, Gain, Loss, Hib),
+    {MS1, MSize} = rabbit_mixed_queue:estimate_queue_memory(MS),
+    rabbit_memory_manager:report_memory(self(), MSize, Hib),
     State #q { mixed_state = MS1 }.
 
 %---------------------------------------------------------------------------
diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index ab5b545a..44582dc4 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -38,11 +38,9 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([register/5, report_memory/3, report_memory/5, info/0,
-         conserve_memory/2]).
+-export([register/5, report_memory/3, info/0, conserve_memory/2]).
 
 -define(TOTAL_TOKENS, 10000000).
--define(ACTIVITY_THRESHOLD, 25).
 
 -define(SERVER, ?MODULE).
 
@@ -52,10 +50,6 @@
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
 -spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
--spec(report_memory/5 :: (pid(), non_neg_integer(),
-                          (non_neg_integer() | 'undefined'),
-                          (non_neg_integer() | 'undefined'), bool()) ->
-             'ok').
 -spec(info/0 :: () -> [{atom(), any()}]).
 -spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
 
@@ -65,7 +59,6 @@
                  liberated_processes,
                  callbacks,
                  tokens_per_byte,
-                 lowrate,
                  hibernate,
                  unoppressable,
                  alarmed
@@ -85,19 +78,13 @@
 %%
 %% There are a finite number of tokens in the system. These are
 %% allocated to processes as the processes report their memory
-%% usage. We keep track of processes which have hibernated, and
-%% processes that are doing only a low rate of work (reported as a low
-%% gain or loss in memory between memory reports). When a process
-%% reports memory use which can't be satisfied by the available
-%% tokens, we try and oppress processes first from the hibernated
-%% group, and then from the lowrate group. The hibernated group is a
-%% simple queue, and so is implicitly sorted by the order in which
-%% processes were added to the queue. This means that when removing
-%% from the queue, we evict the sleepiest (and most passive) pid
-%% first. The lowrate group is a priority queue, where the priority is
-%% the truncated log (base e) of the amount of memory allocated. Thus
-%% when we remove from the queue, we first remove the queue from the
-%% highest bucket.
+%% usage. We keep track of processes which have hibernated. When a
+%% process reports memory use which can't be satisfied by the
+%% available tokens, we try and oppress processes first from the
+%% hibernated group. The hibernated group is a simple queue, and so is
+%% implicitly sorted by the order in which processes were added to the
+%% queue. This means that when removing from the queue, we evict the
+%% sleepiest (and most passive) pid first.
 %%
 %% If the reported memory use still can't be satisfied after
 %% oppressing everyone from those two groups (and note that we check
@@ -123,13 +110,13 @@
 %% processes in the way described above), it will be liberated. We do
 %% not keep any information about oppressed processes.
 %%
-%% Note that the lowrate and hibernate groups can get very out of
-%% date. This is fine, and somewhat unavoidable given the absence of
-%% useful APIs for queues. Thus we allow them to get out of date
-%% (processes will be left in there when they change groups,
-%% duplicates can appear, dead processes are not pruned etc etc etc),
-%% and when we go through the groups, summing up their allocated
-%% tokens, we tidy up at that point.
+%% Note that the hibernate group can get very out of date. This is
+%% fine, and somewhat unavoidable given the absence of useful APIs for
+%% queues. Thus we allow them to get out of date (processes will be
+%% left in there when they change groups, duplicates can appear, dead
+%% processes are not pruned etc etc etc), and when we go through the
+%% groups, summing up their allocated tokens, we tidy up at that
+%% point.
 %%
 %% A liberated process, which is reporting a smaller amount of RAM
 %% than its last report will remain liberated. A liberated process
@@ -162,11 +149,7 @@ register(Pid, Unoppressable, Module, Function, Args) ->
                                Module, Function, Args}).
 
 report_memory(Pid, Memory, Hibernating) ->
-    report_memory(Pid, Memory, undefined, undefined, Hibernating).
-
-report_memory(Pid, Memory, Gain, Loss, Hibernating) ->
-    gen_server2:cast(?SERVER,
-                     {report_memory, Pid, Memory, Gain, Loss, Hibernating}).
+    gen_server2:cast(?SERVER, {report_memory, Pid, Memory, Hibernating}).
 
 info() ->
     gen_server2:call(?SERVER, info).
@@ -186,7 +169,6 @@ init([]) ->
                   liberated_processes = dict:new(),
                   callbacks           = dict:new(),
                   tokens_per_byte     = TPB,
-                  lowrate             = priority_queue:new(),
                   hibernate           = queue:new(),
                   unoppressable       = sets:new(),
                   alarmed             = false
@@ -195,34 +177,25 @@ init([]) ->
 handle_call(info, _From, State) ->
     State1 = #state { available_tokens    = Avail,
                       liberated_processes = Libre,
-                      lowrate             = Lazy,
                       hibernate           = Sleepy,
                       unoppressable       = Unoppressable } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
     {reply, [{ available_tokens,        Avail                       },
              { liberated_processes,     dict:to_list(Libre)         },
-             { lowrate_processes,       priority_queue:to_list(Lazy)},
              { hibernated_processes,    queue:to_list(Sleepy)       },
              { unoppressable_processes, sets:to_list(Unoppressable) }], State1}.
 
-handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
+handle_cast({report_memory, Pid, Memory, Hibernating},
             State = #state { liberated_processes = Libre,
                              available_tokens    = Avail,
                              callbacks           = Callbacks,
                              tokens_per_byte     = TPB,
                              alarmed             = Alarmed }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
-    LowRate = case {BytesGained, BytesLost} of
-                  {undefined, _} -> false;
-                  {_, undefined} -> false;
-                  {G, L} -> G < ?ACTIVITY_THRESHOLD andalso
-                            L < ?ACTIVITY_THRESHOLD
-              end,
     LibreActivity = if Hibernating -> hibernate;
-                       LowRate -> lowrate;
                        true -> active
                     end,
-    {StateN = #state { lowrate = Lazy, hibernate = Sleepy }, ActivityNew} =
+    {StateN = #state { hibernate = Sleepy }, ActivityNew} =
         case find_process(Pid, Libre) of
             {libre, {OAlloc, _OActivity}} ->
                 Avail1 = Avail + OAlloc,
@@ -250,7 +223,7 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
                         State1 = #state { available_tokens = Avail1,
                                           liberated_processes = Libre1 } =
                             free_upto(Pid, Req, State),
-                        case Req > Avail1 orelse Hibernating orelse LowRate of
+                        case Req > Avail1 orelse Hibernating of
                             true ->
                                 %% not enough space, or no compelling
                                 %% reason, so stay oppressed
@@ -270,8 +243,6 @@ handle_cast({report_memory, Pid, Memory, BytesGained, BytesLost, Hibernating},
         case ActivityNew of
             active    -> StateN;
             oppressed -> StateN;
-            lowrate ->
-                StateN #state { lowrate = add_to_lowrate(Pid, Req, Lazy) };
             hibernate ->
                 StateN #state { hibernate = queue:in(Pid, Sleepy) }
         end,
@@ -316,12 +287,6 @@ terminate(_Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-add_to_lowrate(Pid, Alloc, Lazy) ->
-    Bucket = if Alloc == 0 -> 0; %% can't take log(0)
-                true -> trunc(math:log(Alloc)) %% log base e
-             end,
-    priority_queue:in({Pid, Bucket, Alloc}, Bucket, Lazy).
-
 find_process(Pid, Libre) ->
     case dict:find(Pid, Libre) of
         {ok, Value} -> {libre, Value};
@@ -332,18 +297,6 @@ set_process_mode(Callbacks, Pid, Mode) ->
     {Module, Function, Args} = dict:fetch(Pid, Callbacks),
     erlang:apply(Module, Function, Args ++ [Mode]).
 
-tidy_and_sum_lazy(IgnorePids, Lazy, Libre) ->
-    tidy_and_sum(lowrate, Libre,
-                 fun (Lazy1) ->
-                         case priority_queue:out(Lazy1) of
-                             {empty, Lazy2} ->
-                                 {empty, Lazy2};
-                             {{value, {Pid, _Bucket, _Alloc}}, Lazy2} ->
-                                 {{value, Pid}, Lazy2}
-                         end
-                 end, fun add_to_lowrate/3, IgnorePids, Lazy,
-                 priority_queue:new(), 0).
-            
 tidy_and_sum_sleepy(IgnorePids, Sleepy, Libre) ->
     tidy_and_sum(hibernate, Libre, fun queue:out/1,
                  fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
@@ -372,22 +325,6 @@ tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism, DupCheckSet,
                           DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
     end.
 
-free_upto_lazy(IgnorePids, Callbacks, Lazy, Libre, Req) ->
-    free_from(
-      Callbacks,
-      fun(_Libre, Lazy1, LazyAcc) ->
-              case priority_queue:out(Lazy1) of
-                  {empty, _Lazy2} ->
-                      empty;
-                  {{value, V = {Pid, Bucket, Alloc}}, Lazy2} ->
-                      case sets:is_element(Pid, IgnorePids) of
-                          true  -> {skip, Lazy2,
-                                    priority_queue:in(V, Bucket, LazyAcc)};
-                          false -> {value, Lazy2, Pid, Alloc}
-                      end
-              end
-      end, fun priority_queue:join/2, Libre, Lazy, priority_queue:new(), Req).
-
 free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Libre, Req) ->
     free_from(Callbacks,
               fun(Libre1, Sleepy1, SleepyAcc) ->
@@ -425,34 +362,18 @@ free_from(Callbacks, Hylomorphism, BaseCase, Libre, CataInit, AnaInit, Req) ->
 free_upto(Pid, Req, State = #state { available_tokens    = Avail,
                                      liberated_processes = Libre,
                                      callbacks           = Callbacks,
-                                     lowrate             = Lazy,
                                      hibernate           = Sleepy,
                                      unoppressable       = Unoppressable })
   when Req > Avail ->
     Unoppressable1 = sets:add_element(Pid, Unoppressable),
     {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Libre),
     case Req > Avail + SleepySum of
-        true -> %% not enough in sleepy, have a look in lazy too
-            {Lazy1, LazySum} = tidy_and_sum_lazy(Unoppressable1, Lazy, Libre),
-            case Req > Avail + SleepySum + LazySum of
-                true -> %% can't free enough, just return tidied state
-                    State #state { lowrate = Lazy1, hibernate = Sleepy1 };
-                false -> %% need to free all of sleepy, and some of lazy
-                    {Sleepy2, Libre1, ReqRem} =
-                        free_upto_sleepy(Unoppressable1, Callbacks,
-                                         Sleepy1, Libre, Req),
-                    {Lazy2, Libre2, ReqRem1} =
-                        free_upto_lazy(Unoppressable1, Callbacks,
-                                       Lazy1, Libre1, ReqRem),
-                    %% ReqRem1 will be <= 0 because it's
-                    %% likely we'll have freed more than we
-                    %% need, thus Req - ReqRem1 is total freed
-                    State #state { available_tokens = Avail + (Req - ReqRem1),
-                                   liberated_processes = Libre2,
-                                   lowrate = Lazy2,
-                                   hibernate = Sleepy2 }
-            end;
-        false -> %% enough available in sleepy, don't touch lazy
+        true -> %% not enough in sleepy, just return tidied state
+            State #state { hibernate = Sleepy1 };
+        false -> 
+            %% ReqRem1 will be <= 0 because it's likely we'll have
+            %% freed more than we need, thus Req - ReqRem1 is total
+            %% freed
             {Sleepy2, Libre1, ReqRem} =
                 free_upto_sleepy(Unoppressable1, Callbacks,
                                  Sleepy1, Libre, Req),
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 251c2046..bbec524b 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -40,7 +40,7 @@
          len/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
 
 -export([set_storage_mode/3, storage_mode/1,
-         estimate_queue_memory_and_reset_counters/1]).
+         estimate_queue_memory/1]).
 
 -record(mqstate, { mode,
                    msg_buf,
@@ -48,8 +48,6 @@
                    is_durable,
                    length,
                    memory_size,
-                   memory_gain,
-                   memory_loss,
                    prefetcher
                  }
        ).
@@ -67,8 +65,6 @@
                               is_durable :: boolean(),
                               length :: non_neg_integer(),
                               memory_size :: (non_neg_integer() | 'undefined'),
-                              memory_gain :: (non_neg_integer() | 'undefined'),
-                              memory_loss :: (non_neg_integer() | 'undefined'),
                               prefetcher :: (pid() | 'undefined')
                             }).
 -type(acktag() :: ( 'no_on_disk' | { non_neg_integer(), non_neg_integer() })).
@@ -92,9 +88,8 @@
 -spec(is_empty/1 :: (mqstate()) -> boolean()).
 
 -spec(set_storage_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
--spec(estimate_queue_memory_and_reset_counters/1 :: (mqstate()) ->
-             {mqstate(), non_neg_integer(), non_neg_integer(),
-              non_neg_integer()}).
+-spec(estimate_queue_memory/1 :: (mqstate()) ->
+             {mqstate(), non_neg_integer()}).
 -spec(storage_mode/1 :: (mqstate()) -> mode()).
 
 -endif.
@@ -126,8 +121,7 @@ init(Queue, IsDurable) ->
     MsgBuf = inc_queue_length(queue:new(), Len1),
     {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
                     is_durable = IsDurable, length = Len1,
-                    memory_size = Size, memory_gain = undefined,
-                    memory_loss = undefined, prefetcher = undefined }}.
+                    memory_size = Size, prefetcher = undefined }}.
 
 publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
         #mqstate { queue = Q, mode = Mode, is_durable = IsDurable,
@@ -492,9 +486,8 @@ flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
     ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
     {[], []}.
 
-estimate_queue_memory_and_reset_counters(State =
-  #mqstate { memory_size = Size, memory_gain = Gain, memory_loss = Loss }) ->
-    {State #mqstate { memory_gain = 0, memory_loss = 0 }, 4 * Size, Gain, Loss}.
+estimate_queue_memory(State = #mqstate { memory_size = Size }) ->
+    {State, 4 * Size}.
 
 storage_mode(#mqstate { mode = Mode }) ->
     Mode.
@@ -515,15 +508,11 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
     Msg #basic_message {
       content = rabbit_binary_generator:ensure_content_encoded(Content) }.
 
-gain_memory(Inc, State = #mqstate { memory_size = QSize,
-                                    memory_gain = Gain }) ->
-    State #mqstate { memory_size = QSize + Inc,
-                     memory_gain = Gain + Inc }.
+gain_memory(Inc, State = #mqstate { memory_size = QSize }) ->
+    State #mqstate { memory_size = QSize + Inc }.
 
-lose_memory(Dec, State = #mqstate { memory_size = QSize,
-                                    memory_loss = Loss }) ->
-    State #mqstate { memory_size = QSize - Dec,
-                     memory_loss = Loss + Dec }.
+lose_memory(Dec, State = #mqstate { memory_size = QSize }) ->
+    State #mqstate { memory_size = QSize - Dec }.
 
 inc_queue_length(MsgBuf, 0) ->
     MsgBuf;
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 44abdda4..039e9aa4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1081,8 +1081,8 @@ rdq_test_purge() ->
 
 rdq_new_mixed_queue(Q, Durable, Disk) ->
     {ok, MS} = rabbit_mixed_queue:init(Q, Durable),
-    {MS1, _, _, _} =
-        rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS),
+    {MS1, _} =
+        rabbit_mixed_queue:estimate_queue_memory(MS),
     case Disk of
         true -> {ok, MS2} = rabbit_mixed_queue:set_storage_mode(disk, [], MS1),
                 MS2;
@@ -1115,15 +1115,15 @@ rdq_test_mixed_queue_modes() ->
             end, MS4, lists:seq(1,10)),
     30 = rabbit_mixed_queue:len(MS6),
     io:format("Published a mixture of messages; ~w~n",
-              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS6)]),
+              [rabbit_mixed_queue:estimate_queue_memory(MS6)]),
     {ok, MS7} = rabbit_mixed_queue:set_storage_mode(disk, [], MS6),
     30 = rabbit_mixed_queue:len(MS7),
     io:format("Converted to disk only mode; ~w~n",
-             [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS7)]),
+             [rabbit_mixed_queue:estimate_queue_memory(MS7)]),
     {ok, MS8} = rabbit_mixed_queue:set_storage_mode(mixed, [], MS7),
     30 = rabbit_mixed_queue:len(MS8),
     io:format("Converted to mixed mode; ~w~n",
-              [rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS8)]),
+              [rabbit_mixed_queue:estimate_queue_memory(MS8)]),
     MS10 =
         lists:foldl(
           fun (N, MS9) ->
@@ -1162,7 +1162,7 @@ rdq_test_mixed_queue_modes() ->
     rdq_start(),
     MS17 = rdq_new_mixed_queue(q, true, false),
     0 = rabbit_mixed_queue:len(MS17),
-    {MS17,0,0,0} = rabbit_mixed_queue:estimate_queue_memory_and_reset_counters(MS17),
+    {MS17,0} = rabbit_mixed_queue:estimate_queue_memory(MS17),
     io:format("Recovered queue~n"),
     rdq_stop(),
     passed.
-- 
cgit v1.2.1


From 9d7e2db5513a236858a374bdb037f8d25ea35892 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 27 Aug 2009 23:44:39 +0100
Subject: Lots of things: 1) disk_queue: remove prealloc - fs's that support
 holes do it really fast and it buys us nothing. fs's that don't support holes
 get crippled. 2) disk_queue: make sure we fsync before closing. This is
 crucial. Posix is perfectly happy to close without flushing data to disk. 3)
 disk_queue: bug in recover from crash during compaction (*still* not tested),
 where we were opening with just write, not read+write, hence would have
 stomped over existing content. 4) memory manager: track oppressed processes
 and only liberate if they're reporting a memory usage 5% different from what
 they last had. 5) minor cosmetics to amqqueue_process. 6) start up guid
 earlier. Necessary because it is actually needed in recovery of queues.

I'm not happy with memory manager, but am utterly knackered and need to sleep. All tests pass.
---
 src/rabbit.erl                  |   8 +--
 src/rabbit_amqqueue_process.erl |  17 +++---
 src/rabbit_disk_queue.erl       |  63 +++++++++++-----------
 src/rabbit_memory_manager.erl   | 112 ++++++++++++++++++++--------------------
 4 files changed, 96 insertions(+), 104 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 8962b12e..f665ad92 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -148,6 +148,10 @@ start(normal, []) ->
                 ok = start_child(rabbit_router),
                 ok = start_child(rabbit_node_monitor)
         end},
+       {"guid generator",
+        fun () ->
+                ok = start_child(rabbit_guid)
+        end},
        {"disk queue",
         fun () ->
                 ok = start_child(rabbit_disk_queue)
@@ -162,10 +166,6 @@ start(normal, []) ->
                 ok = rabbit_disk_queue:delete_non_durable_queues(
                        DurableQueueNames)
         end},
-       {"guid generator",
-        fun () ->
-                ok = start_child(rabbit_guid)
-        end},
        {"builtin applications",
         fun () ->
                 {ok, DefaultVHost} = application:get_env(default_vhost),
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 916a2410..3538b6fb 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -114,8 +114,7 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
                blocked_consumers = queue:new(),
                memory_report_timer = undefined
               },
-    %% first thing we must do is report_memory which will clear out
-    %% the 'undefined' values in gain and loss in mixed_queue state
+    %% first thing we must do is report_memory.
     {ok, start_memory_timer(State), hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -721,8 +720,7 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
 
 handle_call(purge, _From, State) ->
     {Count, MS} = rabbit_mixed_queue:purge(State #q.mixed_state),
-    reply({ok, Count},
-          State #q { mixed_state = MS });
+    reply({ok, Count}, State #q { mixed_state = MS });
 
 handle_call({claim_queue, ReaderPid}, _From,
             State = #q{owner = Owner, exclusive_consumer = Holder}) ->
@@ -738,10 +736,9 @@ handle_call({claim_queue, ReaderPid}, _From,
                     %% pid...
                     reply(locked, State);
                 ok ->
-                    reply(ok, State #q { owner =
-                                         {ReaderPid,
-                                          erlang:monitor(process, ReaderPid)} })
-                                                 
+                    reply(ok,
+                          State#q{ owner = {ReaderPid, erlang:monitor(
+                                                         process, ReaderPid)} })
             end;
         {ReaderPid, _MonitorRef} ->
             reply(ok, State);
@@ -827,8 +824,8 @@ handle_cast({set_storage_mode, Mode}, State = #q { mixed_state = MS }) ->
     noreply(State #q { mixed_state = MS1 }).
 
 handle_info(report_memory, State) ->
-    %% deliberately don't call noreply/2 as we don't want to restart the timer.
-    %% By unsetting the timer, we force a report on the next normal message
+    %% deliberately don't call noreply/2/3 as we don't want to start the timer.
+    %% By unsetting the timer, we force a report on the next normal message.
     {noreply, State #q { memory_report_timer = undefined }, hibernate};
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 18b250c5..3e38be8e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -53,8 +53,8 @@
 -include("rabbit.hrl").
 
 -define(WRITE_OK_SIZE_BITS,                  8).
--define(WRITE_OK_TRANSIENT,                255).
--define(WRITE_OK_PERSISTENT,               254).
+-define(WRITE_OK_TRANSIENT,                  255).
+-define(WRITE_OK_PERSISTENT,                 254).
 -define(INTEGER_SIZE_BYTES,                  8).
 -define(INTEGER_SIZE_BITS,                   (8 * ?INTEGER_SIZE_BYTES)).
 -define(MSG_LOC_NAME,                        rabbit_disk_queue_msg_location).
@@ -68,6 +68,7 @@
 -define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
 -define(BATCH_SIZE,                          10000).
 -define(CACHE_MAX_SIZE,                      10485760).
+-define(WRITE_HANDLE_OPEN_MODE,           [append, raw, binary, delayed_write]).
 
 -define(SERVER, ?MODULE).
 
@@ -431,22 +432,14 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
         load_from_disk(State),
-    Path = form_filename(CurrentName),
-    Exists = case file:read_file_info(Path) of
-                 {error,enoent} -> false;
-                 {ok, _} -> true
-             end,
     %% read is only needed so that we can seek
-    {ok, FileHdl} = file:open(Path, [read, write, raw, binary, delayed_write]),
-    case Exists of
-        true -> {ok, Offset} = file:position(FileHdl, {bof, Offset});
-        false -> %% new file, so preallocate
-            ok = preallocate(FileHdl, FileSizeLimit, Offset)
-    end,
+    {ok, FileHdl} = file:open(form_filename(CurrentName),
+                              [read, write, raw, binary, delayed_write]),
+    {ok, Offset} = file:position(FileHdl, {bof, Offset}),
     State2 = State1 #dqstate { current_file_handle = FileHdl },
     %% by reporting a memory use of 0, we guarantee the manager will
-    %% grant us to ram_disk mode. We have to start in ram_disk mode
-    %% because we can't find values for mnesia_bytes_per_record or
+    %% not oppress us. We have to start in ram_disk mode because we
+    %% can't find values for mnesia_bytes_per_record or
     %% ets_bytes_per_record otherwise.
     ok = rabbit_memory_manager:report_memory(self(), 0, false),
     ok = report_memory(false, State2),
@@ -1231,7 +1224,6 @@ maybe_roll_to_new_file(Offset,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = file:open(form_filename(NextName),
                               [write, raw, binary, delayed_write]),
-    ok = preallocate(NextHdl, FileSizeLimit, 0),
     true = ets:update_element(FileSummary, CurName, {5, NextName}),%% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     State2 = State1 #dqstate { current_file_name = NextName,
@@ -1244,12 +1236,6 @@ maybe_roll_to_new_file(Offset,
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
-preallocate(Hdl, FileSizeLimit, FinalPos) ->
-    {ok, FileSizeLimit} = file:position(Hdl, {bof, FileSizeLimit}),
-    ok = file:truncate(Hdl),
-    {ok, FinalPos} = file:position(Hdl, {bof, FinalPos}),
-    ok.
-
 %% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
 
 compact(FilesSet, State) ->
@@ -1330,6 +1316,12 @@ sort_msg_locations_by_offset(Dir, List) ->
                        Comp(OffA, OffB)
                end, List).
 
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file:position(Hdl, {bof, FileSizeLimit}),
+    ok = file:truncate(Hdl),
+    {ok, FinalPos} = file:position(Hdl, {bof, FinalPos}),
+    ok.
+
 truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
     ok = file:truncate(FileHdl),
@@ -1339,11 +1331,11 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
               _SourceLeft, _SourceRight},
              {Destination, DestinationValid, DestinationContiguousTop,
               _DestinationLeft, _DestinationRight},
-             State1) ->
-    State = close_file(Source, close_file(Destination, State1)),
+             State) ->
+    State1 = close_file(Source, close_file(Destination, State)),
     {ok, SourceHdl} =
         file:open(form_filename(Source),
-                  [read, write, raw, binary, read_ahead, delayed_write]),
+                  [read, raw, binary, read_ahead]),
     {ok, DestinationHdl} =
         file:open(form_filename(Destination),
                   [read, write, raw, binary, read_ahead, delayed_write]),
@@ -1378,11 +1370,11 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                           %% enforce it anyway
                   end, sort_msg_locations_by_offset(
                          asc, dets_ets_match_object(
-                                 State, #message_store_entry
+                                 State1, #message_store_entry
                                  { file = Destination, _ = '_' }))),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
-                   DestinationHdl, TmpHdl, Destination, State),
+                   DestinationHdl, TmpHdl, Destination, State1),
             TmpSize = DestinationValid - DestinationContiguousTop,
             %% so now Tmp contains everything we need to salvage from
             %% Destination, and MsgLocationDets has been updated to
@@ -1399,16 +1391,16 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     end,
     SourceWorkList =
         sort_msg_locations_by_offset(
-          asc, dets_ets_match_object(State, #message_store_entry
+          asc, dets_ets_match_object(State1, #message_store_entry
                                       { file = Source, _ = '_' })),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
-                       SourceHdl, DestinationHdl, Destination, State),
+                       SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
     ok = file:sync(DestinationHdl),
     ok = file:close(SourceHdl),
     ok = file:close(DestinationHdl),
     ok = file:delete(form_filename(Source)),
-    State.
+    State1.
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, State) ->
@@ -1748,7 +1740,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
                 %% note this also catches the case when the tmp file
                 %% is empty
             ok = file:delete(TmpFile);
-        _False ->
+        false ->
             %% we're in case 4 above. Check that everything in the
             %% main file is a valid message in mnesia
             verify_messages_in_mnesia(MsgIds),
@@ -1760,8 +1752,10 @@ recover_crashed_compactions1(Files, TmpFile) ->
             true = lists:all(fun (MsgId) ->
                                      not (lists:member(MsgId, MsgIdsTmp))
                              end, MsgIds),
-            {ok, MainHdl} = file:open(form_filename(NonTmpRelatedFile),
-                                      [write, raw, binary, delayed_write]),
+            %% must open with read flag, otherwise will stomp over contents
+            {ok, MainHdl} = 
+                file:open(form_filename(NonTmpRelatedFile),
+                          [read, write, raw, binary, delayed_write]),
             {ok, Top} = file:position(MainHdl, Top),
             %% wipe out any rubbish at the end of the file
             ok = file:truncate(MainHdl),
@@ -1780,6 +1774,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             {ok, TmpHdl} = file:open(form_filename(TmpFile),
                                      [read, raw, binary, read_ahead]),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
+            ok = file:sync(MainHdl),
             ok = file:close(MainHdl),
             ok = file:close(TmpHdl),
             ok = file:delete(TmpFile),
@@ -1862,7 +1857,7 @@ read_message_from_disk(FileHdl, TotalSize) ->
     end.
 
 scan_file_for_valid_messages(File) ->
-    case file:open(form_filename(File), [raw, binary, read]) of
+    case file:open(form_filename(File), [raw, binary, read, read_ahead]) of
         {ok, Hdl} ->
             Valid = scan_file_for_valid_messages(Hdl, 0, []),
             %% if something really bad's happened, the close could fail, but ignore
diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 44582dc4..29216d77 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -56,7 +56,7 @@
 -endif.
 
 -record(state, { available_tokens,
-                 liberated_processes,
+                 processes,
                  callbacks,
                  tokens_per_byte,
                  hibernate,
@@ -166,7 +166,7 @@ init([]) ->
              true -> ?TOTAL_TOKENS / MemAvail
           end,
     {ok, #state { available_tokens    = ?TOTAL_TOKENS,
-                  liberated_processes = dict:new(),
+                  processes           = dict:new(),
                   callbacks           = dict:new(),
                   tokens_per_byte     = TPB,
                   hibernate           = queue:new(),
@@ -176,64 +176,65 @@ init([]) ->
 
 handle_call(info, _From, State) ->
     State1 = #state { available_tokens    = Avail,
-                      liberated_processes = Libre,
+                      processes           = Procs,
                       hibernate           = Sleepy,
                       unoppressable       = Unoppressable } =
         free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
     {reply, [{ available_tokens,        Avail                       },
-             { liberated_processes,     dict:to_list(Libre)         },
+             { processes,               dict:to_list(Procs)         },
              { hibernated_processes,    queue:to_list(Sleepy)       },
              { unoppressable_processes, sets:to_list(Unoppressable) }], State1}.
 
 handle_cast({report_memory, Pid, Memory, Hibernating},
-            State = #state { liberated_processes = Libre,
+            State = #state { processes           = Procs,
                              available_tokens    = Avail,
                              callbacks           = Callbacks,
                              tokens_per_byte     = TPB,
                              alarmed             = Alarmed }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
     LibreActivity = if Hibernating -> hibernate;
-                       true -> active
-                    end,
+                      true -> active
+                   end,
     {StateN = #state { hibernate = Sleepy }, ActivityNew} =
-        case find_process(Pid, Libre) of
+        case find_process(Pid, Procs) of
             {libre, {OAlloc, _OActivity}} ->
                 Avail1 = Avail + OAlloc,
                 State1 = #state { available_tokens = Avail2,
-                                  liberated_processes = Libre1 }
+                                  processes = Procs1 }
                     = free_upto(Pid, Req,
                                 State #state { available_tokens = Avail1 }),
                 case Req > Avail2 of
                     true -> %% nowt we can do, oppress the process
                         ok = set_process_mode(Callbacks, Pid, oppressed),
-                        {State1 #state { liberated_processes =
-                                         dict:erase(Pid, Libre1) }, oppressed};
+                        {State1 #state { processes =
+                                         dict:store(Pid, {Req, oppressed},
+                                                    Procs1) }, oppressed};
                     false -> %% keep liberated
                         {State1 #state
-                         { liberated_processes =
-                           dict:store(Pid, {Req, LibreActivity}, Libre1),
+                         { processes =
+                           dict:store(Pid, {Req, LibreActivity}, Procs1),
                            available_tokens = Avail2 - Req },
                          LibreActivity}
                 end;
-            oppressed ->
-                case Alarmed of
+            {oppressed, OrigReq} ->
+                case Alarmed orelse Hibernating orelse
+                    (Req > OrigReq * 0.95 andalso Req < OrigReq * 1.05) of
                     true ->
                         {State, oppressed};
                     false ->
                         State1 = #state { available_tokens = Avail1,
-                                          liberated_processes = Libre1 } =
+                                          processes = Procs1 } =
                             free_upto(Pid, Req, State),
-                        case Req > Avail1 orelse Hibernating of
+                        case Req > Avail1 of
                             true ->
-                                %% not enough space, or no compelling
-                                %% reason, so stay oppressed
+                                %% not enough space, so stay oppressed
                                 {State1, oppressed};
                             false -> %% can liberate the process
                                 set_process_mode(Callbacks, Pid, liberated),
                                 {State1 #state {
-                                   liberated_processes =
+                                   processes =
                                    dict:store(Pid, {Req, LibreActivity},
-                                              Libre1),
+                                              Procs1),
                                    available_tokens = Avail1 - Req },
                                  LibreActivity}
                         end
@@ -266,16 +267,14 @@ handle_cast({conserve_memory, Conserve}, State) ->
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
-                             liberated_processes = Libre }) ->
-    State1 = case find_process(Pid, Libre) of
-                 oppressed ->
-                     State;
-                 {libre, {Alloc, _Activity}} ->
-                     State #state { available_tokens = Avail + Alloc,
-                                    liberated_processes = 
-                                    dict:erase(Pid, Libre) }
-             end,
-    {noreply, State1};
+                             processes = Procs }) ->
+    State1 = State #state { processes = dict:erase(Pid, Procs) },
+    {noreply, case find_process(Pid, Procs) of
+                  {oppressed, _OrigReq} ->
+                      State1;
+                  {libre, {Alloc, _Activity}} ->
+                      State1 #state { available_tokens = Avail + Alloc }
+              end};
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(_Info, State) ->
@@ -287,22 +286,23 @@ terminate(_Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-find_process(Pid, Libre) ->
-    case dict:find(Pid, Libre) of
-        {ok, Value} -> {libre, Value};
-        error -> oppressed
+find_process(Pid, Procs) ->
+    case dict:find(Pid, Procs) of
+        {ok, {OrigReq, oppressed}}        -> {oppressed, OrigReq};
+        {ok, Value = {_Alloc, _Activity}} -> {libre, Value};
+        error                             -> {oppressed, -9999}
     end.
 
 set_process_mode(Callbacks, Pid, Mode) ->
     {Module, Function, Args} = dict:fetch(Pid, Callbacks),
     erlang:apply(Module, Function, Args ++ [Mode]).
 
-tidy_and_sum_sleepy(IgnorePids, Sleepy, Libre) ->
-    tidy_and_sum(hibernate, Libre, fun queue:out/1,
+tidy_and_sum_sleepy(IgnorePids, Sleepy, Procs) ->
+    tidy_and_sum(hibernate, Procs, fun queue:out/1,
                  fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
                  IgnorePids, Sleepy, queue:new(), 0).
 
-tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism, DupCheckSet,
+tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism, DupCheckSet,
              CataInit, AnaInit, AllocAcc) ->
     case Catamorphism(CataInit) of
         {empty, _CataInit} -> {AnaInit, AllocAcc};
@@ -312,7 +312,7 @@ tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism, DupCheckSet,
                     true ->
                         {DupCheckSet, AnaInit, AllocAcc};
                     false ->
-                        case find_process(Pid, Libre) of
+                        case find_process(Pid, Procs) of
                             {libre, {Alloc, AtomExpected}} ->
                                 {sets:add_element(Pid, DupCheckSet),
                                  Anamorphism(Pid, Alloc, AnaInit),
@@ -321,13 +321,13 @@ tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism, DupCheckSet,
                                 {DupCheckSet, AnaInit, AllocAcc}
                         end
                 end,
-            tidy_and_sum(AtomExpected, Libre, Catamorphism, Anamorphism,
+            tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism,
                           DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
     end.
 
-free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Libre, Req) ->
+free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req) ->
     free_from(Callbacks,
-              fun(Libre1, Sleepy1, SleepyAcc) ->
+              fun(Procs1, Sleepy1, SleepyAcc) ->
                       case queue:out(Sleepy1) of
                           {empty, _Sleepy2} ->
                               empty;
@@ -336,37 +336,37 @@ free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Libre, Req) ->
                                   true  -> {skip, Sleepy2,
                                             queue:in(Pid, SleepyAcc)};
                                   false -> {Alloc, hibernate} =
-                                               dict:fetch(Pid, Libre1),
+                                               dict:fetch(Pid, Procs1),
                                            {value, Sleepy2, Pid, Alloc}
                               end
                       end
-              end, fun queue:join/2, Libre, Sleepy, queue:new(), Req).
+              end, fun queue:join/2, Procs, Sleepy, queue:new(), Req).
 
-free_from(Callbacks, Hylomorphism, BaseCase, Libre, CataInit, AnaInit, Req) ->
-    case Hylomorphism(Libre, CataInit, AnaInit) of
+free_from(Callbacks, Hylomorphism, BaseCase, Procs, CataInit, AnaInit, Req) ->
+    case Hylomorphism(Procs, CataInit, AnaInit) of
         empty ->
-            {AnaInit, Libre, Req};
+            {AnaInit, Procs, Req};
         {skip, CataInit1, AnaInit1} ->
-            free_from(Callbacks, Hylomorphism, BaseCase, Libre, CataInit1,
+            free_from(Callbacks, Hylomorphism, BaseCase, Procs, CataInit1,
                       AnaInit1, Req);
         {value, CataInit1, Pid, Alloc} ->
-            Libre1 = dict:erase(Pid, Libre),
+            Procs1 = dict:store(Pid, {Alloc, oppressed}, Procs),
             ok = set_process_mode(Callbacks, Pid, oppressed),
             case Req > Alloc of
-                true -> free_from(Callbacks, Hylomorphism, BaseCase, Libre1,
+                true -> free_from(Callbacks, Hylomorphism, BaseCase, Procs1,
                                   CataInit1, AnaInit, Req - Alloc);
-                false -> {BaseCase(CataInit1, AnaInit), Libre1, Req - Alloc}
+                false -> {BaseCase(CataInit1, AnaInit), Procs1, Req - Alloc}
             end
     end.
 
 free_upto(Pid, Req, State = #state { available_tokens    = Avail,
-                                     liberated_processes = Libre,
+                                     processes           = Procs,
                                      callbacks           = Callbacks,
                                      hibernate           = Sleepy,
                                      unoppressable       = Unoppressable })
   when Req > Avail ->
     Unoppressable1 = sets:add_element(Pid, Unoppressable),
-    {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Libre),
+    {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Procs),
     case Req > Avail + SleepySum of
         true -> %% not enough in sleepy, just return tidied state
             State #state { hibernate = Sleepy1 };
@@ -374,11 +374,11 @@ free_upto(Pid, Req, State = #state { available_tokens    = Avail,
             %% ReqRem1 will be <= 0 because it's likely we'll have
             %% freed more than we need, thus Req - ReqRem1 is total
             %% freed
-            {Sleepy2, Libre1, ReqRem} =
+            {Sleepy2, Procs1, ReqRem} =
                 free_upto_sleepy(Unoppressable1, Callbacks,
-                                 Sleepy1, Libre, Req),
+                                 Sleepy1, Procs, Req),
             State #state { available_tokens = Avail + (Req - ReqRem),
-                           liberated_processes = Libre1,
+                           processes = Procs1,
                            hibernate = Sleepy2 }
     end;
 free_upto(_Pid, _Req, State) ->
-- 
cgit v1.2.1


From 5a99c6ff7f9e2b62e1891a72836bb97d035548ab Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 05:21:03 +0100
Subject: less noisy startup

---
 src/rabbit.erl | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 71b10913..773b4d04 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -146,14 +146,8 @@ start(normal, []) ->
                 ok = rabbit_amqqueue:start(),
 
                 ok = start_child(rabbit_router),
-                ok = start_child(rabbit_node_monitor)
-        end},
-       {"guid generator",
-        fun () ->
-                ok = start_child(rabbit_guid)
-        end},
-       {"disk queue",
-        fun () ->
+                ok = start_child(rabbit_node_monitor),
+                ok = start_child(rabbit_guid),
                 ok = start_child(rabbit_disk_queue)
         end},
        {"recovery",
-- 
cgit v1.2.1


From 7a41b42cb445e329febe3b779dd8c6a30b83f157 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 11:15:22 +0100
Subject: cosmetic

---
 src/rabbit_amqqueue_process.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 3538b6fb..7c19ea72 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -824,7 +824,7 @@ handle_cast({set_storage_mode, Mode}, State = #q { mixed_state = MS }) ->
     noreply(State #q { mixed_state = MS1 }).
 
 handle_info(report_memory, State) ->
-    %% deliberately don't call noreply/2/3 as we don't want to start the timer.
+    %% deliberately don't call noreply/1 as we don't want to start the timer.
     %% By unsetting the timer, we force a report on the next normal message.
     {noreply, State #q { memory_report_timer = undefined }, hibernate};
 
-- 
cgit v1.2.1


From c6616de02ed1361b6ca25eff0a45ebed6cccaa92 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 12:11:31 +0100
Subject: reference bug21489

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3e38be8e..78505af7 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -398,7 +398,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                        ]),
 
     %% it would be better to have this as private, but dets:from_ets/2
-    %% seems to blow up if it is set private
+    %% seems to blow up if it is set private - see bug21489
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected, {keypos, 2}]),
 
     InitName = "0" ++ ?FILE_EXTENSION,
-- 
cgit v1.2.1


From c3c2d9668bbe3fafda644f32a2cfb14d010c6de2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 12:31:33 +0100
Subject: Changes to memory_manager. Watch for change in overall amount of free
 tokens, not requested amount

---
 src/rabbit_memory_manager.erl | 82 +++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 29216d77..b7640d7a 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -41,6 +41,7 @@
 -export([register/5, report_memory/3, info/0, conserve_memory/2]).
 
 -define(TOTAL_TOKENS, 10000000).
+-define(THRESHOLD_MULTIPLIER, 1.05).
 
 -define(SERVER, ?MODULE).
 
@@ -197,7 +198,7 @@ handle_cast({report_memory, Pid, Memory, Hibernating},
                    end,
     {StateN = #state { hibernate = Sleepy }, ActivityNew} =
         case find_process(Pid, Procs) of
-            {libre, {OAlloc, _OActivity}} ->
+            {libre, OAlloc, _OActivity} ->
                 Avail1 = Avail + OAlloc,
                 State1 = #state { available_tokens = Avail2,
                                   processes = Procs1 }
@@ -205,20 +206,22 @@ handle_cast({report_memory, Pid, Memory, Hibernating},
                                 State #state { available_tokens = Avail1 }),
                 case Req > Avail2 of
                     true -> %% nowt we can do, oppress the process
-                        ok = set_process_mode(Callbacks, Pid, oppressed),
-                        {State1 #state { processes =
-                                         dict:store(Pid, {Req, oppressed},
-                                                    Procs1) }, oppressed};
+                        Procs2 =
+                            set_process_mode(Procs1, Callbacks, Pid, oppressed,
+                                             {oppressed, Avail2}),
+                        {State1 #state { processes = Procs2 }, oppressed};
                     false -> %% keep liberated
                         {State1 #state
                          { processes =
-                           dict:store(Pid, {Req, LibreActivity}, Procs1),
+                           dict:store(Pid, {libre, Req, LibreActivity}, Procs1),
                            available_tokens = Avail2 - Req },
                          LibreActivity}
                 end;
-            {oppressed, OrigReq} ->
+            {oppressed, OrigAvail} ->
                 case Alarmed orelse Hibernating orelse
-                    (Req > OrigReq * 0.95 andalso Req < OrigReq * 1.05) of
+                    (Avail > (OrigAvail / ?THRESHOLD_MULTIPLIER) andalso
+                     Avail < (OrigAvail * ?THRESHOLD_MULTIPLIER))
+                    of
                     true ->
                         {State, oppressed};
                     false ->
@@ -230,11 +233,11 @@ handle_cast({report_memory, Pid, Memory, Hibernating},
                                 %% not enough space, so stay oppressed
                                 {State1, oppressed};
                             false -> %% can liberate the process
-                                set_process_mode(Callbacks, Pid, liberated),
+                                Procs2 = set_process_mode(
+                                           Procs1, Callbacks, Pid, liberated,
+                                           {libre, Req, LibreActivity}),
                                 {State1 #state {
-                                   processes =
-                                   dict:store(Pid, {Req, LibreActivity},
-                                              Procs1),
+                                   processes = Procs2,
                                    available_tokens = Avail1 - Req },
                                  LibreActivity}
                         end
@@ -254,9 +257,9 @@ handle_cast({register, Pid, IsUnoppressable, Module, Function, Args},
                              unoppressable = Unoppressable }) ->
     _MRef = erlang:monitor(process, Pid),
     Unoppressable1 = case IsUnoppressable of
-                       true -> sets:add_element(Pid, Unoppressable);
-                       false -> Unoppressable
-                   end,
+                         true -> sets:add_element(Pid, Unoppressable);
+                         false -> Unoppressable
+                     end,
     {noreply, State #state { callbacks = dict:store
                              (Pid, {Module, Function, Args}, Callbacks),
                              unoppressable = Unoppressable1
@@ -267,12 +270,14 @@ handle_cast({conserve_memory, Conserve}, State) ->
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
-                             processes = Procs }) ->
-    State1 = State #state { processes = dict:erase(Pid, Procs) },
+                             processes = Procs,
+                             callbacks = Callbacks }) ->
+    State1 = State #state { processes = dict:erase(Pid, Procs),
+                            callbacks = dict:erase(Pid, Callbacks) },
     {noreply, case find_process(Pid, Procs) of
                   {oppressed, _OrigReq} ->
                       State1;
-                  {libre, {Alloc, _Activity}} ->
+                  {libre, Alloc, _Activity} ->
                       State1 #state { available_tokens = Avail + Alloc }
               end};
 handle_info({'EXIT', _Pid, Reason}, State) ->
@@ -288,14 +293,14 @@ code_change(_OldVsn, State, _Extra) ->
 
 find_process(Pid, Procs) ->
     case dict:find(Pid, Procs) of
-        {ok, {OrigReq, oppressed}}        -> {oppressed, OrigReq};
-        {ok, Value = {_Alloc, _Activity}} -> {libre, Value};
-        error                             -> {oppressed, -9999}
+        {ok, Value} -> Value;
+        error       -> {oppressed, 0}
     end.
 
-set_process_mode(Callbacks, Pid, Mode) ->
+set_process_mode(Procs, Callbacks, Pid, Mode, Record) ->
     {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-    erlang:apply(Module, Function, Args ++ [Mode]).
+    ok = erlang:apply(Module, Function, Args ++ [Mode]),
+    dict:store(Pid, Record, Procs).
 
 tidy_and_sum_sleepy(IgnorePids, Sleepy, Procs) ->
     tidy_and_sum(hibernate, Procs, fun queue:out/1,
@@ -313,7 +318,7 @@ tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism, DupCheckSet,
                         {DupCheckSet, AnaInit, AllocAcc};
                     false ->
                         case find_process(Pid, Procs) of
-                            {libre, {Alloc, AtomExpected}} ->
+                            {libre, Alloc, AtomExpected} ->
                                 {sets:add_element(Pid, DupCheckSet),
                                  Anamorphism(Pid, Alloc, AnaInit),
                                  Alloc + AllocAcc};
@@ -325,7 +330,7 @@ tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism, DupCheckSet,
                           DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
     end.
 
-free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req) ->
+free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req, Avail) ->
     free_from(Callbacks,
               fun(Procs1, Sleepy1, SleepyAcc) ->
                       case queue:out(Sleepy1) of
@@ -335,35 +340,36 @@ free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req) ->
                               case sets:is_element(Pid, IgnorePids) of
                                   true  -> {skip, Sleepy2,
                                             queue:in(Pid, SleepyAcc)};
-                                  false -> {Alloc, hibernate} =
+                                  false -> {libre, Alloc, hibernate} =
                                                dict:fetch(Pid, Procs1),
                                            {value, Sleepy2, Pid, Alloc}
                               end
                       end
-              end, fun queue:join/2, Procs, Sleepy, queue:new(), Req).
+              end, fun queue:join/2, Procs, Sleepy, queue:new(), Req, Avail).
 
-free_from(Callbacks, Hylomorphism, BaseCase, Procs, CataInit, AnaInit, Req) ->
+free_from(
+  Callbacks, Hylomorphism, BaseCase, Procs, CataInit, AnaInit, Req, Avail) ->
     case Hylomorphism(Procs, CataInit, AnaInit) of
         empty ->
             {AnaInit, Procs, Req};
         {skip, CataInit1, AnaInit1} ->
             free_from(Callbacks, Hylomorphism, BaseCase, Procs, CataInit1,
-                      AnaInit1, Req);
+                      AnaInit1, Req, Avail);
         {value, CataInit1, Pid, Alloc} ->
-            Procs1 = dict:store(Pid, {Alloc, oppressed}, Procs),
-            ok = set_process_mode(Callbacks, Pid, oppressed),
+            Procs1 = set_process_mode(
+                       Procs, Callbacks, Pid, oppressed, {oppressed, Avail}),
             case Req > Alloc of
                 true -> free_from(Callbacks, Hylomorphism, BaseCase, Procs1,
-                                  CataInit1, AnaInit, Req - Alloc);
+                                  CataInit1, AnaInit, Req - Alloc, Avail);
                 false -> {BaseCase(CataInit1, AnaInit), Procs1, Req - Alloc}
             end
     end.
 
-free_upto(Pid, Req, State = #state { available_tokens    = Avail,
-                                     processes           = Procs,
-                                     callbacks           = Callbacks,
-                                     hibernate           = Sleepy,
-                                     unoppressable       = Unoppressable })
+free_upto(Pid, Req, State = #state { available_tokens = Avail,
+                                     processes        = Procs,
+                                     callbacks        = Callbacks,
+                                     hibernate        = Sleepy,
+                                     unoppressable    = Unoppressable })
   when Req > Avail ->
     Unoppressable1 = sets:add_element(Pid, Unoppressable),
     {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Procs),
@@ -376,7 +382,7 @@ free_upto(Pid, Req, State = #state { available_tokens    = Avail,
             %% freed
             {Sleepy2, Procs1, ReqRem} =
                 free_upto_sleepy(Unoppressable1, Callbacks,
-                                 Sleepy1, Procs, Req),
+                                 Sleepy1, Procs, Req, Avail),
             State #state { available_tokens = Avail + (Req - ReqRem),
                            processes = Procs1,
                            hibernate = Sleepy2 }
-- 
cgit v1.2.1


From 8fc927c5ee0244dddc50a8e038055eceab1adcec Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 12:44:35 +0100
Subject: cosmetics - formatting

---
 src/rabbit_memory_manager.erl | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index b7640d7a..91bcf1b8 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -187,15 +187,15 @@ handle_call(info, _From, State) ->
              { unoppressable_processes, sets:to_list(Unoppressable) }], State1}.
 
 handle_cast({report_memory, Pid, Memory, Hibernating},
-            State = #state { processes           = Procs,
-                             available_tokens    = Avail,
-                             callbacks           = Callbacks,
-                             tokens_per_byte     = TPB,
-                             alarmed             = Alarmed }) ->
+            State = #state { processes        = Procs,
+                             available_tokens = Avail,
+                             callbacks        = Callbacks,
+                             tokens_per_byte  = TPB,
+                             alarmed          = Alarmed }) ->
     Req = rabbit_misc:ceil(TPB * Memory),
     LibreActivity = if Hibernating -> hibernate;
-                      true -> active
-                   end,
+                       true -> active
+                    end,
     {StateN = #state { hibernate = Sleepy }, ActivityNew} =
         case find_process(Pid, Procs) of
             {libre, OAlloc, _OActivity} ->
@@ -220,8 +220,7 @@ handle_cast({report_memory, Pid, Memory, Hibernating},
             {oppressed, OrigAvail} ->
                 case Alarmed orelse Hibernating orelse
                     (Avail > (OrigAvail / ?THRESHOLD_MULTIPLIER) andalso
-                     Avail < (OrigAvail * ?THRESHOLD_MULTIPLIER))
-                    of
+                     Avail < (OrigAvail * ?THRESHOLD_MULTIPLIER)) of
                     true ->
                         {State, oppressed};
                     false ->
@@ -270,8 +269,8 @@ handle_cast({conserve_memory, Conserve}, State) ->
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state { available_tokens = Avail,
-                             processes = Procs,
-                             callbacks = Callbacks }) ->
+                             processes        = Procs,
+                             callbacks        = Callbacks }) ->
     State1 = State #state { processes = dict:erase(Pid, Procs),
                             callbacks = dict:erase(Pid, Callbacks) },
     {noreply, case find_process(Pid, Procs) of
@@ -327,7 +326,7 @@ tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism, DupCheckSet,
                         end
                 end,
             tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism,
-                          DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
+                         DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
     end.
 
 free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req, Avail) ->
@@ -384,8 +383,8 @@ free_upto(Pid, Req, State = #state { available_tokens = Avail,
                 free_upto_sleepy(Unoppressable1, Callbacks,
                                  Sleepy1, Procs, Req, Avail),
             State #state { available_tokens = Avail + (Req - ReqRem),
-                           processes = Procs1,
-                           hibernate = Sleepy2 }
+                           processes        = Procs1,
+                           hibernate        = Sleepy2 }
     end;
 free_upto(_Pid, _Req, State) ->
     State.
-- 
cgit v1.2.1


From fe34da3a2b4448785d90d1323a6f2d7d382b811d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 12:48:06 +0100
Subject: cosmetic: move things around

---
 src/rabbit_disk_queue.erl | 210 ++++++++++++++++++++++++++--------------------
 1 file changed, 117 insertions(+), 93 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 78505af7..5d7c7a35 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -50,6 +50,8 @@
 -export([stop/0, stop_and_obliterate/0, set_mode/1, to_disk_only_mode/0,
          to_ram_disk_mode/0]).
 
+%%----------------------------------------------------------------------------
+
 -include("rabbit.hrl").
 
 -define(WRITE_OK_SIZE_BITS,                  8).
@@ -246,7 +248,7 @@
 %% alternating full files and files with only one tiny message in
 %% them).
 
-%% ---- SPECS ----
+%%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
@@ -286,7 +288,9 @@
 
 -endif.
 
-%% ---- PUBLIC API ----
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
 
 start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE,
@@ -360,7 +364,9 @@ cache_info() ->
 set_mode(Mode) ->
     gen_server2:pcast(?SERVER, 10, {set_mode, Mode}).
 
-%% ---- GEN-SERVER INTERNAL API ----
+%%----------------------------------------------------------------------------
+%% gen_server behaviour
+%%----------------------------------------------------------------------------
 
 init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %% If the gen_server is part of a supervision tree and is ordered
@@ -584,7 +590,9 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-%% ---- UTILITY FUNCTIONS ----
+%%----------------------------------------------------------------------------
+%% memory management helper functions
+%%----------------------------------------------------------------------------
 
 stop_memory_timer(State = #dqstate { memory_report_timer_ref = undefined }) ->
     State;
@@ -668,37 +676,50 @@ to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
                      mnesia_bytes_per_record = undefined,
                      ets_bytes_per_record = undefined }.
 
-noreply(NewState) ->
-    noreply1(start_memory_timer(NewState)).
+%%----------------------------------------------------------------------------
+%% message cache helper functions
+%%----------------------------------------------------------------------------
 
-noreply1(NewState = #dqstate { on_sync_txns = [],
-                               commit_timer_ref = undefined }) ->
-    {noreply, NewState, hibernate};
-noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {noreply, start_commit_timer(NewState), 0};
-noreply1(NewState = #dqstate { on_sync_txns = [] }) ->
-    {noreply, stop_commit_timer(NewState), hibernate};
-noreply1(NewState) ->
-    {noreply, NewState, 0}.
+remove_cache_entry(MsgId, #dqstate { message_cache = Cache }) ->
+    true = ets:delete(Cache, MsgId),
+    ok.
 
-reply(Reply, NewState) ->
-    reply1(Reply, start_memory_timer(NewState)).
+fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
+    case ets:lookup(Cache, MsgId) of
+        [] ->
+            not_found;
+        [{MsgId, Message, _RefCount}] ->
+            NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
+            {Message, NewRefCount}
+    end.
 
-reply1(Reply, NewState = #dqstate { on_sync_txns = [],
-                                    commit_timer_ref = undefined }) ->
-    {reply, Reply, NewState, hibernate};
-reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {reply, Reply, start_commit_timer(NewState), 0};
-reply1(Reply, NewState = #dqstate { on_sync_txns = [] }) ->
-    {reply, Reply, stop_commit_timer(NewState), hibernate};
-reply1(Reply, NewState) ->
-    {reply, Reply, NewState, 0}.
+decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
+    true = try case ets:update_counter(Cache, MsgId, {3, -1}) of
+                   N when N =< 0 -> true = ets:delete(Cache, MsgId);
+                   _N -> true
+               end
+           catch error:badarg -> 
+                   %% MsgId is not in there because although it's been
+                   %% delivered, it's never actually been read (think:
+                   %% persistent message in mixed queue)
+                   true
+           end,
+    ok.
 
-form_filename(Name) ->
-    filename:join(base_directory(), Name).
+insert_into_cache(Message = #basic_message { guid = MsgId },
+                  #dqstate { message_cache = Cache }) ->
+    case cache_is_full(Cache) of
+        true -> ok;
+        false -> true = ets:insert_new(Cache, {MsgId, Message, 1}),
+                 ok
+    end.
 
-base_directory() ->
-    filename:join(rabbit_mnesia:dir(), "rabbit_disk_queue/").
+cache_is_full(Cache) ->
+    ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
+
+%%----------------------------------------------------------------------------
+%% dets/ets agnosticism
+%%----------------------------------------------------------------------------
 
 dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets,
                            operation_mode = disk_only }, Key) ->
@@ -737,6 +758,42 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
                                  operation_mode = ram_disk }, Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
+%%----------------------------------------------------------------------------
+%% general helper functions
+%%----------------------------------------------------------------------------
+
+noreply(NewState) ->
+    noreply1(start_memory_timer(NewState)).
+
+noreply1(NewState = #dqstate { on_sync_txns = [],
+                               commit_timer_ref = undefined }) ->
+    {noreply, NewState, hibernate};
+noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
+    {noreply, start_commit_timer(NewState), 0};
+noreply1(NewState = #dqstate { on_sync_txns = [] }) ->
+    {noreply, stop_commit_timer(NewState), hibernate};
+noreply1(NewState) ->
+    {noreply, NewState, 0}.
+
+reply(Reply, NewState) ->
+    reply1(Reply, start_memory_timer(NewState)).
+
+reply1(Reply, NewState = #dqstate { on_sync_txns = [],
+                                    commit_timer_ref = undefined }) ->
+    {reply, Reply, NewState, hibernate};
+reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
+    {reply, Reply, start_commit_timer(NewState), 0};
+reply1(Reply, NewState = #dqstate { on_sync_txns = [] }) ->
+    {reply, Reply, stop_commit_timer(NewState), hibernate};
+reply1(Reply, NewState) ->
+    {reply, Reply, NewState, 0}.
+
+form_filename(Name) ->
+    filename:join(base_directory(), Name).
+
+base_directory() ->
+    filename:join(rabbit_mnesia:dir(), "rabbit_disk_queue/").
+
 with_read_handle_at(File, Offset, Fun, State =
                     #dqstate { read_file_hc_cache = HC,
                                current_file_name = CurName,
@@ -752,24 +809,6 @@ with_read_handle_at(File, Offset, Fun, State =
         rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
     {Result, State1 #dqstate { read_file_hc_cache = HC1 }}.
 
-sequence_lookup(Sequences, Q) ->
-    case ets:lookup(Sequences, Q) of
-        [] ->
-            {0, 0};
-        [{Q, ReadSeqId, WriteSeqId}] ->
-            {ReadSeqId, WriteSeqId}
-    end.
-
-start_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
-    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
-    State #dqstate { commit_timer_ref = TRef }.
-
-stop_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
-    State;
-stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
-    {ok, cancel} = timer:cancel(TRef),
-    State #dqstate { commit_timer_ref = undefined }.
-
 sync_current_file_handle(State = #dqstate { current_dirty = false,
                                             on_sync_txns = [] }) ->
     State;
@@ -788,6 +827,24 @@ sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
     State1 #dqstate { current_dirty = false, on_sync_txns = [],
                       last_sync_offset = SyncOffset1 }.
 
+sequence_lookup(Sequences, Q) ->
+    case ets:lookup(Sequences, Q) of
+        [] ->
+            {0, 0};
+        [{Q, ReadSeqId, WriteSeqId}] ->
+            {ReadSeqId, WriteSeqId}
+    end.
+
+start_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
+    State #dqstate { commit_timer_ref = TRef }.
+
+stop_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
+    State;
+stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #dqstate { commit_timer_ref = undefined }.
+
 msg_to_bin(Msg = #basic_message { content = Content }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
     term_to_binary(Msg #basic_message { content = ClearedContent }).
@@ -795,44 +852,9 @@ msg_to_bin(Msg = #basic_message { content = Content }) ->
 bin_to_msg(MsgBin) ->
     binary_to_term(MsgBin).
 
-remove_cache_entry(MsgId, #dqstate { message_cache = Cache }) ->
-    true = ets:delete(Cache, MsgId),
-    ok.
-
-fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
-    case ets:lookup(Cache, MsgId) of
-        [] ->
-            not_found;
-        [{MsgId, Message, _RefCount}] ->
-            NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
-            {Message, NewRefCount}
-    end.
-
-decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
-    true = try case ets:update_counter(Cache, MsgId, {3, -1}) of
-                   N when N =< 0 -> true = ets:delete(Cache, MsgId);
-                   _N -> true
-               end
-           catch error:badarg -> 
-                   %% MsgId is not in there because although it's been
-                   %% delivered, it's never actually been read (think:
-                   %% persistent message in mixed queue)
-                   true
-           end,
-    ok.
-
-insert_into_cache(Message = #basic_message { guid = MsgId },
-                  #dqstate { message_cache = Cache }) ->
-    case cache_is_full(Cache) of
-        true -> ok;
-        false -> true = ets:insert_new(Cache, {MsgId, Message, 1}),
-                 ok
-    end.
-
-cache_is_full(Cache) ->
-    ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
-
-%% ---- INTERNAL RAW FUNCTIONS ----
+%%----------------------------------------------------------------------------
+%% internal functions
+%%----------------------------------------------------------------------------
 
 internal_fetch_body(Q, MarkDelivered, Advance, State) ->
     case queue_head(Q, MarkDelivered, Advance, State) of
@@ -1208,7 +1230,9 @@ internal_delete_non_durable_queues(
               end
       end, {ok, State}, Sequences).
 
-%% ---- ROLLING OVER THE APPEND FILE ----
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation
+%%----------------------------------------------------------------------------
 
 maybe_roll_to_new_file(Offset,
                        State = #dqstate { file_size_limit = FileSizeLimit,
@@ -1236,8 +1260,6 @@ maybe_roll_to_new_file(Offset,
 maybe_roll_to_new_file(_, State) ->
     {ok, State}.
 
-%% ---- GARBAGE COLLECTION / COMPACTION / AGGREGATION ----
-
 compact(FilesSet, State) ->
     %% smallest number, hence eldest, hence left-most, first
     Files = lists:sort(sets:to_list(FilesSet)),
@@ -1470,7 +1492,9 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
         _ -> [File|Acc]
     end.
 
-%% ---- DISK RECOVERY ----
+%%----------------------------------------------------------------------------
+%% disk recovery
+%%----------------------------------------------------------------------------
 
 add_index() ->
     case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
@@ -1674,8 +1698,6 @@ load_messages(Left, [File|Files],
                           {File, ValidTotalSize, ContiguousTop, Left, Right}),
     load_messages(File, Files, State).
 
-%% ---- DISK RECOVERY OF FAILED COMPACTION ----
-
 recover_crashed_compactions(Files, TmpFiles) ->
     lists:foreach(fun (TmpFile) ->
                           ok = recover_crashed_compactions1(Files, TmpFile) end,
@@ -1818,7 +1840,9 @@ get_disk_queue_files() ->
     DQTFilesSorted = lists:sort(fun file_name_sort/2, DQTFiles),
     {DQFilesSorted, DQTFilesSorted}.
 
-%% ---- RAW READING AND WRITING OF FILES ----
+%%----------------------------------------------------------------------------
+%% raw reading and writing of files
+%%----------------------------------------------------------------------------
 
 append_message(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
     BodySize = size(MsgBody),
-- 
cgit v1.2.1


From efca9fa3cfa861bc68ea05a158426c01290a1fa2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 12:48:56 +0100
Subject: cosmetics - documentation

---
 src/rabbit_memory_manager.erl | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 91bcf1b8..aa3900e9 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -94,7 +94,7 @@
 %% those processes and then going "whoops, didn't help after all"),
 %% then we oppress the reporting process. When a process registers, it
 %% can declare itself "unoppressable". If a process is unoppressable
-%% then it will not be sent to disk as a result of other processes
+%% then it will not be oppressed as a result of other processes
 %% needing more tokens. However, if it itself needs additional tokens
 %% which aren't available then it is still oppressed as before. This
 %% feature is only used by the disk_queue, because if the disk queue
@@ -102,14 +102,17 @@
 %% tight, the disk_queue would typically be one of the first processes
 %% to be oppressed (sent to disk_only mode), which cripples
 %% performance. Thus by setting it unoppressable, it is only possible
-%% for the disk_queue to be oppressed when it is active and
-%% attempting to increase its memory allocation.
+%% for the disk_queue to be oppressed when it is active and attempting
+%% to increase its memory allocation.
 %%
 %% If a process has been oppressed, it continues making memory
 %% reports, as if it was liberated. As soon as a reported amount of
 %% memory can be satisfied (and this can include oppressing other
-%% processes in the way described above), it will be liberated. We do
-%% not keep any information about oppressed processes.
+%% processes in the way described above), *and* the number of
+%% available tokens has changed by ?THRESHOLD_MULTIPLIER since the
+%% processes was oppressed, it will be liberated. This later condition
+%% prevents processes from continually oppressing each other if they
+%% themselves can be liberated by oppressing other processes.
 %%
 %% Note that the hibernate group can get very out of date. This is
 %% fine, and somewhat unavoidable given the absence of useful APIs for
-- 
cgit v1.2.1


From f372213e6f84d36b38e0aea7cd0f62b807d737e0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 13:07:41 +0100
Subject: cosmetic

---
 src/rabbit_memory_manager.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index aa3900e9..6c6d4076 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -183,7 +183,7 @@ handle_call(info, _From, State) ->
                       processes           = Procs,
                       hibernate           = Sleepy,
                       unoppressable       = Unoppressable } =
-        free_upto(undef, 1 + ?TOTAL_TOKENS, State), %% this'll just do tidying
+        free_upto(undefined, 1 + ?TOTAL_TOKENS, State), %% just tidy
     {reply, [{ available_tokens,        Avail                       },
              { processes,               dict:to_list(Procs)         },
              { hibernated_processes,    queue:to_list(Sleepy)       },
-- 
cgit v1.2.1


From 3fb98994acf05a627237665f585c63d920dddac1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 13:53:29 +0100
Subject: minor refactoring make it more obvious that ReqRem will always end up
 being <= 0

---
 src/rabbit_memory_manager.erl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index aa3900e9..68cfd4ce 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -360,10 +360,11 @@ free_from(
         {value, CataInit1, Pid, Alloc} ->
             Procs1 = set_process_mode(
                        Procs, Callbacks, Pid, oppressed, {oppressed, Avail}),
-            case Req > Alloc of
+            Req1 = Req - Alloc,
+            case Req1 > 0 of
                 true -> free_from(Callbacks, Hylomorphism, BaseCase, Procs1,
-                                  CataInit1, AnaInit, Req - Alloc, Avail);
-                false -> {BaseCase(CataInit1, AnaInit), Procs1, Req - Alloc}
+                                  CataInit1, AnaInit, Req1, Avail);
+                false -> {BaseCase(CataInit1, AnaInit), Procs1, Req1}
             end
     end.
 
@@ -379,8 +380,8 @@ free_upto(Pid, Req, State = #state { available_tokens = Avail,
         true -> %% not enough in sleepy, just return tidied state
             State #state { hibernate = Sleepy1 };
         false -> 
-            %% ReqRem1 will be <= 0 because it's likely we'll have
-            %% freed more than we need, thus Req - ReqRem1 is total
+            %% ReqRem will be <= 0 because it's likely we'll have
+            %% freed more than we need, thus Req - ReqRem is total
             %% freed
             {Sleepy2, Procs1, ReqRem} =
                 free_upto_sleepy(Unoppressable1, Callbacks,
-- 
cgit v1.2.1


From 7d129264620b1e8f54339ab26a4169a3bb2ad116 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 15:00:58 +0100
Subject: superdooper. Recovery from crashes mid compaction tested, corrected,
 working.

---
 src/rabbit_amqqueue_process.erl |  8 ++++----
 src/rabbit_disk_queue.erl       | 28 ++++++++++++++--------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 7c19ea72..9d97e881 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -120,10 +120,10 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
 
 terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
-    QName = qname(State),
-    rabbit_mixed_queue:delete_queue(State #q.mixed_state),
-    stop_memory_timer(State),
-    ok = rabbit_amqqueue:internal_delete(QName).
+    State1 = stop_memory_timer(State),
+    QName = qname(State1),
+    ok = rabbit_amqqueue:internal_delete(QName),
+    {ok, _MS} = rabbit_mixed_queue:delete_queue(State1 #q.mixed_state).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 5d7c7a35..0ff4c50e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -380,15 +380,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     process_flag(trap_exit, true),
     ok = rabbit_memory_manager:register
            (self(), true, rabbit_disk_queue, set_mode, []),
-    Node = node(),
-    ok = 
-        case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
-                                           disc_copies) of
-            {atomic, ok} -> ok;
-            {aborted, {already_exists, rabbit_disk_queue, Node,
-                       disc_copies}} -> ok;
-            E -> E
-        end,
     ok = filelib:ensure_dir(form_filename("nothing")),
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
@@ -438,6 +429,15 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
         load_from_disk(State),
+    Node = node(),
+    ok = 
+        case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
+                                           disc_copies) of
+            {atomic, ok} -> ok;
+            {aborted, {already_exists, rabbit_disk_queue, Node,
+                       disc_copies}} -> ok;
+            E -> E
+        end,
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(form_filename(CurrentName),
                               [read, write, raw, binary, delayed_write]),
@@ -1757,11 +1757,12 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %%    consist only of valid messages. Plan: Truncate the main file
     %%    back to before any of the files in the tmp file and copy
     %%    them over again
+    TmpPath = form_filename(TmpFile),
     case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
                 %% note this also catches the case when the tmp file
                 %% is empty
-            ok = file:delete(TmpFile);
+            ok = file:delete(TmpPath);
         false ->
             %% we're in case 4 above. Check that everything in the
             %% main file is a valid message in mnesia
@@ -1793,13 +1794,12 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% single move if we run out of disk space, this truncate
             %% could fail, but we still aren't risking losing data
             ok = file:truncate(MainHdl),
-            {ok, TmpHdl} = file:open(form_filename(TmpFile),
-                                     [read, raw, binary, read_ahead]),
+            {ok, TmpHdl} = file:open(TmpPath, [read, raw, binary, read_ahead]),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
             ok = file:sync(MainHdl),
             ok = file:close(MainHdl),
             ok = file:close(TmpHdl),
-            ok = file:delete(TmpFile),
+            ok = file:delete(TmpPath),
 
             {ok, _MainMessages, MsgIdsMain} =
                 scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
@@ -1910,7 +1910,7 @@ read_next_file_entry(FileHdl, Offset) ->
     case file:read(FileHdl, TwoIntegers) of
         {ok,
          <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
-            case {TotalSize =:= 0, MsgIdBinSize =:= 0} of
+            case {TotalSize =< 0, MsgIdBinSize =< 0} of
                 {true, _} -> eof; %% Nothing we can do other than stop
                 {false, true} ->
                     %% current message corrupted, try skipping past it
-- 
cgit v1.2.1


From 221ec0bf10c4f4f6dad07f4cb7796e78564d6928 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 15:13:00 +0100
Subject: dq: revert the table change to earlier on, so that recovery isn't as
 horrendously slow. amqq_proc: correct order of actions in delete_queue and
 document.

---
 src/rabbit_amqqueue_process.erl | 10 ++++++++--
 src/rabbit_disk_queue.erl       | 20 +++++++++++---------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9d97e881..45f311f7 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -122,8 +122,14 @@ terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
     State1 = stop_memory_timer(State),
     QName = qname(State1),
-    ok = rabbit_amqqueue:internal_delete(QName),
-    {ok, _MS} = rabbit_mixed_queue:delete_queue(State1 #q.mixed_state).
+    %% Delete from disk queue first. If we crash at this point, when a
+    %% durable queue, we will be recreated at startup, possibly with
+    %% partial content. The alternative is much worse however - if we
+    %% called internal_delete first, we would then have a race between
+    %% the disk_queue delete and a new queue with the same name being
+    %% created and published to.
+    {ok, _MS} = rabbit_mixed_queue:delete_queue(State1 #q.mixed_state),
+    ok = rabbit_amqqueue:internal_delete(QName).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0ff4c50e..28e74537 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -381,6 +381,17 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     ok = rabbit_memory_manager:register
            (self(), true, rabbit_disk_queue, set_mode, []),
     ok = filelib:ensure_dir(form_filename("nothing")),
+
+    Node = node(),
+    ok = 
+        case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
+                                           disc_copies) of
+            {atomic, ok} -> ok;
+            {aborted, {already_exists, rabbit_disk_queue, Node,
+                       disc_copies}} -> ok;
+            E -> E
+        end,
+
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
     {ok, MsgLocationDets} =
@@ -429,15 +440,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
         load_from_disk(State),
-    Node = node(),
-    ok = 
-        case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
-                                           disc_copies) of
-            {atomic, ok} -> ok;
-            {aborted, {already_exists, rabbit_disk_queue, Node,
-                       disc_copies}} -> ok;
-            E -> E
-        end,
     %% read is only needed so that we can seek
     {ok, FileHdl} = file:open(form_filename(CurrentName),
                               [read, write, raw, binary, delayed_write]),
-- 
cgit v1.2.1


From 6e3e16d248c0a18d9142916ed69f7cc790aa2aaf Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 15:15:28 +0100
Subject: cosmetic - visual separation of sections

---
 src/rabbit_memory_manager.erl | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 2fb593e9..eb37a6f3 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -45,6 +45,8 @@
 
 -define(SERVER, ?MODULE).
 
+%%----------------------------------------------------------------------------
+
 -ifdef(use_specs).
 
 -spec(start_link/0 :: () ->
@@ -56,6 +58,8 @@
 
 -endif.
 
+%%----------------------------------------------------------------------------
+
 -record(state, { available_tokens,
                  processes,
                  callbacks,
@@ -161,6 +165,8 @@ info() ->
 conserve_memory(_Pid, Conserve) ->
     gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
 
+%%----------------------------------------------------------------------------
+
 init([]) ->
     process_flag(trap_exit, true),
     rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
@@ -293,6 +299,8 @@ terminate(_Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
+%%----------------------------------------------------------------------------
+
 find_process(Pid, Procs) ->
     case dict:find(Pid, Procs) of
         {ok, Value} -> Value;
-- 
cgit v1.2.1


From 2282ff5d1370603e8db75a55a4cee8eb71d61340 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 15:22:59 +0100
Subject: minor refactorings

---
 src/rabbit_amqqueue_process.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 45f311f7..5789b105 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -121,7 +121,6 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
 terminate(_Reason, State) ->
     %% FIXME: How do we cancel active subscriptions?
     State1 = stop_memory_timer(State),
-    QName = qname(State1),
     %% Delete from disk queue first. If we crash at this point, when a
     %% durable queue, we will be recreated at startup, possibly with
     %% partial content. The alternative is much worse however - if we
@@ -129,7 +128,7 @@ terminate(_Reason, State) ->
     %% the disk_queue delete and a new queue with the same name being
     %% created and published to.
     {ok, _MS} = rabbit_mixed_queue:delete_queue(State1 #q.mixed_state),
-    ok = rabbit_amqqueue:internal_delete(QName).
+    ok = rabbit_amqqueue:internal_delete(qname(State1)).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
-- 
cgit v1.2.1


From 8647395497843f3a7cd47bf096f047bd12024a92 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 28 Aug 2009 17:58:10 +0100
Subject: Use an mnesia transaction to record safely shutting down, and
 associated wiring on startup. Manually verified this all works.

---
 src/rabbit_disk_queue.erl | 74 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 28e74537..7051ea05 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -71,6 +71,11 @@
 -define(BATCH_SIZE,                          10000).
 -define(CACHE_MAX_SIZE,                      10485760).
 -define(WRITE_HANDLE_OPEN_MODE,           [append, raw, binary, delayed_write]).
+-define(SHUTDOWN_MESSAGE_KEY,                shutdown_token).
+-define(SHUTDOWN_MESSAGE, #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
+                                        msg_id = infinity_and_beyond,
+                                        is_delivered = never
+                                       }).
 
 -define(SERVER, ?MODULE).
 
@@ -392,6 +397,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
             E -> E
         end,
 
+    ok = detect_shutdown_state_and_adjust_delivered_flags(),
+
     file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
                               ?FILE_EXTENSION_DETS)),
     {ok, MsgLocationDets} =
@@ -582,6 +589,7 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
         _ -> sync_current_file_handle(State),
              file:close(FileHdl)
     end,
+    store_safe_shutdown(),
     HC1 = rabbit_file_handle_cache:close_all(HC),
     State1 #dqstate { current_file_handle = undefined,
                       current_dirty = false,
@@ -854,6 +862,72 @@ msg_to_bin(Msg = #basic_message { content = Content }) ->
 bin_to_msg(MsgBin) ->
     binary_to_term(MsgBin).
 
+
+store_safe_shutdown() ->
+    ok = rabbit_misc:execute_mnesia_transaction(
+           fun() ->
+                   mnesia:write(rabbit_disk_queue,
+                                ?SHUTDOWN_MESSAGE, write)
+           end).
+
+detect_shutdown_state_and_adjust_delivered_flags() ->
+    MarkDelivered =
+        rabbit_misc:execute_mnesia_transaction(
+          fun() ->
+                  case mnesia:read(rabbit_disk_queue,
+                                   ?SHUTDOWN_MESSAGE_KEY, read) of
+                      [?SHUTDOWN_MESSAGE] ->
+                          mnesia:delete(rabbit_disk_queue,
+                                        ?SHUTDOWN_MESSAGE_KEY, write),
+                          false;
+                      [] ->
+                          true
+                  end
+          end),
+    %% if we crash here, then on startup we'll not find the
+    %% SHUTDOWN_MESSAGE so will mark everything delivered, which is
+    %% the safe thing to do.
+    case MarkDelivered of
+        true -> mark_messages_delivered();
+        false -> ok
+    end.
+
+mark_messages_delivered() ->
+    mark_message_delivered('$start_of_table').
+
+%% A single huge transaction is a bad idea because of memory
+%% use. Equally, using dirty operations is a bad idea because you
+%% shouldn't do writes when doing mnesia:dirty_next, because the
+%% ordering can change. So we use transactions of bounded
+%% size. However, even this does necessitate restarting between
+%% transactions.
+mark_message_delivered('$end_of_table') ->
+    ok;
+mark_message_delivered(_Key) ->
+    mark_message_delivered(
+      rabbit_misc:execute_mnesia_transaction(
+        fun () ->
+                ok = mnesia:write_lock_table(rabbit_disk_queue),
+                mark_message_delivered(mnesia:first(rabbit_disk_queue),
+                                       ?BATCH_SIZE)
+        end)).
+
+mark_message_delivered(Key, 0) ->
+    Key;
+mark_message_delivered(Key = '$end_of_table', _N) ->
+    Key;
+mark_message_delivered(Key, N) ->
+    [Obj] = mnesia:read(rabbit_disk_queue, Key, write),
+    M = case Obj #dq_msg_loc.is_delivered of
+             true -> N;
+             false ->
+                ok = mnesia:write(rabbit_disk_queue,
+                                  Obj #dq_msg_loc { is_delivered = true },
+                                  write),
+                N - 1
+        end,
+    mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
+
 %%----------------------------------------------------------------------------
 %% internal functions
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From f5d877ca4b2d5e2a1e5a1cdff53dfd014a2409a3 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 18:32:46 +0100
Subject: cosmetic changes to shutdown marker code move it to the right place
 reorganise constants section

---
 src/rabbit_disk_queue.erl | 189 +++++++++++++++++++++++-----------------------
 1 file changed, 95 insertions(+), 94 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 7051ea05..42a635a1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -54,38 +54,40 @@
 
 -include("rabbit.hrl").
 
--define(WRITE_OK_SIZE_BITS,                  8).
--define(WRITE_OK_TRANSIENT,                  255).
--define(WRITE_OK_PERSISTENT,                 254).
--define(INTEGER_SIZE_BYTES,                  8).
--define(INTEGER_SIZE_BITS,                   (8 * ?INTEGER_SIZE_BYTES)).
--define(MSG_LOC_NAME,                        rabbit_disk_queue_msg_location).
--define(FILE_SUMMARY_ETS_NAME,               rabbit_disk_queue_file_summary).
--define(SEQUENCE_ETS_NAME,                   rabbit_disk_queue_sequences).
--define(CACHE_ETS_NAME,                      rabbit_disk_queue_cache).
--define(FILE_EXTENSION,                      ".rdq").
--define(FILE_EXTENSION_TMP,                  ".rdt").
--define(FILE_EXTENSION_DETS,                 ".dets").
--define(FILE_PACKING_ADJUSTMENT,             (1 + (2* (?INTEGER_SIZE_BYTES)))).
--define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
--define(BATCH_SIZE,                          10000).
--define(CACHE_MAX_SIZE,                      10485760).
--define(WRITE_HANDLE_OPEN_MODE,           [append, raw, binary, delayed_write]).
--define(SHUTDOWN_MESSAGE_KEY,                shutdown_token).
--define(SHUTDOWN_MESSAGE, #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
-                                        msg_id = infinity_and_beyond,
-                                        is_delivered = never
-                                       }).
-
--define(SERVER, ?MODULE).
-
--define(MAX_READ_FILE_HANDLES, 256).
--define(FILE_SIZE_LIMIT, (256*1024*1024)).
+-define(WRITE_OK_SIZE_BITS,      8).
+-define(WRITE_OK_TRANSIENT,      255).
+-define(WRITE_OK_PERSISTENT,     254).
+-define(INTEGER_SIZE_BYTES,      8).
+-define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
+-define(MSG_LOC_NAME,            rabbit_disk_queue_msg_location).
+-define(FILE_SUMMARY_ETS_NAME,   rabbit_disk_queue_file_summary).
+-define(SEQUENCE_ETS_NAME,       rabbit_disk_queue_sequences).
+-define(CACHE_ETS_NAME,          rabbit_disk_queue_cache).
+-define(FILE_EXTENSION,          ".rdq").
+-define(FILE_EXTENSION_TMP,      ".rdt").
+-define(FILE_EXTENSION_DETS,     ".dets").
+-define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
+-define(BATCH_SIZE,              10000).
+-define(CACHE_MAX_SIZE,          10485760).
+-define(WRITE_HANDLE_OPEN_MODE,  [append, raw, binary, delayed_write]).
+-define(MAX_READ_FILE_HANDLES,   256).
+-define(FILE_SIZE_LIMIT,         (256*1024*1024)).
+
+
+-define(SHUTDOWN_MESSAGE_KEY, shutdown_token).
+-define(SHUTDOWN_MESSAGE,
+        #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
+                      msg_id = infinity_and_beyond,
+                      is_delivered = never
+                     }).
 
+-define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
 -define(SYNC_INTERVAL, 5). %% milliseconds
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
 
+-define(SERVER, ?MODULE).
+
 -record(dqstate,
         {msg_location_dets,       %% where are messages?
          msg_location_ets,        %% as above, but for ets version
@@ -862,72 +864,6 @@ msg_to_bin(Msg = #basic_message { content = Content }) ->
 bin_to_msg(MsgBin) ->
     binary_to_term(MsgBin).
 
-
-store_safe_shutdown() ->
-    ok = rabbit_misc:execute_mnesia_transaction(
-           fun() ->
-                   mnesia:write(rabbit_disk_queue,
-                                ?SHUTDOWN_MESSAGE, write)
-           end).
-
-detect_shutdown_state_and_adjust_delivered_flags() ->
-    MarkDelivered =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  case mnesia:read(rabbit_disk_queue,
-                                   ?SHUTDOWN_MESSAGE_KEY, read) of
-                      [?SHUTDOWN_MESSAGE] ->
-                          mnesia:delete(rabbit_disk_queue,
-                                        ?SHUTDOWN_MESSAGE_KEY, write),
-                          false;
-                      [] ->
-                          true
-                  end
-          end),
-    %% if we crash here, then on startup we'll not find the
-    %% SHUTDOWN_MESSAGE so will mark everything delivered, which is
-    %% the safe thing to do.
-    case MarkDelivered of
-        true -> mark_messages_delivered();
-        false -> ok
-    end.
-
-mark_messages_delivered() ->
-    mark_message_delivered('$start_of_table').
-
-%% A single huge transaction is a bad idea because of memory
-%% use. Equally, using dirty operations is a bad idea because you
-%% shouldn't do writes when doing mnesia:dirty_next, because the
-%% ordering can change. So we use transactions of bounded
-%% size. However, even this does necessitate restarting between
-%% transactions.
-mark_message_delivered('$end_of_table') ->
-    ok;
-mark_message_delivered(_Key) ->
-    mark_message_delivered(
-      rabbit_misc:execute_mnesia_transaction(
-        fun () ->
-                ok = mnesia:write_lock_table(rabbit_disk_queue),
-                mark_message_delivered(mnesia:first(rabbit_disk_queue),
-                                       ?BATCH_SIZE)
-        end)).
-
-mark_message_delivered(Key, 0) ->
-    Key;
-mark_message_delivered(Key = '$end_of_table', _N) ->
-    Key;
-mark_message_delivered(Key, N) ->
-    [Obj] = mnesia:read(rabbit_disk_queue, Key, write),
-    M = case Obj #dq_msg_loc.is_delivered of
-             true -> N;
-             false ->
-                ok = mnesia:write(rabbit_disk_queue,
-                                  Obj #dq_msg_loc { is_delivered = true },
-                                  write),
-                N - 1
-        end,
-    mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
-
 %%----------------------------------------------------------------------------
 %% internal functions
 %%----------------------------------------------------------------------------
@@ -1569,9 +1505,74 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     end.
 
 %%----------------------------------------------------------------------------
-%% disk recovery
+%% recovery
 %%----------------------------------------------------------------------------
 
+store_safe_shutdown() ->
+    ok = rabbit_misc:execute_mnesia_transaction(
+           fun() ->
+                   mnesia:write(rabbit_disk_queue,
+                                ?SHUTDOWN_MESSAGE, write)
+           end).
+
+detect_shutdown_state_and_adjust_delivered_flags() ->
+    MarkDelivered =
+        rabbit_misc:execute_mnesia_transaction(
+          fun() ->
+                  case mnesia:read(rabbit_disk_queue,
+                                   ?SHUTDOWN_MESSAGE_KEY, read) of
+                      [?SHUTDOWN_MESSAGE] ->
+                          mnesia:delete(rabbit_disk_queue,
+                                        ?SHUTDOWN_MESSAGE_KEY, write),
+                          false;
+                      [] ->
+                          true
+                  end
+          end),
+    %% if we crash here, then on startup we'll not find the
+    %% SHUTDOWN_MESSAGE so will mark everything delivered, which is
+    %% the safe thing to do.
+    case MarkDelivered of
+        true -> mark_messages_delivered();
+        false -> ok
+    end.
+
+mark_messages_delivered() ->
+    mark_message_delivered('$start_of_table').
+
+%% A single huge transaction is a bad idea because of memory
+%% use. Equally, using dirty operations is a bad idea because you
+%% shouldn't do writes when doing mnesia:dirty_next, because the
+%% ordering can change. So we use transactions of bounded
+%% size. However, even this does necessitate restarting between
+%% transactions.
+mark_message_delivered('$end_of_table') ->
+    ok;
+mark_message_delivered(_Key) ->
+    mark_message_delivered(
+      rabbit_misc:execute_mnesia_transaction(
+        fun () ->
+                ok = mnesia:write_lock_table(rabbit_disk_queue),
+                mark_message_delivered(mnesia:first(rabbit_disk_queue),
+                                       ?BATCH_SIZE)
+        end)).
+
+mark_message_delivered(Key, 0) ->
+    Key;
+mark_message_delivered(Key = '$end_of_table', _N) ->
+    Key;
+mark_message_delivered(Key, N) ->
+    [Obj] = mnesia:read(rabbit_disk_queue, Key, write),
+    M = case Obj #dq_msg_loc.is_delivered of
+             true -> N;
+             false ->
+                ok = mnesia:write(rabbit_disk_queue,
+                                  Obj #dq_msg_loc { is_delivered = true },
+                                  write),
+                N - 1
+        end,
+    mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
+
 add_index() ->
     case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
         {atomic, ok} -> ok;
-- 
cgit v1.2.1


From bb196602c3356ca14abb937aca96ff3d9eeaf328 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 Aug 2009 20:25:07 +0100
Subject: refactor: pull msg_location_dets filename construction into separate
 fun

---
 src/rabbit_disk_queue.erl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 42a635a1..f78f413f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -401,12 +401,11 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
-    file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
-                              ?FILE_EXTENSION_DETS)),
+    file:delete(msg_location_dets_file()),
+
     {ok, MsgLocationDets} =
         dets:open_file(?MSG_LOC_NAME,
-                       [{file, form_filename(atom_to_list(?MSG_LOC_NAME) ++
-                                             ?FILE_EXTENSION_DETS)},
+                       [{file, msg_location_dets_file()},
                         {min_no_slots, 1024*1024},
                         %% man says this should be <= 32M. But it works...
                         {max_no_slots, 30*1024*1024},
@@ -583,8 +582,7 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
     %% deliberately ignoring return codes here
     State1 = stop_commit_timer(stop_memory_timer(State)),
     dets:close(MsgLocationDets),
-    file:delete(form_filename(atom_to_list(?MSG_LOC_NAME) ++
-                              ?FILE_EXTENSION_DETS)),
+    file:delete(msg_location_dets_file()),
     true = ets:delete_all_objects(MsgLocationEts),
     case FileHdl of
         undefined -> ok;
@@ -806,6 +804,9 @@ form_filename(Name) ->
 base_directory() ->
     filename:join(rabbit_mnesia:dir(), "rabbit_disk_queue/").
 
+msg_location_dets_file() ->
+    form_filename(atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
+
 with_read_handle_at(File, Offset, Fun, State =
                     #dqstate { read_file_hc_cache = HC,
                                current_file_name = CurName,
-- 
cgit v1.2.1


From 2c361b44a361f088749694c5cf6719f3aa1ce419 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:09:26 +0100
Subject: refactoring: extract file opening, and simplify file:position calls

---
 src/rabbit_disk_queue.erl | 45 ++++++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f78f413f..62d53eee 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -73,6 +73,8 @@
 -define(MAX_READ_FILE_HANDLES,   256).
 -define(FILE_SIZE_LIMIT,         (256*1024*1024)).
 
+-define(READ_MODE, [read, read_ahead]).
+-define(WRITE_MODE, [write, delayed_write]).
 
 -define(SHUTDOWN_MESSAGE_KEY, shutdown_token).
 -define(SHUTDOWN_MESSAGE,
@@ -449,9 +451,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                              current_offset = Offset } } =
         load_from_disk(State),
     %% read is only needed so that we can seek
-    {ok, FileHdl} = file:open(form_filename(CurrentName),
-                              [read, write, raw, binary, delayed_write]),
-    {ok, Offset} = file:position(FileHdl, {bof, Offset}),
+    FileHdl = open_file(CurrentName, ?WRITE_MODE ++ [read]),
+    {ok, Offset} = file:position(FileHdl, Offset),
     State2 = State1 #dqstate { current_file_handle = FileHdl },
     %% by reporting a memory use of 0, we guarantee the manager will
     %% not oppress us. We have to start in ram_disk mode because we
@@ -807,6 +808,10 @@ base_directory() ->
 msg_location_dets_file() ->
     form_filename(atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
 
+open_file(File, Mode) ->
+    {ok, Hdl} = file:open(form_filename(File), [raw, binary] ++ Mode),
+    Hdl.
+
 with_read_handle_at(File, Offset, Fun, State =
                     #dqstate { read_file_hc_cache = HC,
                                current_file_name = CurName,
@@ -1259,8 +1264,7 @@ maybe_roll_to_new_file(Offset,
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
-    {ok, NextHdl} = file:open(form_filename(NextName),
-                              [write, raw, binary, delayed_write]),
+    NextHdl = open_file(NextName, ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurName, {5, NextName}),%% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     State2 = State1 #dqstate { current_file_name = NextName,
@@ -1352,13 +1356,13 @@ sort_msg_locations_by_offset(Dir, List) ->
                end, List).
 
 preallocate(Hdl, FileSizeLimit, FinalPos) ->
-    {ok, FileSizeLimit} = file:position(Hdl, {bof, FileSizeLimit}),
+    {ok, FileSizeLimit} = file:position(Hdl, FileSizeLimit),
     ok = file:truncate(Hdl),
-    {ok, FinalPos} = file:position(Hdl, {bof, FinalPos}),
+    {ok, FinalPos} = file:position(Hdl, FinalPos),
     ok.
 
 truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
-    {ok, Lowpoint} = file:position(FileHdl, {bof, Lowpoint}),
+    {ok, Lowpoint} = file:position(FileHdl, Lowpoint),
     ok = file:truncate(FileHdl),
     ok = preallocate(FileHdl, Highpoint, Lowpoint).
 
@@ -1368,12 +1372,8 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
               _DestinationLeft, _DestinationRight},
              State) ->
     State1 = close_file(Source, close_file(Destination, State)),
-    {ok, SourceHdl} =
-        file:open(form_filename(Source),
-                  [read, raw, binary, read_ahead]),
-    {ok, DestinationHdl} =
-        file:open(form_filename(Destination),
-                  [read, write, raw, binary, read_ahead, delayed_write]),
+    SourceHdl = open_file(Source, ?READ_MODE),
+    DestinationHdl = open_file(Destination, ?READ_MODE ++ ?WRITE_MODE),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't
     %% need a tmp file
@@ -1386,10 +1386,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                                           DestinationValid, ExpectedSize);
        true ->
             Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} =
-                file:open(form_filename(Tmp),
-                          [read, write, raw, binary,
-                           read_ahead, delayed_write]),
+            TmpHdl = open_file(Tmp, ?READ_MODE ++ ?WRITE_MODE),
             Worklist =
                 lists:dropwhile(
                   fun (#message_store_entry { offset = Offset })
@@ -1415,7 +1412,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
             %% Destination, and MsgLocationDets has been updated to
             %% reflect compaction of Destination so truncate
             %% Destination and copy from Tmp back to the end
-            {ok, 0} = file:position(TmpHdl, {bof, 0}),
+            {ok, 0} = file:position(TmpHdl, 0),
             ok = truncate_and_extend_file(
                    DestinationHdl, DestinationContiguousTop, ExpectedSize),
             {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
@@ -1464,7 +1461,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                           %% the previous block
                           BSize = BlockEnd - BlockStart,
                           {ok, BlockStart} =
-                                file:position(SourceHdl, {bof, BlockStart}),
+                              file:position(SourceHdl, BlockStart),
                           {ok, BSize} =
                               file:copy(SourceHdl, DestinationHdl, BSize),
                           {NextOffset, Offset, Offset + Size}
@@ -1472,7 +1469,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
           end, {InitOffset, undefined, undefined}, WorkList),
     %% do the last remaining block
     BSize1 = BlockEnd1 - BlockStart1,
-    {ok, BlockStart1} = file:position(SourceHdl, {bof, BlockStart1}),
+    {ok, BlockStart1} = file:position(SourceHdl, BlockStart1),
     {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
     ok.
 
@@ -1854,9 +1851,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
                                      not (lists:member(MsgId, MsgIdsTmp))
                              end, MsgIds),
             %% must open with read flag, otherwise will stomp over contents
-            {ok, MainHdl} = 
-                file:open(form_filename(NonTmpRelatedFile),
-                          [read, write, raw, binary, delayed_write]),
+            MainHdl = open_file(NonTmpRelatedFile, ?WRITE_MODE ++ [read]),
             {ok, Top} = file:position(MainHdl, Top),
             %% wipe out any rubbish at the end of the file
             ok = file:truncate(MainHdl),
@@ -1872,7 +1867,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% single move if we run out of disk space, this truncate
             %% could fail, but we still aren't risking losing data
             ok = file:truncate(MainHdl),
-            {ok, TmpHdl} = file:open(TmpPath, [read, raw, binary, read_ahead]),
+            TmpHdl = open_file(TmpFile, ?READ_MODE),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
             ok = file:sync(MainHdl),
             ok = file:close(MainHdl),
-- 
cgit v1.2.1


From 35e3c4c36162841eed728499ae52e5b6b71305df Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:10:38 +0100
Subject: refactoring: eliminate code duplication in read_message_from_disk

---
 src/rabbit_disk_queue.erl | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 62d53eee..0f08b04c 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1942,14 +1942,13 @@ read_message_from_disk(FileHdl, TotalSize) ->
                MsgIdBinSize:?INTEGER_SIZE_BITS,
                Rest:TotalSizeWriteOkBytes/binary>>} ->
             BodySize = TotalSize - MsgIdBinSize,
-            case Rest of
-                <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-                 ?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>> ->
-                    {ok, {MsgBody, false, BodySize}};
-                <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-                 ?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>> ->
-                    {ok, {MsgBody, true, BodySize}}
-            end;
+            <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+             StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
+            Persistent = case StopByte of
+                             ?WRITE_OK_TRANSIENT  -> false;
+                             ?WRITE_OK_PERSISTENT -> true
+                         end,
+            {ok, {MsgBody, Persistent, BodySize}};
         KO -> KO
     end.
 
-- 
cgit v1.2.1


From cd2b9673ee3a6a17ca1899c7f7e983d62bcba9af Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:13:18 +0100
Subject: refactoring: simplify match in read_next_file_entry this is
 equivalent since the read entities are unsigned

---
 src/rabbit_disk_queue.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0f08b04c..22c57fe3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1982,9 +1982,9 @@ read_next_file_entry(FileHdl, Offset) ->
     case file:read(FileHdl, TwoIntegers) of
         {ok,
          <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
-            case {TotalSize =< 0, MsgIdBinSize =< 0} of
-                {true, _} -> eof; %% Nothing we can do other than stop
-                {false, true} ->
+            case {TotalSize, MsgIdBinSize} of
+                {0, _} -> eof; %% Nothing we can do other than stop
+                {_, 0} ->
                     %% current message corrupted, try skipping past it
                     ExpectedAbsPos =
                         Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
@@ -1995,7 +1995,7 @@ read_next_file_entry(FileHdl, Offset) ->
                             eof; %% seek failed, so give up
                         KO -> KO
                     end;
-                {false, false} -> %% all good, let's continue
+                {_, _} -> %% all good, let's continue
                     case file:read(FileHdl, MsgIdBinSize) of
                         {ok, <<MsgId:MsgIdBinSize/binary>>} ->
                             ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT +
-- 
cgit v1.2.1


From b2687dbe7e8f35dab6f0e232384b1a18fa040129 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:13:51 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 22c57fe3..d712142a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1989,11 +1989,9 @@ read_next_file_entry(FileHdl, Offset) ->
                     ExpectedAbsPos =
                         Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
                     case file:position(FileHdl, {cur, TotalSize + 1}) of
-                        {ok, ExpectedAbsPos} ->
-                            {corrupted, ExpectedAbsPos};
-                        {ok, _SomeOtherPos} ->
-                            eof; %% seek failed, so give up
-                        KO -> KO
+                        {ok, ExpectedAbsPos} -> {corrupted, ExpectedAbsPos};
+                        {ok, _SomeOtherPos}  -> eof; %% seek failed, so give up
+                        KO                   -> KO
                     end;
                 {_, _} -> %% all good, let's continue
                     case file:read(FileHdl, MsgIdBinSize) of
-- 
cgit v1.2.1


From 26cb7de3eeb1f48c551a4ab0910eeeb4a8249b74 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:14:39 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d712142a..d88eea1b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1998,9 +1998,8 @@ read_next_file_entry(FileHdl, Offset) ->
                         {ok, <<MsgId:MsgIdBinSize/binary>>} ->
                             ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT +
                                 TotalSize - 1,
-                            case file:position(FileHdl,
-                                               {cur, TotalSize - MsgIdBinSize}
-                                              ) of
+                            case file:position(
+                                   FileHdl, {cur, TotalSize - MsgIdBinSize}) of
                                 {ok, ExpectedAbsPos} ->
                                     NextOffset = Offset + TotalSize +
                                         ?FILE_PACKING_ADJUSTMENT,
-- 
cgit v1.2.1


From 413c0f73b0c5d30a45335e849d8465bb630b72a0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:15:29 +0100
Subject: refactoring: extract reading of stop byte

---
 src/rabbit_disk_queue.erl | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d88eea1b..900692a4 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1995,26 +1995,21 @@ read_next_file_entry(FileHdl, Offset) ->
                     end;
                 {_, _} -> %% all good, let's continue
                     case file:read(FileHdl, MsgIdBinSize) of
-                        {ok, <<MsgId:MsgIdBinSize/binary>>} ->
+                        {ok, <<MsgIdBin:MsgIdBinSize/binary>>} ->
                             ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT +
                                 TotalSize - 1,
                             case file:position(
                                    FileHdl, {cur, TotalSize - MsgIdBinSize}) of
                                 {ok, ExpectedAbsPos} ->
-                                    NextOffset = Offset + TotalSize +
-                                        ?FILE_PACKING_ADJUSTMENT,
-                                    case file:read(FileHdl, 1) of
-                                        {ok,
-                                         <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>} ->
-                                             {ok, {binary_to_term(MsgId),
-                                                   false, TotalSize, NextOffset}};
-                                        {ok,
-                                         <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} ->
-                                             {ok, {binary_to_term(MsgId),
-                                                   true, TotalSize, NextOffset}};
-                                        {ok, _SomeOtherData} ->
+                                    NextOffset = ExpectedAbsPos + 1,
+                                    case read_stop_byte(FileHdl) of
+                                        {ok, Persistent} ->
+                                            MsgId = binary_to_term(MsgIdBin),
+                                            {ok, {MsgId, Persistent,
+                                                  TotalSize, NextOffset}};
+                                        corrupted ->
                                             {corrupted, NextOffset};
-                                        KO -> KO
+                                        Other -> Other
                                     end;
                                 {ok, _SomeOtherPos} ->
                                     %% seek failed, so give up
@@ -2026,3 +2021,11 @@ read_next_file_entry(FileHdl, Offset) ->
             end;
         Other -> Other
     end.
+
+read_stop_byte(FileHdl) ->
+    case file:read(FileHdl, 1) of
+        {ok, <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>}  -> {ok, false};
+        {ok, <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} -> {ok, true};
+        {ok, _SomeOtherData}                               -> corrupted;
+        KO                                                 -> KO
+    end.
-- 
cgit v1.2.1


From f59e92544d3c377f1952603dca114910352c24c2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:20:22 +0100
Subject: Other is KO accidentally changed in previous commit

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 900692a4..1f8e1222 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -2009,7 +2009,7 @@ read_next_file_entry(FileHdl, Offset) ->
                                                   TotalSize, NextOffset}};
                                         corrupted ->
                                             {corrupted, NextOffset};
-                                        Other -> Other
+                                        KO -> KO
                                     end;
                                 {ok, _SomeOtherPos} ->
                                     %% seek failed, so give up
-- 
cgit v1.2.1


From 81d68f02d6cf9bf5db044b47aa3e3cdc9205a7ed Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 07:32:53 +0100
Subject: cosmetic: remove whitespace

---
 src/rabbit_disk_queue.erl | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 1f8e1222..b7ed2156 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -273,7 +273,7 @@
 -spec(phantom_fetch/1 :: (queue_name()) ->
              ('empty' |
               {msg_id(), boolean(), boolean(), ack_tag(), non_neg_integer()})).
--spec(prefetch/1 :: (queue_name()) -> 'ok'). 
+-spec(prefetch/1 :: (queue_name()) -> 'ok').
 -spec(ack/2 :: (queue_name(), [ack_tag()]) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
 -spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean()}], [ack_tag()]) ->
@@ -392,14 +392,13 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     ok = filelib:ensure_dir(form_filename("nothing")),
 
     Node = node(),
-    ok = 
-        case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
-                                           disc_copies) of
-            {atomic, ok} -> ok;
-            {aborted, {already_exists, rabbit_disk_queue, Node,
-                       disc_copies}} -> ok;
-            E -> E
-        end,
+    ok = case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
+                                            disc_copies) of
+             {atomic, ok} -> ok;
+             {aborted, {already_exists, rabbit_disk_queue, Node,
+                        disc_copies}} -> ok;
+             E -> E
+         end,
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
@@ -464,7 +463,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({fetch, Q}, _From, State) ->
-    {ok, Result, State1} = 
+    {ok, Result, State1} =
         internal_fetch_body(Q, record_delivery, pop_queue, State),
     reply(Result, State1);
 handle_call({phantom_fetch, Q}, _From, State) ->
@@ -555,7 +554,7 @@ handle_cast({prefetch, Q, From}, State) ->
 	    false -> State1
 	end,
     noreply(State3).
-        
+
 handle_info(report_memory, State) ->
     %% call noreply1/2, not noreply/1/2, as we don't want to restart the
     %% memory_report_timer_ref.
@@ -709,7 +708,7 @@ decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
                    N when N =< 0 -> true = ets:delete(Cache, MsgId);
                    _N -> true
                end
-           catch error:badarg -> 
+           catch error:badarg ->
                    %% MsgId is not in there because although it's been
                    %% delivered, it's never actually been read (think:
                    %% persistent message in mixed queue)
@@ -885,7 +884,7 @@ internal_fetch_body(Q, MarkDelivered, Advance, State) ->
 internal_fetch_attributes(Q, MarkDelivered, Advance, State) ->
     case queue_head(Q, MarkDelivered, Advance, State) of
         E = {ok, empty, _} -> E;
-        {ok, AckTag, IsDelivered, 
+        {ok, AckTag, IsDelivered,
          #message_store_entry { msg_id = MsgId, is_persistent = IsPersistent },
          Remaining, State1} ->
             {ok, {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}, State1}
@@ -1076,11 +1075,9 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                   end, PubMsgIds),
     TxnDetails = {Q, PubMsgIds, AckSeqIds, From},
     case NeedsSync of
-        true -> 
-            Txns1 = [TxnDetails | Txns],
-            State #dqstate { on_sync_txns = Txns1 };
-        false ->
-            internal_do_tx_commit(TxnDetails, State)
+        true  -> Txns1 = [TxnDetails | Txns],
+                 State #dqstate { on_sync_txns = Txns1 };
+        false -> internal_do_tx_commit(TxnDetails, State)
     end.
 
 internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
@@ -1613,7 +1610,7 @@ prune_mnesia_flush_batch(DeleteAcc) ->
     lists:foldl(fun (Key, ok) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
                 end, ok, DeleteAcc).
-    
+
 prune_mnesia(_State, '$end_of_table', Files, _DeleteAcc, 0) ->
     {ok, Files};
 prune_mnesia(_State, '$end_of_table', Files, DeleteAcc, _Len) ->
@@ -1976,7 +1973,7 @@ scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
             %% bad message, but we may still have recovered some valid messages
             {ok, Acc}
     end.
-            
+
 read_next_file_entry(FileHdl, Offset) ->
     TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
     case file:read(FileHdl, TwoIntegers) of
@@ -2013,7 +2010,7 @@ read_next_file_entry(FileHdl, Offset) ->
                                     end;
                                 {ok, _SomeOtherPos} ->
                                     %% seek failed, so give up
-                                    eof; 
+                                    eof;
                                 KO -> KO
                             end;
                         Other -> Other
-- 
cgit v1.2.1


From 553fad7d51b4046311ac3bdaed2ca353ae8c5538 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 08:07:41 +0100
Subject: refactoring: (re)introduce {ok, ...} return on open_file This allows
 us to replace the last remaining file:open with open_file. Also, extract
 binary mode settings and use them in both open_file and fh cache creation.

---
 src/rabbit_disk_queue.erl | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b7ed2156..cbf8d68f 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -73,6 +73,7 @@
 -define(MAX_READ_FILE_HANDLES,   256).
 -define(FILE_SIZE_LIMIT,         (256*1024*1024)).
 
+-define(BINARY_MODE, [raw, binary]).
 -define(READ_MODE, [read, read_ahead]).
 -define(WRITE_MODE, [write, delayed_write]).
 
@@ -419,6 +420,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected, {keypos, 2}]),
 
     InitName = "0" ++ ?FILE_EXTENSION,
+    HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
+                                                ?BINARY_MODE ++ [read]),
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
                    msg_location_ets        = MsgLocationEts,
@@ -433,9 +436,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_offset          = 0,
                    current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
-                   read_file_hc_cache      = rabbit_file_handle_cache:init(
-                                               ReadFileHandlesLimit,
-                                               [read, raw, binary]),
+                   read_file_hc_cache      = HandleCache,
                    on_sync_txns           = [],
                    commit_timer_ref        = undefined,
                    last_sync_offset        = 0,
@@ -450,7 +451,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                              current_offset = Offset } } =
         load_from_disk(State),
     %% read is only needed so that we can seek
-    FileHdl = open_file(CurrentName, ?WRITE_MODE ++ [read]),
+    {ok, FileHdl} = open_file(CurrentName, ?WRITE_MODE ++ [read]),
     {ok, Offset} = file:position(FileHdl, Offset),
     State2 = State1 #dqstate { current_file_handle = FileHdl },
     %% by reporting a memory use of 0, we guarantee the manager will
@@ -807,9 +808,7 @@ base_directory() ->
 msg_location_dets_file() ->
     form_filename(atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
 
-open_file(File, Mode) ->
-    {ok, Hdl} = file:open(form_filename(File), [raw, binary] ++ Mode),
-    Hdl.
+open_file(File, Mode) -> file:open(form_filename(File), ?BINARY_MODE ++ Mode).
 
 with_read_handle_at(File, Offset, Fun, State =
                     #dqstate { read_file_hc_cache = HC,
@@ -1261,7 +1260,7 @@ maybe_roll_to_new_file(Offset,
     ok = file:close(CurHdl),
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
-    NextHdl = open_file(NextName, ?WRITE_MODE),
+    {ok, NextHdl} = open_file(NextName, ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurName, {5, NextName}),%% 5 is Right
     true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
     State2 = State1 #dqstate { current_file_name = NextName,
@@ -1369,8 +1368,8 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
               _DestinationLeft, _DestinationRight},
              State) ->
     State1 = close_file(Source, close_file(Destination, State)),
-    SourceHdl = open_file(Source, ?READ_MODE),
-    DestinationHdl = open_file(Destination, ?READ_MODE ++ ?WRITE_MODE),
+    {ok, SourceHdl} = open_file(Source, ?READ_MODE),
+    {ok, DestinationHdl} = open_file(Destination, ?READ_MODE ++ ?WRITE_MODE),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't
     %% need a tmp file
@@ -1383,7 +1382,7 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
                                           DestinationValid, ExpectedSize);
        true ->
             Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
-            TmpHdl = open_file(Tmp, ?READ_MODE ++ ?WRITE_MODE),
+            {ok, TmpHdl} = open_file(Tmp, ?READ_MODE ++ ?WRITE_MODE),
             Worklist =
                 lists:dropwhile(
                   fun (#message_store_entry { offset = Offset })
@@ -1848,7 +1847,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
                                      not (lists:member(MsgId, MsgIdsTmp))
                              end, MsgIds),
             %% must open with read flag, otherwise will stomp over contents
-            MainHdl = open_file(NonTmpRelatedFile, ?WRITE_MODE ++ [read]),
+            {ok, MainHdl} = open_file(NonTmpRelatedFile, ?WRITE_MODE ++ [read]),
             {ok, Top} = file:position(MainHdl, Top),
             %% wipe out any rubbish at the end of the file
             ok = file:truncate(MainHdl),
@@ -1864,7 +1863,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% single move if we run out of disk space, this truncate
             %% could fail, but we still aren't risking losing data
             ok = file:truncate(MainHdl),
-            TmpHdl = open_file(TmpFile, ?READ_MODE),
+            {ok, TmpHdl} = open_file(TmpFile, ?READ_MODE),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
             ok = file:sync(MainHdl),
             ok = file:close(MainHdl),
@@ -1950,10 +1949,11 @@ read_message_from_disk(FileHdl, TotalSize) ->
     end.
 
 scan_file_for_valid_messages(File) ->
-    case file:open(form_filename(File), [raw, binary, read, read_ahead]) of
+    case open_file(File, ?READ_MODE) of
         {ok, Hdl} ->
             Valid = scan_file_for_valid_messages(Hdl, 0, []),
-            %% if something really bad's happened, the close could fail, but ignore
+            %% if something really bad's happened, the close could fail,
+            %% but ignore
             file:close(Hdl),
             Valid;
         {error, enoent} -> {ok, []};
-- 
cgit v1.2.1


From 1b2987d56e9f8926b2ed5d30d0bcea7abfdecde2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 08:13:26 +0100
Subject: refactoring: rename non-sensical read_file_hc_cache to
 read_file_handle_cache

---
 src/rabbit_disk_queue.erl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index cbf8d68f..d01a9834 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -104,7 +104,7 @@
          current_dirty,           %% has the current file been written to
                                   %% since the last fsync?
          file_size_limit,         %% how big can our files get?
-         read_file_hc_cache,      %% file handle cache for reading
+         read_file_handle_cache,  %% file handle cache for reading
          on_sync_txns,            %% list of commiters to run on sync (reversed)
          commit_timer_ref,        %% TRef for our interval timer
          last_sync_offset,        %% current_offset at the last time we sync'd
@@ -436,8 +436,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                    current_offset          = 0,
                    current_dirty           = false,
                    file_size_limit         = FileSizeLimit,
-                   read_file_hc_cache      = HandleCache,
-                   on_sync_txns           = [],
+                   read_file_handle_cache  = HandleCache,
+                   on_sync_txns            = [],
                    commit_timer_ref        = undefined,
                    last_sync_offset        = 0,
                    message_cache           = ets:new(?CACHE_ETS_NAME,
@@ -578,7 +578,7 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                             msg_location_ets = MsgLocationEts,
                             current_file_handle = FileHdl,
-                            read_file_hc_cache = HC
+                            read_file_handle_cache = HC
                           }) ->
     %% deliberately ignoring return codes here
     State1 = stop_commit_timer(stop_memory_timer(State)),
@@ -594,7 +594,7 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
     HC1 = rabbit_file_handle_cache:close_all(HC),
     State1 #dqstate { current_file_handle = undefined,
                       current_dirty = false,
-                      read_file_hc_cache = HC1,
+                      read_file_handle_cache = HC1,
                       memory_report_timer_ref = undefined
                     }.
 
@@ -811,7 +811,7 @@ msg_location_dets_file() ->
 open_file(File, Mode) -> file:open(form_filename(File), ?BINARY_MODE ++ Mode).
 
 with_read_handle_at(File, Offset, Fun, State =
-                    #dqstate { read_file_hc_cache = HC,
+                    #dqstate { read_file_handle_cache = HC,
                                current_file_name = CurName,
                                current_dirty = IsDirty,
                                last_sync_offset = SyncOffset
@@ -823,7 +823,7 @@ with_read_handle_at(File, Offset, Fun, State =
     FilePath = form_filename(File),
     {Result, HC1} =
         rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
-    {Result, State1 #dqstate { read_file_hc_cache = HC1 }}.
+    {Result, State1 #dqstate { read_file_handle_cache = HC1 }}.
 
 sync_current_file_handle(State = #dqstate { current_dirty = false,
                                             on_sync_txns = [] }) ->
@@ -1469,9 +1469,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
     {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
     ok.
 
-close_file(File, State = #dqstate { read_file_hc_cache = HC }) ->
+close_file(File, State = #dqstate { read_file_handle_cache = HC }) ->
     HC1 = rabbit_file_handle_cache:close_file(form_filename(File), HC),
-    State #dqstate { read_file_hc_cache = HC1 }.
+    State #dqstate { read_file_handle_cache = HC1 }.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
     [{File, ValidData, _ContiguousTop, Left, Right}] =
-- 
cgit v1.2.1


From 92a17b599c07768583168683ea7086d8fc06fa1f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 08:28:46 +0100
Subject: cosmetic - mostly whitespace shuffling

---
 src/rabbit_disk_queue.erl | 62 +++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d01a9834..3491e67e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -624,68 +624,66 @@ report_memory(Hibernating, State) ->
     rabbit_memory_manager:report_memory(self(), trunc(2.5 * Bytes),
                                             Hibernating).
 
-memory_use(#dqstate { operation_mode = ram_disk,
-                      file_summary = FileSummary,
-                      sequences = Sequences,
+memory_use(#dqstate { operation_mode   = ram_disk,
+                      file_summary     = FileSummary,
+                      sequences        = Sequences,
                       msg_location_ets = MsgLocationEts,
-                      message_cache = Cache,
-                      wordsize = WordSize
+                      message_cache    = Cache,
+                      wordsize         = WordSize
                      }) ->
     WordSize * (mnesia:table_info(rabbit_disk_queue, memory) +
                 lists:sum([ets:info(Table, memory)
                            || Table <- [MsgLocationEts, FileSummary, Cache,
                                         Sequences]]));
-memory_use(#dqstate { operation_mode = disk_only,
-                      file_summary = FileSummary,
-                      sequences = Sequences,
-                      msg_location_dets = MsgLocationDets,
-                      message_cache = Cache,
-                      wordsize = WordSize,
+memory_use(#dqstate { operation_mode          = disk_only,
+                      file_summary            = FileSummary,
+                      sequences               = Sequences,
+                      msg_location_dets       = MsgLocationDets,
+                      message_cache           = Cache,
+                      wordsize                = WordSize,
                       mnesia_bytes_per_record = MnesiaBytesPerRecord,
-                      ets_bytes_per_record = EtsBytesPerRecord }) ->
-    MnesiaSizeEstimate =
-        mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord,
-    MsgLocationSizeEstimate =
-        dets:info(MsgLocationDets, size) * EtsBytesPerRecord,
+                      ets_bytes_per_record    = EtsBytesPerRecord }) ->
     (WordSize * (lists:sum([ets:info(Table, memory)
                             || Table <- [FileSummary, Cache, Sequences]]))) +
-        rabbit_misc:ceil(MnesiaSizeEstimate) +
-        rabbit_misc:ceil(MsgLocationSizeEstimate).
+        rabbit_misc:ceil(
+          mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord) +
+        rabbit_misc:ceil(
+          dets:info(MsgLocationDets, size) * EtsBytesPerRecord).
 
 to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
     State;
-to_disk_only_mode(State = #dqstate { operation_mode = ram_disk,
+to_disk_only_mode(State = #dqstate { operation_mode    = ram_disk,
                                      msg_location_dets = MsgLocationDets,
-                                     msg_location_ets = MsgLocationEts,
-                                     wordsize = WordSize }) ->
+                                     msg_location_ets  = MsgLocationEts,
+                                     wordsize          = WordSize }) ->
     rabbit_log:info("Converting disk queue to disk only mode~n", []),
-    MnesiaMemoryBytes = WordSize * mnesia:table_info(rabbit_disk_queue, memory),
-    MnesiaSize = lists:max([1, mnesia:table_info(rabbit_disk_queue, size)]),
-    EtsMemoryBytes = WordSize * ets:info(MsgLocationEts, memory),
-    EtsSize = lists:max([1, ets:info(MsgLocationEts, size)]),
+    MnesiaMemBytes = WordSize * mnesia:table_info(rabbit_disk_queue, memory),
+    EtsMemBytes    = WordSize * ets:info(MsgLocationEts, memory),
+    MnesiaSize     = lists:max([1, mnesia:table_info(rabbit_disk_queue, size)]),
+    EtsSize        = lists:max([1, ets:info(MsgLocationEts, size)]),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_only_copies),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
     garbage_collect(),
-    State #dqstate { operation_mode = disk_only,
-                     mnesia_bytes_per_record = MnesiaMemoryBytes / MnesiaSize,
-                     ets_bytes_per_record = EtsMemoryBytes / EtsSize }.
+    State #dqstate { operation_mode          = disk_only,
+                     mnesia_bytes_per_record = MnesiaMemBytes / MnesiaSize,
+                     ets_bytes_per_record    = EtsMemBytes / EtsSize }.
 
 to_ram_disk_mode(State = #dqstate { operation_mode = ram_disk }) ->
     State;
-to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
+to_ram_disk_mode(State = #dqstate { operation_mode    = disk_only,
                                     msg_location_dets = MsgLocationDets,
-                                    msg_location_ets = MsgLocationEts }) ->
+                                    msg_location_ets  = MsgLocationEts }) ->
     rabbit_log:info("Converting disk queue to ram disk mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_copies),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
     garbage_collect(),
-    State #dqstate { operation_mode = ram_disk,
+    State #dqstate { operation_mode          = ram_disk,
                      mnesia_bytes_per_record = undefined,
-                     ets_bytes_per_record = undefined }.
+                     ets_bytes_per_record    = undefined }.
 
 %%----------------------------------------------------------------------------
 %% message cache helper functions
-- 
cgit v1.2.1


From 4cac33c65398501c5339dbe83e77f0a44fd2086d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 08:30:03 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3491e67e..d3bae0bb 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -74,8 +74,8 @@
 -define(FILE_SIZE_LIMIT,         (256*1024*1024)).
 
 -define(BINARY_MODE, [raw, binary]).
--define(READ_MODE, [read, read_ahead]).
--define(WRITE_MODE, [write, delayed_write]).
+-define(READ_MODE,   [read, read_ahead]).
+-define(WRITE_MODE,  [write, delayed_write]).
 
 -define(SHUTDOWN_MESSAGE_KEY, shutdown_token).
 -define(SHUTDOWN_MESSAGE,
-- 
cgit v1.2.1


From b55c1ee841245037c8364d52191939ba1a442d45 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 10:32:42 +0100
Subject: banish ?FILE_PACKING_ADJUSTMENT from all but three functions The
 details of the message packing are opaque to high level code, as they should
 be. The TotalSize that code sees now is the total size of the message on
 disk, including all packing adjustments, which is all that is ever needed to
 perform all the necessary file positioning etc at that level.

---
 src/rabbit_disk_queue.erl | 62 ++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index d3bae0bb..4205dca5 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -923,7 +923,7 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                                                           {offset, Offset},
                                                           {read, Rest}]}})
                                 end,
-                          {Offset + TotalSize + ?FILE_PACKING_ADJUSTMENT, Res}
+                          {Offset + TotalSize, Res}
                   end, State),
             Message = #basic_message {} = bin_to_msg(MsgBody),
             ok = if RefCount > 1 ->
@@ -1003,10 +1003,8 @@ remove_message(MsgId, Files,
                 ets:lookup(FileSummary, File),
             ContiguousTop1 = lists:min([ContiguousTop, Offset]),
             true =
-                ets:insert(FileSummary,
-                           {File,
-                            (ValidTotalSize-TotalSize-?FILE_PACKING_ADJUSTMENT),
-                            ContiguousTop1, Left, Right}),
+                ets:insert(FileSummary, {File, ValidTotalSize - TotalSize,
+                                         ContiguousTop1, Left, Right}),
             if CurName =:= File -> Files;
                true -> sets:add_element(File, Files)
             end;
@@ -1036,8 +1034,7 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                        is_persistent = IsPersistent }),
             [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
                 ets:lookup(FileSummary, CurName),
-            ValidTotalSize1 = ValidTotalSize + TotalSize +
-                ?FILE_PACKING_ADJUSTMENT,
+            ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
                                      %% can't be any holes in this file
                                      ValidTotalSize1;
@@ -1045,7 +1042,7 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                              end,
             true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
                                             ContiguousTop1, Left, undefined}),
-            NextOffset = CurOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+            NextOffset = CurOffset + TotalSize,
             maybe_roll_to_new_file(
               NextOffset, State #dqstate {current_offset = NextOffset,
                                           current_dirty = true});
@@ -1437,19 +1434,18 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
-                  Size = TotalSize + ?FILE_PACKING_ADJUSTMENT,
                   %% update MsgLocationDets to reflect change of file and offset
                   ok = dets_ets_insert(State, StoreEntry #message_store_entry
                                        { file = Destination,
                                          offset = CurOffset }),
-                  NextOffset = CurOffset + Size,
+                  NextOffset = CurOffset + TotalSize,
                   if BlockStart =:= undefined ->
                           %% base case, called only for the first list elem
-                          {NextOffset, Offset, Offset + Size};
+                          {NextOffset, Offset, Offset + TotalSize};
                      Offset =:= BlockEnd ->
                           %% extend the current block because the next
                           %% msg follows straight on
-                          {NextOffset, BlockStart, BlockEnd + Size};
+                          {NextOffset, BlockStart, BlockEnd + TotalSize};
                      true ->
                           %% found a gap, so actually do the work for
                           %% the previous block
@@ -1458,7 +1454,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                               file:position(SourceHdl, BlockStart),
                           {ok, BSize} =
                               file:copy(SourceHdl, DestinationHdl, BSize),
-                          {NextOffset, Offset, Offset + Size}
+                          {NextOffset, Offset, Offset + TotalSize}
                   end
           end, {InitOffset, undefined, undefined}, WorkList),
     %% do the last remaining block
@@ -1729,7 +1725,7 @@ load_messages(Left, [], State) ->
                                         offset = MaxOffset,
                                         total_size = TotalSize} | _ ] =
                     sort_msg_locations_by_offset(desc, L),
-                MaxOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT
+                MaxOffset + TotalSize
              end,
     State #dqstate { current_file_num = Num, current_file_name = Left,
                      current_offset = Offset };
@@ -1752,7 +1748,7 @@ load_messages(Left, [File|Files],
                                    total_size = TotalSize,
                                    is_persistent = IsPersistent }),
                         {[Obj | VMAcc],
-                         VTSAcc + TotalSize + ?FILE_PACKING_ADJUSTMENT
+                         VTSAcc + TotalSize
                         }
                 end
         end, {[], 0}, Messages),
@@ -1854,7 +1850,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             %% Remember the head of the list will be the highest entry
             %% in the file
             [{_, _, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
-            TmpSize = TmpTopOffset + TmpTopTotalSize + ?FILE_PACKING_ADJUSTMENT,
+            TmpSize = TmpTopOffset + TmpTopTotalSize,
             ExpectedAbsPos = Top + TmpSize,
             {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
             %% and now extend the main file as big as necessary in a
@@ -1890,7 +1886,7 @@ find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds};
 find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, ExpectedOffset}
                              | Tail], ExpectedOffset, MsgIds) ->
-    ExpectedOffset1 = ExpectedOffset + TotalSize + ?FILE_PACKING_ADJUSTMENT,
+    ExpectedOffset1 = ExpectedOffset + TotalSize,
     find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
@@ -1915,27 +1911,28 @@ append_message(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
     BodySize = size(MsgBody),
     MsgIdBin = term_to_binary(MsgId),
     MsgIdBinSize = size(MsgIdBin),
-    TotalSize = BodySize + MsgIdBinSize,
+    Size = BodySize + MsgIdBinSize,
     StopByte = case IsPersistent of
                    true -> ?WRITE_OK_PERSISTENT;
                    false -> ?WRITE_OK_TRANSIENT
                end,
-    case file:write(FileHdl, <<TotalSize:?INTEGER_SIZE_BITS,
+    case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
                                MsgIdBinSize:?INTEGER_SIZE_BITS,
                                MsgIdBin:MsgIdBinSize/binary,
                                MsgBody:BodySize/binary,
                                StopByte:?WRITE_OK_SIZE_BITS>>) of
-        ok -> {ok, TotalSize};
+        ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
         KO -> KO
     end.
 
 read_message_from_disk(FileHdl, TotalSize) ->
-    TotalSizeWriteOkBytes = TotalSize + 1,
-    case file:read(FileHdl, TotalSize + ?FILE_PACKING_ADJUSTMENT) of
-        {ok, <<TotalSize:?INTEGER_SIZE_BITS,
+    Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
+    SizeWriteOkBytes = Size + 1,
+    case file:read(FileHdl, TotalSize) of
+        {ok, <<Size:?INTEGER_SIZE_BITS,
                MsgIdBinSize:?INTEGER_SIZE_BITS,
-               Rest:TotalSizeWriteOkBytes/binary>>} ->
-            BodySize = TotalSize - MsgIdBinSize,
+               Rest:SizeWriteOkBytes/binary>>} ->
+            BodySize = Size - MsgIdBinSize,
             <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
              StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
             Persistent = case StopByte of
@@ -1976,14 +1973,13 @@ read_next_file_entry(FileHdl, Offset) ->
     TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
     case file:read(FileHdl, TwoIntegers) of
         {ok,
-         <<TotalSize:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
-            case {TotalSize, MsgIdBinSize} of
+         <<Size:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
+            case {Size, MsgIdBinSize} of
                 {0, _} -> eof; %% Nothing we can do other than stop
                 {_, 0} ->
                     %% current message corrupted, try skipping past it
-                    ExpectedAbsPos =
-                        Offset + ?FILE_PACKING_ADJUSTMENT + TotalSize,
-                    case file:position(FileHdl, {cur, TotalSize + 1}) of
+                    ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
+                    case file:position(FileHdl, {cur, Size + 1}) of
                         {ok, ExpectedAbsPos} -> {corrupted, ExpectedAbsPos};
                         {ok, _SomeOtherPos}  -> eof; %% seek failed, so give up
                         KO                   -> KO
@@ -1991,10 +1987,10 @@ read_next_file_entry(FileHdl, Offset) ->
                 {_, _} -> %% all good, let's continue
                     case file:read(FileHdl, MsgIdBinSize) of
                         {ok, <<MsgIdBin:MsgIdBinSize/binary>>} ->
-                            ExpectedAbsPos = Offset + ?FILE_PACKING_ADJUSTMENT +
-                                TotalSize - 1,
+                            TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+                            ExpectedAbsPos = Offset + TotalSize - 1,
                             case file:position(
-                                   FileHdl, {cur, TotalSize - MsgIdBinSize}) of
+                                   FileHdl, {cur, Size - MsgIdBinSize}) of
                                 {ok, ExpectedAbsPos} ->
                                     NextOffset = ExpectedAbsPos + 1,
                                     case read_stop_byte(FileHdl) of
-- 
cgit v1.2.1


From 547cd84a08bc7c9e72f93d459a77352c50d94b6d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 11:11:41 +0100
Subject: remove unused constant This was added in revision b48919ed4e12 for no
 apparent reason

---
 src/rabbit_disk_queue.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 4205dca5..719ff1a0 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -69,7 +69,6 @@
 -define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
 -define(BATCH_SIZE,              10000).
 -define(CACHE_MAX_SIZE,          10485760).
--define(WRITE_HANDLE_OPEN_MODE,  [append, raw, binary, delayed_write]).
 -define(MAX_READ_FILE_HANDLES,   256).
 -define(FILE_SIZE_LIMIT,         (256*1024*1024)).
 
-- 
cgit v1.2.1


From dfda3707d08a0b72fcfba594dc113ef91192a05f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 29 Aug 2009 11:15:16 +0100
Subject: refactoring: move all the low level message file i/o into a separate
 module The details of how messages are encoded in files are opaque to
 disk_queue.

---
 src/rabbit_disk_queue.erl | 121 ++-----------------------------------
 src/rabbit_msg_file.erl   | 148 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+), 116 deletions(-)
 create mode 100644 src/rabbit_msg_file.erl

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 719ff1a0..84c3b6e3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -54,11 +54,6 @@
 
 -include("rabbit.hrl").
 
--define(WRITE_OK_SIZE_BITS,      8).
--define(WRITE_OK_TRANSIENT,      255).
--define(WRITE_OK_PERSISTENT,     254).
--define(INTEGER_SIZE_BYTES,      8).
--define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(MSG_LOC_NAME,            rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME,   rabbit_disk_queue_file_summary).
 -define(SEQUENCE_ETS_NAME,       rabbit_disk_queue_sequences).
@@ -66,7 +61,6 @@
 -define(FILE_EXTENSION,          ".rdq").
 -define(FILE_EXTENSION_TMP,      ".rdt").
 -define(FILE_EXTENSION_DETS,     ".dets").
--define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
 -define(BATCH_SIZE,              10000).
 -define(CACHE_MAX_SIZE,          10485760).
 -define(MAX_READ_FILE_HANDLES,   256).
@@ -913,7 +907,7 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                 with_read_handle_at(
                   File, Offset,
                   fun(Hdl) ->
-                          Res = case read_message_from_disk(Hdl, TotalSize) of
+                          Res = case rabbit_msg_file:read(Hdl, TotalSize) of
                                     {ok, {_, _, _}} = Obj -> Obj;
                                     {ok, Rest} ->
                                         throw({error,
@@ -1024,8 +1018,9 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
     case dets_ets_lookup(State, MsgId) of
         [] ->
             %% New message, lots to do
-            {ok, TotalSize} = append_message(CurHdl, MsgId, msg_to_bin(Message),
-                                             IsPersistent),
+            {ok, TotalSize} = rabbit_msg_file:append(
+                                CurHdl, MsgId, msg_to_bin(Message),
+                                IsPersistent),
             true = dets_ets_insert_new(
                      State, #message_store_entry
                      { msg_id = MsgId, ref_count = 1, file = CurName,
@@ -1902,50 +1897,10 @@ get_disk_queue_files() ->
     DQTFilesSorted = lists:sort(fun file_name_sort/2, DQTFiles),
     {DQFilesSorted, DQTFilesSorted}.
 
-%%----------------------------------------------------------------------------
-%% raw reading and writing of files
-%%----------------------------------------------------------------------------
-
-append_message(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
-    BodySize = size(MsgBody),
-    MsgIdBin = term_to_binary(MsgId),
-    MsgIdBinSize = size(MsgIdBin),
-    Size = BodySize + MsgIdBinSize,
-    StopByte = case IsPersistent of
-                   true -> ?WRITE_OK_PERSISTENT;
-                   false -> ?WRITE_OK_TRANSIENT
-               end,
-    case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
-                               MsgIdBinSize:?INTEGER_SIZE_BITS,
-                               MsgIdBin:MsgIdBinSize/binary,
-                               MsgBody:BodySize/binary,
-                               StopByte:?WRITE_OK_SIZE_BITS>>) of
-        ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
-        KO -> KO
-    end.
-
-read_message_from_disk(FileHdl, TotalSize) ->
-    Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
-    SizeWriteOkBytes = Size + 1,
-    case file:read(FileHdl, TotalSize) of
-        {ok, <<Size:?INTEGER_SIZE_BITS,
-               MsgIdBinSize:?INTEGER_SIZE_BITS,
-               Rest:SizeWriteOkBytes/binary>>} ->
-            BodySize = Size - MsgIdBinSize,
-            <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
-             StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
-            Persistent = case StopByte of
-                             ?WRITE_OK_TRANSIENT  -> false;
-                             ?WRITE_OK_PERSISTENT -> true
-                         end,
-            {ok, {MsgBody, Persistent, BodySize}};
-        KO -> KO
-    end.
-
 scan_file_for_valid_messages(File) ->
     case open_file(File, ?READ_MODE) of
         {ok, Hdl} ->
-            Valid = scan_file_for_valid_messages(Hdl, 0, []),
+            Valid = rabbit_msg_file:scan(Hdl),
             %% if something really bad's happened, the close could fail,
             %% but ignore
             file:close(Hdl),
@@ -1953,69 +1908,3 @@ scan_file_for_valid_messages(File) ->
         {error, enoent} -> {ok, []};
         {error, Reason} -> throw({error, {unable_to_scan_file, File, Reason}})
     end.
-
-scan_file_for_valid_messages(FileHdl, Offset, Acc) ->
-    case read_next_file_entry(FileHdl, Offset) of
-        eof -> {ok, Acc};
-        {corrupted, NextOffset} ->
-            scan_file_for_valid_messages(FileHdl, NextOffset, Acc);
-        {ok, {MsgId, IsPersistent, TotalSize, NextOffset}} ->
-            scan_file_for_valid_messages(
-              FileHdl, NextOffset,
-              [{MsgId, IsPersistent, TotalSize, Offset} | Acc]);
-        _KO ->
-            %% bad message, but we may still have recovered some valid messages
-            {ok, Acc}
-    end.
-
-read_next_file_entry(FileHdl, Offset) ->
-    TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
-    case file:read(FileHdl, TwoIntegers) of
-        {ok,
-         <<Size:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
-            case {Size, MsgIdBinSize} of
-                {0, _} -> eof; %% Nothing we can do other than stop
-                {_, 0} ->
-                    %% current message corrupted, try skipping past it
-                    ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
-                    case file:position(FileHdl, {cur, Size + 1}) of
-                        {ok, ExpectedAbsPos} -> {corrupted, ExpectedAbsPos};
-                        {ok, _SomeOtherPos}  -> eof; %% seek failed, so give up
-                        KO                   -> KO
-                    end;
-                {_, _} -> %% all good, let's continue
-                    case file:read(FileHdl, MsgIdBinSize) of
-                        {ok, <<MsgIdBin:MsgIdBinSize/binary>>} ->
-                            TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
-                            ExpectedAbsPos = Offset + TotalSize - 1,
-                            case file:position(
-                                   FileHdl, {cur, Size - MsgIdBinSize}) of
-                                {ok, ExpectedAbsPos} ->
-                                    NextOffset = ExpectedAbsPos + 1,
-                                    case read_stop_byte(FileHdl) of
-                                        {ok, Persistent} ->
-                                            MsgId = binary_to_term(MsgIdBin),
-                                            {ok, {MsgId, Persistent,
-                                                  TotalSize, NextOffset}};
-                                        corrupted ->
-                                            {corrupted, NextOffset};
-                                        KO -> KO
-                                    end;
-                                {ok, _SomeOtherPos} ->
-                                    %% seek failed, so give up
-                                    eof;
-                                KO -> KO
-                            end;
-                        Other -> Other
-                    end
-            end;
-        Other -> Other
-    end.
-
-read_stop_byte(FileHdl) ->
-    case file:read(FileHdl, 1) of
-        {ok, <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>}  -> {ok, false};
-        {ok, <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} -> {ok, true};
-        {ok, _SomeOtherData}                               -> corrupted;
-        KO                                                 -> KO
-    end.
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
new file mode 100644
index 00000000..6cf11ac8
--- /dev/null
+++ b/src/rabbit_msg_file.erl
@@ -0,0 +1,148 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_file).
+
+-export([append/4, read/2, scan/1]).
+
+%%----------------------------------------------------------------------------
+
+-define(INTEGER_SIZE_BYTES,      8).
+-define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
+-define(WRITE_OK_SIZE_BITS,      8).
+-define(WRITE_OK_TRANSIENT,      255).
+-define(WRITE_OK_PERSISTENT,     254).
+-define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
+
+%%----------------------------------------------------------------------------
+
+append(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
+    BodySize = size(MsgBody),
+    MsgIdBin = term_to_binary(MsgId),
+    MsgIdBinSize = size(MsgIdBin),
+    Size = BodySize + MsgIdBinSize,
+    StopByte = case IsPersistent of
+                   true -> ?WRITE_OK_PERSISTENT;
+                   false -> ?WRITE_OK_TRANSIENT
+               end,
+    case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
+                               MsgIdBinSize:?INTEGER_SIZE_BITS,
+                               MsgIdBin:MsgIdBinSize/binary,
+                               MsgBody:BodySize/binary,
+                               StopByte:?WRITE_OK_SIZE_BITS>>) of
+        ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
+        KO -> KO
+    end.
+
+read(FileHdl, TotalSize) ->
+    Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
+    SizeWriteOkBytes = Size + 1,
+    case file:read(FileHdl, TotalSize) of
+        {ok, <<Size:?INTEGER_SIZE_BITS,
+               MsgIdBinSize:?INTEGER_SIZE_BITS,
+               Rest:SizeWriteOkBytes/binary>>} ->
+            BodySize = Size - MsgIdBinSize,
+            <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+             StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
+            Persistent = case StopByte of
+                             ?WRITE_OK_TRANSIENT  -> false;
+                             ?WRITE_OK_PERSISTENT -> true
+                         end,
+            {ok, {MsgBody, Persistent, BodySize}};
+        KO -> KO
+    end.
+
+scan(FileHdl) -> scan(FileHdl, 0, []).
+
+scan(FileHdl, Offset, Acc) ->
+    case read_next(FileHdl, Offset) of
+        eof -> {ok, Acc};
+        {corrupted, NextOffset} ->
+            scan(FileHdl, NextOffset, Acc);
+        {ok, {MsgId, IsPersistent, TotalSize, NextOffset}} ->
+            scan(FileHdl, NextOffset,
+                 [{MsgId, IsPersistent, TotalSize, Offset} | Acc]);
+        _KO ->
+            %% bad message, but we may still have recovered some valid messages
+            {ok, Acc}
+    end.
+
+read_next(FileHdl, Offset) ->
+    TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
+    case file:read(FileHdl, TwoIntegers) of
+        {ok,
+         <<Size:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
+            case {Size, MsgIdBinSize} of
+                {0, _} -> eof; %% Nothing we can do other than stop
+                {_, 0} ->
+                    %% current message corrupted, try skipping past it
+                    ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
+                    case file:position(FileHdl, {cur, Size + 1}) of
+                        {ok, ExpectedAbsPos} -> {corrupted, ExpectedAbsPos};
+                        {ok, _SomeOtherPos}  -> eof; %% seek failed, so give up
+                        KO                   -> KO
+                    end;
+                {_, _} -> %% all good, let's continue
+                    case file:read(FileHdl, MsgIdBinSize) of
+                        {ok, <<MsgIdBin:MsgIdBinSize/binary>>} ->
+                            TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+                            ExpectedAbsPos = Offset + TotalSize - 1,
+                            case file:position(
+                                   FileHdl, {cur, Size - MsgIdBinSize}) of
+                                {ok, ExpectedAbsPos} ->
+                                    NextOffset = ExpectedAbsPos + 1,
+                                    case read_stop_byte(FileHdl) of
+                                        {ok, Persistent} ->
+                                            MsgId = binary_to_term(MsgIdBin),
+                                            {ok, {MsgId, Persistent,
+                                                  TotalSize, NextOffset}};
+                                        corrupted ->
+                                            {corrupted, NextOffset};
+                                        KO -> KO
+                                    end;
+                                {ok, _SomeOtherPos} ->
+                                    %% seek failed, so give up
+                                    eof;
+                                KO -> KO
+                            end;
+                        Other -> Other
+                    end
+            end;
+        Other -> Other
+    end.
+
+read_stop_byte(FileHdl) ->
+    case file:read(FileHdl, 1) of
+        {ok, <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>}  -> {ok, false};
+        {ok, <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} -> {ok, true};
+        {ok, _SomeOtherData}                               -> corrupted;
+        KO                                                 -> KO
+    end.
-- 
cgit v1.2.1


From f112cd9df2dbf427c576e79100515f8ebdfe6357 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 12:09:33 +0100
Subject: Read through all of Matthias changes, no problems. In here, one
 cosmetic, and one change so that rmf:read returns the MsgId off disk to be
 later matched. This is purely defensive.

---
 src/rabbit_disk_queue.erl | 10 +++-------
 src/rabbit_msg_file.erl   |  4 ++--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 84c3b6e3..9caed89a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -903,12 +903,12 @@ read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
                                            total_size = TotalSize }, State) ->
     case fetch_and_increment_cache(MsgId, State) of
         not_found ->
-            {{ok, {MsgBody, _IsPersistent, _BodySize}}, State1} =
+            {{ok, {MsgId, MsgBody, _IsPersistent, _BodySize}}, State1} =
                 with_read_handle_at(
                   File, Offset,
                   fun(Hdl) ->
                           Res = case rabbit_msg_file:read(Hdl, TotalSize) of
-                                    {ok, {_, _, _}} = Obj -> Obj;
+                                    {ok, {MsgId, _, _, _}} = Obj -> Obj;
                                     {ok, Rest} ->
                                         throw({error,
                                                {misread, [{old_state, State},
@@ -1725,7 +1725,6 @@ load_messages(Left, [], State) ->
                      current_offset = Offset };
 load_messages(Left, [File|Files],
               State = #dqstate { file_summary = FileSummary }) ->
-    %% [{MsgId, TotalSize, FileOffset}]
     {ok, Messages} = scan_file_for_valid_messages(File),
     {ValidMessages, ValidTotalSize} = lists:foldl(
         fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
@@ -1741,9 +1740,7 @@ load_messages(Left, [File|Files],
                                    file = File, offset = Offset,
                                    total_size = TotalSize,
                                    is_persistent = IsPersistent }),
-                        {[Obj | VMAcc],
-                         VTSAcc + TotalSize
-                        }
+                        {[Obj | VMAcc], VTSAcc + TotalSize}
                 end
         end, {[], 0}, Messages),
     %% foldl reverses lists, find_contiguous_block_prefix needs
@@ -1782,7 +1779,6 @@ scan_file_for_valid_messages_msg_ids(File) ->
 recover_crashed_compactions1(Files, TmpFile) ->
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFile, Files),
-    %% [{MsgId, TotalSize, FileOffset}]
     {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
         scan_file_for_valid_messages_msg_ids(TmpFile),
     %% all of these messages should appear in the mnesia table,
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 6cf11ac8..70b04c09 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -70,13 +70,13 @@ read(FileHdl, TotalSize) ->
                MsgIdBinSize:?INTEGER_SIZE_BITS,
                Rest:SizeWriteOkBytes/binary>>} ->
             BodySize = Size - MsgIdBinSize,
-            <<_MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+            <<MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
              StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
             Persistent = case StopByte of
                              ?WRITE_OK_TRANSIENT  -> false;
                              ?WRITE_OK_PERSISTENT -> true
                          end,
-            {ok, {MsgBody, Persistent, BodySize}};
+            {ok, {binary_to_term(MsgId), MsgBody, Persistent, BodySize}};
         KO -> KO
     end.
 
-- 
cgit v1.2.1


From 8653730bfe1a6f1a9a4b5b8c162c9d84ec4add7f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 15:47:19 +0100
Subject: Before the magic_marker_msg was introduced, if the queue was entirely
 on disk already and was then told to go to disk_only mode, it would form a
 single requeue_next_n call, where the N would be the length of the queue.
 This would be detected by the disk_queue and become a no-op. Because of the
 introduction of the magic_marker_msg, that is no longer possible - we want
 the marker to go from the back of the queue to the front. As such, the N will
 at most be 1 less than the queue length, causing unnecessary work to be done.

This patch removes unnecessary work by observing that we do not need to rotate the entire queue should we find that the queue consists of zero or more disk-stored msgs followed by zero or more ram-only stored messages. If this is the case, we only need to publish the latter ram-only messages, and have no need for the magic marker msgs at all. Furthermore, if there are no ram-only messages we have no work to do at all. The only situation in which we must rotate the entire queue is when we have ram-only messages followed by disk messages. In this case, we have to get the ram-only messages onto the disk queue before the disk messages, which requires the full rotation.
---
 src/rabbit_mixed_queue.erl | 102 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index bbec524b..2bb9c09a 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -362,20 +362,8 @@ set_storage_mode(disk, TxnMessages, State =
                         queue:join(Fetched, MsgBuf2)
                 end
         end,
-    %% (Re)enqueue _everything_ here.  Note that due to batching going
-    %% on (see comments above send_messages_to_disk), if we crash
-    %% during this transition, we could have messages in the wrong
-    %% order on disk. Thus we publish a magic_marker_message which,
-    %% when this transition is compelete, will be back at the head of
-    %% the queue. Should we die, on startup, during the foldl over the
-    %% queue, we detect the marker message and requeue all the
-    %% messages in front of it, to the back of the queue, thus
-    %% correcting the order.  The result is that everything ends up
-    %% back in the same order, but will have new sequence IDs.
-    ok = publish_magic_marker_message(Q),
     {ok, MsgBuf3} =
-        send_messages_to_disk(IsDurable, Q, MsgBuf1, 0, 0, [], [], queue:new()),
-    {ok, Length} = fetch_ack_magic_marker_message(Q),
+        send_messages_to_disk(IsDurable, Q, MsgBuf1, Length),
     %% tx_publish txn messages. Some of these will have been already
     %% published if they really are durable and persistent which is
     %% why we can't just use our own tx_publish/2 function (would end
@@ -414,6 +402,90 @@ set_storage_mode(mixed, TxnMessages, State =
     garbage_collect(),
     {ok, State #mqstate { mode = mixed }}.
 
+send_messages_to_disk(_IsDurable, _Q, MsgBuf, 0) ->
+    {ok, MsgBuf};
+send_messages_to_disk(IsDurable, Q, MsgBuf, Length) ->
+    case scan_for_disk_after_ram(IsDurable, MsgBuf) of
+        disk_only ->
+            %% Everything on disk already, we don't need to do
+            %% anything
+            {ok, inc_queue_length(queue:new(), Length)};
+        {not_found, PrefixLen, MsgBufRAMSuffix} ->
+            %% No disk msgs follow RAM msgs and the queue has a RAM
+            %% suffix, so we can just publish those. If we crash at
+            %% this point, we may lose some messages, but everything
+            %% will remain in the right order, so no need for the
+            %% marker messages.
+            MsgBuf1 = inc_queue_length(queue:new(), PrefixLen),
+            send_messages_to_disk(IsDurable, Q, MsgBufRAMSuffix, 0, 0, [], [],
+                                  MsgBuf1);
+        found ->
+            %% There are disk msgs *after* ram msgs in the queue. We
+            %% need to reenqueue everything. Note that due to batching
+            %% going on (see comments above send_messages_to_disk/8),
+            %% if we crash during this transition, we could have
+            %% messages in the wrong order on disk. Thus we publish a
+            %% magic_marker_message which, when this transition is
+            %% complete, will be back at the head of the queue. Should
+            %% we die, on startup, during the foldl over the queue, we
+            %% detect the marker message and requeue all the messages
+            %% in front of it, to the back of the queue, thus
+            %% correcting the order.  The result is that everything
+            %% ends up back in the same order, but will have new
+            %% sequence IDs.
+            ok = publish_magic_marker_message(Q),
+            {ok, MsgBuf1} =
+                send_messages_to_disk(IsDurable, Q, MsgBuf, 0, 0, [], [],
+                                      queue:new()),
+            {ok, Length} = fetch_ack_magic_marker_message(Q),
+            {ok, MsgBuf1}
+    end.
+
+scan_for_disk_after_ram(IsDurable, MsgBuf) ->
+    scan_for_disk_after_ram(IsDurable, MsgBuf, {disk, 0}).
+
+%% We return 'disk_only' if everything is alread on disk; 'found' if
+%% we find a disk message after finding RAM messages; and
+%% {'not_found', Count, MsgBuf} otherwise, where Count is the length
+%% of the disk prefix, and MsgBuf is the RAM suffix of the MsgBuf
+%% argument. Note msgs via the prefetcher are counted as RAM msgs on
+%% the grounds that they have to be republished.
+scan_for_disk_after_ram(IsDurable, MsgBuf, Mode) ->
+    case queue:out(MsgBuf) of
+        {empty, _MsgBuf} ->
+            case Mode of
+                {ram, N, MsgBuf1} -> {not_found, N, MsgBuf1};
+                {disk, _N}        -> disk_only
+            end;
+        {{value, {on_disk, Count}}, MsgBuf1} ->
+            case Mode of
+                {ram, _, _} -> found; %% found disk after RAM, bad
+                {disk, N} -> scan_for_disk_after_ram(IsDurable, MsgBuf1,
+                                                     {disk, N + Count})
+            end;
+        {{value, {_Msg, _IsDelivered, _AckTag}}, MsgBuf1} ->
+            %% found a msg from the prefetcher. Ensure RAM mode
+            scan_for_disk_after_ram(IsDurable, MsgBuf1,
+                                    ensure_ram(Mode, MsgBuf));
+        {{value,
+          {#basic_message { is_persistent = IsPersistent }, _IsDelivered}},
+          MsgBuf1} ->
+            %% normal message
+            case IsDurable andalso IsPersistent of
+                true ->
+                    case Mode of
+                        {ram, _, _} -> found; %% found disk after RAM, bad
+                        {disk, N} -> scan_for_disk_after_ram(IsDurable, MsgBuf1,
+                                                             {disk, N + 1})
+                    end;
+                false -> scan_for_disk_after_ram(IsDurable, MsgBuf1,
+                                                 ensure_ram(Mode, MsgBuf))
+            end
+    end.
+
+ensure_ram(Obj = {ram, _N, _MsgBuf}, _MsgBuf1) -> Obj;
+ensure_ram({disk, N}, MsgBuf)                  -> {ram, N, MsgBuf}.
+
 %% (Re)enqueue _everything_ here. Messages which are not on disk will
 %% be tx_published, messages that are on disk will be requeued to the
 %% end of the queue. This is done in batches, where a batch consists
@@ -486,6 +558,10 @@ flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
     ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
     {[], []}.
 
+%% Scaling this by 4 is a magic number. Found by trial and error to
+%% work ok. We are deliberately over reporting so that we run out of
+%% memory sooner rather than later, because the transition to disk
+%% only modes transiently can take quite a lot of memory.
 estimate_queue_memory(State = #mqstate { memory_size = Size }) ->
     {State, 4 * Size}.
 
-- 
cgit v1.2.1


From 32c7ae8a5ae472cb78ef7a5d936a7d4c23a8ed38 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 15:48:31 +0100
Subject: comments on magic 2.5 scaling of memory report, and removal of
 unnecessary initial calls to report_memory

---
 src/rabbit_disk_queue.erl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 9caed89a..aa45e748 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -447,12 +447,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     {ok, FileHdl} = open_file(CurrentName, ?WRITE_MODE ++ [read]),
     {ok, Offset} = file:position(FileHdl, Offset),
     State2 = State1 #dqstate { current_file_handle = FileHdl },
-    %% by reporting a memory use of 0, we guarantee the manager will
-    %% not oppress us. We have to start in ram_disk mode because we
-    %% can't find values for mnesia_bytes_per_record or
-    %% ets_bytes_per_record otherwise.
-    ok = rabbit_memory_manager:report_memory(self(), 0, false),
-    ok = report_memory(false, State2),
     {ok, start_memory_timer(State2), hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -612,6 +606,10 @@ start_memory_timer(State = #dqstate { memory_report_timer_ref = undefined }) ->
 start_memory_timer(State) ->
     State.
 
+%% Scaling this by 2.5 is a magic number. Found by trial and error to
+%% work ok. We are deliberately over reporting so that we run out of
+%% memory sooner rather than later, because the transition to disk
+%% only modes transiently can take quite a lot of memory.
 report_memory(Hibernating, State) ->
     Bytes = memory_use(State),
     rabbit_memory_manager:report_memory(self(), trunc(2.5 * Bytes),
-- 
cgit v1.2.1


From 4a3a904a2178f1ef99ddb3126ae7f511394fb444 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 16:13:05 +0100
Subject: The multiplier for the memory thresholds should not be on the number
 of available tokes. In extremis, when available tokens is 0, this makes
 queues very likely to switch mode (bad, memory is tight), and when there are
 lots of tokens available, this makes queues less likely to switch mode (bad,
 memory is plentiful). Instead, we should be using constant offsets, based on
 the total number of tokens in the system.

---
 src/rabbit_memory_manager.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index eb37a6f3..bf694c8f 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -41,7 +41,8 @@
 -export([register/5, report_memory/3, info/0, conserve_memory/2]).
 
 -define(TOTAL_TOKENS, 10000000).
--define(THRESHOLD_MULTIPLIER, 1.05).
+-define(THRESHOLD_MULTIPLIER, 0.05).
+-define(THRESHOLD_OFFSET, ?TOTAL_TOKENS * ?THRESHOLD_MULTIPLIER).
 
 -define(SERVER, ?MODULE).
 
@@ -228,8 +229,8 @@ handle_cast({report_memory, Pid, Memory, Hibernating},
                 end;
             {oppressed, OrigAvail} ->
                 case Alarmed orelse Hibernating orelse
-                    (Avail > (OrigAvail / ?THRESHOLD_MULTIPLIER) andalso
-                     Avail < (OrigAvail * ?THRESHOLD_MULTIPLIER)) of
+                    (Avail > (OrigAvail - ?THRESHOLD_OFFSET) andalso
+                     Avail < (OrigAvail + ?THRESHOLD_OFFSET)) of
                     true ->
                         {State, oppressed};
                     false ->
-- 
cgit v1.2.1


From f3ed38b3fe9b1a39617229c68b7e5bf21a1f484e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 23:22:21 +0100
Subject: specs for rabbit_file_handle_cache

---
 src/rabbit_file_handle_cache.erl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/rabbit_file_handle_cache.erl b/src/rabbit_file_handle_cache.erl
index 83acffd0..7cc2ed90 100644
--- a/src/rabbit_file_handle_cache.erl
+++ b/src/rabbit_file_handle_cache.erl
@@ -40,6 +40,27 @@
           mode     %% the mode to open the files as
         }).
 
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(hcstate() :: #hcstate { limit   :: non_neg_integer(),
+                              handles :: dict(),
+                              ages    :: gb_tree(),
+                              mode    :: [any()]
+                            }).
+
+-spec(init/2 :: (non_neg_integer(), [any()]) -> hcstate()).
+-spec(close_all/1 :: (hcstate()) -> hcstate()).
+-spec(close_file/2 :: (any(), hcstate()) -> hcstate()).
+-spec(with_file_handle_at/4 :: (any(), non_neg_integer(),
+                                fun ((any()) -> {non_neg_integer(), A}),
+                                    hcstate()) ->
+             {A, hcstate()}).
+-endif.
+
+%%----------------------------------------------------------------------------
+
 init(Limit, OpenMode) ->
     #hcstate { limit   = Limit,
                handles = dict:new(),
-- 
cgit v1.2.1


From 9e1a4fd8d3c34983879345241708d773bf1d06e9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 23:30:44 +0100
Subject: specs for msg_file

---
 src/rabbit_msg_file.erl | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 70b04c09..6124b989 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -35,6 +35,8 @@
 
 %%----------------------------------------------------------------------------
 
+-include("rabbit.hrl").
+
 -define(INTEGER_SIZE_BYTES,      8).
 -define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(WRITE_OK_SIZE_BITS,      8).
@@ -44,6 +46,20 @@
 
 %%----------------------------------------------------------------------------
 
+-ifdef(use_specs).
+
+-spec(append/4 :: (any(), msg_id(), binary(), boolean()) ->
+             ({'ok', non_neg_integer()} | any())).
+-spec(read/2 :: (any(), non_neg_integer()) ->
+             ({'ok', {msg_id(), binary(), boolean(), non_neg_integer()}} |
+              any())).
+-spec(scan/1 :: (any()) -> {'ok', [{msg_id(), boolean(), non_neg_integer(),
+                                    non_neg_integer()}]}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
 append(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
     BodySize = size(MsgBody),
     MsgIdBin = term_to_binary(MsgId),
-- 
cgit v1.2.1


From e4b19c1a219b4064bc20b4afdc7cf61f8915fca9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Sep 2009 23:40:44 +0100
Subject: refactoring of (no)reply1 to eliminate duplication

---
 src/rabbit_disk_queue.erl | 48 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index aa45e748..0afba2d6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -762,31 +762,29 @@ dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
 %% general helper functions
 %%----------------------------------------------------------------------------
 
-noreply(NewState) ->
-    noreply1(start_memory_timer(NewState)).
-
-noreply1(NewState = #dqstate { on_sync_txns = [],
-                               commit_timer_ref = undefined }) ->
-    {noreply, NewState, hibernate};
-noreply1(NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {noreply, start_commit_timer(NewState), 0};
-noreply1(NewState = #dqstate { on_sync_txns = [] }) ->
-    {noreply, stop_commit_timer(NewState), hibernate};
-noreply1(NewState) ->
-    {noreply, NewState, 0}.
-
-reply(Reply, NewState) ->
-    reply1(Reply, start_memory_timer(NewState)).
-
-reply1(Reply, NewState = #dqstate { on_sync_txns = [],
-                                    commit_timer_ref = undefined }) ->
-    {reply, Reply, NewState, hibernate};
-reply1(Reply, NewState = #dqstate { commit_timer_ref = undefined }) ->
-    {reply, Reply, start_commit_timer(NewState), 0};
-reply1(Reply, NewState = #dqstate { on_sync_txns = [] }) ->
-    {reply, Reply, stop_commit_timer(NewState), hibernate};
-reply1(Reply, NewState) ->
-    {reply, Reply, NewState, 0}.
+noreply(State) ->
+    noreply1(start_memory_timer(State)).
+
+noreply1(State) ->
+    {State1, Timeout} = next_state(State),
+    {noreply, State1, Timeout}.
+
+reply(Reply, State) ->
+    reply1(Reply, start_memory_timer(State)).
+
+reply1(Reply, State) ->
+    {State1, Timeout} = next_state(State),
+    {reply, Reply, State1, Timeout}.
+
+next_state(State = #dqstate { on_sync_txns = [],
+                              commit_timer_ref = undefined }) ->
+    {State, hibernate};
+next_state(State = #dqstate { commit_timer_ref = undefined }) ->
+    {start_commit_timer(State), 0};
+next_state(State = #dqstate { on_sync_txns = [] }) ->
+    {stop_commit_timer(State), hibernate};
+next_state(State) ->
+    {State, 0}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
-- 
cgit v1.2.1


From f6f592dbf28422767ed2d3f5a0c308121353f848 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 11:09:29 +0100
Subject: made type signatures almost as useful as comments

---
 include/rabbit.hrl               |  3 +++
 src/rabbit_file_handle_cache.erl | 14 +++++++++-----
 src/rabbit_msg_file.erl          | 13 +++++++------
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index a2eae8f8..25a36732 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -84,6 +84,9 @@
 -type(info_key() :: atom()).
 -type(info() :: {info_key(), any()}).
 -type(regexp() :: binary()).
+-type(file_path() :: any()).
+-type(io_device() :: any()).
+-type(file_open_mode() :: any()).
 
 %% this is really an abstract type, but dialyzer does not support them
 -type(guid() :: any()).
diff --git a/src/rabbit_file_handle_cache.erl b/src/rabbit_file_handle_cache.erl
index 7cc2ed90..85a5d6e9 100644
--- a/src/rabbit_file_handle_cache.erl
+++ b/src/rabbit_file_handle_cache.erl
@@ -33,6 +33,10 @@
 
 -export([init/2, close_all/1, close_file/2, with_file_handle_at/4]).
 
+%%----------------------------------------------------------------------------
+
+-include("rabbit.hrl").
+
 -record(hcstate,
         { limit,   %% how many file handles can we open?
           handles, %% dict of the files to their handles, age and offset
@@ -47,14 +51,14 @@
 -type(hcstate() :: #hcstate { limit   :: non_neg_integer(),
                               handles :: dict(),
                               ages    :: gb_tree(),
-                              mode    :: [any()]
+                              mode    :: [file_open_mode()]
                             }).
 
--spec(init/2 :: (non_neg_integer(), [any()]) -> hcstate()).
+-spec(init/2 :: (non_neg_integer(), [file_open_mode()]) -> hcstate()).
 -spec(close_all/1 :: (hcstate()) -> hcstate()).
--spec(close_file/2 :: (any(), hcstate()) -> hcstate()).
--spec(with_file_handle_at/4 :: (any(), non_neg_integer(),
-                                fun ((any()) -> {non_neg_integer(), A}),
+-spec(close_file/2 :: (file_path(), hcstate()) -> hcstate()).
+-spec(with_file_handle_at/4 :: (file_path(), non_neg_integer(),
+                                fun ((io_device()) -> {non_neg_integer(), A}),
                                     hcstate()) ->
              {A, hcstate()}).
 -endif.
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 6124b989..0b3b5af8 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -48,13 +48,14 @@
 
 -ifdef(use_specs).
 
--spec(append/4 :: (any(), msg_id(), binary(), boolean()) ->
-             ({'ok', non_neg_integer()} | any())).
--spec(read/2 :: (any(), non_neg_integer()) ->
+-spec(append/4 :: (io_device(), msg_id(), binary(), boolean()) ->
+             ({'ok', non_neg_integer()} | {'error', any()})).
+-spec(read/2 :: (io_device(), non_neg_integer()) ->
              ({'ok', {msg_id(), binary(), boolean(), non_neg_integer()}} |
-              any())).
--spec(scan/1 :: (any()) -> {'ok', [{msg_id(), boolean(), non_neg_integer(),
-                                    non_neg_integer()}]}).
+              {'error', any()})).
+-spec(scan/1 :: (io_device()) ->
+             {'ok', [{msg_id(), boolean(), non_neg_integer(),
+                      non_neg_integer()}]}).
 
 -endif.
 
-- 
cgit v1.2.1


From b0c18e25af532970969c3b6ff852ea0ae03ca0c2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 13:38:46 +0100
Subject: Fixed the mistake Matthias spotted in recovery from crash (comment
 #211). Tested by deliberately breaking the compaction in a variety of ways
 and then coverage and startup and ensuring the correct code paths are taken.

---
 src/rabbit_disk_queue.erl | 56 ++++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0afba2d6..6701bc6b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1815,34 +1815,46 @@ recover_crashed_compactions1(Files, TmpFile) ->
                 %% is empty
             ok = file:delete(TmpPath);
         false ->
-            %% we're in case 4 above. Check that everything in the
-            %% main file is a valid message in mnesia
-            verify_messages_in_mnesia(MsgIds),
-            %% The main file should be contiguous
-            {Top, MsgIds} = find_contiguous_block_prefix(
-                              lists:reverse(UncorruptedMessages)),
+            %% We're in case 4 above. We only care about the inital
+            %% msgs in main file that are not in the tmp file. If
+            %% there are no msgs in the tmp file then we would be in
+            %% the 'true' branch of this case, so we know the
+            %% lists:last call is safe.
+            EldestTmpMsgId = lists:last(MsgIdsTmp),
+            {MsgIds1, UncorruptedMessages1}
+                = case lists:splitwith(
+                         fun (MsgId) -> MsgId /= EldestTmpMsgId end, MsgIds) of
+                      {MsgIds, []} -> %% no msgs from tmp in main
+                          {MsgIds, UncorruptedMessages};
+                      {Dropped, [EldestTmpMsgId | Rest]} ->
+                          %% Msgs in Dropped are in tmp, so forget them.
+                          %% *cry*. Lists indexed from 1.
+                          {Rest, lists:sublist(UncorruptedMessages,
+                                               2 + length(Dropped),
+                                               length(Rest))}
+                  end,
+            %% Check that everything in the main file prefix is a
+            %% valid message in mnesia
+            verify_messages_in_mnesia(MsgIds1),
+            %% The main file prefix should be contiguous
+            {Top, MsgIds1} = find_contiguous_block_prefix(
+                               lists:reverse(UncorruptedMessages1)),
             %% we should have that none of the messages in the prefix
             %% are in the tmp file
             true = lists:all(fun (MsgId) ->
                                      not (lists:member(MsgId, MsgIdsTmp))
-                             end, MsgIds),
+                             end, MsgIds1),
             %% must open with read flag, otherwise will stomp over contents
             {ok, MainHdl} = open_file(NonTmpRelatedFile, ?WRITE_MODE ++ [read]),
-            {ok, Top} = file:position(MainHdl, Top),
-            %% wipe out any rubbish at the end of the file
-            ok = file:truncate(MainHdl),
-            %% there really could be rubbish at the end of the file -
-            %% we could have failed after the extending truncate.
-            %% Remember the head of the list will be the highest entry
-            %% in the file
+            %% Wipe out any rubbish at the end of the file. Remember
+            %% the head of the list will be the highest entry in the
+            %% file.
             [{_, _, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
             TmpSize = TmpTopOffset + TmpTopTotalSize,
-            ExpectedAbsPos = Top + TmpSize,
-            {ok, ExpectedAbsPos} = file:position(MainHdl, {cur, TmpSize}),
-            %% and now extend the main file as big as necessary in a
-            %% single move if we run out of disk space, this truncate
-            %% could fail, but we still aren't risking losing data
-            ok = file:truncate(MainHdl),
+            %% Extend the main file as big as necessary in a single
+            %% move. If we run out of disk space, this truncate could
+            %% fail, but we still aren't risking losing data
+            ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
             {ok, TmpHdl} = open_file(TmpFile, ?READ_MODE),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
             ok = file:sync(MainHdl),
@@ -1852,9 +1864,9 @@ recover_crashed_compactions1(Files, TmpFile) ->
 
             {ok, _MainMessages, MsgIdsMain} =
                 scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
-            %% check that everything in MsgIds is in MsgIdsMain
+            %% check that everything in MsgIds1 is in MsgIdsMain
             true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
-                             MsgIds),
+                             MsgIds1),
             %% check that everything in MsgIdsTmp is in MsgIdsMain
             true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
                              MsgIdsTmp)
-- 
cgit v1.2.1


From 663ce9b785c33e8ed5a071ac3bc661cb959b7618 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 14:10:41 +0100
Subject: made dq:delete_queue a call, not a cast. This eliminates a race
 condition between queues being deleted and new queues with the same name
 being created and published to

---
 src/rabbit_disk_queue.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 6701bc6b..9282eeb1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -334,7 +334,7 @@ purge(Q) ->
     gen_server2:call(?SERVER, {purge, Q}, infinity).
 
 delete_queue(Q) ->
-    gen_server2:cast(?SERVER, {delete_queue, Q}).
+    gen_server2:call(?SERVER, {delete_queue, Q}, infinity).
 
 delete_non_durable_queues(DurableQueues) ->
     gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues},
@@ -467,6 +467,10 @@ handle_call({purge, Q}, _From, State) ->
     reply(Count, State1);
 handle_call(filesync, _From, State) ->
     reply(ok, sync_current_file_handle(State));
+handle_call({delete_queue, Q}, From, State) ->
+    gen_server2:reply(From, ok),
+    {ok, State1} = internal_delete_queue(Q, State),
+    noreply(State1);
 handle_call({len, Q}, _From, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     reply(WriteSeqId - ReadSeqId, State);
@@ -514,9 +518,6 @@ handle_cast({requeue, Q, MsgSeqIds}, State) ->
 handle_cast({requeue_next_n, Q, N}, State) ->
     {ok, State1} = internal_requeue_next_n(Q, N, State),
     noreply(State1);
-handle_cast({delete_queue, Q}, State) ->
-    {ok, State1} = internal_delete_queue(Q, State),
-    noreply(State1);
 handle_cast({set_mode, Mode}, State) ->
     noreply((case Mode of
                  oppressed -> fun to_disk_only_mode/1;
-- 
cgit v1.2.1


From 41477c58a3b57e2a38b6e367ad75a653f5596b37 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 15:00:45 +0100
Subject: A couple of cosmetics - grab_msg_id went away, but tidied up
 terminate and vaporise - hopefully sufficiently that both Matthias and I are
 happy with the result\000

---
 src/rabbit_disk_queue.erl | 70 +++++++++++++++++++++++++----------------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 9282eeb1..ed075552 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -480,16 +480,8 @@ handle_call({foldl, Fun, Init, Q}, _From, State) ->
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
-    State1 = #dqstate { file_summary = FileSummary,
-                        sequences = Sequences } =
-        shutdown(State), %% tidy up file handles early
-    {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
-    true = ets:delete(FileSummary),
-    true = ets:delete(Sequences),
-    lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
-    {stop, normal, ok,
-     State1 #dqstate { current_file_handle = undefined }};
-    %% gen_server now calls terminate, which then calls shutdown
+    {ok, State1} = vaporise(State),
+    {stop, normal, ok, State1}; %% gen_server now calls terminate
 handle_call(to_disk_only_mode, _From, State) ->
     reply(ok, to_disk_only_mode(State));
 handle_call(to_ram_disk_mode, _From, State) ->
@@ -560,31 +552,47 @@ handle_pre_hibernate(State) ->
     ok = report_memory(true, State),
     {hibernate, stop_memory_timer(State)}.
 
-terminate(_Reason, State) ->
-    shutdown(State).
-
-shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
-                            msg_location_ets = MsgLocationEts,
-                            current_file_handle = FileHdl,
-                            read_file_handle_cache = HC
-                          }) ->
-    %% deliberately ignoring return codes here
+terminate(_Reason, State = #dqstate { sequences = undefined }) ->
+    State;
+terminate(_Reason, State = #dqstate { msg_location_dets = MsgLocationDets,
+                                      msg_location_ets = MsgLocationEts,
+                                      file_summary = FileSummary,
+                                      sequences = Sequences,
+                                      current_file_handle = FileHdl,
+                                      read_file_handle_cache = HC
+                                    }) ->
     State1 = stop_commit_timer(stop_memory_timer(State)),
-    dets:close(MsgLocationDets),
-    file:delete(msg_location_dets_file()),
-    true = ets:delete_all_objects(MsgLocationEts),
     case FileHdl of
         undefined -> ok;
-        _ -> sync_current_file_handle(State),
+        _ -> sync_current_file_handle(State1),
              file:close(FileHdl)
     end,
     store_safe_shutdown(),
     HC1 = rabbit_file_handle_cache:close_all(HC),
-    State1 #dqstate { current_file_handle = undefined,
+    dets:close(MsgLocationDets),
+    file:delete(msg_location_dets_file()),
+    ets:delete(MsgLocationEts),
+    ets:delete(FileSummary),
+    ets:delete(Sequences),
+    State1 #dqstate { msg_location_dets = undefined,
+                      msg_location_ets = undefined,
+                      file_summary = undefined,
+                      sequences = undefined,
+                      current_file_handle = undefined,
                       current_dirty = false,
-                      read_file_handle_cache = HC1,
-                      memory_report_timer_ref = undefined
-                    }.
+                      read_file_handle_cache = HC1
+                     }.
+
+vaporise(State = #dqstate { current_file_handle = FileHdl }) ->
+    case FileHdl of
+        undefined -> ok;
+        _ -> sync_current_file_handle(State),
+             file:close(FileHdl)
+    end,
+    {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
+    lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
+    {ok, terminate(normal, State #dqstate { current_file_handle = undefined,
+                                            current_dirty = false })}.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -1766,12 +1774,10 @@ verify_messages_in_mnesia(MsgIds) ->
                                   msg_id))
       end, MsgIds).
 
-grab_msg_id({MsgId, _IsPersistent, _TotalSize, _FileOffset}) ->
-    MsgId.
-
 scan_file_for_valid_messages_msg_ids(File) ->
     {ok, Messages} = scan_file_for_valid_messages(File),
-    {ok, Messages, lists:map(fun grab_msg_id/1, Messages)}.
+    {ok, Messages,
+     [MsgId || {MsgId, _IsPersistent, _TotalSize, _FileOffset} <- Messages]}.
 
 recover_crashed_compactions1(Files, TmpFile) ->
     NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
@@ -1825,7 +1831,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
             {MsgIds1, UncorruptedMessages1}
                 = case lists:splitwith(
                          fun (MsgId) -> MsgId /= EldestTmpMsgId end, MsgIds) of
-                      {MsgIds, []} -> %% no msgs from tmp in main
+                      {_MsgIds, []} -> %% no msgs from tmp in main
                           {MsgIds, UncorruptedMessages};
                       {Dropped, [EldestTmpMsgId | Rest]} ->
                           %% Msgs in Dropped are in tmp, so forget them.
-- 
cgit v1.2.1


From 58e5bf41dd1ab55a8ca0dab18ac5c4262ef62644 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 15:12:58 +0100
Subject: renaming of variables.

---
 src/rabbit_memory_manager.erl | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index bf694c8f..b9d7bf7b 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -318,27 +318,27 @@ tidy_and_sum_sleepy(IgnorePids, Sleepy, Procs) ->
                  fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
                  IgnorePids, Sleepy, queue:new(), 0).
 
-tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism, DupCheckSet,
-             CataInit, AnaInit, AllocAcc) ->
-    case Catamorphism(CataInit) of
-        {empty, _CataInit} -> {AnaInit, AllocAcc};
-        {{value, Pid}, CataInit1} ->
-            {DupCheckSet1, AnaInit1, AllocAcc1} =
+tidy_and_sum(AtomExpected, Procs, Generator, Consumer, DupCheckSet,
+             GenInit, ConInit, AllocAcc) ->
+    case Generator(GenInit) of
+        {empty, _GetInit} -> {ConInit, AllocAcc};
+        {{value, Pid}, GenInit1} ->
+            {DupCheckSet1, ConInit1, AllocAcc1} =
                 case sets:is_element(Pid, DupCheckSet) of
                     true ->
-                        {DupCheckSet, AnaInit, AllocAcc};
+                        {DupCheckSet, ConInit, AllocAcc};
                     false ->
                         case find_process(Pid, Procs) of
                             {libre, Alloc, AtomExpected} ->
                                 {sets:add_element(Pid, DupCheckSet),
-                                 Anamorphism(Pid, Alloc, AnaInit),
+                                 Consumer(Pid, Alloc, ConInit),
                                  Alloc + AllocAcc};
                             _ ->
-                                {DupCheckSet, AnaInit, AllocAcc}
+                                {DupCheckSet, ConInit, AllocAcc}
                         end
                 end,
-            tidy_and_sum(AtomExpected, Procs, Catamorphism, Anamorphism,
-                         DupCheckSet1, CataInit1, AnaInit1, AllocAcc1)
+            tidy_and_sum(AtomExpected, Procs, Generator, Consumer,
+                         DupCheckSet1, GenInit1, ConInit1, AllocAcc1)
     end.
 
 free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req, Avail) ->
@@ -359,21 +359,21 @@ free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req, Avail) ->
               end, fun queue:join/2, Procs, Sleepy, queue:new(), Req, Avail).
 
 free_from(
-  Callbacks, Hylomorphism, BaseCase, Procs, CataInit, AnaInit, Req, Avail) ->
-    case Hylomorphism(Procs, CataInit, AnaInit) of
+  Callbacks, Transformer, BaseCase, Procs, DestroyMe, CreateMe, Req, Avail) ->
+    case Transformer(Procs, DestroyMe, CreateMe) of
         empty ->
-            {AnaInit, Procs, Req};
-        {skip, CataInit1, AnaInit1} ->
-            free_from(Callbacks, Hylomorphism, BaseCase, Procs, CataInit1,
-                      AnaInit1, Req, Avail);
-        {value, CataInit1, Pid, Alloc} ->
+            {CreateMe, Procs, Req};
+        {skip, DestroyMe1, CreateMe1} ->
+            free_from(Callbacks, Transformer, BaseCase, Procs, DestroyMe1,
+                      CreateMe1, Req, Avail);
+        {value, DestroyMe1, Pid, Alloc} ->
             Procs1 = set_process_mode(
                        Procs, Callbacks, Pid, oppressed, {oppressed, Avail}),
             Req1 = Req - Alloc,
             case Req1 > 0 of
-                true -> free_from(Callbacks, Hylomorphism, BaseCase, Procs1,
-                                  CataInit1, AnaInit, Req1, Avail);
-                false -> {BaseCase(CataInit1, AnaInit), Procs1, Req1}
+                true -> free_from(Callbacks, Transformer, BaseCase, Procs1,
+                                  DestroyMe1, CreateMe, Req1, Avail);
+                false -> {BaseCase(DestroyMe1, CreateMe), Procs1, Req1}
             end
     end.
 
-- 
cgit v1.2.1


From 90be527e9ed8f14c8a859a074bc819b256450983 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 15:20:35 +0100
Subject: another attempt at
 stop/terminate/vaporisation/shutdown/suicide/deathsquad

---
 src/rabbit_disk_queue.erl | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ed075552..f0402be6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -552,22 +552,26 @@ handle_pre_hibernate(State) ->
     ok = report_memory(true, State),
     {hibernate, stop_memory_timer(State)}.
 
-terminate(_Reason, State = #dqstate { sequences = undefined }) ->
+terminate(_Reason, State) ->
+    State1 = shutdown(State),
+    store_safe_shutdown(),
+    State1.
+
+shutdown(State = #dqstate { sequences = undefined }) ->
     State;
-terminate(_Reason, State = #dqstate { msg_location_dets = MsgLocationDets,
-                                      msg_location_ets = MsgLocationEts,
-                                      file_summary = FileSummary,
-                                      sequences = Sequences,
-                                      current_file_handle = FileHdl,
-                                      read_file_handle_cache = HC
-                                    }) ->
+shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
+                            msg_location_ets = MsgLocationEts,
+                            file_summary = FileSummary,
+                            sequences = Sequences,
+                            current_file_handle = FileHdl,
+                            read_file_handle_cache = HC
+                          }) ->
     State1 = stop_commit_timer(stop_memory_timer(State)),
     case FileHdl of
         undefined -> ok;
         _ -> sync_current_file_handle(State1),
              file:close(FileHdl)
     end,
-    store_safe_shutdown(),
     HC1 = rabbit_file_handle_cache:close_all(HC),
     dets:close(MsgLocationDets),
     file:delete(msg_location_dets_file()),
@@ -583,16 +587,11 @@ terminate(_Reason, State = #dqstate { msg_location_dets = MsgLocationDets,
                       read_file_handle_cache = HC1
                      }.
 
-vaporise(State = #dqstate { current_file_handle = FileHdl }) ->
-    case FileHdl of
-        undefined -> ok;
-        _ -> sync_current_file_handle(State),
-             file:close(FileHdl)
-    end,
+vaporise(State) ->
+    State1 = shutdown(State),
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
-    {ok, terminate(normal, State #dqstate { current_file_handle = undefined,
-                                            current_dirty = false })}.
+    {ok, State1}.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
-- 
cgit v1.2.1


From 8f3f0c0a1d7e9f8ef4fb90f1d2d2fb5e7ed49661 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 16:00:18 +0100
Subject: Shutdown III: Inline Vaporisation

---
 src/rabbit_disk_queue.erl | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index f0402be6..a5c60250 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -480,7 +480,9 @@ handle_call({foldl, Fun, Init, Q}, _From, State) ->
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
 handle_call(stop_vaporise, _From, State) ->
-    {ok, State1} = vaporise(State),
+    State1 = shutdown(State),
+    {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
+    lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok, State1}; %% gen_server now calls terminate
 handle_call(to_disk_only_mode, _From, State) ->
     reply(ok, to_disk_only_mode(State));
@@ -587,12 +589,6 @@ shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
                       read_file_handle_cache = HC1
                      }.
 
-vaporise(State) ->
-    State1 = shutdown(State),
-    {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
-    lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
-    {ok, State1}.
-
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-- 
cgit v1.2.1


From 12071e742768c4c6ecd767c98b45c0130d3360c8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 16:34:24 +0100
Subject: factor out sublist and disjoint tests

---
 src/rabbit_disk_queue.erl | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index a5c60250..ad7c8df1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1811,7 +1811,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
     %%    back to before any of the files in the tmp file and copy
     %%    them over again
     TmpPath = form_filename(TmpFile),
-    case lists:all(fun (MsgId) -> lists:member(MsgId, MsgIds) end, MsgIdsTmp) of
+    case is_sublist(MsgIdsTmp, MsgIds) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
                 %% note this also catches the case when the tmp file
                 %% is empty
@@ -1843,9 +1843,7 @@ recover_crashed_compactions1(Files, TmpFile) ->
                                lists:reverse(UncorruptedMessages1)),
             %% we should have that none of the messages in the prefix
             %% are in the tmp file
-            true = lists:all(fun (MsgId) ->
-                                     not (lists:member(MsgId, MsgIdsTmp))
-                             end, MsgIds1),
+            true = is_disjoint(MsgIds1, MsgIdsTmp),
             %% must open with read flag, otherwise will stomp over contents
             {ok, MainHdl} = open_file(NonTmpRelatedFile, ?WRITE_MODE ++ [read]),
             %% Wipe out any rubbish at the end of the file. Remember
@@ -1867,14 +1865,18 @@ recover_crashed_compactions1(Files, TmpFile) ->
             {ok, _MainMessages, MsgIdsMain} =
                 scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
             %% check that everything in MsgIds1 is in MsgIdsMain
-            true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
-                             MsgIds1),
+            true = is_sublist(MsgIds1, MsgIdsMain),
             %% check that everything in MsgIdsTmp is in MsgIdsMain
-            true = lists:all(fun (MsgId) -> lists:member(MsgId, MsgIdsMain) end,
-                             MsgIdsTmp)
+            true = is_sublist(MsgIdsTmp, MsgIdsMain)
     end,
     ok.
 
+is_sublist(SmallerList, BiggerList) ->
+    lists:all(fun (Item) -> lists:member(Item, BiggerList) end, SmallerList).
+
+is_disjoint(SmallerList, BiggerList) ->
+    lists:all(fun (Item) -> not lists:member(Item, BiggerList) end, SmallerList).
+
 %% Takes the list in *ascending* order (i.e. eldest message
 %% first). This is the opposite of what scan_file_for_valid_messages
 %% produces. The list of msgs that is produced is youngest first.
-- 
cgit v1.2.1


From 57cb986fc7383215ab1525542ecbf86c504745b0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 17:34:26 +0100
Subject: Made the disk queue start up in the same mode it was last running in.

This is slightly grim because I have to store some values in the mnesia table which then have to survive all the start up logic, so there are a couple of annoying 1-line changes elsewhere. However, it does indeed work.

There was also one bool() -> boolean() fix in the memory_manager.
---
 src/rabbit_disk_queue.erl     | 63 ++++++++++++++++++++++++++++++-------------
 src/rabbit_memory_manager.erl |  2 +-
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ad7c8df1..62380884 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -70,12 +70,14 @@
 -define(READ_MODE,   [read, read_ahead]).
 -define(WRITE_MODE,  [write, delayed_write]).
 
--define(SHUTDOWN_MESSAGE_KEY, shutdown_token).
+-define(SHUTDOWN_MESSAGE_KEY, {internal_token, shutdown}).
 -define(SHUTDOWN_MESSAGE,
         #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
                       msg_id = infinity_and_beyond,
                       is_delivered = never
-                     }).
+                    }).
+
+-define(BPR_KEY, {internal_token, bytes_per_record}).
 
 -define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
 -define(SYNC_INTERVAL, 5). %% milliseconds
@@ -386,13 +388,22 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     ok = filelib:ensure_dir(form_filename("nothing")),
 
     Node = node(),
-    ok = case mnesia:change_table_copy_type(rabbit_disk_queue, Node,
-                                            disc_copies) of
-             {atomic, ok} -> ok;
-             {aborted, {already_exists, rabbit_disk_queue, Node,
-                        disc_copies}} -> ok;
-             E -> E
-         end,
+    {Mode, MnesiaBPR, EtsBPR} =
+        case lists:member(Node, mnesia:table_info(rabbit_disk_queue,
+                                                  disc_copies)) of
+            true ->
+                %% memory manager assumes we start oppressed. As we're
+                %% not, make sure it knows about it, by reporting zero
+                %% memory usage, which ensures it'll tell us to become
+                %% liberated
+                rabbit_memory_manager:report_memory(
+                  self(), 0, false),
+                {ram_disk, undefined, undefined};
+            false ->
+                [#dq_msg_loc { msg_id = {MnesiaBPR1, EtsBPR1}}] =
+                    mnesia:dirty_read(rabbit_disk_queue, ?BPR_KEY),
+                {disk_only, MnesiaBPR1, EtsBPR1}
+        end,
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
@@ -418,7 +429,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     State =
         #dqstate { msg_location_dets       = MsgLocationDets,
                    msg_location_ets        = MsgLocationEts,
-                   operation_mode          = ram_disk,
+                   operation_mode          = Mode,
                    file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
                                                      [set, private]),
                    sequences               = ets:new(?SEQUENCE_ETS_NAME,
@@ -437,8 +448,8 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                                                      [set, private]),
                    memory_report_timer_ref = undefined,
                    wordsize                = erlang:system_info(wordsize),
-                   mnesia_bytes_per_record = undefined,
-                   ets_bytes_per_record    = undefined
+                   mnesia_bytes_per_record = MnesiaBPR,
+                   ets_bytes_per_record    = EtsBPR
                  },
     {ok, State1 = #dqstate { current_file_name = CurrentName,
                              current_offset = Offset } } =
@@ -617,7 +628,7 @@ start_memory_timer(State) ->
 report_memory(Hibernating, State) ->
     Bytes = memory_use(State),
     rabbit_memory_manager:report_memory(self(), trunc(2.5 * Bytes),
-                                            Hibernating).
+                                        Hibernating).
 
 memory_use(#dqstate { operation_mode   = ram_disk,
                       file_summary     = FileSummary,
@@ -658,12 +669,21 @@ to_disk_only_mode(State = #dqstate { operation_mode    = ram_disk,
     EtsSize        = lists:max([1, ets:info(MsgLocationEts, size)]),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_only_copies),
+    MnesiaBPR = MnesiaMemBytes / MnesiaSize,
+    EtsBPR = EtsMemBytes / EtsSize,
+    ok = rabbit_misc:execute_mnesia_transaction(
+           fun() ->
+                   mnesia:write(rabbit_disk_queue,
+                                #dq_msg_loc { queue_and_seq_id = ?BPR_KEY,
+                                              msg_id = {MnesiaBPR, EtsBPR},
+                                              is_delivered = never }, write)
+           end),
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
     garbage_collect(),
     State #dqstate { operation_mode          = disk_only,
-                     mnesia_bytes_per_record = MnesiaMemBytes / MnesiaSize,
-                     ets_bytes_per_record    = EtsMemBytes / EtsSize }.
+                     mnesia_bytes_per_record = MnesiaBPR,
+                     ets_bytes_per_record    = EtsBPR }.
 
 to_ram_disk_mode(State = #dqstate { operation_mode = ram_disk }) ->
     State;
@@ -673,6 +693,7 @@ to_ram_disk_mode(State = #dqstate { operation_mode    = disk_only,
     rabbit_log:info("Converting disk queue to ram disk mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_copies),
+    ok = mnesia:dirty_delete(rabbit_disk_queue, ?BPR_KEY),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
     garbage_collect(),
@@ -1546,12 +1567,12 @@ mark_message_delivered(Key = '$end_of_table', _N) ->
 mark_message_delivered(Key, N) ->
     [Obj] = mnesia:read(rabbit_disk_queue, Key, write),
     M = case Obj #dq_msg_loc.is_delivered of
-             true -> N;
              false ->
                 ok = mnesia:write(rabbit_disk_queue,
                                   Obj #dq_msg_loc { is_delivered = true },
                                   write),
-                N - 1
+                N - 1;
+            _ -> N %% needs to match 'never' as well as 'true'
         end,
     mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
 
@@ -1594,7 +1615,9 @@ load_from_disk(State) ->
     {ok, State2}.
 
 prune_mnesia_flush_batch(DeleteAcc) ->
-    lists:foldl(fun (Key, ok) ->
+    lists:foldl(fun ({internal_token, _}, ok) ->
+                        ok;
+                    (Key, ok) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
                 end, ok, DeleteAcc).
 
@@ -1648,7 +1671,9 @@ extract_sequence_numbers(Sequences) ->
           fun() ->
                   ok = mnesia:read_lock_table(rabbit_disk_queue),
                   mnesia:foldl(
-                    fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
+                    fun (#dq_msg_loc { queue_and_seq_id = {internal_token, _} },
+                         true) -> true;
+                        (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
                             NextWrite = SeqId + 1,
                             case ets:lookup(Sequences, Q) of
                                 [] -> ets:insert_new(Sequences,
diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index b9d7bf7b..3b637b3a 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -53,7 +53,7 @@
 -spec(start_link/0 :: () ->
               ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
--spec(report_memory/3 :: (pid(), non_neg_integer(), bool()) -> 'ok').
+-spec(report_memory/3 :: (pid(), non_neg_integer(), boolean()) -> 'ok').
 -spec(info/0 :: () -> [{atom(), any()}]).
 -spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
 
-- 
cgit v1.2.1


From bbd851c7f1625adca09099b1ecf2c208df98b55a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Sep 2009 17:51:21 +0100
Subject: Guarantee that any oppressed process reporting 0 memory use will get
 liberated.

---
 src/rabbit_memory_manager.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index 3b637b3a..aaacaef7 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -228,9 +228,10 @@ handle_cast({report_memory, Pid, Memory, Hibernating},
                          LibreActivity}
                 end;
             {oppressed, OrigAvail} ->
-                case Alarmed orelse Hibernating orelse
-                    (Avail > (OrigAvail - ?THRESHOLD_OFFSET) andalso
-                     Avail < (OrigAvail + ?THRESHOLD_OFFSET)) of
+                case Req > 0 andalso
+                    ( Alarmed orelse Hibernating orelse
+                      (Avail > (OrigAvail - ?THRESHOLD_OFFSET) andalso
+                       Avail < (OrigAvail + ?THRESHOLD_OFFSET)) ) of
                     true ->
                         {State, oppressed};
                     false ->
-- 
cgit v1.2.1


From 8e23734c4bcee46e7d8770d99ff2634c63d06230 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Sep 2009 11:39:10 +0100
Subject: sort files for compaction numerically

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 62380884..63df5cc5 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1285,7 +1285,7 @@ maybe_roll_to_new_file(_, State) ->
 
 compact(FilesSet, State) ->
     %% smallest number, hence eldest, hence left-most, first
-    Files = lists:sort(sets:to_list(FilesSet)),
+    Files = lists:sort(fun file_name_sort/2, sets:to_list(FilesSet)),
     %% foldl reverses, so now youngest/right-most first
     RemainingFiles = lists:foldl(fun (File, Acc) ->
                                          delete_empty_files(File, Acc, State)
-- 
cgit v1.2.1


From 2aa8b3cb17bff0efef36e8a7c236593d94e4fef4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Sep 2009 11:45:49 +0100
Subject: commented on purpose of cache

---
 src/rabbit_disk_queue.erl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 63df5cc5..3914e933 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -705,6 +705,17 @@ to_ram_disk_mode(State = #dqstate { operation_mode    = disk_only,
 %% message cache helper functions
 %%----------------------------------------------------------------------------
 
+%% The purpose of the cache is not especially performance, though it
+%% can help there too. The main purpose is to ensure that individual
+%% messages that are sent to multiple queues, and then to disk, are
+%% read back as the same binary object rather than multiples of
+%% identical binary objects. This prevents memory explosion.
+%%
+%% We limit the cache in size. If we didn't, then we could have two
+%% queues coming off the same exchange, receiving the same millions of
+%% messages, then one queue gets drained, which would pull the entire
+%% queue into the cache, which would potentially explode memory.
+
 remove_cache_entry(MsgId, #dqstate { message_cache = Cache }) ->
     true = ets:delete(Cache, MsgId),
     ok.
-- 
cgit v1.2.1


From 538b8e5dd6c87583e8e983fb50f47b0508a15b2d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Sep 2009 11:55:53 +0100
Subject: ensure that we fsync after copying messages across during compaction

---
 src/rabbit_disk_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 3914e933..134762d7 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -1445,7 +1445,6 @@ combine_files({Source, SourceValid, _SourceContiguousTop,
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
-    ok = file:sync(DestinationHdl),
     ok = file:close(SourceHdl),
     ok = file:close(DestinationHdl),
     ok = file:delete(form_filename(Source)),
@@ -1487,6 +1486,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
     BSize1 = BlockEnd1 - BlockStart1,
     {ok, BlockStart1} = file:position(SourceHdl, BlockStart1),
     {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
+    ok = file:sync(DestinationHdl),
     ok.
 
 close_file(File, State = #dqstate { read_file_handle_cache = HC }) ->
-- 
cgit v1.2.1


From cb642dde8835dd0baf2b6aa33071a4642f5eb392 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Sep 2009 14:02:42 +0100
Subject: use a record in file summary

---
 src/rabbit_disk_queue.erl | 112 +++++++++++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 46 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 134762d7..961d1fe6 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -113,6 +113,9 @@
 -record(message_store_entry,
         {msg_id, ref_count, file, offset, total_size, is_persistent}).
 
+-record(file_summary_entry,
+        {file, valid_total_size, contiguous_top, left, right}).
+
 %% The components:
 %%
 %% MsgLocation: this is a (d)ets table which contains:
@@ -430,8 +433,9 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
         #dqstate { msg_location_dets       = MsgLocationDets,
                    msg_location_ets        = MsgLocationEts,
                    operation_mode          = Mode,
-                   file_summary            = ets:new(?FILE_SUMMARY_ETS_NAME,
-                                                     [set, private]),
+                   file_summary            = ets:new(
+                                               ?FILE_SUMMARY_ETS_NAME,
+                                               [set, private, {keypos, 2}]),
                    sequences               = ets:new(?SEQUENCE_ETS_NAME,
                                                      [set, private]),
                    current_file_num        = 0,
@@ -693,7 +697,6 @@ to_ram_disk_mode(State = #dqstate { operation_mode    = disk_only,
     rabbit_log:info("Converting disk queue to ram disk mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_copies),
-    ok = mnesia:dirty_delete(rabbit_disk_queue, ?BPR_KEY),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
     garbage_collect(),
@@ -1024,12 +1027,14 @@ remove_message(MsgId, Files,
         1 ->
             ok = dets_ets_delete(State, MsgId),
             ok = remove_cache_entry(MsgId, State),
-            [{File, ValidTotalSize, ContiguousTop, Left, Right}] =
+            [FSEntry = #file_summary_entry { valid_total_size = ValidTotalSize,
+                                             contiguous_top = ContiguousTop }] =
                 ets:lookup(FileSummary, File),
             ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-            true =
-                ets:insert(FileSummary, {File, ValidTotalSize - TotalSize,
-                                         ContiguousTop1, Left, Right}),
+            ValidTotalSize1 = ValidTotalSize - TotalSize,
+            true = ets:insert(FileSummary, FSEntry #file_summary_entry { 
+                                             valid_total_size = ValidTotalSize1,
+                                             contiguous_top = ContiguousTop1 }),
             if CurName =:= File -> Files;
                true -> sets:add_element(File, Files)
             end;
@@ -1058,7 +1063,9 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                      { msg_id = MsgId, ref_count = 1, file = CurName,
                        offset = CurOffset, total_size = TotalSize,
                        is_persistent = IsPersistent }),
-            [{CurName, ValidTotalSize, ContiguousTop, Left, undefined}] =
+            [FSEntry = #file_summary_entry { valid_total_size = ValidTotalSize,
+                                             contiguous_top = ContiguousTop,
+                                             right = undefined }] =
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
@@ -1066,8 +1073,9 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                                      ValidTotalSize1;
                                 true -> ContiguousTop
                              end,
-            true = ets:insert(FileSummary, {CurName, ValidTotalSize1,
-                                            ContiguousTop1, Left, undefined}),
+            true = ets:insert(FileSummary, FSEntry #file_summary_entry {
+                                             valid_total_size = ValidTotalSize1,
+                                             contiguous_top = ContiguousTop1 }),
             NextOffset = CurOffset + TotalSize,
             maybe_roll_to_new_file(
               NextOffset, State #dqstate {current_offset = NextOffset,
@@ -1282,8 +1290,12 @@ maybe_roll_to_new_file(Offset,
     NextNum = CurNum + 1,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = open_file(NextName, ?WRITE_MODE),
-    true = ets:update_element(FileSummary, CurName, {5, NextName}),%% 5 is Right
-    true = ets:insert_new(FileSummary, {NextName, 0, 0, CurName, undefined}),
+    true = ets:update_element(FileSummary, CurName,
+                              {#file_summary_entry.right, NextName}),
+    true = ets:insert_new(
+             FileSummary, #file_summary_entry {
+               file = NextName, valid_total_size = 0, contiguous_top = 0,
+               left = CurName, right = undefined }),
     State2 = State1 #dqstate { current_file_name = NextName,
                                current_file_handle = NextHdl,
                                current_file_num = NextNum,
@@ -1316,16 +1328,15 @@ combine_file(File, State = #dqstate { file_summary = FileSummary,
     %% been deleted within the current GC run
     case ets:lookup(FileSummary, File) of
         [] -> State;
-        [FileObj = {File, _ValidData, _ContiguousTop, Left, Right}] ->
+        [FSEntry = #file_summary_entry { left = Left, right = Right }] ->
             GoRight =
                 fun() ->
                         case Right of
                             undefined -> State;
                             _ when not (CurName == Right) ->
-                                [RightObj] = ets:lookup(FileSummary, Right),
-                                {_, State1} =
-                                    adjust_meta_and_combine(FileObj, RightObj,
-                                                            State),
+                                [FSRight] = ets:lookup(FileSummary, Right),
+                                {_, State1} = adjust_meta_and_combine(
+                                                FSEntry, FSRight, State),
                                 State1;
                             _ -> State
                         end
@@ -1333,8 +1344,8 @@ combine_file(File, State = #dqstate { file_summary = FileSummary,
             case Left of
                 undefined ->
                     GoRight();
-                _ -> [LeftObj] = ets:lookup(FileSummary, Left),
-                     case adjust_meta_and_combine(LeftObj, FileObj, State) of
+                _ -> [FSLeft] = ets:lookup(FileSummary, Left),
+                     case adjust_meta_and_combine(FSLeft, FSEntry, State) of
                          {true, State1} -> State1;
                          {false, State} -> GoRight()
                      end
@@ -1342,21 +1353,23 @@ combine_file(File, State = #dqstate { file_summary = FileSummary,
     end.
 
 adjust_meta_and_combine(
-  LeftObj = {LeftFile, LeftValidData, _LeftContigTop, LeftLeft, RightFile},
-  RightObj = {RightFile, RightValidData, _RightContigTop, LeftFile, RightRight},
+  LeftObj = #file_summary_entry {
+    file = LeftFile, valid_total_size = LeftValidData, right = RightFile },
+  RightObj = #file_summary_entry { 
+    file = RightFile, valid_total_size = RightValidData, left = LeftFile,
+    right = RightRight },
   State = #dqstate { file_size_limit = FileSizeLimit,
-                     file_summary = FileSummary
-                   }) ->
+                     file_summary = FileSummary }) ->
     TotalValidData = LeftValidData + RightValidData,
     if FileSizeLimit >= TotalValidData ->
             State1 = combine_files(RightObj, LeftObj, State),
             %% this could fail if RightRight is undefined
-            %% left is the 4th field
-            ets:update_element(FileSummary, RightRight, {4, LeftFile}),
-            true = ets:insert(FileSummary, {LeftFile,
-                                            TotalValidData, TotalValidData,
-                                            LeftLeft,
-                                            RightRight}),
+            ets:update_element(FileSummary, RightRight,
+                               {#file_summary_entry.left, LeftFile}),
+            true = ets:insert(FileSummary, LeftObj #file_summary_entry {
+                                             valid_total_size = TotalValidData,
+                                             contiguous_top = TotalValidData,
+                                             right = RightRight }),
             true = ets:delete(FileSummary, RightFile),
             {true, State1};
        true -> {false, State}
@@ -1383,11 +1396,14 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     ok = file:truncate(FileHdl),
     ok = preallocate(FileHdl, Highpoint, Lowpoint).
 
-combine_files({Source, SourceValid, _SourceContiguousTop,
-              _SourceLeft, _SourceRight},
-             {Destination, DestinationValid, DestinationContiguousTop,
-              _DestinationLeft, _DestinationRight},
-             State) ->
+combine_files(#file_summary_entry { file = Source,
+                                    valid_total_size = SourceValid,
+                                    left = Destination },
+              #file_summary_entry { file = Destination,
+                                    valid_total_size = DestinationValid,
+                                    contiguous_top = DestinationContiguousTop,
+                                    right = Source },
+              State) ->
     State1 = close_file(Source, close_file(Destination, State)),
     {ok, SourceHdl} = open_file(Source, ?READ_MODE),
     {ok, DestinationHdl} = open_file(Destination, ?READ_MODE ++ ?WRITE_MODE),
@@ -1494,23 +1510,25 @@ close_file(File, State = #dqstate { read_file_handle_cache = HC }) ->
     State #dqstate { read_file_handle_cache = HC1 }.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
-    [{File, ValidData, _ContiguousTop, Left, Right}] =
+    [#file_summary_entry { valid_total_size = ValidData,
+                           left = Left, right = Right }] =
         ets:lookup(FileSummary, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
         0 ->
             case {Left, Right} of
-                {undefined, _} when not (is_atom(Right)) ->
-                    %% the eldest file is empty. YAY!
-                    %% left is the 4th field
-                    true =
-                        ets:update_element(FileSummary, Right, {4, undefined});
+                {undefined, _} when not is_atom(Right) ->
+                    %% the eldest file is empty.
+                    true = ets:update_element(
+                             FileSummary, Right,
+                             {#file_summary_entry.left, undefined});
                 {_, _} when not (is_atom(Right)) ->
-                    %% left is the 4th field
-                    true = ets:update_element(FileSummary, Right, {4, Left}),
-                    %% right is the 5th field
-                    true = ets:update_element(FileSummary, Left, {5, Right})
+                    true = ets:update_element(FileSummary, Right,
+                                              {#file_summary_entry.left, Left}),
+                    true =
+                        ets:update_element(FileSummary, Left,
+                                           {#file_summary_entry.right, Right})
             end,
             true = ets:delete(FileSummary, File),
             ok = file:delete(form_filename(File)),
@@ -1786,8 +1804,10 @@ load_messages(Left, [File|Files],
                 [] -> undefined;
                 [F|_] -> F
             end,
-    true = ets:insert_new(FileSummary,
-                          {File, ValidTotalSize, ContiguousTop, Left, Right}),
+    true = ets:insert_new(FileSummary, #file_summary_entry {
+                            file = File, valid_total_size = ValidTotalSize,
+                            contiguous_top = ContiguousTop, left = Left,
+                            right = Right }),
     load_messages(File, Files, State).
 
 recover_crashed_compactions(Files, TmpFiles) ->
-- 
cgit v1.2.1


From 6a6cb0de547946b75e28e03245d2ac2fcb1c4db2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Sep 2009 14:03:31 +0100
Subject: the disk queue sometimes needs much longer to shut down safely if
 it's heavily loaded - up the limit to 5 seconds. However, I suspect something
 like 60 seconds is more likely to be realistic value

---
 src/rabbit.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 773b4d04..f3008a93 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -252,7 +252,7 @@ print_banner() ->
 start_child(Mod) ->
     {ok,_} = supervisor:start_child(rabbit_sup,
                                     {Mod, {Mod, start_link, []},
-                                     transient, 1000, worker, [Mod]}),
+                                     transient, 5000, worker, [Mod]}),
     ok.
 
 ensure_working_log_handlers() ->
-- 
cgit v1.2.1


From be99ccc2fa6602c17f50bdebf7087bfda3383da4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Sep 2009 14:33:37 +0100
Subject: Switched to using a file to hold the disk_only data. Also found a bug
 where vaporise was wiping out the disk_only data (both as a file, and when it
 was in mnesia). The result was that if the dq was in disk_only mode before
 being vaporised, it would refuse to start up again. Thus vaporise now pushes
 the queue back to ram_disk mode if necessary, after wiping out the contents
 of the mnesia table. Finally, all tests pass again.

---
 src/rabbit_disk_queue.erl | 49 ++++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 961d1fe6..2d13a337 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -65,6 +65,7 @@
 -define(CACHE_MAX_SIZE,          10485760).
 -define(MAX_READ_FILE_HANDLES,   256).
 -define(FILE_SIZE_LIMIT,         (256*1024*1024)).
+-define(DISK_ONLY_MODE_FILE,     "disk_only_stats.dat").
 
 -define(BINARY_MODE, [raw, binary]).
 -define(READ_MODE,   [read, read_ahead]).
@@ -77,8 +78,6 @@
                       is_delivered = never
                     }).
 
--define(BPR_KEY, {internal_token, bytes_per_record}).
-
 -define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
 -define(SYNC_INTERVAL, 5). %% milliseconds
 -define(HIBERNATE_AFTER_MIN, 1000).
@@ -403,9 +402,14 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
                   self(), 0, false),
                 {ram_disk, undefined, undefined};
             false ->
-                [#dq_msg_loc { msg_id = {MnesiaBPR1, EtsBPR1}}] =
-                    mnesia:dirty_read(rabbit_disk_queue, ?BPR_KEY),
-                {disk_only, MnesiaBPR1, EtsBPR1}
+                Path = form_filename(?DISK_ONLY_MODE_FILE),
+                case rabbit_misc:read_term_file(Path) of
+                    {ok, [{MnesiaBPR1, EtsBPR1}]} ->
+                        {disk_only, MnesiaBPR1, EtsBPR1};
+                    {error, Reason} ->
+                        throw({error, {cannot_read_disk_only_mode_file, Path,
+                                       Reason}})
+                end
         end,
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
@@ -494,9 +498,14 @@ handle_call({foldl, Fun, Init, Q}, _From, State) ->
     reply(Result, State1);
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
-handle_call(stop_vaporise, _From, State) ->
+handle_call(stop_vaporise, _From, State = #dqstate { operation_mode = Mode }) ->
     State1 = shutdown(State),
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
+    {atomic, ok} = case Mode of
+                       ram_disk -> {atomic, ok};
+                       disk_only -> mnesia:change_table_copy_type(
+                                      rabbit_disk_queue, node(), disc_copies)
+                   end,
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok, State1}; %% gen_server now calls terminate
 handle_call(to_disk_only_mode, _From, State) ->
@@ -675,13 +684,12 @@ to_disk_only_mode(State = #dqstate { operation_mode    = ram_disk,
                                                  disc_only_copies),
     MnesiaBPR = MnesiaMemBytes / MnesiaSize,
     EtsBPR = EtsMemBytes / EtsSize,
-    ok = rabbit_misc:execute_mnesia_transaction(
-           fun() ->
-                   mnesia:write(rabbit_disk_queue,
-                                #dq_msg_loc { queue_and_seq_id = ?BPR_KEY,
-                                              msg_id = {MnesiaBPR, EtsBPR},
-                                              is_delivered = never }, write)
-           end),
+    Path = form_filename(?DISK_ONLY_MODE_FILE),
+    case rabbit_misc:write_term_file(Path, [{MnesiaBPR, EtsBPR}]) of
+        ok -> ok;
+        {error, Reason} ->
+            throw({error, {cannot_create_disk_only_mode_file, Path, Reason}})
+    end,
     ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
     true = ets:delete_all_objects(MsgLocationEts),
     garbage_collect(),
@@ -697,6 +705,7 @@ to_ram_disk_mode(State = #dqstate { operation_mode    = disk_only,
     rabbit_log:info("Converting disk queue to ram disk mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_copies),
+    ok = file:delete(form_filename(?DISK_ONLY_MODE_FILE)),
     true = ets:from_dets(MsgLocationEts, MsgLocationDets),
     ok = dets:delete_all_objects(MsgLocationDets),
     garbage_collect(),
@@ -1596,12 +1605,12 @@ mark_message_delivered(Key = '$end_of_table', _N) ->
 mark_message_delivered(Key, N) ->
     [Obj] = mnesia:read(rabbit_disk_queue, Key, write),
     M = case Obj #dq_msg_loc.is_delivered of
-             false ->
+            true -> N;
+            false ->
                 ok = mnesia:write(rabbit_disk_queue,
                                   Obj #dq_msg_loc { is_delivered = true },
                                   write),
-                N - 1;
-            _ -> N %% needs to match 'never' as well as 'true'
+                N - 1
         end,
     mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
 
@@ -1644,9 +1653,7 @@ load_from_disk(State) ->
     {ok, State2}.
 
 prune_mnesia_flush_batch(DeleteAcc) ->
-    lists:foldl(fun ({internal_token, _}, ok) ->
-                        ok;
-                    (Key, ok) ->
+    lists:foldl(fun (Key, ok) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
                 end, ok, DeleteAcc).
 
@@ -1700,9 +1707,7 @@ extract_sequence_numbers(Sequences) ->
           fun() ->
                   ok = mnesia:read_lock_table(rabbit_disk_queue),
                   mnesia:foldl(
-                    fun (#dq_msg_loc { queue_and_seq_id = {internal_token, _} },
-                         true) -> true;
-                        (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
+                    fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
                             NextWrite = SeqId + 1,
                             case ets:lookup(Sequences, Q) of
                                 [] -> ets:insert_new(Sequences,
-- 
cgit v1.2.1


From 42a50439996cd340ea9cadc5037f045d4ac90385 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Sep 2009 15:03:31 +0100
Subject: rename #message_store_entry to #msg_location to match what we call
 the containing table

---
 src/rabbit_disk_queue.erl | 76 +++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 2d13a337..c5e79df9 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -109,7 +109,7 @@
          ets_bytes_per_record     %% bytes per record in msg_location_ets
         }).
 
--record(message_store_entry,
+-record(msg_location,
         {msg_id, ref_count, file, offset, total_size, is_persistent}).
 
 -record(file_summary_entry,
@@ -919,7 +919,7 @@ internal_fetch_attributes(Q, MarkDelivered, Advance, State) ->
     case queue_head(Q, MarkDelivered, Advance, State) of
         E = {ok, empty, _} -> E;
         {ok, AckTag, IsDelivered,
-         #message_store_entry { msg_id = MsgId, is_persistent = IsPersistent },
+         #msg_location { msg_id = MsgId, is_persistent = IsPersistent },
          Remaining, State1} ->
             {ok, {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}, State1}
     end.
@@ -942,9 +942,9 @@ maybe_advance(pop_queue, Sequences, Q, ReadSeqId, WriteSeqId) ->
     true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
     ok.
 
-read_stored_message(#message_store_entry { msg_id = MsgId, ref_count = RefCount,
-                                           file = File, offset = Offset,
-                                           total_size = TotalSize }, State) ->
+read_stored_message(#msg_location { msg_id = MsgId, ref_count = RefCount,
+                                    file = File, offset = Offset,
+                                    total_size = TotalSize }, State) ->
     case fetch_and_increment_cache(MsgId, State) of
         not_found ->
             {{ok, {MsgId, MsgBody, _IsPersistent, _BodySize}}, State1} =
@@ -979,7 +979,7 @@ update_message_attributes(Q, SeqId, MarkDelivered, State) ->
     [Obj =
      #dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
-    [StoreEntry = #message_store_entry { msg_id = MsgId }] =
+    [StoreEntry = #msg_location { msg_id = MsgId }] =
         dets_ets_lookup(State, MsgId),
     ok = case {IsDelivered, MarkDelivered} of
              {true, _} -> ok;
@@ -1029,8 +1029,8 @@ remove_message(MsgId, Files,
                                   current_file_name = CurName
                                 }) ->
     [StoreEntry =
-     #message_store_entry { msg_id = MsgId, ref_count = RefCount, file = File,
-                            offset = Offset, total_size = TotalSize }] =
+     #msg_location { msg_id = MsgId, ref_count = RefCount, file = File,
+                     offset = Offset, total_size = TotalSize }] =
         dets_ets_lookup(State, MsgId),
     case RefCount of
         1 ->
@@ -1049,8 +1049,8 @@ remove_message(MsgId, Files,
             end;
         _ when 1 < RefCount ->
             ok = decrement_cache(MsgId, State),
-            ok = dets_ets_insert(State, StoreEntry #message_store_entry
-                                 { ref_count = RefCount - 1 }),
+            ok = dets_ets_insert(State, StoreEntry #msg_location {
+                                          ref_count = RefCount - 1 }),
             Files
     end.
 
@@ -1068,8 +1068,8 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                                 CurHdl, MsgId, msg_to_bin(Message),
                                 IsPersistent),
             true = dets_ets_insert_new(
-                     State, #message_store_entry
-                     { msg_id = MsgId, ref_count = 1, file = CurName,
+                     State, #msg_location {
+                       msg_id = MsgId, ref_count = 1, file = CurName,
                        offset = CurOffset, total_size = TotalSize,
                        is_persistent = IsPersistent }),
             [FSEntry = #file_summary_entry { valid_total_size = ValidTotalSize,
@@ -1090,10 +1090,10 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
               NextOffset, State #dqstate {current_offset = NextOffset,
                                           current_dirty = true});
         [StoreEntry =
-         #message_store_entry { msg_id = MsgId, ref_count = RefCount }] ->
+         #msg_location { msg_id = MsgId, ref_count = RefCount }] ->
             %% We already know about it, just update counter
-            ok = dets_ets_insert(State, StoreEntry #message_store_entry
-                                 { ref_count = RefCount + 1 }),
+            ok = dets_ets_insert(State, StoreEntry #msg_location {
+                                          ref_count = RefCount + 1 }),
             {ok, State}
     end.
 
@@ -1105,8 +1105,8 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                                     }) ->
     NeedsSync = IsDirty andalso
         lists:any(fun ({MsgId, _IsDelivered}) ->
-                          [#message_store_entry { msg_id = MsgId, file = File,
-                                                  offset = Offset }] =
+                          [#msg_location { msg_id = MsgId, file = File,
+                                           offset = Offset }] =
                               dets_ets_lookup(State, MsgId),
                           File =:= CurFile andalso Offset >= SyncOffset
                   end, PubMsgIds),
@@ -1389,8 +1389,8 @@ sort_msg_locations_by_offset(Dir, List) ->
                asc  -> fun erlang:'<'/2;
                desc -> fun erlang:'>'/2
            end,
-    lists:sort(fun (#message_store_entry { offset = OffA },
-                    #message_store_entry { offset = OffB }) ->
+    lists:sort(fun (#msg_location { offset = OffA },
+                    #msg_location { offset = OffB }) ->
                        Comp(OffA, OffB)
                end, List).
 
@@ -1431,7 +1431,7 @@ combine_files(#file_summary_entry { file = Source,
             {ok, TmpHdl} = open_file(Tmp, ?READ_MODE ++ ?WRITE_MODE),
             Worklist =
                 lists:dropwhile(
-                  fun (#message_store_entry { offset = Offset })
+                  fun (#msg_location { offset = Offset })
                       when Offset /= DestinationContiguousTop ->
                           %% it cannot be that Offset ==
                           %% DestinationContiguousTop because if it
@@ -1444,8 +1444,8 @@ combine_files(#file_summary_entry { file = Source,
                           %% enforce it anyway
                   end, sort_msg_locations_by_offset(
                          asc, dets_ets_match_object(
-                                 State1, #message_store_entry
-                                 { file = Destination, _ = '_' }))),
+                                 State1, #msg_location {
+                                   file = Destination, _ = '_' }))),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State1),
@@ -1465,8 +1465,8 @@ combine_files(#file_summary_entry { file = Source,
     end,
     SourceWorkList =
         sort_msg_locations_by_offset(
-          asc, dets_ets_match_object(State1, #message_store_entry
-                                      { file = Source, _ = '_' })),
+          asc, dets_ets_match_object(State1, #msg_location {
+                                       file = Source, _ = '_' })),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
@@ -1479,15 +1479,15 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, State) ->
     {FinalOffset, BlockStart1, BlockEnd1} =
         lists:foldl(
-          fun (StoreEntry = #message_store_entry { offset = Offset,
-                                                   total_size = TotalSize },
+          fun (StoreEntry = #msg_location { offset = Offset,
+                                            total_size = TotalSize },
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   %% update MsgLocationDets to reflect change of file and offset
-                  ok = dets_ets_insert(State, StoreEntry #message_store_entry
-                                       { file = Destination,
-                                         offset = CurOffset }),
+                  ok = dets_ets_insert(State, StoreEntry #msg_location {
+                                                file = Destination,
+                                                offset = CurOffset }),
                   NextOffset = CurOffset + TotalSize,
                   if BlockStart =:= undefined ->
                           %% base case, called only for the first list elem
@@ -1670,10 +1670,10 @@ prune_mnesia(State, Key, Files, DeleteAcc, Len) ->
             [] ->
                 %% msg hasn't been found on disk, delete it
                 {[{Q, SeqId} | DeleteAcc], Files, Len + 1};
-            [#message_store_entry { msg_id = MsgId, is_persistent = true }] ->
+            [#msg_location { msg_id = MsgId, is_persistent = true }] ->
                 %% msg is persistent, keep it
                 {DeleteAcc, Files, Len};
-            [#message_store_entry { msg_id = MsgId, is_persistent = false}] ->
+            [#msg_location { msg_id = MsgId, is_persistent = false}] ->
                 %% msg is not persistent, delete it
                 Files2 = remove_message(MsgId, Files, State),
                 {[{Q, SeqId} | DeleteAcc], Files2, Len + 1}
@@ -1770,13 +1770,13 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
 load_messages(Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset =
-        case dets_ets_match_object(State, #message_store_entry
-                                   { file = Left, _ = '_' }) of
+        case dets_ets_match_object(State, #msg_location {
+                                     file = Left, _ = '_' }) of
             [] -> 0;
             L ->
-                [ #message_store_entry {file = Left,
-                                        offset = MaxOffset,
-                                        total_size = TotalSize} | _ ] =
+                [ #msg_location { file = Left,
+                                  offset = MaxOffset,
+                                  total_size = TotalSize} | _ ] =
                     sort_msg_locations_by_offset(desc, L),
                 MaxOffset + TotalSize
              end,
@@ -1794,8 +1794,8 @@ load_messages(Left, [File|Files],
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
                         true = dets_ets_insert_new(
-                                 State, #message_store_entry
-                                 { msg_id = MsgId, ref_count = RefCount,
+                                 State, #msg_location {
+                                   msg_id = MsgId, ref_count = RefCount,
                                    file = File, offset = Offset,
                                    total_size = TotalSize,
                                    is_persistent = IsPersistent }),
-- 
cgit v1.2.1


From 326341bc00df7e74268eb827d42b3fa2424fbaae Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Sep 2009 15:10:44 +0100
Subject: rename #file_summary_entry to #file_summary to match what we call the
 containing table

---
 src/rabbit_disk_queue.erl | 60 +++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c5e79df9..538b08d8 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -112,7 +112,7 @@
 -record(msg_location,
         {msg_id, ref_count, file, offset, total_size, is_persistent}).
 
--record(file_summary_entry,
+-record(file_summary,
         {file, valid_total_size, contiguous_top, left, right}).
 
 %% The components:
@@ -1036,12 +1036,12 @@ remove_message(MsgId, Files,
         1 ->
             ok = dets_ets_delete(State, MsgId),
             ok = remove_cache_entry(MsgId, State),
-            [FSEntry = #file_summary_entry { valid_total_size = ValidTotalSize,
-                                             contiguous_top = ContiguousTop }] =
+            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
+                                       contiguous_top = ContiguousTop }] =
                 ets:lookup(FileSummary, File),
             ContiguousTop1 = lists:min([ContiguousTop, Offset]),
             ValidTotalSize1 = ValidTotalSize - TotalSize,
-            true = ets:insert(FileSummary, FSEntry #file_summary_entry { 
+            true = ets:insert(FileSummary, FSEntry #file_summary { 
                                              valid_total_size = ValidTotalSize1,
                                              contiguous_top = ContiguousTop1 }),
             if CurName =:= File -> Files;
@@ -1072,9 +1072,9 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                        msg_id = MsgId, ref_count = 1, file = CurName,
                        offset = CurOffset, total_size = TotalSize,
                        is_persistent = IsPersistent }),
-            [FSEntry = #file_summary_entry { valid_total_size = ValidTotalSize,
-                                             contiguous_top = ContiguousTop,
-                                             right = undefined }] =
+            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
+                                       contiguous_top = ContiguousTop,
+                                       right = undefined }] =
                 ets:lookup(FileSummary, CurName),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
@@ -1082,7 +1082,7 @@ internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                                      ValidTotalSize1;
                                 true -> ContiguousTop
                              end,
-            true = ets:insert(FileSummary, FSEntry #file_summary_entry {
+            true = ets:insert(FileSummary, FSEntry #file_summary {
                                              valid_total_size = ValidTotalSize1,
                                              contiguous_top = ContiguousTop1 }),
             NextOffset = CurOffset + TotalSize,
@@ -1300,9 +1300,9 @@ maybe_roll_to_new_file(Offset,
     NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
     {ok, NextHdl} = open_file(NextName, ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurName,
-                              {#file_summary_entry.right, NextName}),
+                              {#file_summary.right, NextName}),
     true = ets:insert_new(
-             FileSummary, #file_summary_entry {
+             FileSummary, #file_summary {
                file = NextName, valid_total_size = 0, contiguous_top = 0,
                left = CurName, right = undefined }),
     State2 = State1 #dqstate { current_file_name = NextName,
@@ -1337,7 +1337,7 @@ combine_file(File, State = #dqstate { file_summary = FileSummary,
     %% been deleted within the current GC run
     case ets:lookup(FileSummary, File) of
         [] -> State;
-        [FSEntry = #file_summary_entry { left = Left, right = Right }] ->
+        [FSEntry = #file_summary { left = Left, right = Right }] ->
             GoRight =
                 fun() ->
                         case Right of
@@ -1362,9 +1362,9 @@ combine_file(File, State = #dqstate { file_summary = FileSummary,
     end.
 
 adjust_meta_and_combine(
-  LeftObj = #file_summary_entry {
+  LeftObj = #file_summary {
     file = LeftFile, valid_total_size = LeftValidData, right = RightFile },
-  RightObj = #file_summary_entry { 
+  RightObj = #file_summary { 
     file = RightFile, valid_total_size = RightValidData, left = LeftFile,
     right = RightRight },
   State = #dqstate { file_size_limit = FileSizeLimit,
@@ -1374,8 +1374,8 @@ adjust_meta_and_combine(
             State1 = combine_files(RightObj, LeftObj, State),
             %% this could fail if RightRight is undefined
             ets:update_element(FileSummary, RightRight,
-                               {#file_summary_entry.left, LeftFile}),
-            true = ets:insert(FileSummary, LeftObj #file_summary_entry {
+                               {#file_summary.left, LeftFile}),
+            true = ets:insert(FileSummary, LeftObj #file_summary {
                                              valid_total_size = TotalValidData,
                                              contiguous_top = TotalValidData,
                                              right = RightRight }),
@@ -1405,13 +1405,13 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     ok = file:truncate(FileHdl),
     ok = preallocate(FileHdl, Highpoint, Lowpoint).
 
-combine_files(#file_summary_entry { file = Source,
-                                    valid_total_size = SourceValid,
-                                    left = Destination },
-              #file_summary_entry { file = Destination,
-                                    valid_total_size = DestinationValid,
-                                    contiguous_top = DestinationContiguousTop,
-                                    right = Source },
+combine_files(#file_summary { file = Source,
+                              valid_total_size = SourceValid,
+                              left = Destination },
+              #file_summary { file = Destination,
+                              valid_total_size = DestinationValid,
+                              contiguous_top = DestinationContiguousTop,
+                              right = Source },
               State) ->
     State1 = close_file(Source, close_file(Destination, State)),
     {ok, SourceHdl} = open_file(Source, ?READ_MODE),
@@ -1519,8 +1519,8 @@ close_file(File, State = #dqstate { read_file_handle_cache = HC }) ->
     State #dqstate { read_file_handle_cache = HC1 }.
 
 delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
-    [#file_summary_entry { valid_total_size = ValidData,
-                           left = Left, right = Right }] =
+    [#file_summary { valid_total_size = ValidData,
+                     left = Left, right = Right }] =
         ets:lookup(FileSummary, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
@@ -1531,13 +1531,13 @@ delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
                     %% the eldest file is empty.
                     true = ets:update_element(
                              FileSummary, Right,
-                             {#file_summary_entry.left, undefined});
+                             {#file_summary.left, undefined});
                 {_, _} when not (is_atom(Right)) ->
                     true = ets:update_element(FileSummary, Right,
-                                              {#file_summary_entry.left, Left}),
+                                              {#file_summary.left, Left}),
                     true =
                         ets:update_element(FileSummary, Left,
-                                           {#file_summary_entry.right, Right})
+                                           {#file_summary.right, Right})
             end,
             true = ets:delete(FileSummary, File),
             ok = file:delete(form_filename(File)),
@@ -1809,10 +1809,10 @@ load_messages(Left, [File|Files],
                 [] -> undefined;
                 [F|_] -> F
             end,
-    true = ets:insert_new(FileSummary, #file_summary_entry {
+    true = ets:insert_new(FileSummary, #file_summary {
                             file = File, valid_total_size = ValidTotalSize,
-                            contiguous_top = ContiguousTop, left = Left,
-                            right = Right }),
+                            contiguous_top = ContiguousTop,
+                            left = Left, right = Right }),
     load_messages(File, Files, State).
 
 recover_crashed_compactions(Files, TmpFiles) ->
-- 
cgit v1.2.1


From 55e453260ca60fa2d439e9f9af3de9f0cd4b861a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 6 Sep 2009 23:12:45 +0100
Subject: big refactoring: extract msg_store from disk_queue The msg_store
 knows nothing about queues, or message structure.

---
 src/rabbit_disk_queue.erl | 1344 +++++++--------------------------------------
 src/rabbit_msg_store.erl  | 1128 +++++++++++++++++++++++++++++++++++++
 2 files changed, 1315 insertions(+), 1157 deletions(-)
 create mode 100644 src/rabbit_msg_store.erl

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 538b08d8..ad5d8fb1 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -54,23 +54,13 @@
 
 -include("rabbit.hrl").
 
--define(MSG_LOC_NAME,            rabbit_disk_queue_msg_location).
--define(FILE_SUMMARY_ETS_NAME,   rabbit_disk_queue_file_summary).
+-define(MAX_READ_FILE_HANDLES, 256).
+-define(FILE_SIZE_LIMIT,       (256*1024*1024)).
+
 -define(SEQUENCE_ETS_NAME,       rabbit_disk_queue_sequences).
--define(CACHE_ETS_NAME,          rabbit_disk_queue_cache).
--define(FILE_EXTENSION,          ".rdq").
--define(FILE_EXTENSION_TMP,      ".rdt").
--define(FILE_EXTENSION_DETS,     ".dets").
 -define(BATCH_SIZE,              10000).
--define(CACHE_MAX_SIZE,          10485760).
--define(MAX_READ_FILE_HANDLES,   256).
--define(FILE_SIZE_LIMIT,         (256*1024*1024)).
 -define(DISK_ONLY_MODE_FILE,     "disk_only_stats.dat").
 
--define(BINARY_MODE, [raw, binary]).
--define(READ_MODE,   [read, read_ahead]).
--define(WRITE_MODE,  [write, delayed_write]).
-
 -define(SHUTDOWN_MESSAGE_KEY, {internal_token, shutdown}).
 -define(SHUTDOWN_MESSAGE,
         #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
@@ -86,175 +76,15 @@
 -define(SERVER, ?MODULE).
 
 -record(dqstate,
-        {msg_location_dets,       %% where are messages?
-         msg_location_ets,        %% as above, but for ets version
-         operation_mode,          %% ram_disk | disk_only
-         file_summary,            %% what's in the files?
+        {operation_mode,          %% ram_disk | disk_only
+         store,                   %% message store
          sequences,               %% next read and write for each q
-         current_file_num,        %% current file name as number
-         current_file_name,       %% current file name
-         current_file_handle,     %% current file handle
-         current_offset,          %% current offset within current file
-         current_dirty,           %% has the current file been written to
-                                  %% since the last fsync?
-         file_size_limit,         %% how big can our files get?
-         read_file_handle_cache,  %% file handle cache for reading
          on_sync_txns,            %% list of commiters to run on sync (reversed)
          commit_timer_ref,        %% TRef for our interval timer
-         last_sync_offset,        %% current_offset at the last time we sync'd
-         message_cache,           %% ets message cache
          memory_report_timer_ref, %% TRef for the memory report timer
-         wordsize,                %% bytes in a word on this platform
-         mnesia_bytes_per_record, %% bytes per record in mnesia in ram_disk mode
-         ets_bytes_per_record     %% bytes per record in msg_location_ets
+         mnesia_bytes_per_record  %% bytes per record in mnesia in ram_disk mode
         }).
 
--record(msg_location,
-        {msg_id, ref_count, file, offset, total_size, is_persistent}).
-
--record(file_summary,
-        {file, valid_total_size, contiguous_top, left, right}).
-
-%% The components:
-%%
-%% MsgLocation: this is a (d)ets table which contains:
-%%              {MsgId, RefCount, File, Offset, TotalSize, IsPersistent}
-%% FileSummary: this is an ets table which contains:
-%%              {File, ValidTotalSize, ContiguousTop, Left, Right}
-%% Sequences:   this is an ets table which contains:
-%%              {Q, ReadSeqId, WriteSeqId}
-%% rabbit_disk_queue: this is an mnesia table which contains:
-%%              #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
-%%                            is_delivered = IsDelivered,
-%%                            msg_id = MsgId
-%%                          }
-%%
-
-%% The basic idea is that messages are appended to the current file up
-%% until that file becomes too big (> file_size_limit). At that point,
-%% the file is closed and a new file is created on the _right_ of the
-%% old file which is used for new messages. Files are named
-%% numerically ascending, thus the file with the lowest name is the
-%% eldest file.
-%%
-%% We need to keep track of which messages are in which files (this is
-%% the MsgLocation table); how much useful data is in each file and
-%% which files are on the left and right of each other. This is the
-%% purpose of the FileSummary table.
-%%
-%% As messages are removed from files, holes appear in these
-%% files. The field ValidTotalSize contains the total amount of useful
-%% data left in the file, whilst ContiguousTop contains the amount of
-%% valid data right at the start of each file. These are needed for
-%% garbage collection.
-%%
-%% On publish, we write the message to disk, record the changes to
-%% FileSummary and MsgLocation, and, should this be either a plain
-%% publish, or followed by a tx_commit, we record the message in the
-%% mnesia table. Sequences exists to enforce ordering of messages as
-%% they are published within a queue.
-%%
-%% On delivery, we read the next message to be read from disk
-%% (according to the ReadSeqId for the given queue) and record in the
-%% mnesia table that the message has been delivered.
-%%
-%% On ack we remove the relevant entry from MsgLocation, update
-%% FileSummary and delete from the mnesia table.
-%%
-%% In order to avoid extra mnesia searching, we return the SeqId
-%% during delivery which must be returned in ack - it is not possible
-%% to ack from MsgId alone.
-
-%% As messages are ack'd, holes develop in the files. When we discover
-%% that either a file is now empty or that it can be combined with the
-%% useful data in either its left or right file, we compact the two
-%% files together. This keeps disk utilisation high and aids
-%% performance.
-%%
-%% Given the compaction between two files, the left file is considered
-%% the ultimate destination for the good data in the right file. If
-%% necessary, the good data in the left file which is fragmented
-%% throughout the file is written out to a temporary file, then read
-%% back in to form a contiguous chunk of good data at the start of the
-%% left file. Thus the left file is garbage collected and
-%% compacted. Then the good data from the right file is copied onto
-%% the end of the left file. MsgLocation and FileSummary tables are
-%% updated.
-%%
-%% On startup, we scan the files we discover, dealing with the
-%% possibilites of a crash have occured during a compaction (this
-%% consists of tidyup - the compaction is deliberately designed such
-%% that data is duplicated on disk rather than risking it being lost),
-%% and rebuild the dets and ets tables (MsgLocation, FileSummary,
-%% Sequences) from what we find. We ensure that the messages we have
-%% discovered on disk match exactly with the messages recorded in the
-%% mnesia table.
-
-%% MsgLocation is deliberately a dets table, and the mnesia table is
-%% set to be a disk_only_table in order to ensure that we are not RAM
-%% constrained. However, for performance reasons, it is possible to
-%% call to_ram_disk_mode/0 which will alter the mnesia table to
-%% disc_copies and convert MsgLocation to an ets table. This results
-%% in a massive performance improvement, at the expense of greater RAM
-%% usage. The idea is that when memory gets tight, we switch to
-%% disk_only mode but otherwise try to run in ram_disk mode.
-
-%% So, with this design, messages move to the left. Eventually, they
-%% should end up in a contiguous block on the left and are then never
-%% rewritten. But this isn't quite the case. If in a file there is one
-%% message that is being ignored, for some reason, and messages in the
-%% file to the right and in the current block are being read all the
-%% time then it will repeatedly be the case that the good data from
-%% both files can be combined and will be written out to a new
-%% file. Whenever this happens, our shunned message will be rewritten.
-%%
-%% So, provided that we combine messages in the right order,
-%% (i.e. left file, bottom to top, right file, bottom to top),
-%% eventually our shunned message will end up at the bottom of the
-%% left file. The compaction/combining algorithm is smart enough to
-%% read in good data from the left file that is scattered throughout
-%% (i.e. C and D in the below diagram), then truncate the file to just
-%% above B (i.e. truncate to the limit of the good contiguous region
-%% at the start of the file), then write C and D on top and then write
-%% E, F and G from the right file on top. Thus contiguous blocks of
-%% good data at the bottom of files are not rewritten (yes, this is
-%% the data the size of which is tracked by the ContiguousTop
-%% variable. Judicious use of a mirror is required).
-%%
-%% +-------+    +-------+         +-------+
-%% |   X   |    |   G   |         |   G   |
-%% +-------+    +-------+         +-------+
-%% |   D   |    |   X   |         |   F   |
-%% +-------+    +-------+         +-------+
-%% |   X   |    |   X   |         |   E   |
-%% +-------+    +-------+         +-------+
-%% |   C   |    |   F   |   ===>  |   D   |
-%% +-------+    +-------+         +-------+
-%% |   X   |    |   X   |         |   C   |
-%% +-------+    +-------+         +-------+
-%% |   B   |    |   X   |         |   B   |
-%% +-------+    +-------+         +-------+
-%% |   A   |    |   E   |         |   A   |
-%% +-------+    +-------+         +-------+
-%%   left         right             left
-%%
-%% From this reasoning, we do have a bound on the number of times the
-%% message is rewritten. From when it is inserted, there can be no
-%% files inserted between it and the head of the queue, and the worst
-%% case is that everytime it is rewritten, it moves one position lower
-%% in the file (for it to stay at the same position requires that
-%% there are no holes beneath it, which means truncate would be used
-%% and so it would not be rewritten at all). Thus this seems to
-%% suggest the limit is the number of messages ahead of it in the
-%% queue, though it's likely that that's pessimistic, given the
-%% requirements for compaction/combination of files.
-%%
-%% The other property is that we have is the bound on the lowest
-%% utilisation, which should be 50% - worst case is that all files are
-%% fractionally over half full and can't be combined (equivalent is
-%% alternating full files and files with only one tiny message in
-%% them).
-
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
@@ -414,69 +244,35 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
-    file:delete(msg_location_dets_file()),
-
-    {ok, MsgLocationDets} =
-        dets:open_file(?MSG_LOC_NAME,
-                       [{file, msg_location_dets_file()},
-                        {min_no_slots, 1024*1024},
-                        %% man says this should be <= 32M. But it works...
-                        {max_no_slots, 30*1024*1024},
-                        {type, set},
-                        {keypos, 2}
-                       ]),
-
-    %% it would be better to have this as private, but dets:from_ets/2
-    %% seems to blow up if it is set private - see bug21489
-    MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected, {keypos, 2}]),
-
-    InitName = "0" ++ ?FILE_EXTENSION,
-    HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
-                                                ?BINARY_MODE ++ [read]),
+    ok = add_index(),
+    Store = rabbit_msg_store:init(Mode, base_directory(),
+                                  FileSizeLimit, ReadFileHandlesLimit,
+                                  fun ref_count/1, EtsBPR),
+    Store1 = prune_mnesia(Store),
+    ok = del_index(),
+
+    Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
+    ok = extract_sequence_numbers(Sequences),
+
     State =
-        #dqstate { msg_location_dets       = MsgLocationDets,
-                   msg_location_ets        = MsgLocationEts,
-                   operation_mode          = Mode,
-                   file_summary            = ets:new(
-                                               ?FILE_SUMMARY_ETS_NAME,
-                                               [set, private, {keypos, 2}]),
-                   sequences               = ets:new(?SEQUENCE_ETS_NAME,
-                                                     [set, private]),
-                   current_file_num        = 0,
-                   current_file_name       = InitName,
-                   current_file_handle     = undefined,
-                   current_offset          = 0,
-                   current_dirty           = false,
-                   file_size_limit         = FileSizeLimit,
-                   read_file_handle_cache  = HandleCache,
+        #dqstate { operation_mode          = Mode,
+                   store                   = Store1,
+                   sequences               = Sequences,
                    on_sync_txns            = [],
                    commit_timer_ref        = undefined,
-                   last_sync_offset        = 0,
-                   message_cache           = ets:new(?CACHE_ETS_NAME,
-                                                     [set, private]),
                    memory_report_timer_ref = undefined,
-                   wordsize                = erlang:system_info(wordsize),
-                   mnesia_bytes_per_record = MnesiaBPR,
-                   ets_bytes_per_record    = EtsBPR
+                   mnesia_bytes_per_record = MnesiaBPR
                  },
-    {ok, State1 = #dqstate { current_file_name = CurrentName,
-                             current_offset = Offset } } =
-        load_from_disk(State),
-    %% read is only needed so that we can seek
-    {ok, FileHdl} = open_file(CurrentName, ?WRITE_MODE ++ [read]),
-    {ok, Offset} = file:position(FileHdl, Offset),
-    State2 = State1 #dqstate { current_file_handle = FileHdl },
-    {ok, start_memory_timer(State2), hibernate,
+    {ok, start_memory_timer(State), hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({fetch, Q}, _From, State) ->
-    {ok, Result, State1} =
+    {Result, State1} =
         internal_fetch_body(Q, record_delivery, pop_queue, State),
     reply(Result, State1);
 handle_call({phantom_fetch, Q}, _From, State) ->
-    {ok, Result, State1} =
-        internal_fetch_attributes(Q, record_delivery, pop_queue, State),
-    reply(Result, State1);
+    Result = internal_fetch_attributes(Q, record_delivery, pop_queue, State),
+    reply(Result, State);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     State1 =
         internal_tx_commit(Q, PubMsgIds, AckSeqIds, From, State),
@@ -485,7 +281,7 @@ handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     reply(Count, State1);
 handle_call(filesync, _From, State) ->
-    reply(ok, sync_current_file_handle(State));
+    reply(ok, sync(State));
 handle_call({delete_queue, Q}, From, State) ->
     gen_server2:reply(From, ok),
     {ok, State1} = internal_delete_queue(Q, State),
@@ -515,8 +311,8 @@ handle_call(to_ram_disk_mode, _From, State) ->
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
     reply(ok, State1);
-handle_call(cache_info, _From, State = #dqstate { message_cache = Cache }) ->
-    reply(ets:info(Cache), State).
+handle_call(cache_info, _From, State = #dqstate { store = Store }) ->
+    reply(rabbit_msg_store:cache_info(Store), State).
 
 handle_cast({publish, Q, Message, IsDelivered}, State) ->
     {ok, _MsgSeqId, State1} = internal_publish(Q, Message, IsDelivered, State),
@@ -542,25 +338,19 @@ handle_cast({set_mode, Mode}, State) ->
                  liberated -> fun to_ram_disk_mode/1
              end)(State));
 handle_cast({prefetch, Q, From}, State) ->
-    {ok, Result, State1} =
+    {Result, State1} =
         internal_fetch_body(Q, record_delivery, peek_queue, State),
-    Cont = rabbit_misc:with_exit_handler(
-             fun () -> false end,
-             fun () ->
-                     ok = rabbit_queue_prefetcher:publish(From, Result),
-                     true
-             end),
-    State3 =
-	case Cont of
-	    true ->
-		case internal_fetch_attributes(
-                       Q, ignore_delivery, pop_queue, State1) of
-		    {ok, empty, State2} -> State2;
-		    {ok, _, State2} -> State2
-		end;
-	    false -> State1
-	end,
-    noreply(State3).
+    case rabbit_misc:with_exit_handler(
+           fun () -> false end,
+           fun () ->
+                   ok = rabbit_queue_prefetcher:publish(From, Result),
+                   true
+           end) of
+        true ->
+            internal_fetch_attributes(Q, ignore_delivery, pop_queue, State1);
+        false -> ok
+    end,
+    noreply(State1).
 
 handle_info(report_memory, State) ->
     %% call noreply1/2, not noreply/1/2, as we don't want to restart the
@@ -571,7 +361,7 @@ handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(timeout, State) ->
     %% must have commit_timer set, so timeout was 0, and we're not hibernating
-    noreply(sync_current_file_handle(State)).
+    noreply(sync(State)).
 
 handle_pre_hibernate(State) ->
     %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
@@ -585,33 +375,11 @@ terminate(_Reason, State) ->
 
 shutdown(State = #dqstate { sequences = undefined }) ->
     State;
-shutdown(State = #dqstate { msg_location_dets = MsgLocationDets,
-                            msg_location_ets = MsgLocationEts,
-                            file_summary = FileSummary,
-                            sequences = Sequences,
-                            current_file_handle = FileHdl,
-                            read_file_handle_cache = HC
-                          }) ->
+shutdown(State = #dqstate { sequences = Sequences, store = Store }) ->
     State1 = stop_commit_timer(stop_memory_timer(State)),
-    case FileHdl of
-        undefined -> ok;
-        _ -> sync_current_file_handle(State1),
-             file:close(FileHdl)
-    end,
-    HC1 = rabbit_file_handle_cache:close_all(HC),
-    dets:close(MsgLocationDets),
-    file:delete(msg_location_dets_file()),
-    ets:delete(MsgLocationEts),
-    ets:delete(FileSummary),
+    Store1 = rabbit_msg_store:cleanup(Store),
     ets:delete(Sequences),
-    State1 #dqstate { msg_location_dets = undefined,
-                      msg_location_ets = undefined,
-                      file_summary = undefined,
-                      sequences = undefined,
-                      current_file_handle = undefined,
-                      current_dirty = false,
-                      read_file_handle_cache = HC1
-                     }.
+    State1 #dqstate { sequences = undefined, store = Store1 }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -643,168 +411,59 @@ report_memory(Hibernating, State) ->
     rabbit_memory_manager:report_memory(self(), trunc(2.5 * Bytes),
                                         Hibernating).
 
-memory_use(#dqstate { operation_mode   = ram_disk,
-                      file_summary     = FileSummary,
-                      sequences        = Sequences,
-                      msg_location_ets = MsgLocationEts,
-                      message_cache    = Cache,
-                      wordsize         = WordSize
-                     }) ->
-    WordSize * (mnesia:table_info(rabbit_disk_queue, memory) +
-                lists:sum([ets:info(Table, memory)
-                           || Table <- [MsgLocationEts, FileSummary, Cache,
-                                        Sequences]]));
+memory_use(#dqstate { operation_mode = ram_disk,
+                      store          = Store,
+                      sequences      = Sequences }) ->
+    WordSize = erlang:system_info(wordsize),
+    rabbit_msg_store:memory(Store) +
+        WordSize * ets:info(Sequences, memory) +
+        WordSize * mnesia:table_info(rabbit_disk_queue, memory);
 memory_use(#dqstate { operation_mode          = disk_only,
-                      file_summary            = FileSummary,
+                      store                   = Store,
                       sequences               = Sequences,
-                      msg_location_dets       = MsgLocationDets,
-                      message_cache           = Cache,
-                      wordsize                = WordSize,
-                      mnesia_bytes_per_record = MnesiaBytesPerRecord,
-                      ets_bytes_per_record    = EtsBytesPerRecord }) ->
-    (WordSize * (lists:sum([ets:info(Table, memory)
-                            || Table <- [FileSummary, Cache, Sequences]]))) +
-        rabbit_misc:ceil(
-          mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord) +
+                      mnesia_bytes_per_record = MnesiaBytesPerRecord }) ->
+    WordSize = erlang:system_info(wordsize),
+    rabbit_msg_store:memory(Store) +
+        WordSize * ets:info(Sequences, memory) +
         rabbit_misc:ceil(
-          dets:info(MsgLocationDets, size) * EtsBytesPerRecord).
+          mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord).
 
 to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
     State;
-to_disk_only_mode(State = #dqstate { operation_mode    = ram_disk,
-                                     msg_location_dets = MsgLocationDets,
-                                     msg_location_ets  = MsgLocationEts,
-                                     wordsize          = WordSize }) ->
+to_disk_only_mode(State = #dqstate { operation_mode = ram_disk,
+                                     store          = Store }) ->
     rabbit_log:info("Converting disk queue to disk only mode~n", []),
-    MnesiaMemBytes = WordSize * mnesia:table_info(rabbit_disk_queue, memory),
-    EtsMemBytes    = WordSize * ets:info(MsgLocationEts, memory),
-    MnesiaSize     = lists:max([1, mnesia:table_info(rabbit_disk_queue, size)]),
-    EtsSize        = lists:max([1, ets:info(MsgLocationEts, size)]),
+    MnesiaBPR = erlang:system_info(wordsize) *
+        mnesia:table_info(rabbit_disk_queue, memory) /
+        lists:max([1, mnesia:table_info(rabbit_disk_queue, size)]),
+    EtsBPR = rabbit_msg_store:ets_bpr(Store),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_only_copies),
-    MnesiaBPR = MnesiaMemBytes / MnesiaSize,
-    EtsBPR = EtsMemBytes / EtsSize,
+    Store1 = rabbit_msg_store:to_disk_only_mode(Store),
     Path = form_filename(?DISK_ONLY_MODE_FILE),
     case rabbit_misc:write_term_file(Path, [{MnesiaBPR, EtsBPR}]) of
         ok -> ok;
         {error, Reason} ->
             throw({error, {cannot_create_disk_only_mode_file, Path, Reason}})
     end,
-    ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
-    true = ets:delete_all_objects(MsgLocationEts),
     garbage_collect(),
     State #dqstate { operation_mode          = disk_only,
-                     mnesia_bytes_per_record = MnesiaBPR,
-                     ets_bytes_per_record    = EtsBPR }.
+                     store                   = Store1,
+                     mnesia_bytes_per_record = MnesiaBPR }.
 
 to_ram_disk_mode(State = #dqstate { operation_mode = ram_disk }) ->
     State;
-to_ram_disk_mode(State = #dqstate { operation_mode    = disk_only,
-                                    msg_location_dets = MsgLocationDets,
-                                    msg_location_ets  = MsgLocationEts }) ->
+to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
+                                    store          = Store }) ->
     rabbit_log:info("Converting disk queue to ram disk mode~n", []),
     {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
                                                  disc_copies),
+    Store1 = rabbit_msg_store:to_ram_disk_mode(Store),
     ok = file:delete(form_filename(?DISK_ONLY_MODE_FILE)),
-    true = ets:from_dets(MsgLocationEts, MsgLocationDets),
-    ok = dets:delete_all_objects(MsgLocationDets),
     garbage_collect(),
     State #dqstate { operation_mode          = ram_disk,
-                     mnesia_bytes_per_record = undefined,
-                     ets_bytes_per_record    = undefined }.
-
-%%----------------------------------------------------------------------------
-%% message cache helper functions
-%%----------------------------------------------------------------------------
-
-%% The purpose of the cache is not especially performance, though it
-%% can help there too. The main purpose is to ensure that individual
-%% messages that are sent to multiple queues, and then to disk, are
-%% read back as the same binary object rather than multiples of
-%% identical binary objects. This prevents memory explosion.
-%%
-%% We limit the cache in size. If we didn't, then we could have two
-%% queues coming off the same exchange, receiving the same millions of
-%% messages, then one queue gets drained, which would pull the entire
-%% queue into the cache, which would potentially explode memory.
-
-remove_cache_entry(MsgId, #dqstate { message_cache = Cache }) ->
-    true = ets:delete(Cache, MsgId),
-    ok.
-
-fetch_and_increment_cache(MsgId, #dqstate { message_cache = Cache }) ->
-    case ets:lookup(Cache, MsgId) of
-        [] ->
-            not_found;
-        [{MsgId, Message, _RefCount}] ->
-            NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
-            {Message, NewRefCount}
-    end.
-
-decrement_cache(MsgId, #dqstate { message_cache = Cache }) ->
-    true = try case ets:update_counter(Cache, MsgId, {3, -1}) of
-                   N when N =< 0 -> true = ets:delete(Cache, MsgId);
-                   _N -> true
-               end
-           catch error:badarg ->
-                   %% MsgId is not in there because although it's been
-                   %% delivered, it's never actually been read (think:
-                   %% persistent message in mixed queue)
-                   true
-           end,
-    ok.
-
-insert_into_cache(Message = #basic_message { guid = MsgId },
-                  #dqstate { message_cache = Cache }) ->
-    case cache_is_full(Cache) of
-        true -> ok;
-        false -> true = ets:insert_new(Cache, {MsgId, Message, 1}),
-                 ok
-    end.
-
-cache_is_full(Cache) ->
-    ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
-
-%%----------------------------------------------------------------------------
-%% dets/ets agnosticism
-%%----------------------------------------------------------------------------
-
-dets_ets_lookup(#dqstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only }, Key) ->
-    dets:lookup(MsgLocationDets, Key);
-dets_ets_lookup(#dqstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk }, Key) ->
-    ets:lookup(MsgLocationEts, Key).
-
-dets_ets_delete(#dqstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only }, Key) ->
-    ok = dets:delete(MsgLocationDets, Key);
-dets_ets_delete(#dqstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk }, Key) ->
-    true = ets:delete(MsgLocationEts, Key),
-    ok.
-
-dets_ets_insert(#dqstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only }, Obj) ->
-    ok = dets:insert(MsgLocationDets, Obj);
-dets_ets_insert(#dqstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk }, Obj) ->
-    true = ets:insert(MsgLocationEts, Obj),
-    ok.
-
-dets_ets_insert_new(#dqstate { msg_location_dets = MsgLocationDets,
-                               operation_mode = disk_only }, Obj) ->
-    true = dets:insert_new(MsgLocationDets, Obj);
-dets_ets_insert_new(#dqstate { msg_location_ets = MsgLocationEts,
-                               operation_mode = ram_disk }, Obj) ->
-    true = ets:insert_new(MsgLocationEts, Obj).
-
-dets_ets_match_object(#dqstate { msg_location_dets = MsgLocationDets,
-                                 operation_mode = disk_only }, Obj) ->
-    dets:match_object(MsgLocationDets, Obj);
-dets_ets_match_object(#dqstate { msg_location_ets = MsgLocationEts,
-                                 operation_mode = ram_disk }, Obj) ->
-    ets:match_object(MsgLocationEts, Obj).
+                     store                   = Store1,
+                     mnesia_bytes_per_record = undefined }.
 
 %%----------------------------------------------------------------------------
 %% general helper functions
@@ -840,50 +499,10 @@ form_filename(Name) ->
 base_directory() ->
     filename:join(rabbit_mnesia:dir(), "rabbit_disk_queue/").
 
-msg_location_dets_file() ->
-    form_filename(atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
-
-open_file(File, Mode) -> file:open(form_filename(File), ?BINARY_MODE ++ Mode).
-
-with_read_handle_at(File, Offset, Fun, State =
-                    #dqstate { read_file_handle_cache = HC,
-                               current_file_name = CurName,
-                               current_dirty = IsDirty,
-                               last_sync_offset = SyncOffset
-                              }) ->
-    State1 = if CurName =:= File andalso IsDirty andalso Offset >= SyncOffset ->
-                     sync_current_file_handle(State);
-                true -> State
-             end,
-    FilePath = form_filename(File),
-    {Result, HC1} =
-        rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
-    {Result, State1 #dqstate { read_file_handle_cache = HC1 }}.
-
-sync_current_file_handle(State = #dqstate { current_dirty = false,
-                                            on_sync_txns = [] }) ->
-    State;
-sync_current_file_handle(State = #dqstate { current_file_handle = CurHdl,
-                                            current_dirty = IsDirty,
-                                            current_offset = CurOffset,
-                                            on_sync_txns = Txns,
-                                            last_sync_offset = SyncOffset
-                                          }) ->
-    SyncOffset1 = case IsDirty of
-                      true -> ok = file:sync(CurHdl),
-                              CurOffset;
-                      false -> SyncOffset
-                  end,
-    State1 = lists:foldl(fun internal_do_tx_commit/2, State, lists:reverse(Txns)),
-    State1 #dqstate { current_dirty = false, on_sync_txns = [],
-                      last_sync_offset = SyncOffset1 }.
-
 sequence_lookup(Sequences, Q) ->
     case ets:lookup(Sequences, Q) of
-        [] ->
-            {0, 0};
-        [{Q, ReadSeqId, WriteSeqId}] ->
-            {ReadSeqId, WriteSeqId}
+        []                           -> {0, 0};
+        [{_, ReadSeqId, WriteSeqId}] -> {ReadSeqId, WriteSeqId}
     end.
 
 start_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
@@ -896,91 +515,54 @@ stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
     State #dqstate { commit_timer_ref = undefined }.
 
-msg_to_bin(Msg = #basic_message { content = Content }) ->
-    ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
-    term_to_binary(Msg #basic_message { content = ClearedContent }).
-
-bin_to_msg(MsgBin) ->
-    binary_to_term(MsgBin).
+sync(State = #dqstate { store = Store, on_sync_txns = Txns }) ->
+    State1 = State #dqstate { store = rabbit_msg_store:sync(Store) },
+    case Txns of
+        [] -> State1;
+        _  -> lists:foldl(fun internal_do_tx_commit/2,
+                          State1 #dqstate { on_sync_txns = [] },
+                          lists:reverse(Txns))
+    end.
 
 %%----------------------------------------------------------------------------
 %% internal functions
 %%----------------------------------------------------------------------------
 
-internal_fetch_body(Q, MarkDelivered, Advance, State) ->
-    case queue_head(Q, MarkDelivered, Advance, State) of
-        E = {ok, empty, _} -> E;
-        {ok, AckTag, IsDelivered, StoreEntry, Remaining, State1} ->
-            {Message, State2} = read_stored_message(StoreEntry, State1),
-            {ok, {Message, IsDelivered, AckTag, Remaining}, State2}
+internal_fetch_body(Q, MarkDelivered, Advance,
+                    State = #dqstate { store = Store }) ->
+    case next(Q, MarkDelivered, Advance, State) of
+        empty -> {empty, State};
+        {MsgId, IsDelivered, AckTag, Remaining} ->
+            {Message, Store1} = rabbit_msg_store:read(MsgId, Store),
+            State1 = State #dqstate { store = Store1 },
+            {{Message, IsDelivered, AckTag, Remaining}, State1}
     end.
 
-internal_fetch_attributes(Q, MarkDelivered, Advance, State) ->
-    case queue_head(Q, MarkDelivered, Advance, State) of
-        E = {ok, empty, _} -> E;
-        {ok, AckTag, IsDelivered,
-         #msg_location { msg_id = MsgId, is_persistent = IsPersistent },
-         Remaining, State1} ->
-            {ok, {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}, State1}
+internal_fetch_attributes(Q, MarkDelivered, Advance,
+                          State = #dqstate { store = Store }) ->
+    case next(Q, MarkDelivered, Advance, State) of
+        empty -> empty;
+        {MsgId, IsDelivered, AckTag, Remaining} ->
+            IsPersistent = rabbit_msg_store:is_persistent(MsgId, Store),
+            {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}
     end.
 
-queue_head(Q, MarkDelivered, Advance,
-           State = #dqstate { sequences = Sequences }) ->
+next(Q, MarkDelivered, Advance, #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
-        {SeqId, SeqId} -> {ok, empty, State};
+        {SeqId, SeqId} -> empty;
         {ReadSeqId, WriteSeqId} when WriteSeqId > ReadSeqId ->
             Remaining = WriteSeqId - ReadSeqId - 1,
-            {AckTag, IsDelivered, StoreEntry} =
-                update_message_attributes(Q, ReadSeqId, MarkDelivered, State),
+            {MsgId, IsDelivered} =
+                update_message_attributes(Q, ReadSeqId, MarkDelivered),
             ok = maybe_advance(Advance, Sequences, Q, ReadSeqId, WriteSeqId),
-            {ok, AckTag, IsDelivered, StoreEntry, Remaining, State}
+            AckTag = {MsgId, ReadSeqId},
+            {MsgId, IsDelivered, AckTag, Remaining}
     end.
 
-maybe_advance(peek_queue, _, _, _, _) ->
-    ok;
-maybe_advance(pop_queue, Sequences, Q, ReadSeqId, WriteSeqId) ->
-    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
-    ok.
-
-read_stored_message(#msg_location { msg_id = MsgId, ref_count = RefCount,
-                                    file = File, offset = Offset,
-                                    total_size = TotalSize }, State) ->
-    case fetch_and_increment_cache(MsgId, State) of
-        not_found ->
-            {{ok, {MsgId, MsgBody, _IsPersistent, _BodySize}}, State1} =
-                with_read_handle_at(
-                  File, Offset,
-                  fun(Hdl) ->
-                          Res = case rabbit_msg_file:read(Hdl, TotalSize) of
-                                    {ok, {MsgId, _, _, _}} = Obj -> Obj;
-                                    {ok, Rest} ->
-                                        throw({error,
-                                               {misread, [{old_state, State},
-                                                          {file, File},
-                                                          {offset, Offset},
-                                                          {read, Rest}]}})
-                                end,
-                          {Offset + TotalSize, Res}
-                  end, State),
-            Message = #basic_message {} = bin_to_msg(MsgBody),
-            ok = if RefCount > 1 ->
-                         insert_into_cache(Message, State1);
-                    true -> ok
-                            %% it's not in the cache and we only have
-                            %% 1 queue with the message. So don't
-                            %% bother putting it in the cache.
-                 end,
-            {Message, State1};
-        {Message, _RefCount} ->
-            {Message, State}
-    end.
-
-update_message_attributes(Q, SeqId, MarkDelivered, State) ->
+update_message_attributes(Q, SeqId, MarkDelivered) ->
     [Obj =
      #dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
-    [StoreEntry = #msg_location { msg_id = MsgId }] =
-        dets_ets_lookup(State, MsgId),
     ok = case {IsDelivered, MarkDelivered} of
              {true, _} -> ok;
              {false, ignore_delivery} -> ok;
@@ -988,130 +570,62 @@ update_message_attributes(Q, SeqId, MarkDelivered, State) ->
                  mnesia:dirty_write(rabbit_disk_queue,
                                     Obj #dq_msg_loc {is_delivered = true})
          end,
-    {{MsgId, SeqId}, IsDelivered, StoreEntry}.
+    {MsgId, IsDelivered}.
+
+maybe_advance(peek_queue, _, _, _, _) ->
+    ok;
+maybe_advance(pop_queue, Sequences, Q, ReadSeqId, WriteSeqId) ->
+    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
+    ok.
 
 internal_foldl(Q, Fun, Init, State) ->
-    State1 = #dqstate { sequences = Sequences } =
-        sync_current_file_handle(State),
+    State1 = #dqstate { sequences = Sequences } = sync(State),
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     internal_foldl(Q, WriteSeqId, Fun, State1, Init, ReadSeqId).
 
 internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
     {ok, Acc, State};
-internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
-    {AckTag, IsDelivered, StoreEntry} =
-        update_message_attributes(Q, ReadSeqId, ignore_delivery, State),
-    {Message, State1} = read_stored_message(StoreEntry, State),
-    Acc1 = Fun(Message, AckTag, IsDelivered, Acc),
-    internal_foldl(Q, WriteSeqId, Fun, State1, Acc1, ReadSeqId + 1).
+internal_foldl(Q, WriteSeqId, Fun, State = #dqstate { store = Store },
+               Acc, ReadSeqId) ->
+    [#dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
+        mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
+    {Message, Store1} = rabbit_msg_store:read(MsgId, Store),
+    Acc1 = Fun(Message, {MsgId, ReadSeqId}, IsDelivered, Acc),
+    internal_foldl(Q, WriteSeqId, Fun, State #dqstate { store = Store1 },
+                   Acc1, ReadSeqId + 1).
 
 internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, true, State).
 
 %% Q is only needed if MnesiaDelete /= false
-remove_messages(Q, MsgSeqIds, MnesiaDelete, State) ->
-    Files =
-        lists:foldl(
-          fun ({MsgId, SeqId}, Files1) ->
-                  Files2 = remove_message(MsgId, Files1, State),
-                  ok = case MnesiaDelete of
-                           true -> mnesia:dirty_delete(rabbit_disk_queue,
-                                                       {Q, SeqId});
-                           _ -> ok
-                       end,
-                  Files2
-          end, sets:new(), MsgSeqIds),
-    State1 = compact(Files, State),
-    {ok, State1}.
-
-remove_message(MsgId, Files,
-               State = #dqstate { file_summary = FileSummary,
-                                  current_file_name = CurName
-                                }) ->
-    [StoreEntry =
-     #msg_location { msg_id = MsgId, ref_count = RefCount, file = File,
-                     offset = Offset, total_size = TotalSize }] =
-        dets_ets_lookup(State, MsgId),
-    case RefCount of
-        1 ->
-            ok = dets_ets_delete(State, MsgId),
-            ok = remove_cache_entry(MsgId, State),
-            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
-                                       contiguous_top = ContiguousTop }] =
-                ets:lookup(FileSummary, File),
-            ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-            ValidTotalSize1 = ValidTotalSize - TotalSize,
-            true = ets:insert(FileSummary, FSEntry #file_summary { 
-                                             valid_total_size = ValidTotalSize1,
-                                             contiguous_top = ContiguousTop1 }),
-            if CurName =:= File -> Files;
-               true -> sets:add_element(File, Files)
-            end;
-        _ when 1 < RefCount ->
-            ok = decrement_cache(MsgId, State),
-            ok = dets_ets_insert(State, StoreEntry #msg_location {
-                                          ref_count = RefCount - 1 }),
-            Files
-    end.
+remove_messages(Q, MsgSeqIds, MnesiaDelete,
+                State = #dqstate { store = Store } ) ->
+    MsgIds = lists:foldl(
+               fun ({MsgId, SeqId}, MsgIdAcc) ->
+                       ok = case MnesiaDelete of
+                                true -> mnesia:dirty_delete(rabbit_disk_queue,
+                                                            {Q, SeqId});
+                                _ -> ok
+                            end,
+                       [MsgId | MsgIdAcc]
+               end, [], MsgSeqIds),
+    Store1 = rabbit_msg_store:remove(MsgIds, Store),
+    {ok, State #dqstate { store = Store1}}.
 
 internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
-                                               guid = MsgId },
-                    State = #dqstate { current_file_handle = CurHdl,
-                                       current_file_name = CurName,
-                                       current_offset = CurOffset,
-                                       file_summary = FileSummary
-                                      }) ->
-    case dets_ets_lookup(State, MsgId) of
-        [] ->
-            %% New message, lots to do
-            {ok, TotalSize} = rabbit_msg_file:append(
-                                CurHdl, MsgId, msg_to_bin(Message),
-                                IsPersistent),
-            true = dets_ets_insert_new(
-                     State, #msg_location {
-                       msg_id = MsgId, ref_count = 1, file = CurName,
-                       offset = CurOffset, total_size = TotalSize,
-                       is_persistent = IsPersistent }),
-            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
-                                       contiguous_top = ContiguousTop,
-                                       right = undefined }] =
-                ets:lookup(FileSummary, CurName),
-            ValidTotalSize1 = ValidTotalSize + TotalSize,
-            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
-                                     %% can't be any holes in this file
-                                     ValidTotalSize1;
-                                true -> ContiguousTop
-                             end,
-            true = ets:insert(FileSummary, FSEntry #file_summary {
-                                             valid_total_size = ValidTotalSize1,
-                                             contiguous_top = ContiguousTop1 }),
-            NextOffset = CurOffset + TotalSize,
-            maybe_roll_to_new_file(
-              NextOffset, State #dqstate {current_offset = NextOffset,
-                                          current_dirty = true});
-        [StoreEntry =
-         #msg_location { msg_id = MsgId, ref_count = RefCount }] ->
-            %% We already know about it, just update counter
-            ok = dets_ets_insert(State, StoreEntry #msg_location {
-                                          ref_count = RefCount + 1 }),
-            {ok, State}
-    end.
+                                               guid = MsgId,
+                                               content = Content },
+                    State = #dqstate { store = Store }) ->
+    ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
+    Message1 = Message #basic_message { content = ClearedContent },
+    Store1 = rabbit_msg_store:write(MsgId, Message1, IsPersistent, Store),
+    {ok, State #dqstate { store = Store1 }}.
 
 internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
-                   State = #dqstate { current_file_name = CurFile,
-                                      current_dirty = IsDirty,
-                                      on_sync_txns = Txns,
-                                      last_sync_offset = SyncOffset
-                                    }) ->
-    NeedsSync = IsDirty andalso
-        lists:any(fun ({MsgId, _IsDelivered}) ->
-                          [#msg_location { msg_id = MsgId, file = File,
-                                           offset = Offset }] =
-                              dets_ets_lookup(State, MsgId),
-                          File =:= CurFile andalso Offset >= SyncOffset
-                  end, PubMsgIds),
+                   State = #dqstate { store = Store, on_sync_txns = Txns }) ->
     TxnDetails = {Q, PubMsgIds, AckSeqIds, From},
-    case NeedsSync of
+    case rabbit_msg_store:needs_sync(
+           [MsgId || {MsgId, _IsDelivered} <- PubMsgIds], Store) of
         true  -> Txns1 = [TxnDetails | Txns],
                  State #dqstate { on_sync_txns = Txns1 };
         false -> internal_do_tx_commit(TxnDetails, State)
@@ -1165,13 +679,14 @@ internal_tx_rollback(MsgIds, State) ->
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
+internal_requeue(Q, MsgSeqIds, State = #dqstate { store = Store,
+                                                  sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
     %% already been delivered). We also know that the rows for these
     %% messages are still in rabbit_disk_queue (i.e. they've not been
     %% ack'd).
-
+    %%
     %% Now, it would be nice if we could adjust the sequence ids in
     %% rabbit_disk_queue (mnesia) to create a contiguous block and
     %% then drop the ReadSeqId for the queue by the corresponding
@@ -1180,13 +695,14 @@ internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
     %% which are not being requeued. As such, moving things about in
     %% rabbit_disk_queue _under_ the current ReadSeqId would result in
     %% such sequence ids referring to the wrong messages.
-
+    %%
     %% Therefore, the only solution is to take these messages, and to
     %% reenqueue them at the top of the queue. Usefully, this only
     %% affects the Sequences and rabbit_disk_queue structures - there
     %% is no need to physically move the messages about on disk, so
-    %% MsgLocation and FileSummary stay put (which makes further sense
-    %% as they have no concept of sequence id anyway).
+    %% the message store remains unaffected, except we need to tell it
+    %% about the ids of the requeued messages so it can remove them
+    %% from its message cache if necessary.
 
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     {WriteSeqId1, Q, MsgIds} =
@@ -1197,8 +713,8 @@ internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
                               MsgSeqIds)
           end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId1}),
-    lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
-    {ok, State}.
+    Store1 = rabbit_msg_store:release(MsgIds, Store),
+    {ok, State #dqstate { store = Store1 }}.
 
 requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, Acc}) ->
     [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
@@ -1212,7 +728,8 @@ requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, Acc}) ->
     {WriteSeqId + 1, Q, [MsgId | Acc]}.
 
 %% move the next N messages from the front of the queue to the back.
-internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
+internal_requeue_next_n(Q, N, State = #dqstate { store = Store,
+                                                 sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     if N >= (WriteSeqId - ReadSeqId) -> {ok, State};
        true ->
@@ -1224,8 +741,8 @@ internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
                   end
                  ),
             true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN}),
-            lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
-            {ok, State}
+            Store1 = rabbit_msg_store:release(MsgIds, Store),
+            {ok, State #dqstate { store = Store1 }}
     end.
 
 requeue_next_messages(_Q, 0, ReadSeq, WriteSeq, Acc) ->
@@ -1257,7 +774,7 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
     end.
 
 internal_delete_queue(Q, State) ->
-    State1 = sync_current_file_handle(State),
+    State1 = sync(State),
     {ok, _Count, State2 = #dqstate { sequences = Sequences }} =
         internal_purge(Q, State1), %% remove everything undelivered
     true = ets:delete(Sequences, Q),
@@ -1282,269 +799,6 @@ internal_delete_non_durable_queues(
               end
       end, {ok, State}, Sequences).
 
-%%----------------------------------------------------------------------------
-%% garbage collection / compaction / aggregation
-%%----------------------------------------------------------------------------
-
-maybe_roll_to_new_file(Offset,
-                       State = #dqstate { file_size_limit = FileSizeLimit,
-                                          current_file_name = CurName,
-                                          current_file_handle = CurHdl,
-                                          current_file_num = CurNum,
-                                          file_summary = FileSummary
-                                        }
-                      ) when Offset >= FileSizeLimit ->
-    State1 = sync_current_file_handle(State),
-    ok = file:close(CurHdl),
-    NextNum = CurNum + 1,
-    NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
-    {ok, NextHdl} = open_file(NextName, ?WRITE_MODE),
-    true = ets:update_element(FileSummary, CurName,
-                              {#file_summary.right, NextName}),
-    true = ets:insert_new(
-             FileSummary, #file_summary {
-               file = NextName, valid_total_size = 0, contiguous_top = 0,
-               left = CurName, right = undefined }),
-    State2 = State1 #dqstate { current_file_name = NextName,
-                               current_file_handle = NextHdl,
-                               current_file_num = NextNum,
-                               current_offset = 0,
-                               last_sync_offset = 0
-                              },
-    {ok, compact(sets:from_list([CurName]), State2)};
-maybe_roll_to_new_file(_, State) ->
-    {ok, State}.
-
-compact(FilesSet, State) ->
-    %% smallest number, hence eldest, hence left-most, first
-    Files = lists:sort(fun file_name_sort/2, sets:to_list(FilesSet)),
-    %% foldl reverses, so now youngest/right-most first
-    RemainingFiles = lists:foldl(fun (File, Acc) ->
-                                         delete_empty_files(File, Acc, State)
-                                 end, [], Files),
-    lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
-
-%% At this stage, we simply know that the file has had msgs removed
-%% from it. However, we don't know if we need to merge it left (which
-%% is what we would prefer), or merge it right. If we merge left, then
-%% this file is the source, and the left file is the destination. If
-%% we merge right then this file is the destination and the right file
-%% is the source.
-combine_file(File, State = #dqstate { file_summary = FileSummary,
-                                      current_file_name = CurName
-                                    }) ->
-    %% the file we're looking at may no longer exist as it may have
-    %% been deleted within the current GC run
-    case ets:lookup(FileSummary, File) of
-        [] -> State;
-        [FSEntry = #file_summary { left = Left, right = Right }] ->
-            GoRight =
-                fun() ->
-                        case Right of
-                            undefined -> State;
-                            _ when not (CurName == Right) ->
-                                [FSRight] = ets:lookup(FileSummary, Right),
-                                {_, State1} = adjust_meta_and_combine(
-                                                FSEntry, FSRight, State),
-                                State1;
-                            _ -> State
-                        end
-                end,
-            case Left of
-                undefined ->
-                    GoRight();
-                _ -> [FSLeft] = ets:lookup(FileSummary, Left),
-                     case adjust_meta_and_combine(FSLeft, FSEntry, State) of
-                         {true, State1} -> State1;
-                         {false, State} -> GoRight()
-                     end
-            end
-    end.
-
-adjust_meta_and_combine(
-  LeftObj = #file_summary {
-    file = LeftFile, valid_total_size = LeftValidData, right = RightFile },
-  RightObj = #file_summary { 
-    file = RightFile, valid_total_size = RightValidData, left = LeftFile,
-    right = RightRight },
-  State = #dqstate { file_size_limit = FileSizeLimit,
-                     file_summary = FileSummary }) ->
-    TotalValidData = LeftValidData + RightValidData,
-    if FileSizeLimit >= TotalValidData ->
-            State1 = combine_files(RightObj, LeftObj, State),
-            %% this could fail if RightRight is undefined
-            ets:update_element(FileSummary, RightRight,
-                               {#file_summary.left, LeftFile}),
-            true = ets:insert(FileSummary, LeftObj #file_summary {
-                                             valid_total_size = TotalValidData,
-                                             contiguous_top = TotalValidData,
-                                             right = RightRight }),
-            true = ets:delete(FileSummary, RightFile),
-            {true, State1};
-       true -> {false, State}
-    end.
-
-sort_msg_locations_by_offset(Dir, List) ->
-    Comp = case Dir of
-               asc  -> fun erlang:'<'/2;
-               desc -> fun erlang:'>'/2
-           end,
-    lists:sort(fun (#msg_location { offset = OffA },
-                    #msg_location { offset = OffB }) ->
-                       Comp(OffA, OffB)
-               end, List).
-
-preallocate(Hdl, FileSizeLimit, FinalPos) ->
-    {ok, FileSizeLimit} = file:position(Hdl, FileSizeLimit),
-    ok = file:truncate(Hdl),
-    {ok, FinalPos} = file:position(Hdl, FinalPos),
-    ok.
-
-truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
-    {ok, Lowpoint} = file:position(FileHdl, Lowpoint),
-    ok = file:truncate(FileHdl),
-    ok = preallocate(FileHdl, Highpoint, Lowpoint).
-
-combine_files(#file_summary { file = Source,
-                              valid_total_size = SourceValid,
-                              left = Destination },
-              #file_summary { file = Destination,
-                              valid_total_size = DestinationValid,
-                              contiguous_top = DestinationContiguousTop,
-                              right = Source },
-              State) ->
-    State1 = close_file(Source, close_file(Destination, State)),
-    {ok, SourceHdl} = open_file(Source, ?READ_MODE),
-    {ok, DestinationHdl} = open_file(Destination, ?READ_MODE ++ ?WRITE_MODE),
-    ExpectedSize = SourceValid + DestinationValid,
-    %% if DestinationValid =:= DestinationContiguousTop then we don't
-    %% need a tmp file
-    %% if they're not equal, then we need to write out everything past
-    %%   the DestinationContiguousTop to a tmp file then truncate,
-    %%   copy back in, and then copy over from Source
-    %% otherwise we just truncate straight away and copy over from Source
-    if DestinationContiguousTop =:= DestinationValid ->
-            ok = truncate_and_extend_file(DestinationHdl,
-                                          DestinationValid, ExpectedSize);
-       true ->
-            Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} = open_file(Tmp, ?READ_MODE ++ ?WRITE_MODE),
-            Worklist =
-                lists:dropwhile(
-                  fun (#msg_location { offset = Offset })
-                      when Offset /= DestinationContiguousTop ->
-                          %% it cannot be that Offset ==
-                          %% DestinationContiguousTop because if it
-                          %% was then DestinationContiguousTop would
-                          %% have been extended by TotalSize
-                          Offset < DestinationContiguousTop
-                          %% Given expected access patterns, I suspect
-                          %% that the list should be naturally sorted
-                          %% as we require, however, we need to
-                          %% enforce it anyway
-                  end, sort_msg_locations_by_offset(
-                         asc, dets_ets_match_object(
-                                 State1, #msg_location {
-                                   file = Destination, _ = '_' }))),
-            ok = copy_messages(
-                   Worklist, DestinationContiguousTop, DestinationValid,
-                   DestinationHdl, TmpHdl, Destination, State1),
-            TmpSize = DestinationValid - DestinationContiguousTop,
-            %% so now Tmp contains everything we need to salvage from
-            %% Destination, and MsgLocationDets has been updated to
-            %% reflect compaction of Destination so truncate
-            %% Destination and copy from Tmp back to the end
-            {ok, 0} = file:position(TmpHdl, 0),
-            ok = truncate_and_extend_file(
-                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
-            {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
-            %% position in DestinationHdl should now be DestinationValid
-            ok = file:sync(DestinationHdl),
-            ok = file:close(TmpHdl),
-            ok = file:delete(form_filename(Tmp))
-    end,
-    SourceWorkList =
-        sort_msg_locations_by_offset(
-          asc, dets_ets_match_object(State1, #msg_location {
-                                       file = Source, _ = '_' })),
-    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
-                       SourceHdl, DestinationHdl, Destination, State1),
-    %% tidy up
-    ok = file:close(SourceHdl),
-    ok = file:close(DestinationHdl),
-    ok = file:delete(form_filename(Source)),
-    State1.
-
-copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
-              Destination, State) ->
-    {FinalOffset, BlockStart1, BlockEnd1} =
-        lists:foldl(
-          fun (StoreEntry = #msg_location { offset = Offset,
-                                            total_size = TotalSize },
-               {CurOffset, BlockStart, BlockEnd}) ->
-                  %% CurOffset is in the DestinationFile.
-                  %% Offset, BlockStart and BlockEnd are in the SourceFile
-                  %% update MsgLocationDets to reflect change of file and offset
-                  ok = dets_ets_insert(State, StoreEntry #msg_location {
-                                                file = Destination,
-                                                offset = CurOffset }),
-                  NextOffset = CurOffset + TotalSize,
-                  if BlockStart =:= undefined ->
-                          %% base case, called only for the first list elem
-                          {NextOffset, Offset, Offset + TotalSize};
-                     Offset =:= BlockEnd ->
-                          %% extend the current block because the next
-                          %% msg follows straight on
-                          {NextOffset, BlockStart, BlockEnd + TotalSize};
-                     true ->
-                          %% found a gap, so actually do the work for
-                          %% the previous block
-                          BSize = BlockEnd - BlockStart,
-                          {ok, BlockStart} =
-                              file:position(SourceHdl, BlockStart),
-                          {ok, BSize} =
-                              file:copy(SourceHdl, DestinationHdl, BSize),
-                          {NextOffset, Offset, Offset + TotalSize}
-                  end
-          end, {InitOffset, undefined, undefined}, WorkList),
-    %% do the last remaining block
-    BSize1 = BlockEnd1 - BlockStart1,
-    {ok, BlockStart1} = file:position(SourceHdl, BlockStart1),
-    {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
-    ok = file:sync(DestinationHdl),
-    ok.
-
-close_file(File, State = #dqstate { read_file_handle_cache = HC }) ->
-    HC1 = rabbit_file_handle_cache:close_file(form_filename(File), HC),
-    State #dqstate { read_file_handle_cache = HC1 }.
-
-delete_empty_files(File, Acc, #dqstate { file_summary = FileSummary }) ->
-    [#file_summary { valid_total_size = ValidData,
-                     left = Left, right = Right }] =
-        ets:lookup(FileSummary, File),
-    case ValidData of
-        %% we should NEVER find the current file in here hence right
-        %% should always be a file, not undefined
-        0 ->
-            case {Left, Right} of
-                {undefined, _} when not is_atom(Right) ->
-                    %% the eldest file is empty.
-                    true = ets:update_element(
-                             FileSummary, Right,
-                             {#file_summary.left, undefined});
-                {_, _} when not (is_atom(Right)) ->
-                    true = ets:update_element(FileSummary, Right,
-                                              {#file_summary.left, Left}),
-                    true =
-                        ets:update_element(FileSummary, Left,
-                                           {#file_summary.right, Right})
-            end,
-            true = ets:delete(FileSummary, File),
-            ok = file:delete(form_filename(File)),
-            Acc;
-        _ -> [File|Acc]
-    end.
-
 %%----------------------------------------------------------------------------
 %% recovery
 %%----------------------------------------------------------------------------
@@ -1630,55 +884,35 @@ del_index() ->
         E1 -> E1
     end.
 
-load_from_disk(State) ->
-    %% sorted so that smallest number is first. which also means
-    %% eldest file (left-most) first
-    ok = add_index(),
-    {Files, TmpFiles} = get_disk_queue_files(),
-    ok = recover_crashed_compactions(Files, TmpFiles),
-    %% There should be no more tmp files now, so go ahead and load the
-    %% whole lot
-    Files1 = case Files of
-                 [] -> [State #dqstate.current_file_name];
-                 _ -> Files
-             end,
-    State1 = load_messages(undefined, Files1, State),
-    %% Finally, check there is nothing in mnesia which we haven't
-    %% loaded
-    Key = mnesia:dirty_first(rabbit_disk_queue),
-    {ok, AlteredFiles} = prune_mnesia(State1, Key, sets:new(), [], 0),
-    State2 = compact(AlteredFiles, State1),
-    ok = extract_sequence_numbers(State2 #dqstate.sequences),
-    ok = del_index(),
-    {ok, State2}.
-
-prune_mnesia_flush_batch(DeleteAcc) ->
+prune_mnesia_flush_batch(DeleteAcc, RemoveAcc, Store) ->
     lists:foldl(fun (Key, ok) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
-                end, ok, DeleteAcc).
-
-prune_mnesia(_State, '$end_of_table', Files, _DeleteAcc, 0) ->
-    {ok, Files};
-prune_mnesia(_State, '$end_of_table', Files, DeleteAcc, _Len) ->
-    ok = prune_mnesia_flush_batch(DeleteAcc),
-    {ok, Files};
-prune_mnesia(State, Key, Files, DeleteAcc, Len) ->
+                end, ok, DeleteAcc),
+    rabbit_msg_store:remove(RemoveAcc, Store).
+
+prune_mnesia(Store) ->
+    prune_mnesia(Store, mnesia:dirty_first(rabbit_disk_queue), [], [], 0).
+
+prune_mnesia(Store, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
+    Store;
+prune_mnesia(Store, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
+    prune_mnesia_flush_batch(DeleteAcc, RemoveAcc, Store);
+prune_mnesia(Store, Key, DeleteAcc, RemoveAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
-    {DeleteAcc1, Files1, Len1} =
-        case dets_ets_lookup(State, MsgId) of
-            [] ->
+    {DeleteAcc1, RemoveAcc1, Len1} =
+        case rabbit_msg_store:is_persistent(MsgId, Store) of
+            not_found ->
                 %% msg hasn't been found on disk, delete it
-                {[{Q, SeqId} | DeleteAcc], Files, Len + 1};
-            [#msg_location { msg_id = MsgId, is_persistent = true }] ->
+                {[{Q, SeqId} | DeleteAcc], RemoveAcc, Len + 1};
+            true ->
                 %% msg is persistent, keep it
-                {DeleteAcc, Files, Len};
-            [#msg_location { msg_id = MsgId, is_persistent = false}] ->
+                {DeleteAcc, RemoveAcc, Len};
+            false ->
                 %% msg is not persistent, delete it
-                Files2 = remove_message(MsgId, Files, State),
-                {[{Q, SeqId} | DeleteAcc], Files2, Len + 1}
+                {[{Q, SeqId} | DeleteAcc], [MsgId | RemoveAcc], Len + 1}
         end,
-    {Key1, DeleteAcc2, Len2} =
+    {Store1, Key1, DeleteAcc2, RemoveAcc2, Len2} =
         if
             Len1 >= ?BATCH_SIZE ->
                 %% We have no way of knowing how flushing the batch
@@ -1686,14 +920,15 @@ prune_mnesia(State, Key, Files, DeleteAcc, Len) ->
                 %% so have no choice but to start again. Although this
                 %% will make recovery slower for large queues, we
                 %% guarantee we can start up in constant memory
-                ok = prune_mnesia_flush_batch(DeleteAcc1),
+                Store2 = prune_mnesia_flush_batch(DeleteAcc1, RemoveAcc1,
+                                                  Store),
                 Key2 = mnesia:dirty_first(rabbit_disk_queue),
-                {Key2, [], 0};
+                {Store2, Key2, [], [], 0};
             true ->
                 Key2 = mnesia:dirty_next(rabbit_disk_queue, Key),
-                {Key2, DeleteAcc1, Len1}
+                {Store, Key2, DeleteAcc1, RemoveAcc1, Len1}
         end,
-    prune_mnesia(State, Key1, Files1, DeleteAcc2, Len2).
+    prune_mnesia(Store1, Key1, DeleteAcc2, RemoveAcc2, Len2).
 
 extract_sequence_numbers(Sequences) ->
     true =
@@ -1712,7 +947,7 @@ extract_sequence_numbers(Sequences) ->
                             case ets:lookup(Sequences, Q) of
                                 [] -> ets:insert_new(Sequences,
                                                      {Q, SeqId, NextWrite});
-                                [Orig = {Q, Read, Write}] ->
+                                [Orig = {_, Read, Write}] ->
                                     Repl = {Q, lists:min([Read, SeqId]),
                                             lists:max([Write, NextWrite])},
                                     case Orig == Repl of
@@ -1767,213 +1002,8 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
         end,
     shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
 
-load_messages(Left, [], State) ->
-    Num = list_to_integer(filename:rootname(Left)),
-    Offset =
-        case dets_ets_match_object(State, #msg_location {
-                                     file = Left, _ = '_' }) of
-            [] -> 0;
-            L ->
-                [ #msg_location { file = Left,
-                                  offset = MaxOffset,
-                                  total_size = TotalSize} | _ ] =
-                    sort_msg_locations_by_offset(desc, L),
-                MaxOffset + TotalSize
-             end,
-    State #dqstate { current_file_num = Num, current_file_name = Left,
-                     current_offset = Offset };
-load_messages(Left, [File|Files],
-              State = #dqstate { file_summary = FileSummary }) ->
-    {ok, Messages} = scan_file_for_valid_messages(File),
-    {ValidMessages, ValidTotalSize} = lists:foldl(
-        fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case length(mnesia:dirty_index_match_object
-                            (rabbit_disk_queue,
-                             #dq_msg_loc { msg_id = MsgId, _ = '_' },
-                             msg_id)) of
-                    0 -> {VMAcc, VTSAcc};
-                    RefCount ->
-                        true = dets_ets_insert_new(
-                                 State, #msg_location {
-                                   msg_id = MsgId, ref_count = RefCount,
-                                   file = File, offset = Offset,
-                                   total_size = TotalSize,
-                                   is_persistent = IsPersistent }),
-                        {[Obj | VMAcc], VTSAcc + TotalSize}
-                end
-        end, {[], 0}, Messages),
-    %% foldl reverses lists, find_contiguous_block_prefix needs
-    %% msgs eldest first, so, ValidMessages is the right way round
-    {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
-    Right = case Files of
-                [] -> undefined;
-                [F|_] -> F
-            end,
-    true = ets:insert_new(FileSummary, #file_summary {
-                            file = File, valid_total_size = ValidTotalSize,
-                            contiguous_top = ContiguousTop,
-                            left = Left, right = Right }),
-    load_messages(File, Files, State).
-
-recover_crashed_compactions(Files, TmpFiles) ->
-    lists:foreach(fun (TmpFile) ->
-                          ok = recover_crashed_compactions1(Files, TmpFile) end,
-                  TmpFiles),
-    ok.
-
-verify_messages_in_mnesia(MsgIds) ->
-    lists:foreach(
-      fun (MsgId) ->
-              true = 0 < length(mnesia:dirty_index_match_object(
-                                  rabbit_disk_queue,
-                                  #dq_msg_loc { msg_id = MsgId, _ = '_' },
-                                  msg_id))
-      end, MsgIds).
-
-scan_file_for_valid_messages_msg_ids(File) ->
-    {ok, Messages} = scan_file_for_valid_messages(File),
-    {ok, Messages,
-     [MsgId || {MsgId, _IsPersistent, _TotalSize, _FileOffset} <- Messages]}.
-
-recover_crashed_compactions1(Files, TmpFile) ->
-    NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
-    true = lists:member(NonTmpRelatedFile, Files),
-    {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
-        scan_file_for_valid_messages_msg_ids(TmpFile),
-    %% all of these messages should appear in the mnesia table,
-    %% otherwise they wouldn't have been copied out
-    verify_messages_in_mnesia(MsgIdsTmp),
-    {ok, UncorruptedMessages, MsgIds} =
-        scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
-    %% 1) It's possible that everything in the tmp file is also in the
-    %%    main file such that the main file is (prefix ++
-    %%    tmpfile). This means that compaction failed immediately
-    %%    prior to the final step of deleting the tmp file. Plan: just
-    %%    delete the tmp file
-    %% 2) It's possible that everything in the tmp file is also in the
-    %%    main file but with holes throughout (or just somthing like
-    %%    main = (prefix ++ hole ++ tmpfile)). This means that
-    %%    compaction wrote out the tmp file successfully and then
-    %%    failed. Plan: just delete the tmp file and allow the
-    %%    compaction to eventually be triggered later
-    %% 3) It's possible that everything in the tmp file is also in the
-    %%    main file but such that the main file does not end with tmp
-    %%    file (and there are valid messages in the suffix; main =
-    %%    (prefix ++ tmpfile[with extra holes?] ++ suffix)). This
-    %%    means that compaction failed as we were writing out the tmp
-    %%    file. Plan: just delete the tmp file and allow the
-    %%    compaction to eventually be triggered later
-    %% 4) It's possible that there are messages in the tmp file which
-    %%    are not in the main file. This means that writing out the
-    %%    tmp file succeeded, but then we failed as we were copying
-    %%    them back over to the main file, after truncating the main
-    %%    file. As the main file has already been truncated, it should
-    %%    consist only of valid messages. Plan: Truncate the main file
-    %%    back to before any of the files in the tmp file and copy
-    %%    them over again
-    TmpPath = form_filename(TmpFile),
-    case is_sublist(MsgIdsTmp, MsgIds) of
-        true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
-                %% note this also catches the case when the tmp file
-                %% is empty
-            ok = file:delete(TmpPath);
-        false ->
-            %% We're in case 4 above. We only care about the inital
-            %% msgs in main file that are not in the tmp file. If
-            %% there are no msgs in the tmp file then we would be in
-            %% the 'true' branch of this case, so we know the
-            %% lists:last call is safe.
-            EldestTmpMsgId = lists:last(MsgIdsTmp),
-            {MsgIds1, UncorruptedMessages1}
-                = case lists:splitwith(
-                         fun (MsgId) -> MsgId /= EldestTmpMsgId end, MsgIds) of
-                      {_MsgIds, []} -> %% no msgs from tmp in main
-                          {MsgIds, UncorruptedMessages};
-                      {Dropped, [EldestTmpMsgId | Rest]} ->
-                          %% Msgs in Dropped are in tmp, so forget them.
-                          %% *cry*. Lists indexed from 1.
-                          {Rest, lists:sublist(UncorruptedMessages,
-                                               2 + length(Dropped),
-                                               length(Rest))}
-                  end,
-            %% Check that everything in the main file prefix is a
-            %% valid message in mnesia
-            verify_messages_in_mnesia(MsgIds1),
-            %% The main file prefix should be contiguous
-            {Top, MsgIds1} = find_contiguous_block_prefix(
-                               lists:reverse(UncorruptedMessages1)),
-            %% we should have that none of the messages in the prefix
-            %% are in the tmp file
-            true = is_disjoint(MsgIds1, MsgIdsTmp),
-            %% must open with read flag, otherwise will stomp over contents
-            {ok, MainHdl} = open_file(NonTmpRelatedFile, ?WRITE_MODE ++ [read]),
-            %% Wipe out any rubbish at the end of the file. Remember
-            %% the head of the list will be the highest entry in the
-            %% file.
-            [{_, _, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
-            TmpSize = TmpTopOffset + TmpTopTotalSize,
-            %% Extend the main file as big as necessary in a single
-            %% move. If we run out of disk space, this truncate could
-            %% fail, but we still aren't risking losing data
-            ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
-            {ok, TmpHdl} = open_file(TmpFile, ?READ_MODE),
-            {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
-            ok = file:sync(MainHdl),
-            ok = file:close(MainHdl),
-            ok = file:close(TmpHdl),
-            ok = file:delete(TmpPath),
-
-            {ok, _MainMessages, MsgIdsMain} =
-                scan_file_for_valid_messages_msg_ids(NonTmpRelatedFile),
-            %% check that everything in MsgIds1 is in MsgIdsMain
-            true = is_sublist(MsgIds1, MsgIdsMain),
-            %% check that everything in MsgIdsTmp is in MsgIdsMain
-            true = is_sublist(MsgIdsTmp, MsgIdsMain)
-    end,
-    ok.
-
-is_sublist(SmallerList, BiggerList) ->
-    lists:all(fun (Item) -> lists:member(Item, BiggerList) end, SmallerList).
-
-is_disjoint(SmallerList, BiggerList) ->
-    lists:all(fun (Item) -> not lists:member(Item, BiggerList) end, SmallerList).
-
-%% Takes the list in *ascending* order (i.e. eldest message
-%% first). This is the opposite of what scan_file_for_valid_messages
-%% produces. The list of msgs that is produced is youngest first.
-find_contiguous_block_prefix([]) -> {0, []};
-find_contiguous_block_prefix(List) ->
-    find_contiguous_block_prefix(List, 0, []).
-
-find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
-    {ExpectedOffset, MsgIds};
-find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, ExpectedOffset}
-                             | Tail], ExpectedOffset, MsgIds) ->
-    ExpectedOffset1 = ExpectedOffset + TotalSize,
-    find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
-find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
-    {ExpectedOffset, MsgIds}.
-
-file_name_sort(A, B) ->
-    ANum = list_to_integer(filename:rootname(A)),
-    BNum = list_to_integer(filename:rootname(B)),
-    ANum < BNum.
-
-get_disk_queue_files() ->
-    DQFiles = filelib:wildcard("*" ++ ?FILE_EXTENSION, base_directory()),
-    DQFilesSorted = lists:sort(fun file_name_sort/2, DQFiles),
-    DQTFiles = filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, base_directory()),
-    DQTFilesSorted = lists:sort(fun file_name_sort/2, DQTFiles),
-    {DQFilesSorted, DQTFilesSorted}.
-
-scan_file_for_valid_messages(File) ->
-    case open_file(File, ?READ_MODE) of
-        {ok, Hdl} ->
-            Valid = rabbit_msg_file:scan(Hdl),
-            %% if something really bad's happened, the close could fail,
-            %% but ignore
-            file:close(Hdl),
-            Valid;
-        {error, enoent} -> {ok, []};
-        {error, Reason} -> throw({error, {unable_to_scan_file, File, Reason}})
-    end.
+ref_count(MsgId) ->
+    length(mnesia:dirty_index_match_object(
+             rabbit_disk_queue,
+             #dq_msg_loc { msg_id = MsgId, _ = '_' },
+             msg_id)).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
new file mode 100644
index 00000000..e4ccc1df
--- /dev/null
+++ b/src/rabbit_msg_store.erl
@@ -0,0 +1,1128 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store).
+
+-export([init/6, write/4, read/2, is_persistent/2, remove/2, release/2,
+         needs_sync/2, sync/1, cleanup/1, cache_info/1, memory/1,
+         ets_bpr/1, to_disk_only_mode/1, to_ram_disk_mode/1]).
+
+%%----------------------------------------------------------------------------
+
+-record(msstate,
+        {operation_mode,         %% ram_disk | disk_only
+         dir,                    %% store directory
+         msg_location_dets,      %% where are messages?
+         msg_location_ets,       %% as above, but for ets version
+         file_summary,           %% what's in the files?
+         current_file_num,       %% current file name as number
+         current_file_name,      %% current file name
+         current_file_handle,    %% current file handle
+         current_offset,         %% current offset within current file
+         current_dirty,          %% has the current file been written to
+                                 %% since the last fsync?
+         file_size_limit,        %% how big can our files get?
+         read_file_handle_cache, %% file handle cache for reading
+         last_sync_offset,       %% current_offset at the last time we sync'd
+         message_cache,          %% ets message cache
+         ets_bytes_per_record    %% bytes per record in msg_location_ets
+         }).
+
+-record(msg_location,
+        {msg_id, ref_count, file, offset, total_size, is_persistent}).
+
+-record(file_summary,
+        {file, valid_total_size, contiguous_top, left, right}).
+
+-define(MSG_LOC_NAME,          rabbit_disk_queue_msg_location).
+-define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
+-define(FILE_EXTENSION,        ".rdq").
+-define(FILE_EXTENSION_TMP,    ".rdt").
+-define(FILE_EXTENSION_DETS,   ".dets").
+
+-define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
+-define(CACHE_MAX_SIZE,        10485760).
+
+-define(BINARY_MODE, [raw, binary]).
+-define(READ_MODE,   [read, read_ahead]).
+-define(WRITE_MODE,  [write, delayed_write]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(mode() :: 'ram_disk' | 'disk_only').
+-type(dets_table() :: any()).
+-type(ets_table() :: any()).
+-type(msg_id() :: any()).
+-type(msg() :: any()).
+-type(file_path() :: any()).
+-type(io_device() :: any()).
+
+-type(msstate() :: #msstate {
+               operation_mode         :: mode(),
+               dir                    :: file_path(),
+               msg_location_dets      :: dets_table(),
+               msg_location_ets       :: ets_table(),
+               file_summary           :: ets_table(),
+               current_file_num       :: non_neg_integer(),
+               current_file_name      :: file_path(),
+               current_file_handle    :: io_device(),
+               current_offset         :: non_neg_integer(),
+               current_dirty          :: boolean(),
+               file_size_limit        :: non_neg_integer(),
+               read_file_handle_cache :: any(),
+               last_sync_offset       :: non_neg_integer(),
+               message_cache          :: ets_table(),
+               ets_bytes_per_record   :: non_neg_integer()
+               }).
+
+-spec(init/6 :: ('ram_disk' | 'disk_only', file_path(),
+                 non_neg_integer(), non_neg_integer(),
+                 fun ((msg_id()) -> non_neg_integer()), non_neg_integer()) ->
+             msstate()).
+-spec(write/4 :: (msg_id(), msg(), boolean(), msstate()) -> msstate()).
+-spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
+-spec(is_persistent/2 :: (msg_id(), msstate()) -> boolean() | 'not_found').
+-spec(remove/2 :: ([msg_id()], msstate()) -> msstate()).
+-spec(release/2 :: ([msg_id()], msstate()) -> msstate()).
+-spec(needs_sync/2 :: ([msg_id()], msstate()) -> boolean()).
+-spec(sync/1 :: (msstate()) -> msstate()).
+-spec(cleanup/1 :: (msstate()) -> msstate()).
+-spec(cache_info/1 :: (msstate()) -> [{atom(), term()}]).
+-spec(memory/1 :: (msstate()) -> non_neg_integer()).
+-spec(ets_bpr/1 :: (msstate()) -> non_neg_integer()).
+-spec(to_disk_only_mode/1 :: (msstate()) -> msstate()).
+-spec(to_ram_disk_mode/1 :: (msstate()) -> msstate()).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+%% The components:
+%%
+%% MsgLocation: this is a (d)ets table which contains:
+%%              {MsgId, RefCount, File, Offset, TotalSize, IsPersistent}
+%% FileSummary: this is an ets table which contains:
+%%              {File, ValidTotalSize, ContiguousTop, Left, Right}
+%%
+%% The basic idea is that messages are appended to the current file up
+%% until that file becomes too big (> file_size_limit). At that point,
+%% the file is closed and a new file is created on the _right_ of the
+%% old file which is used for new messages. Files are named
+%% numerically ascending, thus the file with the lowest name is the
+%% eldest file.
+%%
+%% We need to keep track of which messages are in which files (this is
+%% the MsgLocation table); how much useful data is in each file and
+%% which files are on the left and right of each other. This is the
+%% purpose of the FileSummary table.
+%%
+%% As messages are removed from files, holes appear in these
+%% files. The field ValidTotalSize contains the total amount of useful
+%% data left in the file, whilst ContiguousTop contains the amount of
+%% valid data right at the start of each file. These are needed for
+%% garbage collection.
+%%
+%% When we discover that either a file is now empty or that it can be
+%% combined with the useful data in either its left or right file, we
+%% compact the two files together. This keeps disk utilisation high
+%% and aids performance.
+%%
+%% Given the compaction between two files, the left file is considered
+%% the ultimate destination for the good data in the right file. If
+%% necessary, the good data in the left file which is fragmented
+%% throughout the file is written out to a temporary file, then read
+%% back in to form a contiguous chunk of good data at the start of the
+%% left file. Thus the left file is garbage collected and
+%% compacted. Then the good data from the right file is copied onto
+%% the end of the left file. MsgLocation and FileSummary tables are
+%% updated.
+%%
+%% On startup, we scan the files we discover, dealing with the
+%% possibilites of a crash have occured during a compaction (this
+%% consists of tidyup - the compaction is deliberately designed such
+%% that data is duplicated on disk rather than risking it being lost),
+%% and rebuild the dets and ets tables (MsgLocation, FileSummary).
+%%
+%% MsgLocation is deliberately a dets table in order to ensure that we
+%% are not RAM constrained. However, for performance reasons, it is
+%% possible to call to_ram_disk_mode/0 which will convert MsgLocation
+%% to an ets table. This results in a massive performance improvement,
+%% at the expense of greater RAM usage. The idea is that when memory
+%% gets tight, we switch to disk_only mode but otherwise try to run in
+%% ram_disk mode.
+%%
+%% So, with this design, messages move to the left. Eventually, they
+%% should end up in a contiguous block on the left and are then never
+%% rewritten. But this isn't quite the case. If in a file there is one
+%% message that is being ignored, for some reason, and messages in the
+%% file to the right and in the current block are being read all the
+%% time then it will repeatedly be the case that the good data from
+%% both files can be combined and will be written out to a new
+%% file. Whenever this happens, our shunned message will be rewritten.
+%%
+%% So, provided that we combine messages in the right order,
+%% (i.e. left file, bottom to top, right file, bottom to top),
+%% eventually our shunned message will end up at the bottom of the
+%% left file. The compaction/combining algorithm is smart enough to
+%% read in good data from the left file that is scattered throughout
+%% (i.e. C and D in the below diagram), then truncate the file to just
+%% above B (i.e. truncate to the limit of the good contiguous region
+%% at the start of the file), then write C and D on top and then write
+%% E, F and G from the right file on top. Thus contiguous blocks of
+%% good data at the bottom of files are not rewritten (yes, this is
+%% the data the size of which is tracked by the ContiguousTop
+%% variable. Judicious use of a mirror is required).
+%%
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   G   |         |   G   |
+%% +-------+    +-------+         +-------+
+%% |   D   |    |   X   |         |   F   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   E   |
+%% +-------+    +-------+         +-------+
+%% |   C   |    |   F   |   ===>  |   D   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   C   |
+%% +-------+    +-------+         +-------+
+%% |   B   |    |   X   |         |   B   |
+%% +-------+    +-------+         +-------+
+%% |   A   |    |   E   |         |   A   |
+%% +-------+    +-------+         +-------+
+%%   left         right             left
+%%
+%% From this reasoning, we do have a bound on the number of times the
+%% message is rewritten. From when it is inserted, there can be no
+%% files inserted between it and the head of the queue, and the worst
+%% case is that everytime it is rewritten, it moves one position lower
+%% in the file (for it to stay at the same position requires that
+%% there are no holes beneath it, which means truncate would be used
+%% and so it would not be rewritten at all). Thus this seems to
+%% suggest the limit is the number of messages ahead of it in the
+%% queue, though it's likely that that's pessimistic, given the
+%% requirements for compaction/combination of files.
+%%
+%% The other property is that we have is the bound on the lowest
+%% utilisation, which should be 50% - worst case is that all files are
+%% fractionally over half full and can't be combined (equivalent is
+%% alternating full files and files with only one tiny message in
+%% them).
+%%
+%% Messages are reference-counted. When a message with the same id is
+%% written several times we only store it once, and only remove it
+%% from the store when it has been removed the same number of times.
+%%
+%% The reference counts do not persist. Therefore the initialisation
+%% function must be provided with a function that determines the
+%% initial reference count of any (recovered) message.
+%%
+%% Read messages with a reference count greater than one are entered
+%% into a message cache. The purpose of the cache is not especially
+%% performance, though it can help there too, but prevention of memory
+%% explosion. It ensures that as messages with a high reference count
+%% are read from several processes they are read back as the same
+%% binary object rather than multiples of identical binary
+%% objects.
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
+     EtsBytesPerRecord) ->
+
+    file:delete(msg_location_dets_file(Dir)),
+
+    {ok, MsgLocationDets} =
+        dets:open_file(?MSG_LOC_NAME,
+                       [{file, msg_location_dets_file(Dir)},
+                        {min_no_slots, 1024*1024},
+                        %% man says this should be <= 32M. But it works...
+                        {max_no_slots, 30*1024*1024},
+                        {type, set},
+                        {keypos, 2}
+                       ]),
+
+    %% it would be better to have this as private, but dets:from_ets/2
+    %% seems to blow up if it is set private - see bug21489
+    MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected, {keypos, 2}]),
+
+    InitName = "0" ++ ?FILE_EXTENSION,
+    HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
+                                                ?BINARY_MODE ++ [read]),
+    State =
+        #msstate { operation_mode         = Mode,
+                   dir                    = Dir,
+                   msg_location_dets      = MsgLocationDets,
+                   msg_location_ets       = MsgLocationEts,
+                   file_summary           = ets:new(
+                                              ?FILE_SUMMARY_ETS_NAME,
+                                              [set, private, {keypos, 2}]),
+                   current_file_num       = 0,
+                   current_file_name      = InitName,
+                   current_file_handle    = undefined,
+                   current_offset         = 0,
+                   current_dirty          = false,
+                   file_size_limit        = FileSizeLimit,
+                   read_file_handle_cache = HandleCache,
+                   last_sync_offset       = 0,
+                   message_cache          = ets:new(?CACHE_ETS_NAME,
+                                                    [set, private]),
+                   ets_bytes_per_record   = EtsBytesPerRecord
+                  },
+    
+    Files = 
+        sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
+    TmpFiles =
+        sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
+    ok = recover_crashed_compactions(RefCountFun, Dir, Files, TmpFiles),
+    %% There should be no more tmp files now, so go ahead and load the
+    %% whole lot
+    State1 = #msstate { current_file_name = CurrentName,
+                        current_offset = Offset }  =
+        load_messages(RefCountFun, Files, State),
+
+    %% read is only needed so that we can seek
+    {ok, FileHdl} = open_file(Dir, CurrentName, ?WRITE_MODE ++ [read]),
+    {ok, Offset} = file:position(FileHdl, Offset),
+
+    State1 #msstate { current_file_handle = FileHdl }.
+
+write(MsgId, Msg, IsPersistent,
+      State = #msstate { current_file_handle = CurHdl,
+                         current_file_name   = CurName,
+                         current_offset      = CurOffset,
+                         file_summary        = FileSummary }) ->
+    case dets_ets_lookup(State, MsgId) of
+        [] ->
+            %% New message, lots to do
+            {ok, TotalSize} = rabbit_msg_file:append(
+                                CurHdl, MsgId, term_to_binary(Msg),
+                                IsPersistent),
+            true = dets_ets_insert_new(
+                     State, #msg_location {
+                       msg_id = MsgId, ref_count = 1, file = CurName,
+                       offset = CurOffset, total_size = TotalSize,
+                       is_persistent = IsPersistent }),
+            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
+                                       contiguous_top = ContiguousTop,
+                                       right = undefined }] =
+                ets:lookup(FileSummary, CurName),
+            ValidTotalSize1 = ValidTotalSize + TotalSize,
+            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
+                                     %% can't be any holes in this file
+                                     ValidTotalSize1;
+                                true -> ContiguousTop
+                             end,
+            true = ets:insert(FileSummary, FSEntry #file_summary {
+                                             valid_total_size = ValidTotalSize1,
+                                             contiguous_top = ContiguousTop1 }),
+            NextOffset = CurOffset + TotalSize,
+            maybe_roll_to_new_file(
+              NextOffset, State #msstate {current_offset = NextOffset,
+                                          current_dirty = true});
+        [StoreEntry =
+         #msg_location { msg_id = MsgId, ref_count = RefCount }] ->
+            %% We already know about it, just update counter
+            ok = dets_ets_insert(State, StoreEntry #msg_location {
+                                          ref_count = RefCount + 1 }),
+            State
+    end.
+
+read(MsgId, State) ->
+    Objs = dets_ets_lookup(State, MsgId),
+    case Objs of
+        [] ->
+            not_found;
+        [#msg_location { ref_count  = RefCount,
+                         file       = File,
+                         offset     = Offset,
+                         total_size = TotalSize }] ->
+            case fetch_and_increment_cache(MsgId, State) of
+                not_found ->
+                    {{ok, {MsgId, MsgBody, _IsPersistent, _BodySize}}, State1} =
+                        with_read_handle_at(
+                          File, Offset,
+                          fun(Hdl) ->
+                                  Res = case rabbit_msg_file:read(
+                                               Hdl, TotalSize) of
+                                            {ok, {MsgId, _, _, _}} = Obj -> Obj;
+                                            {ok, Rest} ->
+                                                throw({error,
+                                                       {misread, 
+                                                        [{old_state, State},
+                                                         {file, File},
+                                                         {offset, Offset},
+                                                         {read, Rest}]}})
+                                        end,
+                                  {Offset + TotalSize, Res}
+                          end, State),
+                    Message = binary_to_term(MsgBody),
+                    ok = if RefCount > 1 ->
+                                 insert_into_cache(MsgId, Message, State1);
+                            true -> ok
+                                    %% it's not in the cache and we
+                                    %% only have one reference to the
+                                    %% message. So don't bother
+                                    %% putting it in the cache.
+                         end,
+                    {Message, State1};
+                {Message, _RefCount} ->
+                    {Message, State}
+            end
+    end.
+
+is_persistent(MsgId, State) ->
+    Objs = dets_ets_lookup(State, MsgId),
+    case Objs of
+        [] ->
+            not_found;
+        [#msg_location { msg_id = MsgId, is_persistent = IsPersistent }] ->
+            IsPersistent
+    end.
+
+remove(MsgIds, State = #msstate { current_file_name = CurName }) ->
+    compact(sets:to_list(
+              lists:foldl(
+                fun (MsgId, Files1) ->
+                        case remove_message(MsgId, State) of
+                            {compact, File} ->
+                                if CurName =:= File -> Files1;
+                                   true -> sets:add_element(File, Files1)
+                                end;
+                            no_compact -> Files1
+                        end
+                end, sets:new(), MsgIds)),
+            State).
+
+release(MsgIds, State) ->
+    lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
+    State.
+
+needs_sync(_MsgIds, #msstate { current_dirty = false }) ->
+    false;    
+needs_sync(MsgIds, State = #msstate { current_file_name = CurFile,
+                                      last_sync_offset  = SyncOffset }) ->
+    lists:any(fun (MsgId) ->
+                      [#msg_location { msg_id = MsgId, file = File,
+                                       offset = Offset }] =
+                          dets_ets_lookup(State, MsgId),
+                      File =:= CurFile andalso Offset >= SyncOffset
+              end, MsgIds).
+
+sync(State = #msstate { current_dirty = false }) ->
+    State;
+sync(State = #msstate { current_file_handle = CurHdl,
+                        current_offset = CurOffset }) ->
+    ok = file:sync(CurHdl),
+    State #msstate { current_dirty = false, last_sync_offset = CurOffset }.
+
+cleanup(State = #msstate { dir                    = Dir,
+                           msg_location_dets      = MsgLocationDets,
+                           msg_location_ets       = MsgLocationEts,
+                           file_summary           = FileSummary,
+                           current_file_handle    = FileHdl,
+                           read_file_handle_cache = HC }) ->
+    State1 = case FileHdl of
+                 undefined -> State;
+                 _ -> State2 = sync(State),
+                      file:close(FileHdl),
+                      State2
+             end,
+    HC1 = rabbit_file_handle_cache:close_all(HC),
+    dets:close(MsgLocationDets),
+    file:delete(msg_location_dets_file(Dir)),
+    ets:delete(MsgLocationEts),
+    ets:delete(FileSummary),
+    State1 #msstate { msg_location_dets      = undefined,
+                      msg_location_ets       = undefined,
+                      file_summary           = undefined,
+                      current_file_handle    = undefined,
+                      current_dirty          = false,
+                      read_file_handle_cache = HC1
+                     }.
+
+cache_info(#msstate { message_cache = Cache }) ->
+    ets:info(Cache).
+
+memory(#msstate { operation_mode   = ram_disk,
+                  file_summary     = FileSummary,
+                  msg_location_ets = MsgLocationEts,
+                  message_cache    = Cache }) ->
+    erlang:system_info(wordsize) *
+        lists:sum([ets:info(Table, memory) ||
+                      Table <- [FileSummary, MsgLocationEts, Cache]]);
+memory(#msstate { operation_mode       = disk_only,
+                  file_summary         = FileSummary,
+                  msg_location_dets    = MsgLocationDets,
+                  message_cache        = Cache,
+                  ets_bytes_per_record = EtsBytesPerRecord }) ->
+    erlang:system_info(wordsize) *
+        lists:sum([ets:info(Table, memory) ||
+                      Table <- [FileSummary, Cache]]) +
+        rabbit_misc:ceil(dets:info(MsgLocationDets, size) * EtsBytesPerRecord).
+
+ets_bpr(#msstate { operation_mode = disk_only,
+                   ets_bytes_per_record = EtsBytesPerRecord }) ->
+    EtsBytesPerRecord;
+ets_bpr(#msstate { operation_mode = ram_disk,
+                   msg_location_ets = MsgLocationEts }) ->
+    erlang:system_info(wordsize) * ets:info(MsgLocationEts, memory) /
+        lists:max([1, ets:info(MsgLocationEts, size)]).
+
+to_disk_only_mode(State = #msstate { operation_mode = disk_only }) ->
+    State;
+to_disk_only_mode(State = #msstate { operation_mode = ram_disk,
+                                     msg_location_dets = MsgLocationDets,
+                                     msg_location_ets = MsgLocationEts }) ->
+    ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
+    true = ets:delete_all_objects(MsgLocationEts),
+    State #msstate { operation_mode       = disk_only,
+                     ets_bytes_per_record = ets_bpr(State) }.
+
+to_ram_disk_mode(State = #msstate { operation_mode = ram_disk }) ->
+    State;
+to_ram_disk_mode(State = #msstate { operation_mode = disk_only,
+                                    msg_location_dets = MsgLocationDets,
+                                    msg_location_ets = MsgLocationEts }) ->
+    true = ets:from_dets(MsgLocationEts, MsgLocationDets),
+    ok = dets:delete_all_objects(MsgLocationDets),
+    State #msstate { operation_mode       = ram_disk,
+                     ets_bytes_per_record = undefined }.
+
+%%----------------------------------------------------------------------------
+%% general helper functions
+%%----------------------------------------------------------------------------
+
+form_filename(Dir, Name) ->
+    filename:join(Dir, Name).
+
+msg_location_dets_file(Dir) ->
+    form_filename(Dir, atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
+
+open_file(Dir, File, Mode) ->
+    file:open(form_filename(Dir, File), ?BINARY_MODE ++ Mode).
+
+sort_file_names(Files) ->
+    lists:sort(fun (A, B) ->
+                       ANum = list_to_integer(filename:rootname(A)),
+                       BNum = list_to_integer(filename:rootname(B)),
+                       ANum < BNum
+               end, Files).
+
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file:position(Hdl, FileSizeLimit),
+    ok = file:truncate(Hdl),
+    {ok, FinalPos} = file:position(Hdl, FinalPos),
+    ok.
+
+truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file:position(FileHdl, Lowpoint),
+    ok = file:truncate(FileHdl),
+    ok = preallocate(FileHdl, Highpoint, Lowpoint).
+
+with_read_handle_at(File, Offset, Fun,
+                    State = #msstate { dir                    = Dir,
+                                       read_file_handle_cache = HC,
+                                       current_file_name      = CurName,
+                                       current_dirty          = IsDirty,
+                                       last_sync_offset       = SyncOffset }) ->
+    State1 = if CurName =:= File andalso IsDirty andalso Offset >= SyncOffset ->
+                     sync(State);
+                true -> State
+             end,
+    FilePath = form_filename(Dir, File),
+    {Result, HC1} =
+        rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
+    {Result, State1 #msstate { read_file_handle_cache = HC1 }}.
+
+remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
+    [StoreEntry =
+     #msg_location { msg_id = MsgId, ref_count = RefCount, file = File,
+                     offset = Offset, total_size = TotalSize }] =
+        dets_ets_lookup(State, MsgId),
+    case RefCount of
+        1 ->
+            ok = dets_ets_delete(State, MsgId),
+            ok = remove_cache_entry(MsgId, State),
+            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
+                                       contiguous_top = ContiguousTop }] =
+                ets:lookup(FileSummary, File),
+            ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+            ValidTotalSize1 = ValidTotalSize - TotalSize,
+            true = ets:insert(FileSummary, FSEntry #file_summary { 
+                                             valid_total_size = ValidTotalSize1,
+                                             contiguous_top = ContiguousTop1 }),
+            {compact, File};
+        _ when 1 < RefCount ->
+            ok = decrement_cache(MsgId, State),
+            ok = dets_ets_insert(State, StoreEntry #msg_location {
+                                          ref_count = RefCount - 1 }),
+            no_compact
+    end.
+
+%%----------------------------------------------------------------------------
+%% message cache helper functions
+%%----------------------------------------------------------------------------
+
+remove_cache_entry(MsgId, #msstate { message_cache = Cache }) ->
+    true = ets:delete(Cache, MsgId),
+    ok.
+
+fetch_and_increment_cache(MsgId, #msstate { message_cache = Cache }) ->
+    case ets:lookup(Cache, MsgId) of
+        [] ->
+            not_found;
+        [{MsgId, Message, _RefCount}] ->
+            NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
+            {Message, NewRefCount}
+    end.
+
+decrement_cache(MsgId, #msstate { message_cache = Cache }) ->
+    true = try case ets:update_counter(Cache, MsgId, {3, -1}) of
+                   N when N =< 0 -> true = ets:delete(Cache, MsgId);
+                   _N -> true
+               end
+           catch error:badarg ->
+                   %% MsgId is not in there because although it's been
+                   %% delivered, it's never actually been read (think:
+                   %% persistent message in mixed queue)
+                   true
+           end,
+    ok.
+
+insert_into_cache(MsgId, Message, #msstate { message_cache = Cache }) ->
+    case cache_is_full(Cache) of
+        true -> ok;
+        false -> true = ets:insert_new(Cache, {MsgId, Message, 1}),
+                 ok
+    end.
+
+cache_is_full(Cache) ->
+    ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
+
+%%----------------------------------------------------------------------------
+%% dets/ets agnosticism
+%%----------------------------------------------------------------------------
+
+dets_ets_lookup(#msstate { msg_location_dets = MsgLocationDets,
+                           operation_mode = disk_only }, Key) ->
+    dets:lookup(MsgLocationDets, Key);
+dets_ets_lookup(#msstate { msg_location_ets = MsgLocationEts,
+                           operation_mode = ram_disk }, Key) ->
+    ets:lookup(MsgLocationEts, Key).
+
+dets_ets_delete(#msstate { msg_location_dets = MsgLocationDets,
+                           operation_mode = disk_only }, Key) ->
+    ok = dets:delete(MsgLocationDets, Key);
+dets_ets_delete(#msstate { msg_location_ets = MsgLocationEts,
+                           operation_mode = ram_disk }, Key) ->
+    true = ets:delete(MsgLocationEts, Key),
+    ok.
+
+dets_ets_insert(#msstate { msg_location_dets = MsgLocationDets,
+                           operation_mode = disk_only }, Obj) ->
+    ok = dets:insert(MsgLocationDets, Obj);
+dets_ets_insert(#msstate { msg_location_ets = MsgLocationEts,
+                           operation_mode = ram_disk }, Obj) ->
+    true = ets:insert(MsgLocationEts, Obj),
+    ok.
+
+dets_ets_insert_new(#msstate { msg_location_dets = MsgLocationDets,
+                               operation_mode = disk_only }, Obj) ->
+    true = dets:insert_new(MsgLocationDets, Obj);
+dets_ets_insert_new(#msstate { msg_location_ets = MsgLocationEts,
+                               operation_mode = ram_disk }, Obj) ->
+    true = ets:insert_new(MsgLocationEts, Obj).
+
+dets_ets_match_object(#msstate { msg_location_dets = MsgLocationDets,
+                                 operation_mode = disk_only }, Obj) ->
+    dets:match_object(MsgLocationDets, Obj);
+dets_ets_match_object(#msstate { msg_location_ets = MsgLocationEts,
+                                 operation_mode = ram_disk }, Obj) ->
+    ets:match_object(MsgLocationEts, Obj).
+
+%%----------------------------------------------------------------------------
+%% recovery
+%%----------------------------------------------------------------------------
+
+recover_crashed_compactions(RefCountFun, Dir, Files, TmpFiles) ->
+    lists:foreach(fun (TmpFile) ->
+                          ok = recover_crashed_compactions1(
+                                 RefCountFun, Dir, Files, TmpFile)
+                  end,
+                  TmpFiles),
+    ok.
+
+recover_crashed_compactions1(RefCountFun, Dir, Files, TmpFile) ->
+    NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
+    true = lists:member(NonTmpRelatedFile, Files),
+    {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
+        scan_file_for_valid_messages_msg_ids(Dir, TmpFile),
+    %% all of these messages should be referenced
+    %% otherwise they wouldn't have been copied out
+    verify_messages_referenced(RefCountFun, MsgIdsTmp),
+    {ok, UncorruptedMessages, MsgIds} =
+        scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFile),
+    %% 1) It's possible that everything in the tmp file is also in the
+    %%    main file such that the main file is (prefix ++
+    %%    tmpfile). This means that compaction failed immediately
+    %%    prior to the final step of deleting the tmp file. Plan: just
+    %%    delete the tmp file
+    %% 2) It's possible that everything in the tmp file is also in the
+    %%    main file but with holes throughout (or just somthing like
+    %%    main = (prefix ++ hole ++ tmpfile)). This means that
+    %%    compaction wrote out the tmp file successfully and then
+    %%    failed. Plan: just delete the tmp file and allow the
+    %%    compaction to eventually be triggered later
+    %% 3) It's possible that everything in the tmp file is also in the
+    %%    main file but such that the main file does not end with tmp
+    %%    file (and there are valid messages in the suffix; main =
+    %%    (prefix ++ tmpfile[with extra holes?] ++ suffix)). This
+    %%    means that compaction failed as we were writing out the tmp
+    %%    file. Plan: just delete the tmp file and allow the
+    %%    compaction to eventually be triggered later
+    %% 4) It's possible that there are messages in the tmp file which
+    %%    are not in the main file. This means that writing out the
+    %%    tmp file succeeded, but then we failed as we were copying
+    %%    them back over to the main file, after truncating the main
+    %%    file. As the main file has already been truncated, it should
+    %%    consist only of valid messages. Plan: Truncate the main file
+    %%    back to before any of the files in the tmp file and copy
+    %%    them over again
+    TmpPath = form_filename(Dir, TmpFile),
+    case is_sublist(MsgIdsTmp, MsgIds) of
+        true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
+                %% note this also catches the case when the tmp file
+                %% is empty
+            ok = file:delete(TmpPath);
+        false ->
+            %% We're in case 4 above. We only care about the inital
+            %% msgs in main file that are not in the tmp file. If
+            %% there are no msgs in the tmp file then we would be in
+            %% the 'true' branch of this case, so we know the
+            %% lists:last call is safe.
+            EldestTmpMsgId = lists:last(MsgIdsTmp),
+            {MsgIds1, UncorruptedMessages1}
+                = case lists:splitwith(
+                         fun (MsgId) -> MsgId /= EldestTmpMsgId end, MsgIds) of
+                      {_MsgIds, []} -> %% no msgs from tmp in main
+                          {MsgIds, UncorruptedMessages};
+                      {Dropped, [EldestTmpMsgId | Rest]} ->
+                          %% Msgs in Dropped are in tmp, so forget them.
+                          %% *cry*. Lists indexed from 1.
+                          {Rest, lists:sublist(UncorruptedMessages,
+                                               2 + length(Dropped),
+                                               length(Rest))}
+                  end,
+            %% Check that everything in the main file prefix is referenced
+            verify_messages_referenced(RefCountFun, MsgIds1),
+            %% The main file prefix should be contiguous
+            {Top, MsgIds1} = find_contiguous_block_prefix(
+                               lists:reverse(UncorruptedMessages1)),
+            %% we should have that none of the messages in the prefix
+            %% are in the tmp file
+            true = is_disjoint(MsgIds1, MsgIdsTmp),
+            %% must open with read flag, otherwise will stomp over contents
+            {ok, MainHdl} = open_file(Dir, NonTmpRelatedFile,
+                                      ?WRITE_MODE ++ [read]),
+            %% Wipe out any rubbish at the end of the file. Remember
+            %% the head of the list will be the highest entry in the
+            %% file.
+            [{_, _, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
+            TmpSize = TmpTopOffset + TmpTopTotalSize,
+            %% Extend the main file as big as necessary in a single
+            %% move. If we run out of disk space, this truncate could
+            %% fail, but we still aren't risking losing data
+            ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
+            {ok, TmpHdl} = open_file(Dir, TmpFile, ?READ_MODE),
+            {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
+            ok = file:sync(MainHdl),
+            ok = file:close(MainHdl),
+            ok = file:close(TmpHdl),
+            ok = file:delete(TmpPath),
+
+            {ok, _MainMessages, MsgIdsMain} =
+                scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFile),
+            %% check that everything in MsgIds1 is in MsgIdsMain
+            true = is_sublist(MsgIds1, MsgIdsMain),
+            %% check that everything in MsgIdsTmp is in MsgIdsMain
+            true = is_sublist(MsgIdsTmp, MsgIdsMain)
+    end,
+    ok.
+
+is_sublist(SmallerL, BiggerL) ->
+    lists:all(fun (Item) -> lists:member(Item, BiggerL) end, SmallerL).
+
+is_disjoint(SmallerL, BiggerL) ->
+    lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
+
+verify_messages_referenced(RefCountFun, MsgIds) ->
+    lists:foreach(fun (MsgId) -> false = RefCountFun(MsgId) == 0 end, MsgIds).
+
+scan_file_for_valid_messages_msg_ids(Dir, File) ->
+    {ok, Messages} = scan_file_for_valid_messages(Dir, File),
+    {ok, Messages,
+     [MsgId || {MsgId, _IsPersistent, _TotalSize, _FileOffset} <- Messages]}.
+
+scan_file_for_valid_messages(Dir, File) ->
+    case open_file(Dir, File, ?READ_MODE) of
+        {ok, Hdl} ->
+            Valid = rabbit_msg_file:scan(Hdl),
+            %% if something really bad's happened, the close could fail,
+            %% but ignore
+            file:close(Hdl),
+            Valid;
+        {error, enoent} -> {ok, []};
+        {error, Reason} -> throw({error, {unable_to_scan_file, File, Reason}})
+    end.
+
+%% Takes the list in *ascending* order (i.e. eldest message
+%% first). This is the opposite of what scan_file_for_valid_messages
+%% produces. The list of msgs that is produced is youngest first.
+find_contiguous_block_prefix([]) -> {0, []};
+find_contiguous_block_prefix(List) ->
+    find_contiguous_block_prefix(List, 0, []).
+
+find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
+    {ExpectedOffset, MsgIds};
+find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, ExpectedOffset}
+                             | Tail], ExpectedOffset, MsgIds) ->
+    ExpectedOffset1 = ExpectedOffset + TotalSize,
+    find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
+find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
+    {ExpectedOffset, MsgIds}.
+
+load_messages(RefCountFun, [], State) ->
+    CurrentFile = State #msstate.current_file_name,
+    load_messages(RefCountFun, undefined, [CurrentFile], State);
+load_messages(RefCountFun, Files, State) ->
+    load_messages(RefCountFun, undefined, Files, State).
+
+load_messages(_RefCountFun, Left, [], State) ->
+    Num = list_to_integer(filename:rootname(Left)),
+    Offset =
+        case dets_ets_match_object(State, #msg_location {
+                                     file = Left, _ = '_' }) of
+            [] -> 0;
+            L ->
+                [ #msg_location { file = Left,
+                                  offset = MaxOffset,
+                                  total_size = TotalSize} | _ ] =
+                    sort_msg_locations_by_offset(desc, L),
+                MaxOffset + TotalSize
+             end,
+    State #msstate { current_file_num = Num, current_file_name = Left,
+                     current_offset = Offset };
+load_messages(RefCountFun, Left, [File|Files],
+              State = #msstate { dir = Dir, file_summary = FileSummary }) ->
+    {ok, Messages} = scan_file_for_valid_messages(Dir, File),
+    {ValidMessages, ValidTotalSize} = lists:foldl(
+        fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+                case RefCountFun(MsgId) of
+                    0 -> {VMAcc, VTSAcc};
+                    RefCount ->
+                        true = dets_ets_insert_new(
+                                 State, #msg_location {
+                                   msg_id = MsgId, ref_count = RefCount,
+                                   file = File, offset = Offset,
+                                   total_size = TotalSize,
+                                   is_persistent = IsPersistent }),
+                        {[Obj | VMAcc], VTSAcc + TotalSize}
+                end
+        end, {[], 0}, Messages),
+    %% foldl reverses lists, find_contiguous_block_prefix needs
+    %% msgs eldest first, so, ValidMessages is the right way round
+    {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
+    Right = case Files of
+                [] -> undefined;
+                [F|_] -> F
+            end,
+    true = ets:insert_new(FileSummary, #file_summary {
+                            file = File, valid_total_size = ValidTotalSize,
+                            contiguous_top = ContiguousTop,
+                            left = Left, right = Right }),
+    load_messages(RefCountFun, File, Files, State).
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation
+%%----------------------------------------------------------------------------
+
+maybe_roll_to_new_file(Offset,
+                       State = #msstate { dir                 = Dir,
+                                          file_size_limit     = FileSizeLimit,
+                                          current_file_name   = CurName,
+                                          current_file_handle = CurHdl,
+                                          current_file_num    = CurNum,
+                                          file_summary        = FileSummary
+                                        }
+                      ) when Offset >= FileSizeLimit ->
+    State1 = sync(State),
+    ok = file:close(CurHdl),
+    NextNum = CurNum + 1,
+    NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
+    {ok, NextHdl} = open_file(Dir, NextName, ?WRITE_MODE),
+    true = ets:update_element(FileSummary, CurName,
+                              {#file_summary.right, NextName}),
+    true = ets:insert_new(
+             FileSummary, #file_summary {
+               file = NextName, valid_total_size = 0, contiguous_top = 0,
+               left = CurName, right = undefined }),
+    State2 = State1 #msstate { current_file_name = NextName,
+                               current_file_handle = NextHdl,
+                               current_file_num = NextNum,
+                               current_offset = 0,
+                               last_sync_offset = 0
+                              },
+    compact([CurName], State2);
+maybe_roll_to_new_file(_, State) ->
+    State.
+
+compact(Files, State) ->
+    %% smallest number, hence eldest, hence left-most, first
+    SortedFiles = sort_file_names(Files),
+    %% foldl reverses, so now youngest/right-most first
+    RemainingFiles = lists:foldl(fun (File, Acc) ->
+                                         delete_empty_files(File, Acc, State)
+                                 end, [], SortedFiles),
+    lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
+
+%% At this stage, we simply know that the file has had msgs removed
+%% from it. However, we don't know if we need to merge it left (which
+%% is what we would prefer), or merge it right. If we merge left, then
+%% this file is the source, and the left file is the destination. If
+%% we merge right then this file is the destination and the right file
+%% is the source.
+combine_file(File, State = #msstate { file_summary = FileSummary,
+                                      current_file_name = CurName }) ->
+    %% the file we're looking at may no longer exist as it may have
+    %% been deleted within the current GC run
+    case ets:lookup(FileSummary, File) of
+        [] -> State;
+        [FSEntry = #file_summary { left = Left, right = Right }] ->
+            GoRight =
+                fun() ->
+                        case Right of
+                            undefined -> State;
+                            _ when not (CurName == Right) ->
+                                [FSRight] = ets:lookup(FileSummary, Right),
+                                {_, State1} = adjust_meta_and_combine(
+                                                FSEntry, FSRight, State),
+                                State1;
+                            _ -> State
+                        end
+                end,
+            case Left of
+                undefined ->
+                    GoRight();
+                _ -> [FSLeft] = ets:lookup(FileSummary, Left),
+                     case adjust_meta_and_combine(FSLeft, FSEntry, State) of
+                         {true, State1} -> State1;
+                         {false, State} -> GoRight()
+                     end
+            end
+    end.
+
+adjust_meta_and_combine(
+  LeftObj = #file_summary {
+    file = LeftFile, valid_total_size = LeftValidData, right = RightFile },
+  RightObj = #file_summary { 
+    file = RightFile, valid_total_size = RightValidData, left = LeftFile,
+    right = RightRight },
+  State = #msstate { file_size_limit = FileSizeLimit,
+                     file_summary = FileSummary }) ->
+    TotalValidData = LeftValidData + RightValidData,
+    if FileSizeLimit >= TotalValidData ->
+            State1 = combine_files(RightObj, LeftObj, State),
+            %% this could fail if RightRight is undefined
+            ets:update_element(FileSummary, RightRight,
+                               {#file_summary.left, LeftFile}),
+            true = ets:insert(FileSummary, LeftObj #file_summary {
+                                             valid_total_size = TotalValidData,
+                                             contiguous_top = TotalValidData,
+                                             right = RightRight }),
+            true = ets:delete(FileSummary, RightFile),
+            {true, State1};
+       true -> {false, State}
+    end.
+
+sort_msg_locations_by_offset(Dir, List) ->
+    Comp = case Dir of
+               asc  -> fun erlang:'<'/2;
+               desc -> fun erlang:'>'/2
+           end,
+    lists:sort(fun (#msg_location { offset = OffA },
+                    #msg_location { offset = OffB }) ->
+                       Comp(OffA, OffB)
+               end, List).
+
+combine_files(#file_summary { file = Source,
+                              valid_total_size = SourceValid,
+                              left = Destination },
+              #file_summary { file = Destination,
+                              valid_total_size = DestinationValid,
+                              contiguous_top = DestinationContiguousTop,
+                              right = Source },
+              State = #msstate { dir = Dir }) ->
+    State1 = close_file(Source, close_file(Destination, State)),
+    {ok, SourceHdl} = open_file(Dir, Source, ?READ_MODE),
+    {ok, DestinationHdl} = open_file(Dir, Destination,
+                                     ?READ_MODE ++ ?WRITE_MODE),
+    ExpectedSize = SourceValid + DestinationValid,
+    %% if DestinationValid =:= DestinationContiguousTop then we don't
+    %% need a tmp file
+    %% if they're not equal, then we need to write out everything past
+    %%   the DestinationContiguousTop to a tmp file then truncate,
+    %%   copy back in, and then copy over from Source
+    %% otherwise we just truncate straight away and copy over from Source
+    if DestinationContiguousTop =:= DestinationValid ->
+            ok = truncate_and_extend_file(DestinationHdl,
+                                          DestinationValid, ExpectedSize);
+       true ->
+            Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
+            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_MODE ++ ?WRITE_MODE),
+            Worklist =
+                lists:dropwhile(
+                  fun (#msg_location { offset = Offset })
+                      when Offset /= DestinationContiguousTop ->
+                          %% it cannot be that Offset ==
+                          %% DestinationContiguousTop because if it
+                          %% was then DestinationContiguousTop would
+                          %% have been extended by TotalSize
+                          Offset < DestinationContiguousTop
+                          %% Given expected access patterns, I suspect
+                          %% that the list should be naturally sorted
+                          %% as we require, however, we need to
+                          %% enforce it anyway
+                  end, sort_msg_locations_by_offset(
+                         asc, dets_ets_match_object(
+                                 State1, #msg_location {
+                                   file = Destination, _ = '_' }))),
+            ok = copy_messages(
+                   Worklist, DestinationContiguousTop, DestinationValid,
+                   DestinationHdl, TmpHdl, Destination, State1),
+            TmpSize = DestinationValid - DestinationContiguousTop,
+            %% so now Tmp contains everything we need to salvage from
+            %% Destination, and MsgLocationDets has been updated to
+            %% reflect compaction of Destination so truncate
+            %% Destination and copy from Tmp back to the end
+            {ok, 0} = file:position(TmpHdl, 0),
+            ok = truncate_and_extend_file(
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
+            {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
+            %% position in DestinationHdl should now be DestinationValid
+            ok = file:sync(DestinationHdl),
+            ok = file:close(TmpHdl),
+            ok = file:delete(form_filename(Dir, Tmp))
+    end,
+    SourceWorkList =
+        sort_msg_locations_by_offset(
+          asc, dets_ets_match_object(State1, #msg_location {
+                                       file = Source, _ = '_' })),
+    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                       SourceHdl, DestinationHdl, Destination, State1),
+    %% tidy up
+    ok = file:close(SourceHdl),
+    ok = file:close(DestinationHdl),
+    ok = file:delete(form_filename(Dir, Source)),
+    State1.
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+              Destination, State) ->
+    {FinalOffset, BlockStart1, BlockEnd1} =
+        lists:foldl(
+          fun (StoreEntry = #msg_location { offset = Offset,
+                                            total_size = TotalSize },
+               {CurOffset, BlockStart, BlockEnd}) ->
+                  %% CurOffset is in the DestinationFile.
+                  %% Offset, BlockStart and BlockEnd are in the SourceFile
+                  %% update MsgLocationDets to reflect change of file and offset
+                  ok = dets_ets_insert(State, StoreEntry #msg_location {
+                                                file = Destination,
+                                                offset = CurOffset }),
+                  NextOffset = CurOffset + TotalSize,
+                  if BlockStart =:= undefined ->
+                          %% base case, called only for the first list elem
+                          {NextOffset, Offset, Offset + TotalSize};
+                     Offset =:= BlockEnd ->
+                          %% extend the current block because the next
+                          %% msg follows straight on
+                          {NextOffset, BlockStart, BlockEnd + TotalSize};
+                     true ->
+                          %% found a gap, so actually do the work for
+                          %% the previous block
+                          BSize = BlockEnd - BlockStart,
+                          {ok, BlockStart} =
+                              file:position(SourceHdl, BlockStart),
+                          {ok, BSize} =
+                              file:copy(SourceHdl, DestinationHdl, BSize),
+                          {NextOffset, Offset, Offset + TotalSize}
+                  end
+          end, {InitOffset, undefined, undefined}, WorkList),
+    %% do the last remaining block
+    BSize1 = BlockEnd1 - BlockStart1,
+    {ok, BlockStart1} = file:position(SourceHdl, BlockStart1),
+    {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
+    ok = file:sync(DestinationHdl),
+    ok.
+
+close_file(File, State = #msstate { dir = Dir, read_file_handle_cache = HC }) ->
+    HC1 = rabbit_file_handle_cache:close_file(form_filename(Dir, File), HC),
+    State #msstate { read_file_handle_cache = HC1 }.
+
+delete_empty_files(File, Acc,
+                   #msstate { dir = Dir, file_summary = FileSummary }) ->
+    [#file_summary { valid_total_size = ValidData,
+                     left = Left, right = Right }] =
+        ets:lookup(FileSummary, File),
+    case ValidData of
+        %% we should NEVER find the current file in here hence right
+        %% should always be a file, not undefined
+        0 ->
+            case {Left, Right} of
+                {undefined, _} when not is_atom(Right) ->
+                    %% the eldest file is empty.
+                    true = ets:update_element(
+                             FileSummary, Right,
+                             {#file_summary.left, undefined});
+                {_, _} when not (is_atom(Right)) ->
+                    true = ets:update_element(FileSummary, Right,
+                                              {#file_summary.left, Left}),
+                    true =
+                        ets:update_element(FileSummary, Left,
+                                           {#file_summary.right, Right})
+            end,
+            true = ets:delete(FileSummary, File),
+            ok = file:delete(form_filename(Dir, File)),
+            Acc;
+        _ -> [File|Acc]
+    end.
-- 
cgit v1.2.1


From 3d84698fca3083ee53f12c0c248bd321c668ef24 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 05:07:53 +0100
Subject: do not return body size from rabbit_msg_file:read it's not used
 anywhere and was cluttering the api Also, make type sigs more meaningful and
 do not include rabbit.hrl, thus underlining the general nature of this
 module.

---
 src/rabbit_msg_file.erl  | 23 +++++++++++++----------
 src/rabbit_msg_store.erl |  4 ++--
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 0b3b5af8..d5b891b6 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -35,8 +35,6 @@
 
 %%----------------------------------------------------------------------------
 
--include("rabbit.hrl").
-
 -define(INTEGER_SIZE_BYTES,      8).
 -define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(WRITE_OK_SIZE_BITS,      8).
@@ -48,14 +46,19 @@
 
 -ifdef(use_specs).
 
--spec(append/4 :: (io_device(), msg_id(), binary(), boolean()) ->
-             ({'ok', non_neg_integer()} | {'error', any()})).
--spec(read/2 :: (io_device(), non_neg_integer()) ->
-             ({'ok', {msg_id(), binary(), boolean(), non_neg_integer()}} |
-              {'error', any()})).
+-type(io_device() :: any()).
+-type(msg_id() :: any()).
+-type(msg() :: binary()).
+-type(msg_attrs() :: boolean()).
+-type(position() :: non_neg_integer()).
+-type(msg_size() :: non_neg_integer()).
+
+-spec(append/4 :: (io_device(), msg_id(), msg(), msg_attrs()) ->
+             ({'ok', msg_size()} | {'error', any()})).
+-spec(read/2 :: (io_device(), msg_size()) ->
+             ({'ok', {msg_id(), msg(), msg_attrs()}} | {'error', any()})).
 -spec(scan/1 :: (io_device()) ->
-             {'ok', [{msg_id(), boolean(), non_neg_integer(),
-                      non_neg_integer()}]}).
+             {'ok', [{msg_id(), msg_attrs(), msg_size(), position()}]}).
 
 -endif.
 
@@ -93,7 +96,7 @@ read(FileHdl, TotalSize) ->
                              ?WRITE_OK_TRANSIENT  -> false;
                              ?WRITE_OK_PERSISTENT -> true
                          end,
-            {ok, {binary_to_term(MsgId), MsgBody, Persistent, BodySize}};
+            {ok, {binary_to_term(MsgId), MsgBody, Persistent}};
         KO -> KO
     end.
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e4ccc1df..e45c9a63 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -369,13 +369,13 @@ read(MsgId, State) ->
                          total_size = TotalSize }] ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
-                    {{ok, {MsgId, MsgBody, _IsPersistent, _BodySize}}, State1} =
+                    {{ok, {MsgId, MsgBody, _IsPersistent}}, State1} =
                         with_read_handle_at(
                           File, Offset,
                           fun(Hdl) ->
                                   Res = case rabbit_msg_file:read(
                                                Hdl, TotalSize) of
-                                            {ok, {MsgId, _, _, _}} = Obj -> Obj;
+                                            {ok, {MsgId, _, _}} = Obj -> Obj;
                                             {ok, Rest} ->
                                                 throw({error,
                                                        {misread, 
-- 
cgit v1.2.1


From 3fab3c99e35761a5b88dad7f7b35d03ffe2e1919 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 06:00:54 +0100
Subject: perform term/binary conversion of msg body in rabbit_msg_file thus
 further generalising rabbit_msg_file

---
 src/rabbit_msg_file.erl  | 20 +++++++++++---------
 src/rabbit_msg_store.erl | 22 ++++++++++------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index d5b891b6..254d987d 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -48,7 +48,7 @@
 
 -type(io_device() :: any()).
 -type(msg_id() :: any()).
--type(msg() :: binary()).
+-type(msg() :: any()).
 -type(msg_attrs() :: boolean()).
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
@@ -64,11 +64,12 @@
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
-    BodySize = size(MsgBody),
-    MsgIdBin = term_to_binary(MsgId),
+append(FileHdl, MsgId, MsgBody, IsPersistent) ->
+    MsgBodyBin   = term_to_binary(MsgBody),
+    BodyBinSize  = size(MsgBodyBin),
+    MsgIdBin     = term_to_binary(MsgId),
     MsgIdBinSize = size(MsgIdBin),
-    Size = BodySize + MsgIdBinSize,
+    Size = BodyBinSize + MsgIdBinSize,
     StopByte = case IsPersistent of
                    true -> ?WRITE_OK_PERSISTENT;
                    false -> ?WRITE_OK_TRANSIENT
@@ -76,7 +77,7 @@ append(FileHdl, MsgId, MsgBody, IsPersistent) when is_binary(MsgBody) ->
     case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
                                MsgIdBinSize:?INTEGER_SIZE_BITS,
                                MsgIdBin:MsgIdBinSize/binary,
-                               MsgBody:BodySize/binary,
+                               MsgBodyBin:BodyBinSize/binary,
                                StopByte:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
         KO -> KO
@@ -89,14 +90,15 @@ read(FileHdl, TotalSize) ->
         {ok, <<Size:?INTEGER_SIZE_BITS,
                MsgIdBinSize:?INTEGER_SIZE_BITS,
                Rest:SizeWriteOkBytes/binary>>} ->
-            BodySize = Size - MsgIdBinSize,
-            <<MsgId:MsgIdBinSize/binary, MsgBody:BodySize/binary,
+            BodyBinSize = Size - MsgIdBinSize,
+            <<MsgIdBin:MsgIdBinSize/binary, MsgBodyBin:BodyBinSize/binary,
              StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
             Persistent = case StopByte of
                              ?WRITE_OK_TRANSIENT  -> false;
                              ?WRITE_OK_PERSISTENT -> true
                          end,
-            {ok, {binary_to_term(MsgId), MsgBody, Persistent}};
+            {ok, {binary_to_term(MsgIdBin), binary_to_term(MsgBodyBin),
+                  Persistent}};
         KO -> KO
     end.
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e45c9a63..427a6695 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -326,8 +326,7 @@ write(MsgId, Msg, IsPersistent,
         [] ->
             %% New message, lots to do
             {ok, TotalSize} = rabbit_msg_file:append(
-                                CurHdl, MsgId, term_to_binary(Msg),
-                                IsPersistent),
+                                CurHdl, MsgId, Msg, IsPersistent),
             true = dets_ets_insert_new(
                      State, #msg_location {
                        msg_id = MsgId, ref_count = 1, file = CurName,
@@ -369,7 +368,7 @@ read(MsgId, State) ->
                          total_size = TotalSize }] ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
-                    {{ok, {MsgId, MsgBody, _IsPersistent}}, State1} =
+                    {{ok, {MsgId, Msg, _IsPersistent}}, State1} =
                         with_read_handle_at(
                           File, Offset,
                           fun(Hdl) ->
@@ -386,18 +385,17 @@ read(MsgId, State) ->
                                         end,
                                   {Offset + TotalSize, Res}
                           end, State),
-                    Message = binary_to_term(MsgBody),
                     ok = if RefCount > 1 ->
-                                 insert_into_cache(MsgId, Message, State1);
+                                 insert_into_cache(MsgId, Msg, State1);
                             true -> ok
                                     %% it's not in the cache and we
                                     %% only have one reference to the
                                     %% message. So don't bother
                                     %% putting it in the cache.
                          end,
-                    {Message, State1};
-                {Message, _RefCount} ->
-                    {Message, State}
+                    {Msg, State1};
+                {Msg, _RefCount} ->
+                    {Msg, State}
             end
     end.
 
@@ -602,9 +600,9 @@ fetch_and_increment_cache(MsgId, #msstate { message_cache = Cache }) ->
     case ets:lookup(Cache, MsgId) of
         [] ->
             not_found;
-        [{MsgId, Message, _RefCount}] ->
+        [{MsgId, Msg, _RefCount}] ->
             NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
-            {Message, NewRefCount}
+            {Msg, NewRefCount}
     end.
 
 decrement_cache(MsgId, #msstate { message_cache = Cache }) ->
@@ -620,10 +618,10 @@ decrement_cache(MsgId, #msstate { message_cache = Cache }) ->
            end,
     ok.
 
-insert_into_cache(MsgId, Message, #msstate { message_cache = Cache }) ->
+insert_into_cache(MsgId, Msg, #msstate { message_cache = Cache }) ->
     case cache_is_full(Cache) of
         true -> ok;
-        false -> true = ets:insert_new(Cache, {MsgId, Message, 1}),
+        false -> true = ets:insert_new(Cache, {MsgId, Msg, 1}),
                  ok
     end.
 
-- 
cgit v1.2.1


From 3a12c75a825173952694509dbe36415204e0fb0f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 06:22:08 +0100
Subject: resolve msg_id type confusion rabbit_mixed_queue and
 rabbit_disk_queue see guids, not non_neg_integers.

---
 include/rabbit.hrl         |  3 ---
 src/rabbit_amqqueue.erl    |  2 ++
 src/rabbit_channel.erl     |  3 +++
 src/rabbit_disk_queue.erl  |  1 +
 src/rabbit_mixed_queue.erl | 14 ++++++++------
 5 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 25a36732..c17ac7eb 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -157,9 +157,6 @@
                 txn       :: maybe(txn()),
                 sender    :: pid(),
                 message   :: message()}).
-%% this really should be an abstract type
--type(msg_id() :: non_neg_integer()).
--type(msg() :: {queue_name(), pid(), msg_id(), bool(), message()}).
 -type(listener() ::
       #listener{node     :: erlang_node(),
                 protocol :: atom(),
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index ad0a0f0c..41286cf7 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -56,6 +56,8 @@
 
 -ifdef(use_specs).
 
+-type(msg_id() :: non_neg_integer()).
+-type(msg() :: {queue_name(), pid(), msg_id(), bool(), message()}).
 -type(qstats() :: {'ok', queue_name(), non_neg_integer(), non_neg_integer()}).
 -type(qlen() :: {'ok', non_neg_integer()}).
 -type(qfun(A) :: fun ((amqqueue()) -> A)).
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 397659c1..c178826b 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -54,6 +54,9 @@
 
 -ifdef(use_specs).
 
+-type(msg_id() :: non_neg_integer()).
+-type(msg() :: {queue_name(), pid(), msg_id(), bool(), message()}).
+
 -spec(start_link/5 ::
       (channel_number(), pid(), pid(), username(), vhost()) -> pid()).
 -spec(do/2 :: (pid(), amqp_method()) -> 'ok').
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ad5d8fb1..6beccf3a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -89,6 +89,7 @@
 
 -ifdef(use_specs).
 
+-type(msg_id() :: guid()).
 -type(seq_id() :: non_neg_integer()).
 -type(ack_tag() :: {msg_id(), seq_id()}).
 
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 2bb9c09a..da94d893 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -67,21 +67,23 @@
                               memory_size :: (non_neg_integer() | 'undefined'),
                               prefetcher :: (pid() | 'undefined')
                             }).
--type(acktag() :: ( 'no_on_disk' | { non_neg_integer(), non_neg_integer() })).
+-type(msg_id() :: guid()).
+-type(seq_id() :: non_neg_integer()).
+-type(ack_tag() :: ( 'no_on_disk' | {msg_id(), seq_id()} )).
 -type(okmqs() :: {'ok', mqstate()}).
 
 -spec(init/2 :: (queue_name(), boolean()) -> okmqs()).
 -spec(publish/2 :: (message(), mqstate()) -> okmqs()).
 -spec(publish_delivered/2 :: (message(), mqstate()) ->
-             {'ok', acktag(), mqstate()}).
+             {'ok', ack_tag(), mqstate()}).
 -spec(fetch/1 :: (mqstate()) ->
-             {('empty' | {message(), boolean(), acktag(), non_neg_integer()}),
+             {('empty' | {message(), boolean(), ack_tag(), non_neg_integer()}),
               mqstate()}).
--spec(ack/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
+-spec(ack/2 :: ([{message(), ack_tag()}], mqstate()) -> okmqs()).
 -spec(tx_publish/2 :: (message(), mqstate()) -> okmqs()).
--spec(tx_commit/3 :: ([message()], [acktag()], mqstate()) -> okmqs()).
+-spec(tx_commit/3 :: ([message()], [ack_tag()], mqstate()) -> okmqs()).
 -spec(tx_rollback/2 :: ([message()], mqstate()) -> okmqs()).
--spec(requeue/2 :: ([{message(), acktag()}], mqstate()) -> okmqs()).
+-spec(requeue/2 :: ([{message(), ack_tag()}], mqstate()) -> okmqs()).
 -spec(purge/1 :: (mqstate()) -> okmqs()).
 -spec(delete_queue/1 :: (mqstate()) -> {'ok', mqstate()}).
 -spec(len/1 :: (mqstate()) -> non_neg_integer()).
-- 
cgit v1.2.1


From 8288df7a3641ad9447501a447f019ac61ed1deff Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 07:22:14 +0100
Subject: generalise persistent flag to message attributes in rabbit_msg_file

---
 src/rabbit_msg_file.erl | 93 +++++++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 50 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 254d987d..f14656cf 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -38,9 +38,8 @@
 -define(INTEGER_SIZE_BYTES,      8).
 -define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(WRITE_OK_SIZE_BITS,      8).
--define(WRITE_OK_TRANSIENT,      255).
--define(WRITE_OK_PERSISTENT,     254).
--define(FILE_PACKING_ADJUSTMENT, (1 + (2* (?INTEGER_SIZE_BYTES)))).
+-define(WRITE_OK_MARKER,         255).
+-define(FILE_PACKING_ADJUSTMENT, (1 + (3 * (?INTEGER_SIZE_BYTES)))).
 
 %%----------------------------------------------------------------------------
 
@@ -49,7 +48,7 @@
 -type(io_device() :: any()).
 -type(msg_id() :: any()).
 -type(msg() :: any()).
--type(msg_attrs() :: boolean()).
+-type(msg_attrs() :: any()).
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
 
@@ -64,21 +63,19 @@
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, MsgId, MsgBody, IsPersistent) ->
-    MsgBodyBin   = term_to_binary(MsgBody),
-    BodyBinSize  = size(MsgBodyBin),
-    MsgIdBin     = term_to_binary(MsgId),
-    MsgIdBinSize = size(MsgIdBin),
-    Size = BodyBinSize + MsgIdBinSize,
-    StopByte = case IsPersistent of
-                   true -> ?WRITE_OK_PERSISTENT;
-                   false -> ?WRITE_OK_TRANSIENT
-               end,
+append(FileHdl, MsgId, MsgBody, MsgAttrs) ->
+    [MsgIdBin, MsgBodyBin, MsgAttrsBin] = Bins =
+        [term_to_binary(X) || X <- [MsgId, MsgBody, MsgAttrs]],
+    [MsgIdBinSize, MsgBodyBinSize, MsgAttrsBinSize] = Sizes =
+        [size(B) || B <- Bins],
+    Size = lists:sum(Sizes),
     case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
                                MsgIdBinSize:?INTEGER_SIZE_BITS,
+                               MsgAttrsBinSize:?INTEGER_SIZE_BITS,
                                MsgIdBin:MsgIdBinSize/binary,
-                               MsgBodyBin:BodyBinSize/binary,
-                               StopByte:?WRITE_OK_SIZE_BITS>>) of
+                               MsgAttrsBin:MsgAttrsBinSize/binary,
+                               MsgBodyBin:MsgBodyBinSize/binary,
+                               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
         KO -> KO
     end.
@@ -89,16 +86,16 @@ read(FileHdl, TotalSize) ->
     case file:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
                MsgIdBinSize:?INTEGER_SIZE_BITS,
+               MsgAttrsBinSize:?INTEGER_SIZE_BITS,
                Rest:SizeWriteOkBytes/binary>>} ->
-            BodyBinSize = Size - MsgIdBinSize,
-            <<MsgIdBin:MsgIdBinSize/binary, MsgBodyBin:BodyBinSize/binary,
-             StopByte:?WRITE_OK_SIZE_BITS>> = Rest,
-            Persistent = case StopByte of
-                             ?WRITE_OK_TRANSIENT  -> false;
-                             ?WRITE_OK_PERSISTENT -> true
-                         end,
-            {ok, {binary_to_term(MsgIdBin), binary_to_term(MsgBodyBin),
-                  Persistent}};
+            BodyBinSize = Size - MsgIdBinSize - MsgAttrsBinSize,
+            <<MsgIdBin:MsgIdBinSize/binary,
+              MsgAttrsBin:MsgAttrsBinSize/binary,
+              MsgBodyBin:BodyBinSize/binary,
+              ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>> = Rest,
+            [MsgId, MsgBody, MsgAttrs] =
+                [binary_to_term(B) || B <- [MsgIdBin, MsgBodyBin, MsgAttrsBin]],
+            {ok, {MsgId, MsgBody, MsgAttrs}};
         KO -> KO
     end.
 
@@ -109,22 +106,23 @@ scan(FileHdl, Offset, Acc) ->
         eof -> {ok, Acc};
         {corrupted, NextOffset} ->
             scan(FileHdl, NextOffset, Acc);
-        {ok, {MsgId, IsPersistent, TotalSize, NextOffset}} ->
+        {ok, {MsgId, MsgAttrs, TotalSize, NextOffset}} ->
             scan(FileHdl, NextOffset,
-                 [{MsgId, IsPersistent, TotalSize, Offset} | Acc]);
+                 [{MsgId, MsgAttrs, TotalSize, Offset} | Acc]);
         _KO ->
             %% bad message, but we may still have recovered some valid messages
             {ok, Acc}
     end.
 
 read_next(FileHdl, Offset) ->
-    TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
-    case file:read(FileHdl, TwoIntegers) of
+    ThreeIntegers = 3 * ?INTEGER_SIZE_BYTES,
+    case file:read(FileHdl, ThreeIntegers) of
         {ok,
-         <<Size:?INTEGER_SIZE_BITS, MsgIdBinSize:?INTEGER_SIZE_BITS>>} ->
-            case {Size, MsgIdBinSize} of
-                {0, _} -> eof; %% Nothing we can do other than stop
-                {_, 0} ->
+         <<Size:?INTEGER_SIZE_BITS,
+           MsgIdBinSize:?INTEGER_SIZE_BITS,
+           MsgAttrsBinSize:?INTEGER_SIZE_BITS>>} ->
+            if Size == 0 -> eof; %% Nothing we can do other than stop
+               MsgIdBinSize == 0 orelse MsgAttrsBinSize == 0 ->
                     %% current message corrupted, try skipping past it
                     ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
                     case file:position(FileHdl, {cur, Size + 1}) of
@@ -132,21 +130,24 @@ read_next(FileHdl, Offset) ->
                         {ok, _SomeOtherPos}  -> eof; %% seek failed, so give up
                         KO                   -> KO
                     end;
-                {_, _} -> %% all good, let's continue
-                    case file:read(FileHdl, MsgIdBinSize) of
-                        {ok, <<MsgIdBin:MsgIdBinSize/binary>>} ->
+               true -> %% all good, let's continue
+                    HeaderSize = MsgIdBinSize + MsgAttrsBinSize,
+                    case file:read(FileHdl, HeaderSize) of
+                        {ok, <<MsgIdBin:MsgIdBinSize/binary,
+                               MsgAttrsBin:MsgAttrsBinSize/binary>>} ->
                             TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
                             ExpectedAbsPos = Offset + TotalSize - 1,
                             case file:position(
-                                   FileHdl, {cur, Size - MsgIdBinSize}) of
+                                   FileHdl, {cur, Size - HeaderSize}) of
                                 {ok, ExpectedAbsPos} ->
                                     NextOffset = ExpectedAbsPos + 1,
-                                    case read_stop_byte(FileHdl) of
-                                        {ok, Persistent} ->
-                                            MsgId = binary_to_term(MsgIdBin),
-                                            {ok, {MsgId, Persistent,
+                                    case file:read(FileHdl, 1) of
+                                        {ok, <<?WRITE_OK_MARKER:
+                                               ?WRITE_OK_SIZE_BITS>>} ->
+                                            {ok, {binary_to_term(MsgIdBin),
+                                                  binary_to_term(MsgAttrsBin),
                                                   TotalSize, NextOffset}};
-                                        corrupted ->
+                                        {ok, _SomeOtherData} ->
                                             {corrupted, NextOffset};
                                         KO -> KO
                                     end;
@@ -160,11 +161,3 @@ read_next(FileHdl, Offset) ->
             end;
         Other -> Other
     end.
-
-read_stop_byte(FileHdl) ->
-    case file:read(FileHdl, 1) of
-        {ok, <<?WRITE_OK_TRANSIENT:?WRITE_OK_SIZE_BITS>>}  -> {ok, false};
-        {ok, <<?WRITE_OK_PERSISTENT:?WRITE_OK_SIZE_BITS>>} -> {ok, true};
-        {ok, _SomeOtherData}                               -> corrupted;
-        KO                                                 -> KO
-    end.
-- 
cgit v1.2.1


From dccced0385d13efec926d730b2fff1bac0069a9e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 07:43:59 +0100
Subject: generalise persistent flag to message attributes in rabbit_msg_store
 This is just a renaming exercise, but it turns rabbit_msg_store into a
 general purpose message store.

---
 src/rabbit_disk_queue.erl |  4 ++--
 src/rabbit_msg_store.erl  | 36 +++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 6beccf3a..b786f036 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -544,7 +544,7 @@ internal_fetch_attributes(Q, MarkDelivered, Advance,
     case next(Q, MarkDelivered, Advance, State) of
         empty -> empty;
         {MsgId, IsDelivered, AckTag, Remaining} ->
-            IsPersistent = rabbit_msg_store:is_persistent(MsgId, Store),
+            IsPersistent = rabbit_msg_store:attrs(MsgId, Store),
             {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}
     end.
 
@@ -902,7 +902,7 @@ prune_mnesia(Store, Key, DeleteAcc, RemoveAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
     {DeleteAcc1, RemoveAcc1, Len1} =
-        case rabbit_msg_store:is_persistent(MsgId, Store) of
+        case rabbit_msg_store:attrs(MsgId, Store) of
             not_found ->
                 %% msg hasn't been found on disk, delete it
                 {[{Q, SeqId} | DeleteAcc], RemoveAcc, Len + 1};
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 427a6695..a1e9e17a 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_store).
 
--export([init/6, write/4, read/2, is_persistent/2, remove/2, release/2,
+-export([init/6, write/4, read/2, attrs/2, remove/2, release/2,
          needs_sync/2, sync/1, cleanup/1, cache_info/1, memory/1,
          ets_bpr/1, to_disk_only_mode/1, to_ram_disk_mode/1]).
 
@@ -57,7 +57,7 @@
          }).
 
 -record(msg_location,
-        {msg_id, ref_count, file, offset, total_size, is_persistent}).
+        {msg_id, ref_count, file, offset, total_size, attrs}).
 
 -record(file_summary,
         {file, valid_total_size, contiguous_top, left, right}).
@@ -84,6 +84,7 @@
 -type(ets_table() :: any()).
 -type(msg_id() :: any()).
 -type(msg() :: any()).
+-type(msg_attrs() :: any()).
 -type(file_path() :: any()).
 -type(io_device() :: any()).
 
@@ -109,9 +110,9 @@
                  non_neg_integer(), non_neg_integer(),
                  fun ((msg_id()) -> non_neg_integer()), non_neg_integer()) ->
              msstate()).
--spec(write/4 :: (msg_id(), msg(), boolean(), msstate()) -> msstate()).
+-spec(write/4 :: (msg_id(), msg(), msg_attrs(), msstate()) -> msstate()).
 -spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
--spec(is_persistent/2 :: (msg_id(), msstate()) -> boolean() | 'not_found').
+-spec(attrs/2 :: (msg_id(), msstate()) -> msg_attrs() | 'not_found').
 -spec(remove/2 :: ([msg_id()], msstate()) -> msstate()).
 -spec(release/2 :: ([msg_id()], msstate()) -> msstate()).
 -spec(needs_sync/2 :: ([msg_id()], msstate()) -> boolean()).
@@ -130,7 +131,7 @@
 %% The components:
 %%
 %% MsgLocation: this is a (d)ets table which contains:
-%%              {MsgId, RefCount, File, Offset, TotalSize, IsPersistent}
+%%              {MsgId, RefCount, File, Offset, TotalSize, Attrs}
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
 %%
@@ -317,7 +318,7 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
 
     State1 #msstate { current_file_handle = FileHdl }.
 
-write(MsgId, Msg, IsPersistent,
+write(MsgId, Msg, Attrs,
       State = #msstate { current_file_handle = CurHdl,
                          current_file_name   = CurName,
                          current_offset      = CurOffset,
@@ -325,13 +326,12 @@ write(MsgId, Msg, IsPersistent,
     case dets_ets_lookup(State, MsgId) of
         [] ->
             %% New message, lots to do
-            {ok, TotalSize} = rabbit_msg_file:append(
-                                CurHdl, MsgId, Msg, IsPersistent),
+            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg, Attrs),
             true = dets_ets_insert_new(
                      State, #msg_location {
                        msg_id = MsgId, ref_count = 1, file = CurName,
                        offset = CurOffset, total_size = TotalSize,
-                       is_persistent = IsPersistent }),
+                       attrs = Attrs }),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
                                        right = undefined }] =
@@ -368,7 +368,7 @@ read(MsgId, State) ->
                          total_size = TotalSize }] ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
-                    {{ok, {MsgId, Msg, _IsPersistent}}, State1} =
+                    {{ok, {MsgId, Msg, _Attrs}}, State1} =
                         with_read_handle_at(
                           File, Offset,
                           fun(Hdl) ->
@@ -399,13 +399,11 @@ read(MsgId, State) ->
             end
     end.
 
-is_persistent(MsgId, State) ->
+attrs(MsgId, State) ->
     Objs = dets_ets_lookup(State, MsgId),
     case Objs of
-        [] ->
-            not_found;
-        [#msg_location { msg_id = MsgId, is_persistent = IsPersistent }] ->
-            IsPersistent
+        [] -> not_found;
+        [#msg_location { msg_id = MsgId, attrs = Attrs }] -> Attrs
     end.
 
 remove(MsgIds, State = #msstate { current_file_name = CurName }) ->
@@ -790,7 +788,7 @@ verify_messages_referenced(RefCountFun, MsgIds) ->
 scan_file_for_valid_messages_msg_ids(Dir, File) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, File),
     {ok, Messages,
-     [MsgId || {MsgId, _IsPersistent, _TotalSize, _FileOffset} <- Messages]}.
+     [MsgId || {MsgId, _Attrs, _TotalSize, _FileOffset} <- Messages]}.
 
 scan_file_for_valid_messages(Dir, File) ->
     case open_file(Dir, File, ?READ_MODE) of
@@ -813,7 +811,7 @@ find_contiguous_block_prefix(List) ->
 
 find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds};
-find_contiguous_block_prefix([{MsgId, _IsPersistent, TotalSize, ExpectedOffset}
+find_contiguous_block_prefix([{MsgId, _Attrs, TotalSize, ExpectedOffset}
                              | Tail], ExpectedOffset, MsgIds) ->
     ExpectedOffset1 = ExpectedOffset + TotalSize,
     find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
@@ -845,7 +843,7 @@ load_messages(RefCountFun, Left, [File|Files],
               State = #msstate { dir = Dir, file_summary = FileSummary }) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, File),
     {ValidMessages, ValidTotalSize} = lists:foldl(
-        fun (Obj = {MsgId, IsPersistent, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+        fun (Obj = {MsgId, Attrs, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                 case RefCountFun(MsgId) of
                     0 -> {VMAcc, VTSAcc};
                     RefCount ->
@@ -854,7 +852,7 @@ load_messages(RefCountFun, Left, [File|Files],
                                    msg_id = MsgId, ref_count = RefCount,
                                    file = File, offset = Offset,
                                    total_size = TotalSize,
-                                   is_persistent = IsPersistent }),
+                                   attrs = Attrs }),
                         {[Obj | VMAcc], VTSAcc + TotalSize}
                 end
         end, {[], 0}, Messages),
-- 
cgit v1.2.1


From 581a435bdee03b7da8a3bad9c202933feef317e0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 08:05:46 +0100
Subject: refactoring of delete_empty_files giving it a name that describes
 what it does, and extracting recursion vestiges into caller.

---
 src/rabbit_msg_store.erl | 49 +++++++++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index a1e9e17a..e2e13ec8 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -907,9 +907,13 @@ compact(Files, State) ->
     %% smallest number, hence eldest, hence left-most, first
     SortedFiles = sort_file_names(Files),
     %% foldl reverses, so now youngest/right-most first
-    RemainingFiles = lists:foldl(fun (File, Acc) ->
-                                         delete_empty_files(File, Acc, State)
-                                 end, [], SortedFiles),
+    RemainingFiles =
+        lists:foldl(fun (File, Acc) ->
+                            case delete_file_if_empty(File, State) of
+                                true  -> Acc;
+                                false -> [File | Acc]
+                            end
+                    end, [], SortedFiles),
     lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
 
 %% At this stage, we simply know that the file has had msgs removed
@@ -1095,30 +1099,29 @@ close_file(File, State = #msstate { dir = Dir, read_file_handle_cache = HC }) ->
     HC1 = rabbit_file_handle_cache:close_file(form_filename(Dir, File), HC),
     State #msstate { read_file_handle_cache = HC1 }.
 
-delete_empty_files(File, Acc,
-                   #msstate { dir = Dir, file_summary = FileSummary }) ->
+delete_file_if_empty(File,
+                     #msstate { dir = Dir, file_summary = FileSummary }) ->
     [#file_summary { valid_total_size = ValidData,
                      left = Left, right = Right }] =
         ets:lookup(FileSummary, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
-        0 ->
-            case {Left, Right} of
-                {undefined, _} when not is_atom(Right) ->
-                    %% the eldest file is empty.
-                    true = ets:update_element(
-                             FileSummary, Right,
-                             {#file_summary.left, undefined});
-                {_, _} when not (is_atom(Right)) ->
-                    true = ets:update_element(FileSummary, Right,
-                                              {#file_summary.left, Left}),
-                    true =
-                        ets:update_element(FileSummary, Left,
-                                           {#file_summary.right, Right})
-            end,
-            true = ets:delete(FileSummary, File),
-            ok = file:delete(form_filename(Dir, File)),
-            Acc;
-        _ -> [File|Acc]
+        0 -> case {Left, Right} of
+                 {undefined, _} when not is_atom(Right) ->
+                     %% the eldest file is empty.
+                     true = ets:update_element(
+                              FileSummary, Right,
+                              {#file_summary.left, undefined});
+                 {_, _} when not (is_atom(Right)) ->
+                     true = ets:update_element(FileSummary, Right,
+                                               {#file_summary.left, Left}),
+                     true =
+                         ets:update_element(FileSummary, Left,
+                                            {#file_summary.right, Right})
+             end,
+             true = ets:delete(FileSummary, File),
+             ok = file:delete(form_filename(Dir, File)),
+             true;
+        _ -> false
     end.
-- 
cgit v1.2.1


From a4a5849c228cf62f2be4d6a1823ba1ca4933993f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 09:44:39 +0100
Subject: shut up dialyzer

---
 src/rabbit_mixed_queue.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index da94d893..b2a03b58 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -53,6 +53,7 @@
        ).
 
 -define(TO_DISK_MAX_FLUSH_SIZE, 100000).
+-define(MAGIC_MARKER, <<"$magic_marker">>).
 
 %%----------------------------------------------------------------------------
 
@@ -654,7 +655,8 @@ on_disk(mixed, _IsDurable, _IsPersistent) -> false.
 
 publish_magic_marker_message(Q) ->
     Msg = rabbit_basic:message(
-            none, internal, [], <<>>, rabbit_guid:guid(), true),
+            rabbit_misc:r(<<"/">>, exchange, <<>>), ?MAGIC_MARKER,
+            [], <<>>, rabbit_guid:guid(), true),
     ok = rabbit_disk_queue:publish(Q, ensure_binary_properties(Msg), false).
 
 fetch_ack_magic_marker_message(Q) ->
@@ -664,8 +666,7 @@ fetch_ack_magic_marker_message(Q) ->
     {ok, Length}.
 
 is_magic_marker_message(
-  #basic_message { exchange_name = none, routing_key = internal,
-                   is_persistent = true }) ->
+  #basic_message { routing_key = ?MAGIC_MARKER, is_persistent = true }) ->
     true;
 is_magic_marker_message(_) ->
     false.
-- 
cgit v1.2.1


From 249131e48b1e08c5c27bbba3e765acf3862cdc3c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 16:48:55 +0100
Subject: rename prune_mnesia to prune since we prune more than just mnesia

---
 src/rabbit_disk_queue.erl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b786f036..b863f608 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -249,7 +249,7 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     Store = rabbit_msg_store:init(Mode, base_directory(),
                                   FileSizeLimit, ReadFileHandlesLimit,
                                   fun ref_count/1, EtsBPR),
-    Store1 = prune_mnesia(Store),
+    Store1 = prune(Store),
     ok = del_index(),
 
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
@@ -885,20 +885,20 @@ del_index() ->
         E1 -> E1
     end.
 
-prune_mnesia_flush_batch(DeleteAcc, RemoveAcc, Store) ->
+prune_flush_batch(DeleteAcc, RemoveAcc, Store) ->
     lists:foldl(fun (Key, ok) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
                 end, ok, DeleteAcc),
     rabbit_msg_store:remove(RemoveAcc, Store).
 
-prune_mnesia(Store) ->
-    prune_mnesia(Store, mnesia:dirty_first(rabbit_disk_queue), [], [], 0).
+prune(Store) ->
+    prune(Store, mnesia:dirty_first(rabbit_disk_queue), [], [], 0).
 
-prune_mnesia(Store, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
+prune(Store, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
     Store;
-prune_mnesia(Store, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
-    prune_mnesia_flush_batch(DeleteAcc, RemoveAcc, Store);
-prune_mnesia(Store, Key, DeleteAcc, RemoveAcc, Len) ->
+prune(Store, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
+    prune_flush_batch(DeleteAcc, RemoveAcc, Store);
+prune(Store, Key, DeleteAcc, RemoveAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
     {DeleteAcc1, RemoveAcc1, Len1} =
@@ -921,7 +921,7 @@ prune_mnesia(Store, Key, DeleteAcc, RemoveAcc, Len) ->
                 %% so have no choice but to start again. Although this
                 %% will make recovery slower for large queues, we
                 %% guarantee we can start up in constant memory
-                Store2 = prune_mnesia_flush_batch(DeleteAcc1, RemoveAcc1,
+                Store2 = prune_flush_batch(DeleteAcc1, RemoveAcc1,
                                                   Store),
                 Key2 = mnesia:dirty_first(rabbit_disk_queue),
                 {Store2, Key2, [], [], 0};
@@ -929,7 +929,7 @@ prune_mnesia(Store, Key, DeleteAcc, RemoveAcc, Len) ->
                 Key2 = mnesia:dirty_next(rabbit_disk_queue, Key),
                 {Store, Key2, DeleteAcc1, RemoveAcc1, Len1}
         end,
-    prune_mnesia(Store1, Key1, DeleteAcc2, RemoveAcc2, Len2).
+    prune(Store1, Key1, DeleteAcc2, RemoveAcc2, Len2).
 
 extract_sequence_numbers(Sequences) ->
     true =
-- 
cgit v1.2.1


From 01e69ce9002ecf1b2f27738a2cf1a72ecb834c72 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 7 Sep 2009 17:41:13 +0100
Subject: refactoring: move dets/ets match into sort_msg_locations_by_offset

---
 src/rabbit_msg_store.erl | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e2e13ec8..357c4867 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -827,16 +827,12 @@ load_messages(RefCountFun, Files, State) ->
 load_messages(_RefCountFun, Left, [], State) ->
     Num = list_to_integer(filename:rootname(Left)),
     Offset =
-        case dets_ets_match_object(State, #msg_location {
-                                     file = Left, _ = '_' }) of
+        case sort_msg_locations_by_offset(desc, Left, State) of
             [] -> 0;
-            L ->
-                [ #msg_location { file = Left,
-                                  offset = MaxOffset,
-                                  total_size = TotalSize} | _ ] =
-                    sort_msg_locations_by_offset(desc, L),
+            [#msg_location { offset = MaxOffset,
+                             total_size = TotalSize } | _] ->
                 MaxOffset + TotalSize
-             end,
+        end,
     State #msstate { current_file_num = Num, current_file_name = Left,
                      current_offset = Offset };
 load_messages(RefCountFun, Left, [File|Files],
@@ -975,7 +971,7 @@ adjust_meta_and_combine(
        true -> {false, State}
     end.
 
-sort_msg_locations_by_offset(Dir, List) ->
+sort_msg_locations_by_offset(Dir, File, State) ->
     Comp = case Dir of
                asc  -> fun erlang:'<'/2;
                desc -> fun erlang:'>'/2
@@ -983,7 +979,8 @@ sort_msg_locations_by_offset(Dir, List) ->
     lists:sort(fun (#msg_location { offset = OffA },
                     #msg_location { offset = OffB }) ->
                        Comp(OffA, OffB)
-               end, List).
+               end, dets_ets_match_object(
+                      State, #msg_location { file = File, _ = '_' })).
 
 combine_files(#file_summary { file = Source,
                               valid_total_size = SourceValid,
@@ -1023,10 +1020,7 @@ combine_files(#file_summary { file = Source,
                           %% that the list should be naturally sorted
                           %% as we require, however, we need to
                           %% enforce it anyway
-                  end, sort_msg_locations_by_offset(
-                         asc, dets_ets_match_object(
-                                 State1, #msg_location {
-                                   file = Destination, _ = '_' }))),
+                  end, sort_msg_locations_by_offset(asc, Destination, State1)),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State1),
@@ -1044,10 +1038,7 @@ combine_files(#file_summary { file = Source,
             ok = file:close(TmpHdl),
             ok = file:delete(form_filename(Dir, Tmp))
     end,
-    SourceWorkList =
-        sort_msg_locations_by_offset(
-          asc, dets_ets_match_object(State1, #msg_location {
-                                       file = Source, _ = '_' })),
+    SourceWorkList = sort_msg_locations_by_offset(asc, Source, State1),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
-- 
cgit v1.2.1


From 4858a7dbbaa8a79fb38be73fa31f332f1fde6e77 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 8 Sep 2009 11:18:26 +0100
Subject: change guid to a binary, using the md5 of term_to_binary The main
 motivation is to reduce the memory and on-disk footprint of the guid from ~34
 bytes to 16. But it turns out that this actually results in a speed
 improvement of a few percent as well, even for non-persistent messaging,
 presumably due to the memory management effects and the fact that 16 byte
 binaries are easier to copy between processes than the deep(ish) original
 guid structure.

---
 include/rabbit.hrl       |  2 +-
 src/rabbit_guid.erl      |  4 ++--
 src/rabbit_msg_file.erl  | 37 ++++++++++++++++++-------------------
 src/rabbit_msg_store.erl |  2 +-
 src/rabbit_tests.erl     | 38 ++++++++++++++++++++++----------------
 5 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index c17ac7eb..095044e7 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -89,7 +89,7 @@
 -type(file_open_mode() :: any()).
 
 %% this is really an abstract type, but dialyzer does not support them
--type(guid() :: any()).
+-type(guid() :: binary()).
 -type(txn() :: guid()).
 -type(pkey() :: guid()).
 -type(r(Kind) ::
diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl
index 45816b85..5053d188 100644
--- a/src/rabbit_guid.erl
+++ b/src/rabbit_guid.erl
@@ -99,7 +99,7 @@ guid() ->
             {S, I}   -> {S, I+1}
         end,
     put(guid, G),
-    G.
+    erlang:md5(term_to_binary(G)).
 
 %% generate a readable string representation of a guid. Note that any
 %% monotonicity of the guid is not preserved in the encoding.
@@ -110,7 +110,7 @@ string_guid(Prefix) ->
     %%
     %% TODO: once debian stable and EPEL have moved from R11B-2 to
     %% R11B-4 or later we should change this to use base64.
-    Prefix ++ "-" ++ ssl_base64:encode(erlang:md5(term_to_binary(guid()))).
+    Prefix ++ "-" ++ ssl_base64:encode(guid()).
 
 binstring_guid(Prefix) ->
     list_to_binary(string_guid(Prefix)).
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index f14656cf..46128612 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -46,7 +46,7 @@
 -ifdef(use_specs).
 
 -type(io_device() :: any()).
--type(msg_id() :: any()).
+-type(msg_id() :: binary()).
 -type(msg() :: any()).
 -type(msg_attrs() :: any()).
 -type(position() :: non_neg_integer()).
@@ -63,16 +63,16 @@
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, MsgId, MsgBody, MsgAttrs) ->
-    [MsgIdBin, MsgBodyBin, MsgAttrsBin] = Bins =
-        [term_to_binary(X) || X <- [MsgId, MsgBody, MsgAttrs]],
-    [MsgIdBinSize, MsgBodyBinSize, MsgAttrsBinSize] = Sizes =
-        [size(B) || B <- Bins],
+append(FileHdl, MsgId, MsgBody, MsgAttrs) when is_binary(MsgId) ->
+    MsgBodyBin  = term_to_binary(MsgBody),
+    MsgAttrsBin = term_to_binary(MsgAttrs),
+    [MsgIdSize, MsgBodyBinSize, MsgAttrsBinSize] = Sizes =
+        [size(B) || B <- [MsgId, MsgBodyBin, MsgAttrsBin]],
     Size = lists:sum(Sizes),
     case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
-                               MsgIdBinSize:?INTEGER_SIZE_BITS,
+                               MsgIdSize:?INTEGER_SIZE_BITS,
                                MsgAttrsBinSize:?INTEGER_SIZE_BITS,
-                               MsgIdBin:MsgIdBinSize/binary,
+                               MsgId:MsgIdSize/binary,
                                MsgAttrsBin:MsgAttrsBinSize/binary,
                                MsgBodyBin:MsgBodyBinSize/binary,
                                ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
@@ -85,17 +85,16 @@ read(FileHdl, TotalSize) ->
     SizeWriteOkBytes = Size + 1,
     case file:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
-               MsgIdBinSize:?INTEGER_SIZE_BITS,
+               MsgIdSize:?INTEGER_SIZE_BITS,
                MsgAttrsBinSize:?INTEGER_SIZE_BITS,
                Rest:SizeWriteOkBytes/binary>>} ->
-            BodyBinSize = Size - MsgIdBinSize - MsgAttrsBinSize,
-            <<MsgIdBin:MsgIdBinSize/binary,
+            BodyBinSize = Size - MsgIdSize - MsgAttrsBinSize,
+            <<MsgId:MsgIdSize/binary,
               MsgAttrsBin:MsgAttrsBinSize/binary,
               MsgBodyBin:BodyBinSize/binary,
               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>> = Rest,
-            [MsgId, MsgBody, MsgAttrs] =
-                [binary_to_term(B) || B <- [MsgIdBin, MsgBodyBin, MsgAttrsBin]],
-            {ok, {MsgId, MsgBody, MsgAttrs}};
+            {ok, {MsgId,
+                  binary_to_term(MsgBodyBin), binary_to_term(MsgAttrsBin)}};
         KO -> KO
     end.
 
@@ -119,10 +118,10 @@ read_next(FileHdl, Offset) ->
     case file:read(FileHdl, ThreeIntegers) of
         {ok,
          <<Size:?INTEGER_SIZE_BITS,
-           MsgIdBinSize:?INTEGER_SIZE_BITS,
+           MsgIdSize:?INTEGER_SIZE_BITS,
            MsgAttrsBinSize:?INTEGER_SIZE_BITS>>} ->
             if Size == 0 -> eof; %% Nothing we can do other than stop
-               MsgIdBinSize == 0 orelse MsgAttrsBinSize == 0 ->
+               MsgIdSize == 0 orelse MsgAttrsBinSize == 0 ->
                     %% current message corrupted, try skipping past it
                     ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
                     case file:position(FileHdl, {cur, Size + 1}) of
@@ -131,9 +130,9 @@ read_next(FileHdl, Offset) ->
                         KO                   -> KO
                     end;
                true -> %% all good, let's continue
-                    HeaderSize = MsgIdBinSize + MsgAttrsBinSize,
+                    HeaderSize = MsgIdSize + MsgAttrsBinSize,
                     case file:read(FileHdl, HeaderSize) of
-                        {ok, <<MsgIdBin:MsgIdBinSize/binary,
+                        {ok, <<MsgId:MsgIdSize/binary,
                                MsgAttrsBin:MsgAttrsBinSize/binary>>} ->
                             TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
                             ExpectedAbsPos = Offset + TotalSize - 1,
@@ -144,7 +143,7 @@ read_next(FileHdl, Offset) ->
                                     case file:read(FileHdl, 1) of
                                         {ok, <<?WRITE_OK_MARKER:
                                                ?WRITE_OK_SIZE_BITS>>} ->
-                                            {ok, {binary_to_term(MsgIdBin),
+                                            {ok, {MsgId,
                                                   binary_to_term(MsgAttrsBin),
                                                   TotalSize, NextOffset}};
                                         {ok, _SomeOtherData} ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 357c4867..da904193 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -82,7 +82,7 @@
 -type(mode() :: 'ram_disk' | 'disk_only').
 -type(dets_table() :: any()).
 -type(ets_table() :: any()).
--type(msg_id() :: any()).
+-type(msg_id() :: binary()).
 -type(msg() :: any()).
 -type(msg_attrs() :: any()).
 -type(file_path() :: any()).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 039e9aa4..1f2187bc 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -848,8 +848,11 @@ benchmark_disk_queue() ->
     passed.
 
 rdq_message(MsgId, MsgBody, IsPersistent) ->
-    rabbit_basic:message(x, <<>>, [], MsgBody, MsgId, IsPersistent).
+    rabbit_basic:message(x, <<>>, [], MsgBody, term_to_binary(MsgId),
+                         IsPersistent).
 
+rdq_match_message(Msg, MsgId, MsgBody, Size) when is_number(MsgId) ->
+    rdq_match_message(Msg, term_to_binary(MsgId), MsgBody, Size);
 rdq_match_message(
   #basic_message { guid = MsgId, content =
                    #content { payload_fragments_rev = [MsgBody] }},
@@ -860,13 +863,17 @@ rdq_match_messages(#basic_message { guid = MsgId, content = #content { payload_f
                    #basic_message { guid = MsgId, content = #content { payload_fragments_rev = MsgBody }}) ->
     ok.
 
+commit_list(List, MsgCount) ->
+    lists:zip([term_to_binary(MsgId) || MsgId <- List],
+              lists:duplicate(MsgCount, false)).
+
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Startup = rdq_virgin(),
     rdq_start(),
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
-    CommitList = lists:zip(List, lists:duplicate(MsgCount, false)),
+    CommitList = commit_list(List, MsgCount),
     {Publish, ok} =
         timer:tc(?MODULE, rdq_time_commands,
                  [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false))
@@ -905,7 +912,7 @@ rdq_stress_gc(MsgCount) ->
     MsgSizeBytes = 256*1024,
     Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
-    CommitList = lists:zip(List, lists:duplicate(MsgCount, false)),
+    CommitList = commit_list(List, MsgCount),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- List],
     rabbit_disk_queue:tx_commit(q, CommitList, []),
     StartChunk = round(MsgCount / 20), % 5%
@@ -948,7 +955,7 @@ rdq_test_startup_with_queue_gaps() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    CommitAll = lists:zip(All, lists:duplicate(Total, false)),
+    CommitAll = commit_list(All, Total),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, true)) || N <- All],
     rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
@@ -1005,7 +1012,7 @@ rdq_test_redeliver() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    CommitAll = lists:zip(All, lists:duplicate(Total, false)),
+    CommitAll = commit_list(All, Total),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
     rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
@@ -1058,7 +1065,7 @@ rdq_test_purge() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    CommitAll = lists:zip(All, lists:duplicate(Total, false)),
+    CommitAll = commit_list(All, Total),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
     rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
@@ -1170,12 +1177,10 @@ rdq_test_mixed_queue_modes() ->
 rdq_test_mode_conversion_mid_txn() ->
     Payload = <<0:(8*256)>>,
     MsgIdsA = lists:seq(0,9),
-    MsgsA = [ rabbit_basic:message(x, <<>>, [], Payload, MsgId,
-                                   (0 == MsgId rem 2))
-            || MsgId <- MsgIdsA ],
+    MsgsA = [ rdq_message(MsgId, Payload, (0 == MsgId rem 2))
+              || MsgId <- MsgIdsA ],
     MsgIdsB = lists:seq(10,20),
-    MsgsB = [ rabbit_basic:message(x, <<>>, [], Payload, MsgId,
-                                   (0 == MsgId rem 2))
+    MsgsB = [ rdq_message(MsgId, Payload, (0 == MsgId rem 2))
             || MsgId <- MsgIdsB ],
 
     rdq_virgin(),
@@ -1229,7 +1234,8 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                 {AckTags, MS8} =
                     lists:foldl(
                       fun (Msg, {Acc, MS7}) ->
-                              Rem = Len1 - (Msg #basic_message.guid) - 1,
+                              MsgId = binary_to_term(Msg #basic_message.guid),
+                              Rem = Len1 - MsgId - 1,
                               {{Msg1, false, AckTag, Rem}, MS7a} =
                                   rabbit_mixed_queue:fetch(MS7),
                               ok = rdq_match_messages(Msg, Msg1),
@@ -1243,7 +1249,8 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
                 {AckTags, MS8} =
                     lists:foldl(
                       fun (Msg, {Acc, MS7}) ->
-                              Rem = Len0 - (Msg #basic_message.guid) - 1,
+                              MsgId = binary_to_term(Msg #basic_message.guid),
+                              Rem = Len0 - MsgId - 1,
                               {{Msg1, false, AckTag, Rem}, MS7a} =
                                   rabbit_mixed_queue:fetch(MS7),
                               ok = rdq_match_messages(Msg, Msg1),
@@ -1266,9 +1273,8 @@ rdq_test_disk_queue_modes() ->
     Total = 1000,
     Half1 = lists:seq(1,round(Total/2)),
     Half2 = lists:seq(1 + round(Total/2), Total),
-    CommitHalf1 = lists:zip(Half1, lists:duplicate(round(Total/2), false)),
-    CommitHalf2 = lists:zip(Half2, lists:duplicate
-                            (Total - round(Total/2), false)),
+    CommitHalf1 = commit_list(Half1, round(Total/2)),
+    CommitHalf2 = commit_list(Half2, Total - round(Total/2)),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half1],
     ok = rabbit_disk_queue:tx_commit(q, CommitHalf1, []),
     io:format("Publish done~n", []),
-- 
cgit v1.2.1


From afaea9ac1e22eaa7607c0ab418e94a114accf6f6 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 8 Sep 2009 13:21:49 +0100
Subject: api tweak: pass durable queues in list rather than set lists are more
 pleasant than sets in APIs, plus in our use we have a list to start with.

---
 src/rabbit.erl            | 4 +---
 src/rabbit_disk_queue.erl | 5 +++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index f3008a93..3993de50 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -155,10 +155,8 @@ start(normal, []) ->
                 ok = maybe_insert_default_data(),
                 ok = rabbit_exchange:recover(),
                 {ok, DurableQueues} = rabbit_amqqueue:recover(),
-                DurableQueueNames =
-                    sets:from_list([ Q #amqqueue.name || Q <- DurableQueues ]),
                 ok = rabbit_disk_queue:delete_non_durable_queues(
-                       DurableQueueNames)
+                       [ Q #amqqueue.name || Q <- DurableQueues ])
         end},
        {"builtin applications",
         fun () ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b863f608..c2622184 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -112,7 +112,7 @@
 -spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
 -spec(purge/1 :: (queue_name()) -> non_neg_integer()).
 -spec(delete_queue/1 :: (queue_name()) -> 'ok').
--spec(delete_non_durable_queues/1 :: (set()) -> 'ok').
+-spec(delete_non_durable_queues/1 :: ([queue_name()]) -> 'ok').
 -spec(len/1 :: (queue_name()) -> non_neg_integer()).
 -spec(foldl/3 :: (fun ((message(), ack_tag(), boolean(), A) -> A),
                   A, queue_name()) -> A).
@@ -792,9 +792,10 @@ internal_delete_queue(Q, State) ->
 
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
+    DurableQueueSet =  sets:from_list(DurableQueues),
     ets:foldl(
       fun ({Q, _Read, _Write}, {ok, State1}) ->
-              case sets:is_element(Q, DurableQueues) of
+              case sets:is_element(Q, DurableQueueSet) of
                   true -> {ok, State1};
                   false -> internal_delete_queue(Q, State1)
               end
-- 
cgit v1.2.1


From 64ef2d0e175a6ca1110c0ff6f0f5d5814033eedf Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 8 Sep 2009 14:01:28 +0100
Subject: minor tidying up: use symbolic record indices instead of numeric ones

---
 src/rabbit_msg_store.erl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index da904193..241453c9 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -270,24 +270,26 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
                         %% man says this should be <= 32M. But it works...
                         {max_no_slots, 30*1024*1024},
                         {type, set},
-                        {keypos, 2}
+                        {keypos, #msg_location.msg_id}
                        ]),
 
     %% it would be better to have this as private, but dets:from_ets/2
     %% seems to blow up if it is set private - see bug21489
-    MsgLocationEts = ets:new(?MSG_LOC_NAME, [set, protected, {keypos, 2}]),
+    MsgLocationEts = ets:new(?MSG_LOC_NAME,
+                             [set, protected, {keypos, #msg_location.msg_id}]),
 
     InitName = "0" ++ ?FILE_EXTENSION,
     HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
                                                 ?BINARY_MODE ++ [read]),
+    FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
+                          [set, private, {keypos, #file_summary.file}]),
+    MessageCache = ets:new(?CACHE_ETS_NAME, [set, private]),
     State =
         #msstate { operation_mode         = Mode,
                    dir                    = Dir,
                    msg_location_dets      = MsgLocationDets,
                    msg_location_ets       = MsgLocationEts,
-                   file_summary           = ets:new(
-                                              ?FILE_SUMMARY_ETS_NAME,
-                                              [set, private, {keypos, 2}]),
+                   file_summary           = FileSummary,
                    current_file_num       = 0,
                    current_file_name      = InitName,
                    current_file_handle    = undefined,
@@ -296,8 +298,7 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
                    file_size_limit        = FileSizeLimit,
                    read_file_handle_cache = HandleCache,
                    last_sync_offset       = 0,
-                   message_cache          = ets:new(?CACHE_ETS_NAME,
-                                                    [set, private]),
+                   message_cache          = MessageCache,
                    ets_bytes_per_record   = EtsBytesPerRecord
                   },
     
-- 
cgit v1.2.1


From 5025c31d2a099fbb0b05bd9974abee71cdf8af03 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 8 Sep 2009 15:26:54 +0100
Subject: refer to files by their number not their name This makes matching
 faster and keeps record sizes smaller. It also means we can get rid of one
 bit of state.

---
 src/rabbit_msg_store.erl | 172 +++++++++++++++++++++++------------------------
 1 file changed, 84 insertions(+), 88 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 241453c9..d5959f9f 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -43,8 +43,7 @@
          msg_location_dets,      %% where are messages?
          msg_location_ets,       %% as above, but for ets version
          file_summary,           %% what's in the files?
-         current_file_num,       %% current file name as number
-         current_file_name,      %% current file name
+         current_file,           %% current file name as number
          current_file_handle,    %% current file handle
          current_offset,         %% current offset within current file
          current_dirty,          %% has the current file been written to
@@ -94,8 +93,7 @@
                msg_location_dets      :: dets_table(),
                msg_location_ets       :: ets_table(),
                file_summary           :: ets_table(),
-               current_file_num       :: non_neg_integer(),
-               current_file_name      :: file_path(),
+               current_file           :: non_neg_integer(),
                current_file_handle    :: io_device(),
                current_offset         :: non_neg_integer(),
                current_dirty          :: boolean(),
@@ -278,7 +276,7 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
     MsgLocationEts = ets:new(?MSG_LOC_NAME,
                              [set, protected, {keypos, #msg_location.msg_id}]),
 
-    InitName = "0" ++ ?FILE_EXTENSION,
+    InitFile = 0,
     HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
                                                 ?BINARY_MODE ++ [read]),
     FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
@@ -290,8 +288,7 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
                    msg_location_dets      = MsgLocationDets,
                    msg_location_ets       = MsgLocationEts,
                    file_summary           = FileSummary,
-                   current_file_num       = 0,
-                   current_file_name      = InitName,
+                   current_file           = InitFile,
                    current_file_handle    = undefined,
                    current_offset         = 0,
                    current_dirty          = false,
@@ -302,26 +299,27 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
                    ets_bytes_per_record   = EtsBytesPerRecord
                   },
     
-    Files = 
+    FileNames = 
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
-    TmpFiles =
+    TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
-    ok = recover_crashed_compactions(RefCountFun, Dir, Files, TmpFiles),
+    ok = recover_crashed_compactions(RefCountFun, Dir, FileNames, TmpFileNames),
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
-    State1 = #msstate { current_file_name = CurrentName,
-                        current_offset = Offset }  =
+    Files = [filename_to_num(FileName) || FileName <- FileNames],
+    State1 = #msstate { current_file = CurFile, current_offset = Offset } =
         load_messages(RefCountFun, Files, State),
 
     %% read is only needed so that we can seek
-    {ok, FileHdl} = open_file(Dir, CurrentName, ?WRITE_MODE ++ [read]),
+    {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
+                              ?WRITE_MODE ++ [read]),
     {ok, Offset} = file:position(FileHdl, Offset),
 
     State1 #msstate { current_file_handle = FileHdl }.
 
 write(MsgId, Msg, Attrs,
       State = #msstate { current_file_handle = CurHdl,
-                         current_file_name   = CurName,
+                         current_file        = CurFile,
                          current_offset      = CurOffset,
                          file_summary        = FileSummary }) ->
     case dets_ets_lookup(State, MsgId) of
@@ -330,13 +328,13 @@ write(MsgId, Msg, Attrs,
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg, Attrs),
             true = dets_ets_insert_new(
                      State, #msg_location {
-                       msg_id = MsgId, ref_count = 1, file = CurName,
+                       msg_id = MsgId, ref_count = 1, file = CurFile,
                        offset = CurOffset, total_size = TotalSize,
                        attrs = Attrs }),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
                                        right = undefined }] =
-                ets:lookup(FileSummary, CurName),
+                ets:lookup(FileSummary, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
                                      %% can't be any holes in this file
@@ -380,7 +378,7 @@ read(MsgId, State) ->
                                                 throw({error,
                                                        {misread, 
                                                         [{old_state, State},
-                                                         {file, File},
+                                                         {file_num, File},
                                                          {offset, Offset},
                                                          {read, Rest}]}})
                                         end,
@@ -407,13 +405,13 @@ attrs(MsgId, State) ->
         [#msg_location { msg_id = MsgId, attrs = Attrs }] -> Attrs
     end.
 
-remove(MsgIds, State = #msstate { current_file_name = CurName }) ->
+remove(MsgIds, State = #msstate { current_file = CurFile }) ->
     compact(sets:to_list(
               lists:foldl(
                 fun (MsgId, Files1) ->
                         case remove_message(MsgId, State) of
                             {compact, File} ->
-                                if CurName =:= File -> Files1;
+                                if CurFile =:= File -> Files1;
                                    true -> sets:add_element(File, Files1)
                                 end;
                             no_compact -> Files1
@@ -427,8 +425,8 @@ release(MsgIds, State) ->
 
 needs_sync(_MsgIds, #msstate { current_dirty = false }) ->
     false;    
-needs_sync(MsgIds, State = #msstate { current_file_name = CurFile,
-                                      last_sync_offset  = SyncOffset }) ->
+needs_sync(MsgIds, State = #msstate { current_file     = CurFile,
+                                      last_sync_offset = SyncOffset }) ->
     lists:any(fun (MsgId) ->
                       [#msg_location { msg_id = MsgId, file = File,
                                        offset = Offset }] =
@@ -520,21 +518,21 @@ to_ram_disk_mode(State = #msstate { operation_mode = disk_only,
 %% general helper functions
 %%----------------------------------------------------------------------------
 
-form_filename(Dir, Name) ->
-    filename:join(Dir, Name).
+form_filename(Dir, Name) -> filename:join(Dir, Name).
+
+filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
+
+filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
 msg_location_dets_file(Dir) ->
     form_filename(Dir, atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
 
-open_file(Dir, File, Mode) ->
-    file:open(form_filename(Dir, File), ?BINARY_MODE ++ Mode).
+open_file(Dir, FileName, Mode) ->
+    file:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode).
 
-sort_file_names(Files) ->
-    lists:sort(fun (A, B) ->
-                       ANum = list_to_integer(filename:rootname(A)),
-                       BNum = list_to_integer(filename:rootname(B)),
-                       ANum < BNum
-               end, Files).
+sort_file_names(FileNames) ->
+    lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
+               FileNames).
 
 preallocate(Hdl, FileSizeLimit, FinalPos) ->
     {ok, FileSizeLimit} = file:position(Hdl, FileSizeLimit),
@@ -550,14 +548,14 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
 with_read_handle_at(File, Offset, Fun,
                     State = #msstate { dir                    = Dir,
                                        read_file_handle_cache = HC,
-                                       current_file_name      = CurName,
+                                       current_file           = CurFile,
                                        current_dirty          = IsDirty,
                                        last_sync_offset       = SyncOffset }) ->
-    State1 = if CurName =:= File andalso IsDirty andalso Offset >= SyncOffset ->
+    State1 = if CurFile == File andalso IsDirty andalso Offset >= SyncOffset ->
                      sync(State);
                 true -> State
              end,
-    FilePath = form_filename(Dir, File),
+    FilePath = form_filename(Dir, filenum_to_name(File)),
     {Result, HC1} =
         rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
     {Result, State1 #msstate { read_file_handle_cache = HC1 }}.
@@ -672,24 +670,24 @@ dets_ets_match_object(#msstate { msg_location_ets = MsgLocationEts,
 %% recovery
 %%----------------------------------------------------------------------------
 
-recover_crashed_compactions(RefCountFun, Dir, Files, TmpFiles) ->
-    lists:foreach(fun (TmpFile) ->
+recover_crashed_compactions(RefCountFun, Dir, FileNames, TmpFileNames) ->
+    lists:foreach(fun (TmpFileName) ->
                           ok = recover_crashed_compactions1(
-                                 RefCountFun, Dir, Files, TmpFile)
+                                 RefCountFun, Dir, FileNames, TmpFileName)
                   end,
-                  TmpFiles),
+                  TmpFileNames),
     ok.
 
-recover_crashed_compactions1(RefCountFun, Dir, Files, TmpFile) ->
-    NonTmpRelatedFile = filename:rootname(TmpFile) ++ ?FILE_EXTENSION,
-    true = lists:member(NonTmpRelatedFile, Files),
+recover_crashed_compactions1(RefCountFun, Dir, FileNames, TmpFileName) ->
+    NonTmpRelatedFileName = filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
+    true = lists:member(NonTmpRelatedFileName, FileNames),
     {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
-        scan_file_for_valid_messages_msg_ids(Dir, TmpFile),
+        scan_file_for_valid_messages_msg_ids(Dir, TmpFileName),
     %% all of these messages should be referenced
     %% otherwise they wouldn't have been copied out
     verify_messages_referenced(RefCountFun, MsgIdsTmp),
     {ok, UncorruptedMessages, MsgIds} =
-        scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFile),
+        scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -716,7 +714,7 @@ recover_crashed_compactions1(RefCountFun, Dir, Files, TmpFile) ->
     %%    consist only of valid messages. Plan: Truncate the main file
     %%    back to before any of the files in the tmp file and copy
     %%    them over again
-    TmpPath = form_filename(Dir, TmpFile),
+    TmpPath = form_filename(Dir, TmpFileName),
     case is_sublist(MsgIdsTmp, MsgIds) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
                 %% note this also catches the case when the tmp file
@@ -750,7 +748,7 @@ recover_crashed_compactions1(RefCountFun, Dir, Files, TmpFile) ->
             %% are in the tmp file
             true = is_disjoint(MsgIds1, MsgIdsTmp),
             %% must open with read flag, otherwise will stomp over contents
-            {ok, MainHdl} = open_file(Dir, NonTmpRelatedFile,
+            {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
                                       ?WRITE_MODE ++ [read]),
             %% Wipe out any rubbish at the end of the file. Remember
             %% the head of the list will be the highest entry in the
@@ -761,7 +759,7 @@ recover_crashed_compactions1(RefCountFun, Dir, Files, TmpFile) ->
             %% move. If we run out of disk space, this truncate could
             %% fail, but we still aren't risking losing data
             ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
-            {ok, TmpHdl} = open_file(Dir, TmpFile, ?READ_MODE),
+            {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_MODE),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
             ok = file:sync(MainHdl),
             ok = file:close(MainHdl),
@@ -769,7 +767,8 @@ recover_crashed_compactions1(RefCountFun, Dir, Files, TmpFile) ->
             ok = file:delete(TmpPath),
 
             {ok, _MainMessages, MsgIdsMain} =
-                scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFile),
+                scan_file_for_valid_messages_msg_ids(
+                  Dir, NonTmpRelatedFileName),
             %% check that everything in MsgIds1 is in MsgIdsMain
             true = is_sublist(MsgIds1, MsgIdsMain),
             %% check that everything in MsgIdsTmp is in MsgIdsMain
@@ -786,13 +785,13 @@ is_disjoint(SmallerL, BiggerL) ->
 verify_messages_referenced(RefCountFun, MsgIds) ->
     lists:foreach(fun (MsgId) -> false = RefCountFun(MsgId) == 0 end, MsgIds).
 
-scan_file_for_valid_messages_msg_ids(Dir, File) ->
-    {ok, Messages} = scan_file_for_valid_messages(Dir, File),
+scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
+    {ok, Messages} = scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages,
      [MsgId || {MsgId, _Attrs, _TotalSize, _FileOffset} <- Messages]}.
 
-scan_file_for_valid_messages(Dir, File) ->
-    case open_file(Dir, File, ?READ_MODE) of
+scan_file_for_valid_messages(Dir, FileName) ->
+    case open_file(Dir, FileName, ?READ_MODE) of
         {ok, Hdl} ->
             Valid = rabbit_msg_file:scan(Hdl),
             %% if something really bad's happened, the close could fail,
@@ -800,7 +799,8 @@ scan_file_for_valid_messages(Dir, File) ->
             file:close(Hdl),
             Valid;
         {error, enoent} -> {ok, []};
-        {error, Reason} -> throw({error, {unable_to_scan_file, File, Reason}})
+        {error, Reason} -> throw({error,
+                                  {unable_to_scan_file, FileName, Reason}})
     end.
 
 %% Takes the list in *ascending* order (i.e. eldest message
@@ -820,13 +820,12 @@ find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
 load_messages(RefCountFun, [], State) ->
-    CurrentFile = State #msstate.current_file_name,
-    load_messages(RefCountFun, undefined, [CurrentFile], State);
+    CurFile = State #msstate.current_file,
+    load_messages(RefCountFun, undefined, [CurFile], State);
 load_messages(RefCountFun, Files, State) ->
     load_messages(RefCountFun, undefined, Files, State).
 
 load_messages(_RefCountFun, Left, [], State) ->
-    Num = list_to_integer(filename:rootname(Left)),
     Offset =
         case sort_msg_locations_by_offset(desc, Left, State) of
             [] -> 0;
@@ -834,11 +833,10 @@ load_messages(_RefCountFun, Left, [], State) ->
                              total_size = TotalSize } | _] ->
                 MaxOffset + TotalSize
         end,
-    State #msstate { current_file_num = Num, current_file_name = Left,
-                     current_offset = Offset };
+    State #msstate { current_file = Left, current_offset = Offset };
 load_messages(RefCountFun, Left, [File|Files],
               State = #msstate { dir = Dir, file_summary = FileSummary }) ->
-    {ok, Messages} = scan_file_for_valid_messages(Dir, File),
+    {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} = lists:foldl(
         fun (Obj = {MsgId, Attrs, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                 case RefCountFun(MsgId) of
@@ -873,36 +871,31 @@ load_messages(RefCountFun, Left, [File|Files],
 maybe_roll_to_new_file(Offset,
                        State = #msstate { dir                 = Dir,
                                           file_size_limit     = FileSizeLimit,
-                                          current_file_name   = CurName,
                                           current_file_handle = CurHdl,
-                                          current_file_num    = CurNum,
-                                          file_summary        = FileSummary
-                                        }
-                      ) when Offset >= FileSizeLimit ->
+                                          current_file        = CurFile,
+                                          file_summary        = FileSummary })
+  when Offset >= FileSizeLimit ->
     State1 = sync(State),
     ok = file:close(CurHdl),
-    NextNum = CurNum + 1,
-    NextName = integer_to_list(NextNum) ++ ?FILE_EXTENSION,
-    {ok, NextHdl} = open_file(Dir, NextName, ?WRITE_MODE),
-    true = ets:update_element(FileSummary, CurName,
-                              {#file_summary.right, NextName}),
+    NextFile = CurFile + 1,
+    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
+    true = ets:update_element(FileSummary, CurFile,
+                              {#file_summary.right, NextFile}),
     true = ets:insert_new(
              FileSummary, #file_summary {
-               file = NextName, valid_total_size = 0, contiguous_top = 0,
-               left = CurName, right = undefined }),
-    State2 = State1 #msstate { current_file_name = NextName,
-                               current_file_handle = NextHdl,
-                               current_file_num = NextNum,
-                               current_offset = 0,
-                               last_sync_offset = 0
-                              },
-    compact([CurName], State2);
+               file = NextFile, valid_total_size = 0, contiguous_top = 0,
+               left = CurFile, right = undefined }),
+    State2 = State1 #msstate { current_file_handle = NextHdl,
+                               current_file        = NextFile,
+                               current_offset      = 0,
+                               last_sync_offset    = 0 },
+    compact([CurFile], State2);
 maybe_roll_to_new_file(_, State) ->
     State.
 
 compact(Files, State) ->
     %% smallest number, hence eldest, hence left-most, first
-    SortedFiles = sort_file_names(Files),
+    SortedFiles = lists:sort(Files),
     %% foldl reverses, so now youngest/right-most first
     RemainingFiles =
         lists:foldl(fun (File, Acc) ->
@@ -920,7 +913,7 @@ compact(Files, State) ->
 %% we merge right then this file is the destination and the right file
 %% is the source.
 combine_file(File, State = #msstate { file_summary = FileSummary,
-                                      current_file_name = CurName }) ->
+                                      current_file = CurFile }) ->
     %% the file we're looking at may no longer exist as it may have
     %% been deleted within the current GC run
     case ets:lookup(FileSummary, File) of
@@ -930,7 +923,7 @@ combine_file(File, State = #msstate { file_summary = FileSummary,
                 fun() ->
                         case Right of
                             undefined -> State;
-                            _ when not (CurName == Right) ->
+                            _ when not (CurFile == Right) ->
                                 [FSRight] = ets:lookup(FileSummary, Right),
                                 {_, State1} = adjust_meta_and_combine(
                                                 FSEntry, FSRight, State),
@@ -991,9 +984,11 @@ combine_files(#file_summary { file = Source,
                               contiguous_top = DestinationContiguousTop,
                               right = Source },
               State = #msstate { dir = Dir }) ->
-    State1 = close_file(Source, close_file(Destination, State)),
-    {ok, SourceHdl} = open_file(Dir, Source, ?READ_MODE),
-    {ok, DestinationHdl} = open_file(Dir, Destination,
+    SourceName = filenum_to_name(Source),
+    DestinationName = filenum_to_name(Destination),
+    State1 = close_file(SourceName, close_file(DestinationName, State)),
+    {ok, SourceHdl} = open_file(Dir, SourceName, ?READ_MODE),
+    {ok, DestinationHdl} = open_file(Dir, DestinationName,
                                      ?READ_MODE ++ ?WRITE_MODE),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't
@@ -1006,7 +1001,7 @@ combine_files(#file_summary { file = Source,
             ok = truncate_and_extend_file(DestinationHdl,
                                           DestinationValid, ExpectedSize);
        true ->
-            Tmp = filename:rootname(Destination) ++ ?FILE_EXTENSION_TMP,
+            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_MODE ++ ?WRITE_MODE),
             Worklist =
                 lists:dropwhile(
@@ -1045,7 +1040,7 @@ combine_files(#file_summary { file = Source,
     %% tidy up
     ok = file:close(SourceHdl),
     ok = file:close(DestinationHdl),
-    ok = file:delete(form_filename(Dir, Source)),
+    ok = file:delete(form_filename(Dir, SourceName)),
     State1.
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
@@ -1087,8 +1082,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
     ok = file:sync(DestinationHdl),
     ok.
 
-close_file(File, State = #msstate { dir = Dir, read_file_handle_cache = HC }) ->
-    HC1 = rabbit_file_handle_cache:close_file(form_filename(Dir, File), HC),
+close_file(FileName,
+           State = #msstate { dir = Dir, read_file_handle_cache = HC }) ->
+    HC1 = rabbit_file_handle_cache:close_file(form_filename(Dir, FileName), HC),
     State #msstate { read_file_handle_cache = HC1 }.
 
 delete_file_if_empty(File,
@@ -1113,7 +1109,7 @@ delete_file_if_empty(File,
                                             {#file_summary.right, Right})
              end,
              true = ets:delete(FileSummary, File),
-             ok = file:delete(form_filename(Dir, File)),
+             ok = file:delete(form_filename(Dir, filenum_to_name(File))),
              true;
         _ -> false
     end.
-- 
cgit v1.2.1


From 94679cd0b53f7cb9abc11fe024d728b32d4181ab Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 10 Sep 2009 00:04:14 +0100
Subject: initialise msg_store ref counts via a generator This is more flexible
 than the previous ref_count function, allowing the ref counts to be obtained
 w/o consuming any memory at the supplying end in a variety of scenarios.

We use the dq_msg_loc ets/dets table to store the ref counts. That
table is later updated with the full details of the messages (their
file and position, etc). At the end we prune any entries that have a
ref count but no associated file - i.e. the referenced message
couldn't be found on disk.

This change should also fix the "All replicas on diskfull nodes are
not active yet" error observed in bug 21530 since we no longer need
the indices on the rabbit_disk_queue mnesia table which we identified
as the most likely cause of that error.
---
 src/rabbit_disk_queue.erl |  30 +++----------
 src/rabbit_msg_store.erl  | 107 +++++++++++++++++++++++++++++++---------------
 2 files changed, 80 insertions(+), 57 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index c2622184..b7ed868b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -245,12 +245,11 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
-    ok = add_index(),
     Store = rabbit_msg_store:init(Mode, base_directory(),
                                   FileSizeLimit, ReadFileHandlesLimit,
-                                  fun ref_count/1, EtsBPR),
+                                  fun msg_ref_gen/1, msg_ref_gen_init(),
+                                  EtsBPR),
     Store1 = prune(Store),
-    ok = del_index(),
 
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
     ok = extract_sequence_numbers(Sequences),
@@ -870,21 +869,12 @@ mark_message_delivered(Key, N) ->
         end,
     mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
 
-add_index() ->
-    case mnesia:add_table_index(rabbit_disk_queue, msg_id) of
-        {atomic, ok} -> ok;
-        {aborted,{already_exists,rabbit_disk_queue,_}} -> ok;
-        E -> E
-    end.
+msg_ref_gen_init() -> mnesia:dirty_first(rabbit_disk_queue).
 
-del_index() ->
-    case mnesia:del_table_index(rabbit_disk_queue, msg_id) of
-        {atomic, ok} -> ok;
-        %% hmm, something weird must be going on, but it's probably
-        %% not the end of the world
-        {aborted, {no_exists, rabbit_disk_queue,_}} -> ok;
-        E1 -> E1
-    end.
+msg_ref_gen('$end_of_table') -> finished;
+msg_ref_gen(Key) ->
+    [Obj] = mnesia:dirty_read(rabbit_disk_queue, Key),
+    {Obj #dq_msg_loc.msg_id, 1, mnesia:dirty_next(rabbit_disk_queue, Key)}.
 
 prune_flush_batch(DeleteAcc, RemoveAcc, Store) ->
     lists:foldl(fun (Key, ok) ->
@@ -1003,9 +993,3 @@ shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
                 0
         end,
     shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
-
-ref_count(MsgId) ->
-    length(mnesia:dirty_index_match_object(
-             rabbit_disk_queue,
-             #dq_msg_loc { msg_id = MsgId, _ = '_' },
-             msg_id)).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index d5959f9f..b745acbf 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_store).
 
--export([init/6, write/4, read/2, attrs/2, remove/2, release/2,
+-export([init/7, write/4, read/2, attrs/2, remove/2, release/2,
          needs_sync/2, sync/1, cleanup/1, cache_info/1, memory/1,
          ets_bpr/1, to_disk_only_mode/1, to_ram_disk_mode/1]).
 
@@ -104,9 +104,10 @@
                ets_bytes_per_record   :: non_neg_integer()
                }).
 
--spec(init/6 :: ('ram_disk' | 'disk_only', file_path(),
+-spec(init/7 :: ('ram_disk' | 'disk_only', file_path(),
                  non_neg_integer(), non_neg_integer(),
-                 fun ((msg_id()) -> non_neg_integer()), non_neg_integer()) ->
+                 (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})),
+                 A, non_neg_integer()) ->
              msstate()).
 -spec(write/4 :: (msg_id(), msg(), msg_attrs(), msstate()) -> msstate()).
 -spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
@@ -241,8 +242,8 @@
 %% from the store when it has been removed the same number of times.
 %%
 %% The reference counts do not persist. Therefore the initialisation
-%% function must be provided with a function that determines the
-%% initial reference count of any (recovered) message.
+%% function must be provided with a generator that produces ref count
+%% deltas for all recovered messages.
 %%
 %% Read messages with a reference count greater than one are entered
 %% into a message cache. The purpose of the cache is not especially
@@ -256,8 +257,8 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
-     EtsBytesPerRecord) ->
+init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit,
+     MsgRefDeltaGen, MsgRefDeltaGenInit, EtsBytesPerRecord) ->
 
     file:delete(msg_location_dets_file(Dir)),
 
@@ -298,17 +299,18 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit, RefCountFun,
                    message_cache          = MessageCache,
                    ets_bytes_per_record   = EtsBytesPerRecord
                   },
-    
+
+    ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
     FileNames = 
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
-    ok = recover_crashed_compactions(RefCountFun, Dir, FileNames, TmpFileNames),
+    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames, State),
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
     State1 = #msstate { current_file = CurFile, current_offset = Offset } =
-        load_messages(RefCountFun, Files, State),
+        load_messages(Files, State),
 
     %% read is only needed so that we can seek
     {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
@@ -666,26 +668,66 @@ dets_ets_match_object(#msstate { msg_location_ets = MsgLocationEts,
                                  operation_mode = ram_disk }, Obj) ->
     ets:match_object(MsgLocationEts, Obj).
 
+dets_ets_select_delete(#msstate { msg_location_dets = MsgLocationDets,
+                                  operation_mode = disk_only }, MatchSpec) ->
+    dets:select_delete(MsgLocationDets, MatchSpec);
+dets_ets_select_delete(#msstate { msg_location_ets = MsgLocationEts,
+                                  operation_mode = ram_disk }, MatchSpec) ->
+    ets:select_delete(MsgLocationEts, MatchSpec).
+
 %%----------------------------------------------------------------------------
 %% recovery
 %%----------------------------------------------------------------------------
 
-recover_crashed_compactions(RefCountFun, Dir, FileNames, TmpFileNames) ->
+count_msg_refs(Gen, Seed, State) ->
+    case Gen(Seed) of
+        finished -> ok;
+        {_MsgId, 0, Next} -> count_msg_refs(Gen, Next, State);
+        {MsgId, Delta, Next} ->
+            case dets_ets_lookup(State, MsgId) of
+                [] -> true = dets_ets_insert_new(
+                               State, #msg_location { msg_id = MsgId,
+                                                      ref_count = Delta });
+                [StoreEntry = #msg_location { msg_id = MsgId,
+                                              ref_count = RefCount }] ->
+                    NewRefCount = RefCount + Delta,
+                    case NewRefCount of
+                        0 -> ok = dets_ets_delete(State, MsgId);
+                        _ -> ok = dets_ets_insert(
+                                    State, StoreEntry #msg_location {
+                                             ref_count = NewRefCount })
+                    end
+            end,
+            count_msg_refs(Gen, Next, State)
+    end.
+
+verify_messages_referenced(State, MsgIds) ->
+    lists:foreach(fun (MsgId) -> [_] = dets_ets_lookup(State, MsgId) end,
+                  MsgIds).
+
+prune_stale_refs(State) ->
+    MatchHead = #msg_location { file = undefined, _ = '_' },
+    case dets_ets_select_delete(State, [{MatchHead, [], [true]}]) of
+        N when is_number(N) -> ok;
+        Other -> Other
+    end.
+
+recover_crashed_compactions(Dir, FileNames, TmpFileNames, State) ->
     lists:foreach(fun (TmpFileName) ->
                           ok = recover_crashed_compactions1(
-                                 RefCountFun, Dir, FileNames, TmpFileName)
+                                 Dir, FileNames, TmpFileName, State)
                   end,
                   TmpFileNames),
     ok.
 
-recover_crashed_compactions1(RefCountFun, Dir, FileNames, TmpFileName) ->
+recover_crashed_compactions1(Dir, FileNames, TmpFileName, State) ->
     NonTmpRelatedFileName = filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFileName, FileNames),
     {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
         scan_file_for_valid_messages_msg_ids(Dir, TmpFileName),
     %% all of these messages should be referenced
     %% otherwise they wouldn't have been copied out
-    verify_messages_referenced(RefCountFun, MsgIdsTmp),
+    verify_messages_referenced(State, MsgIdsTmp),
     {ok, UncorruptedMessages, MsgIds} =
         scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
@@ -740,7 +782,7 @@ recover_crashed_compactions1(RefCountFun, Dir, FileNames, TmpFileName) ->
                                                length(Rest))}
                   end,
             %% Check that everything in the main file prefix is referenced
-            verify_messages_referenced(RefCountFun, MsgIds1),
+            verify_messages_referenced(State, MsgIds1),
             %% The main file prefix should be contiguous
             {Top, MsgIds1} = find_contiguous_block_prefix(
                                lists:reverse(UncorruptedMessages1)),
@@ -782,9 +824,6 @@ is_sublist(SmallerL, BiggerL) ->
 is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
-verify_messages_referenced(RefCountFun, MsgIds) ->
-    lists:foreach(fun (MsgId) -> false = RefCountFun(MsgId) == 0 end, MsgIds).
-
 scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages,
@@ -819,13 +858,14 @@ find_contiguous_block_prefix([{MsgId, _Attrs, TotalSize, ExpectedOffset}
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
-load_messages(RefCountFun, [], State) ->
+load_messages([], State) ->
     CurFile = State #msstate.current_file,
-    load_messages(RefCountFun, undefined, [CurFile], State);
-load_messages(RefCountFun, Files, State) ->
-    load_messages(RefCountFun, undefined, Files, State).
+    load_messages(undefined, [CurFile], State);
+load_messages(Files, State) ->
+    load_messages(undefined, Files, State).
 
-load_messages(_RefCountFun, Left, [], State) ->
+load_messages(Left, [], State) ->
+    ok = prune_stale_refs(State),
     Offset =
         case sort_msg_locations_by_offset(desc, Left, State) of
             [] -> 0;
@@ -834,20 +874,19 @@ load_messages(_RefCountFun, Left, [], State) ->
                 MaxOffset + TotalSize
         end,
     State #msstate { current_file = Left, current_offset = Offset };
-load_messages(RefCountFun, Left, [File|Files],
+load_messages(Left, [File|Files],
               State = #msstate { dir = Dir, file_summary = FileSummary }) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} = lists:foldl(
         fun (Obj = {MsgId, Attrs, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case RefCountFun(MsgId) of
-                    0 -> {VMAcc, VTSAcc};
-                    RefCount ->
-                        true = dets_ets_insert_new(
-                                 State, #msg_location {
-                                   msg_id = MsgId, ref_count = RefCount,
-                                   file = File, offset = Offset,
-                                   total_size = TotalSize,
-                                   attrs = Attrs }),
+                case dets_ets_lookup(State, MsgId) of
+                    [] -> {VMAcc, VTSAcc};
+                    [StoreEntry] ->
+                        ok = dets_ets_insert(
+                               State, StoreEntry #msg_location {
+                                        file = File, offset = Offset,
+                                        total_size = TotalSize,
+                                        attrs = Attrs }),
                         {[Obj | VMAcc], VTSAcc + TotalSize}
                 end
         end, {[], 0}, Messages),
@@ -862,7 +901,7 @@ load_messages(RefCountFun, Left, [File|Files],
                             file = File, valid_total_size = ValidTotalSize,
                             contiguous_top = ContiguousTop,
                             left = Left, right = Right }),
-    load_messages(RefCountFun, File, Files, State).
+    load_messages(File, Files, State).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation
-- 
cgit v1.2.1


From d4af34148e2c4b2d53578719cccdd118655f6541 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 10 Sep 2009 03:41:03 +0100
Subject: drop disk_queue/msg_store disk_only mode and all the mode switching
 and memory management logic that goes with it.

The 2G limitition of dets make the disk_only mode not worthwhile.

In the process I refactored the msg_location access in msg_store
s.t. it shouldn't be much effort to plug in a different index store in
the future.

Also some minor tweaks and tidying up here and there.
---
 src/rabbit_disk_queue.erl | 197 +++------------------------
 src/rabbit_msg_store.erl  | 336 ++++++++++++++--------------------------------
 src/rabbit_tests.erl      |  43 ------
 3 files changed, 119 insertions(+), 457 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index b7ed868b..8991939d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -37,7 +37,6 @@
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
--export([handle_pre_hibernate/1]).
 
 -export([publish/3, fetch/1, phantom_fetch/1, ack/2, tx_publish/1, tx_commit/3,
          tx_rollback/1, requeue/2, purge/1, delete_queue/1,
@@ -45,10 +44,9 @@
          prefetch/1
         ]).
 
--export([filesync/0, cache_info/0]).
+-export([filesync/0]).
 
--export([stop/0, stop_and_obliterate/0, set_mode/1, to_disk_only_mode/0,
-         to_ram_disk_mode/0]).
+-export([stop/0, stop_and_obliterate/0]).
 
 %%----------------------------------------------------------------------------
 
@@ -59,7 +57,6 @@
 
 -define(SEQUENCE_ETS_NAME,       rabbit_disk_queue_sequences).
 -define(BATCH_SIZE,              10000).
--define(DISK_ONLY_MODE_FILE,     "disk_only_stats.dat").
 
 -define(SHUTDOWN_MESSAGE_KEY, {internal_token, shutdown}).
 -define(SHUTDOWN_MESSAGE,
@@ -68,7 +65,6 @@
                       is_delivered = never
                     }).
 
--define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in millisecs
 -define(SYNC_INTERVAL, 5). %% milliseconds
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
@@ -76,13 +72,10 @@
 -define(SERVER, ?MODULE).
 
 -record(dqstate,
-        {operation_mode,          %% ram_disk | disk_only
-         store,                   %% message store
+        {store,                   %% message store
          sequences,               %% next read and write for each q
          on_sync_txns,            %% list of commiters to run on sync (reversed)
-         commit_timer_ref,        %% TRef for our interval timer
-         memory_report_timer_ref, %% TRef for the memory report timer
-         mnesia_bytes_per_record  %% bytes per record in mnesia in ram_disk mode
+         commit_timer_ref         %% TRef for our interval timer
         }).
 
 %%----------------------------------------------------------------------------
@@ -118,11 +111,7 @@
                   A, queue_name()) -> A).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
--spec(to_disk_only_mode/0 :: () -> 'ok').
--spec(to_ram_disk_mode/0 :: () -> 'ok').
 -spec(filesync/0 :: () -> 'ok').
--spec(cache_info/0 :: () -> [{atom(), term()}]).
--spec(set_mode/1 :: ('oppressed' | 'liberated') -> 'ok').
 
 -endif.
 
@@ -187,21 +176,9 @@ stop() ->
 stop_and_obliterate() ->
     gen_server2:call(?SERVER, stop_vaporise, infinity).
 
-to_disk_only_mode() ->
-    gen_server2:pcall(?SERVER, 9, to_disk_only_mode, infinity).
-
-to_ram_disk_mode() ->
-    gen_server2:pcall(?SERVER, 9, to_ram_disk_mode, infinity).
-
 filesync() ->
     gen_server2:pcall(?SERVER, 9, filesync).
 
-cache_info() ->
-    gen_server2:call(?SERVER, cache_info, infinity).
-
-set_mode(Mode) ->
-    gen_server2:pcast(?SERVER, 10, {set_mode, Mode}).
-
 %%----------------------------------------------------------------------------
 %% gen_server behaviour
 %%----------------------------------------------------------------------------
@@ -216,54 +193,23 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     %%       brutal_kill.
     %% Otherwise, the gen_server will be immediately terminated.
     process_flag(trap_exit, true),
-    ok = rabbit_memory_manager:register
-           (self(), true, rabbit_disk_queue, set_mode, []),
-    ok = filelib:ensure_dir(form_filename("nothing")),
 
-    Node = node(),
-    {Mode, MnesiaBPR, EtsBPR} =
-        case lists:member(Node, mnesia:table_info(rabbit_disk_queue,
-                                                  disc_copies)) of
-            true ->
-                %% memory manager assumes we start oppressed. As we're
-                %% not, make sure it knows about it, by reporting zero
-                %% memory usage, which ensures it'll tell us to become
-                %% liberated
-                rabbit_memory_manager:report_memory(
-                  self(), 0, false),
-                {ram_disk, undefined, undefined};
-            false ->
-                Path = form_filename(?DISK_ONLY_MODE_FILE),
-                case rabbit_misc:read_term_file(Path) of
-                    {ok, [{MnesiaBPR1, EtsBPR1}]} ->
-                        {disk_only, MnesiaBPR1, EtsBPR1};
-                    {error, Reason} ->
-                        throw({error, {cannot_read_disk_only_mode_file, Path,
-                                       Reason}})
-                end
-        end,
+    ok = filelib:ensure_dir(form_filename("nothing")),
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
-    Store = rabbit_msg_store:init(Mode, base_directory(),
-                                  FileSizeLimit, ReadFileHandlesLimit,
-                                  fun msg_ref_gen/1, msg_ref_gen_init(),
-                                  EtsBPR),
-    Store1 = prune(Store),
+    Store = prune(rabbit_msg_store:init(base_directory(),
+                                        FileSizeLimit, ReadFileHandlesLimit,
+                                        fun msg_ref_gen/1, msg_ref_gen_init())),
 
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
     ok = extract_sequence_numbers(Sequences),
 
-    State =
-        #dqstate { operation_mode          = Mode,
-                   store                   = Store1,
-                   sequences               = Sequences,
-                   on_sync_txns            = [],
-                   commit_timer_ref        = undefined,
-                   memory_report_timer_ref = undefined,
-                   mnesia_bytes_per_record = MnesiaBPR
-                 },
-    {ok, start_memory_timer(State), hibernate,
+    State = #dqstate { store            = Store,
+                       sequences        = Sequences,
+                       on_sync_txns     = [],
+                       commit_timer_ref = undefined },
+    {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({fetch, Q}, _From, State) ->
@@ -294,25 +240,14 @@ handle_call({foldl, Fun, Init, Q}, _From, State) ->
     reply(Result, State1);
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}; %% gen_server now calls terminate
-handle_call(stop_vaporise, _From, State = #dqstate { operation_mode = Mode }) ->
+handle_call(stop_vaporise, _From, State) ->
     State1 = shutdown(State),
     {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
-    {atomic, ok} = case Mode of
-                       ram_disk -> {atomic, ok};
-                       disk_only -> mnesia:change_table_copy_type(
-                                      rabbit_disk_queue, node(), disc_copies)
-                   end,
     lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
     {stop, normal, ok, State1}; %% gen_server now calls terminate
-handle_call(to_disk_only_mode, _From, State) ->
-    reply(ok, to_disk_only_mode(State));
-handle_call(to_ram_disk_mode, _From, State) ->
-    reply(ok, to_ram_disk_mode(State));
 handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
     {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
-    reply(ok, State1);
-handle_call(cache_info, _From, State = #dqstate { store = Store }) ->
-    reply(rabbit_msg_store:cache_info(Store), State).
+    reply(ok, State1).
 
 handle_cast({publish, Q, Message, IsDelivered}, State) ->
     {ok, _MsgSeqId, State1} = internal_publish(Q, Message, IsDelivered, State),
@@ -332,11 +267,6 @@ handle_cast({requeue, Q, MsgSeqIds}, State) ->
 handle_cast({requeue_next_n, Q, N}, State) ->
     {ok, State1} = internal_requeue_next_n(Q, N, State),
     noreply(State1);
-handle_cast({set_mode, Mode}, State) ->
-    noreply((case Mode of
-                 oppressed -> fun to_disk_only_mode/1;
-                 liberated -> fun to_ram_disk_mode/1
-             end)(State));
 handle_cast({prefetch, Q, From}, State) ->
     {Result, State1} =
         internal_fetch_body(Q, record_delivery, peek_queue, State),
@@ -352,22 +282,12 @@ handle_cast({prefetch, Q, From}, State) ->
     end,
     noreply(State1).
 
-handle_info(report_memory, State) ->
-    %% call noreply1/2, not noreply/1/2, as we don't want to restart the
-    %% memory_report_timer_ref.
-    %% By unsetting the timer, we force a report on the next normal message
-    noreply1(State #dqstate { memory_report_timer_ref = undefined });
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 handle_info(timeout, State) ->
     %% must have commit_timer set, so timeout was 0, and we're not hibernating
     noreply(sync(State)).
 
-handle_pre_hibernate(State) ->
-    %% don't use noreply/1 or noreply1/1 as they'll restart the memory timer
-    ok = report_memory(true, State),
-    {hibernate, stop_memory_timer(State)}.
-
 terminate(_Reason, State) ->
     State1 = shutdown(State),
     store_safe_shutdown(),
@@ -376,7 +296,7 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { sequences = undefined }) ->
     State;
 shutdown(State = #dqstate { sequences = Sequences, store = Store }) ->
-    State1 = stop_commit_timer(stop_memory_timer(State)),
+    State1 = stop_commit_timer(State),
     Store1 = rabbit_msg_store:cleanup(Store),
     ets:delete(Sequences),
     State1 #dqstate { sequences = undefined, store = Store1 }.
@@ -384,100 +304,19 @@ shutdown(State = #dqstate { sequences = Sequences, store = Store }) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-%%----------------------------------------------------------------------------
-%% memory management helper functions
-%%----------------------------------------------------------------------------
-
-stop_memory_timer(State = #dqstate { memory_report_timer_ref = undefined }) ->
-    State;
-stop_memory_timer(State = #dqstate { memory_report_timer_ref = TRef }) ->
-    {ok, cancel} = timer:cancel(TRef),
-    State #dqstate { memory_report_timer_ref = undefined }.
-
-start_memory_timer(State = #dqstate { memory_report_timer_ref = undefined }) ->
-    ok = report_memory(false, State),
-    {ok, TRef} = timer:send_after(?MINIMUM_MEMORY_REPORT_TIME_INTERVAL,
-                                  report_memory),
-    State #dqstate { memory_report_timer_ref = TRef };
-start_memory_timer(State) ->
-    State.
-
-%% Scaling this by 2.5 is a magic number. Found by trial and error to
-%% work ok. We are deliberately over reporting so that we run out of
-%% memory sooner rather than later, because the transition to disk
-%% only modes transiently can take quite a lot of memory.
-report_memory(Hibernating, State) ->
-    Bytes = memory_use(State),
-    rabbit_memory_manager:report_memory(self(), trunc(2.5 * Bytes),
-                                        Hibernating).
-
-memory_use(#dqstate { operation_mode = ram_disk,
-                      store          = Store,
-                      sequences      = Sequences }) ->
-    WordSize = erlang:system_info(wordsize),
-    rabbit_msg_store:memory(Store) +
-        WordSize * ets:info(Sequences, memory) +
-        WordSize * mnesia:table_info(rabbit_disk_queue, memory);
-memory_use(#dqstate { operation_mode          = disk_only,
-                      store                   = Store,
-                      sequences               = Sequences,
-                      mnesia_bytes_per_record = MnesiaBytesPerRecord }) ->
-    WordSize = erlang:system_info(wordsize),
-    rabbit_msg_store:memory(Store) +
-        WordSize * ets:info(Sequences, memory) +
-        rabbit_misc:ceil(
-          mnesia:table_info(rabbit_disk_queue, size) * MnesiaBytesPerRecord).
-
-to_disk_only_mode(State = #dqstate { operation_mode = disk_only }) ->
-    State;
-to_disk_only_mode(State = #dqstate { operation_mode = ram_disk,
-                                     store          = Store }) ->
-    rabbit_log:info("Converting disk queue to disk only mode~n", []),
-    MnesiaBPR = erlang:system_info(wordsize) *
-        mnesia:table_info(rabbit_disk_queue, memory) /
-        lists:max([1, mnesia:table_info(rabbit_disk_queue, size)]),
-    EtsBPR = rabbit_msg_store:ets_bpr(Store),
-    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
-                                                 disc_only_copies),
-    Store1 = rabbit_msg_store:to_disk_only_mode(Store),
-    Path = form_filename(?DISK_ONLY_MODE_FILE),
-    case rabbit_misc:write_term_file(Path, [{MnesiaBPR, EtsBPR}]) of
-        ok -> ok;
-        {error, Reason} ->
-            throw({error, {cannot_create_disk_only_mode_file, Path, Reason}})
-    end,
-    garbage_collect(),
-    State #dqstate { operation_mode          = disk_only,
-                     store                   = Store1,
-                     mnesia_bytes_per_record = MnesiaBPR }.
-
-to_ram_disk_mode(State = #dqstate { operation_mode = ram_disk }) ->
-    State;
-to_ram_disk_mode(State = #dqstate { operation_mode = disk_only,
-                                    store          = Store }) ->
-    rabbit_log:info("Converting disk queue to ram disk mode~n", []),
-    {atomic, ok} = mnesia:change_table_copy_type(rabbit_disk_queue, node(),
-                                                 disc_copies),
-    Store1 = rabbit_msg_store:to_ram_disk_mode(Store),
-    ok = file:delete(form_filename(?DISK_ONLY_MODE_FILE)),
-    garbage_collect(),
-    State #dqstate { operation_mode          = ram_disk,
-                     store                   = Store1,
-                     mnesia_bytes_per_record = undefined }.
-
 %%----------------------------------------------------------------------------
 %% general helper functions
 %%----------------------------------------------------------------------------
 
 noreply(State) ->
-    noreply1(start_memory_timer(State)).
+    noreply1(State).
 
 noreply1(State) ->
     {State1, Timeout} = next_state(State),
     {noreply, State1, Timeout}.
 
 reply(Reply, State) ->
-    reply1(Reply, start_memory_timer(State)).
+    reply1(Reply, State).
 
 reply1(Reply, State) ->
     {State1, Timeout} = next_state(State),
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b745acbf..5b7afb6c 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,17 +31,14 @@
 
 -module(rabbit_msg_store).
 
--export([init/7, write/4, read/2, attrs/2, remove/2, release/2,
-         needs_sync/2, sync/1, cleanup/1, cache_info/1, memory/1,
-         ets_bpr/1, to_disk_only_mode/1, to_ram_disk_mode/1]).
+-export([init/5, write/4, read/2, attrs/2, remove/2, release/2,
+         needs_sync/2, sync/1, cleanup/1]).
 
 %%----------------------------------------------------------------------------
 
 -record(msstate,
-        {operation_mode,         %% ram_disk | disk_only
-         dir,                    %% store directory
-         msg_location_dets,      %% where are messages?
-         msg_location_ets,       %% as above, but for ets version
+        {dir,                    %% store directory
+         msg_locations,          %% where are messages?
          file_summary,           %% what's in the files?
          current_file,           %% current file name as number
          current_file_handle,    %% current file handle
@@ -51,8 +48,7 @@
          file_size_limit,        %% how big can our files get?
          read_file_handle_cache, %% file handle cache for reading
          last_sync_offset,       %% current_offset at the last time we sync'd
-         message_cache,          %% ets message cache
-         ets_bytes_per_record    %% bytes per record in msg_location_ets
+         message_cache           %% ets message cache
          }).
 
 -record(msg_location,
@@ -78,8 +74,6 @@
 
 -ifdef(use_specs).
 
--type(mode() :: 'ram_disk' | 'disk_only').
--type(dets_table() :: any()).
 -type(ets_table() :: any()).
 -type(msg_id() :: binary()).
 -type(msg() :: any()).
@@ -88,10 +82,8 @@
 -type(io_device() :: any()).
 
 -type(msstate() :: #msstate {
-               operation_mode         :: mode(),
                dir                    :: file_path(),
-               msg_location_dets      :: dets_table(),
-               msg_location_ets       :: ets_table(),
+               msg_locations          :: ets_table(),
                file_summary           :: ets_table(),
                current_file           :: non_neg_integer(),
                current_file_handle    :: io_device(),
@@ -100,15 +92,13 @@
                file_size_limit        :: non_neg_integer(),
                read_file_handle_cache :: any(),
                last_sync_offset       :: non_neg_integer(),
-               message_cache          :: ets_table(),
-               ets_bytes_per_record   :: non_neg_integer()
+               message_cache          :: ets_table()
                }).
 
--spec(init/7 :: ('ram_disk' | 'disk_only', file_path(),
+-spec(init/5 :: (file_path(),
                  non_neg_integer(), non_neg_integer(),
                  (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})),
-                 A, non_neg_integer()) ->
-             msstate()).
+                 A) -> msstate()).
 -spec(write/4 :: (msg_id(), msg(), msg_attrs(), msstate()) -> msstate()).
 -spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
 -spec(attrs/2 :: (msg_id(), msstate()) -> msg_attrs() | 'not_found').
@@ -117,11 +107,6 @@
 -spec(needs_sync/2 :: ([msg_id()], msstate()) -> boolean()).
 -spec(sync/1 :: (msstate()) -> msstate()).
 -spec(cleanup/1 :: (msstate()) -> msstate()).
--spec(cache_info/1 :: (msstate()) -> [{atom(), term()}]).
--spec(memory/1 :: (msstate()) -> non_neg_integer()).
--spec(ets_bpr/1 :: (msstate()) -> non_neg_integer()).
--spec(to_disk_only_mode/1 :: (msstate()) -> msstate()).
--spec(to_ram_disk_mode/1 :: (msstate()) -> msstate()).
 
 -endif.
 
@@ -129,7 +114,7 @@
 
 %% The components:
 %%
-%% MsgLocation: this is a (d)ets table which contains:
+%% MsgLocation: this is a ets table which contains:
 %%              {MsgId, RefCount, File, Offset, TotalSize, Attrs}
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
@@ -171,15 +156,7 @@
 %% possibilites of a crash have occured during a compaction (this
 %% consists of tidyup - the compaction is deliberately designed such
 %% that data is duplicated on disk rather than risking it being lost),
-%% and rebuild the dets and ets tables (MsgLocation, FileSummary).
-%%
-%% MsgLocation is deliberately a dets table in order to ensure that we
-%% are not RAM constrained. However, for performance reasons, it is
-%% possible to call to_ram_disk_mode/0 which will convert MsgLocation
-%% to an ets table. This results in a massive performance improvement,
-%% at the expense of greater RAM usage. The idea is that when memory
-%% gets tight, we switch to disk_only mode but otherwise try to run in
-%% ram_disk mode.
+%% and rebuild the ets tables (MsgLocation, FileSummary).
 %%
 %% So, with this design, messages move to the left. Eventually, they
 %% should end up in a contiguous block on the left and are then never
@@ -257,25 +234,11 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit,
-     MsgRefDeltaGen, MsgRefDeltaGenInit, EtsBytesPerRecord) ->
-
-    file:delete(msg_location_dets_file(Dir)),
-
-    {ok, MsgLocationDets} =
-        dets:open_file(?MSG_LOC_NAME,
-                       [{file, msg_location_dets_file(Dir)},
-                        {min_no_slots, 1024*1024},
-                        %% man says this should be <= 32M. But it works...
-                        {max_no_slots, 30*1024*1024},
-                        {type, set},
-                        {keypos, #msg_location.msg_id}
-                       ]),
+init(Dir, FileSizeLimit, ReadFileHandlesLimit,
+     MsgRefDeltaGen, MsgRefDeltaGenInit) ->
 
-    %% it would be better to have this as private, but dets:from_ets/2
-    %% seems to blow up if it is set private - see bug21489
-    MsgLocationEts = ets:new(?MSG_LOC_NAME,
-                             [set, protected, {keypos, #msg_location.msg_id}]),
+    MsgLocations = ets:new(?MSG_LOC_NAME,
+                           [set, private, {keypos, #msg_location.msg_id}]),
 
     InitFile = 0,
     HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
@@ -284,10 +247,8 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit,
                           [set, private, {keypos, #file_summary.file}]),
     MessageCache = ets:new(?CACHE_ETS_NAME, [set, private]),
     State =
-        #msstate { operation_mode         = Mode,
-                   dir                    = Dir,
-                   msg_location_dets      = MsgLocationDets,
-                   msg_location_ets       = MsgLocationEts,
+        #msstate { dir                    = Dir,
+                   msg_locations          = MsgLocations,
                    file_summary           = FileSummary,
                    current_file           = InitFile,
                    current_file_handle    = undefined,
@@ -296,8 +257,7 @@ init(Mode, Dir, FileSizeLimit, ReadFileHandlesLimit,
                    file_size_limit        = FileSizeLimit,
                    read_file_handle_cache = HandleCache,
                    last_sync_offset       = 0,
-                   message_cache          = MessageCache,
-                   ets_bytes_per_record   = EtsBytesPerRecord
+                   message_cache          = MessageCache
                   },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
@@ -324,15 +284,14 @@ write(MsgId, Msg, Attrs,
                          current_file        = CurFile,
                          current_offset      = CurOffset,
                          file_summary        = FileSummary }) ->
-    case dets_ets_lookup(State, MsgId) of
-        [] ->
+    case index_lookup(MsgId, State) of
+        not_found ->
             %% New message, lots to do
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg, Attrs),
-            true = dets_ets_insert_new(
-                     State, #msg_location {
-                       msg_id = MsgId, ref_count = 1, file = CurFile,
-                       offset = CurOffset, total_size = TotalSize,
-                       attrs = Attrs }),
+            ok = index_insert(#msg_location {
+                                msg_id = MsgId, ref_count = 1, file = CurFile,
+                                offset = CurOffset, total_size = TotalSize,
+                                attrs = Attrs }, State),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
                                        right = undefined }] =
@@ -350,23 +309,20 @@ write(MsgId, Msg, Attrs,
             maybe_roll_to_new_file(
               NextOffset, State #msstate {current_offset = NextOffset,
                                           current_dirty = true});
-        [StoreEntry =
-         #msg_location { msg_id = MsgId, ref_count = RefCount }] ->
+        StoreEntry = #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter
-            ok = dets_ets_insert(State, StoreEntry #msg_location {
-                                          ref_count = RefCount + 1 }),
+            ok = index_update(StoreEntry #msg_location {
+                                ref_count = RefCount + 1 }, State),
             State
     end.
 
 read(MsgId, State) ->
-    Objs = dets_ets_lookup(State, MsgId),
-    case Objs of
-        [] ->
-            not_found;
-        [#msg_location { ref_count  = RefCount,
-                         file       = File,
-                         offset     = Offset,
-                         total_size = TotalSize }] ->
+    case index_lookup(MsgId, State) of
+        not_found -> not_found;
+        #msg_location { ref_count  = RefCount,
+                        file       = File,
+                        offset     = Offset,
+                        total_size = TotalSize } ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
                     {{ok, {MsgId, Msg, _Attrs}}, State1} =
@@ -401,10 +357,9 @@ read(MsgId, State) ->
     end.
 
 attrs(MsgId, State) ->
-    Objs = dets_ets_lookup(State, MsgId),
-    case Objs of
-        [] -> not_found;
-        [#msg_location { msg_id = MsgId, attrs = Attrs }] -> Attrs
+    case index_lookup(MsgId, State) of
+        not_found -> not_found;
+        #msg_location { attrs = Attrs } -> Attrs
     end.
 
 remove(MsgIds, State = #msstate { current_file = CurFile }) ->
@@ -430,9 +385,8 @@ needs_sync(_MsgIds, #msstate { current_dirty = false }) ->
 needs_sync(MsgIds, State = #msstate { current_file     = CurFile,
                                       last_sync_offset = SyncOffset }) ->
     lists:any(fun (MsgId) ->
-                      [#msg_location { msg_id = MsgId, file = File,
-                                       offset = Offset }] =
-                          dets_ets_lookup(State, MsgId),
+                      #msg_location { file = File, offset = Offset } =
+                          index_lookup(MsgId, State),
                       File =:= CurFile andalso Offset >= SyncOffset
               end, MsgIds).
 
@@ -443,9 +397,7 @@ sync(State = #msstate { current_file_handle = CurHdl,
     ok = file:sync(CurHdl),
     State #msstate { current_dirty = false, last_sync_offset = CurOffset }.
 
-cleanup(State = #msstate { dir                    = Dir,
-                           msg_location_dets      = MsgLocationDets,
-                           msg_location_ets       = MsgLocationEts,
+cleanup(State = #msstate { msg_locations          = MsgLocations,
                            file_summary           = FileSummary,
                            current_file_handle    = FileHdl,
                            read_file_handle_cache = HC }) ->
@@ -456,66 +408,15 @@ cleanup(State = #msstate { dir                    = Dir,
                       State2
              end,
     HC1 = rabbit_file_handle_cache:close_all(HC),
-    dets:close(MsgLocationDets),
-    file:delete(msg_location_dets_file(Dir)),
-    ets:delete(MsgLocationEts),
+    ets:delete(MsgLocations),
     ets:delete(FileSummary),
-    State1 #msstate { msg_location_dets      = undefined,
-                      msg_location_ets       = undefined,
+    State1 #msstate { msg_locations          = undefined,
                       file_summary           = undefined,
                       current_file_handle    = undefined,
                       current_dirty          = false,
                       read_file_handle_cache = HC1
                      }.
 
-cache_info(#msstate { message_cache = Cache }) ->
-    ets:info(Cache).
-
-memory(#msstate { operation_mode   = ram_disk,
-                  file_summary     = FileSummary,
-                  msg_location_ets = MsgLocationEts,
-                  message_cache    = Cache }) ->
-    erlang:system_info(wordsize) *
-        lists:sum([ets:info(Table, memory) ||
-                      Table <- [FileSummary, MsgLocationEts, Cache]]);
-memory(#msstate { operation_mode       = disk_only,
-                  file_summary         = FileSummary,
-                  msg_location_dets    = MsgLocationDets,
-                  message_cache        = Cache,
-                  ets_bytes_per_record = EtsBytesPerRecord }) ->
-    erlang:system_info(wordsize) *
-        lists:sum([ets:info(Table, memory) ||
-                      Table <- [FileSummary, Cache]]) +
-        rabbit_misc:ceil(dets:info(MsgLocationDets, size) * EtsBytesPerRecord).
-
-ets_bpr(#msstate { operation_mode = disk_only,
-                   ets_bytes_per_record = EtsBytesPerRecord }) ->
-    EtsBytesPerRecord;
-ets_bpr(#msstate { operation_mode = ram_disk,
-                   msg_location_ets = MsgLocationEts }) ->
-    erlang:system_info(wordsize) * ets:info(MsgLocationEts, memory) /
-        lists:max([1, ets:info(MsgLocationEts, size)]).
-
-to_disk_only_mode(State = #msstate { operation_mode = disk_only }) ->
-    State;
-to_disk_only_mode(State = #msstate { operation_mode = ram_disk,
-                                     msg_location_dets = MsgLocationDets,
-                                     msg_location_ets = MsgLocationEts }) ->
-    ok = dets:from_ets(MsgLocationDets, MsgLocationEts),
-    true = ets:delete_all_objects(MsgLocationEts),
-    State #msstate { operation_mode       = disk_only,
-                     ets_bytes_per_record = ets_bpr(State) }.
-
-to_ram_disk_mode(State = #msstate { operation_mode = ram_disk }) ->
-    State;
-to_ram_disk_mode(State = #msstate { operation_mode = disk_only,
-                                    msg_location_dets = MsgLocationDets,
-                                    msg_location_ets = MsgLocationEts }) ->
-    true = ets:from_dets(MsgLocationEts, MsgLocationDets),
-    ok = dets:delete_all_objects(MsgLocationDets),
-    State #msstate { operation_mode       = ram_disk,
-                     ets_bytes_per_record = undefined }.
-
 %%----------------------------------------------------------------------------
 %% general helper functions
 %%----------------------------------------------------------------------------
@@ -526,9 +427,6 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 
 filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
-msg_location_dets_file(Dir) ->
-    form_filename(Dir, atom_to_list(?MSG_LOC_NAME) ++ ?FILE_EXTENSION_DETS).
-
 open_file(Dir, FileName, Mode) ->
     file:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode).
 
@@ -563,13 +461,12 @@ with_read_handle_at(File, Offset, Fun,
     {Result, State1 #msstate { read_file_handle_cache = HC1 }}.
 
 remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
-    [StoreEntry =
-     #msg_location { msg_id = MsgId, ref_count = RefCount, file = File,
-                     offset = Offset, total_size = TotalSize }] =
-        dets_ets_lookup(State, MsgId),
+    StoreEntry = #msg_location { ref_count = RefCount, file = File,
+                                 offset = Offset, total_size = TotalSize } =
+        index_lookup(MsgId, State),
     case RefCount of
         1 ->
-            ok = dets_ets_delete(State, MsgId),
+            ok = index_delete(MsgId, State),
             ok = remove_cache_entry(MsgId, State),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop }] =
@@ -582,8 +479,8 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             {compact, File};
         _ when 1 < RefCount ->
             ok = decrement_cache(MsgId, State),
-            ok = dets_ets_insert(State, StoreEntry #msg_location {
-                                          ref_count = RefCount - 1 }),
+            ok = index_update(StoreEntry #msg_location {
+                                ref_count = RefCount - 1 }, State),
             no_compact
     end.
 
@@ -628,52 +525,39 @@ cache_is_full(Cache) ->
     ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
 
 %%----------------------------------------------------------------------------
-%% dets/ets agnosticism
+%% index
 %%----------------------------------------------------------------------------
 
-dets_ets_lookup(#msstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only }, Key) ->
-    dets:lookup(MsgLocationDets, Key);
-dets_ets_lookup(#msstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk }, Key) ->
-    ets:lookup(MsgLocationEts, Key).
-
-dets_ets_delete(#msstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only }, Key) ->
-    ok = dets:delete(MsgLocationDets, Key);
-dets_ets_delete(#msstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk }, Key) ->
-    true = ets:delete(MsgLocationEts, Key),
+index_lookup(Key, #msstate { msg_locations = MsgLocations }) ->
+    case ets:lookup(MsgLocations, Key) of
+        []      -> not_found;
+        [Entry] -> Entry
+    end.
+
+index_insert(Obj, #msstate { msg_locations = MsgLocations }) ->
+    true = ets:insert_new(MsgLocations, Obj),
     ok.
 
-dets_ets_insert(#msstate { msg_location_dets = MsgLocationDets,
-                           operation_mode = disk_only }, Obj) ->
-    ok = dets:insert(MsgLocationDets, Obj);
-dets_ets_insert(#msstate { msg_location_ets = MsgLocationEts,
-                           operation_mode = ram_disk }, Obj) ->
-    true = ets:insert(MsgLocationEts, Obj),
+index_update(Obj, #msstate { msg_locations = MsgLocations }) ->
+    true = ets:insert(MsgLocations, Obj),
     ok.
 
-dets_ets_insert_new(#msstate { msg_location_dets = MsgLocationDets,
-                               operation_mode = disk_only }, Obj) ->
-    true = dets:insert_new(MsgLocationDets, Obj);
-dets_ets_insert_new(#msstate { msg_location_ets = MsgLocationEts,
-                               operation_mode = ram_disk }, Obj) ->
-    true = ets:insert_new(MsgLocationEts, Obj).
-
-dets_ets_match_object(#msstate { msg_location_dets = MsgLocationDets,
-                                 operation_mode = disk_only }, Obj) ->
-    dets:match_object(MsgLocationDets, Obj);
-dets_ets_match_object(#msstate { msg_location_ets = MsgLocationEts,
-                                 operation_mode = ram_disk }, Obj) ->
-    ets:match_object(MsgLocationEts, Obj).
-
-dets_ets_select_delete(#msstate { msg_location_dets = MsgLocationDets,
-                                  operation_mode = disk_only }, MatchSpec) ->
-    dets:select_delete(MsgLocationDets, MatchSpec);
-dets_ets_select_delete(#msstate { msg_location_ets = MsgLocationEts,
-                                  operation_mode = ram_disk }, MatchSpec) ->
-    ets:select_delete(MsgLocationEts, MatchSpec).
+index_delete(Key, #msstate { msg_locations = MsgLocations }) ->
+    true = ets:delete(MsgLocations, Key),
+    ok.
+
+index_search_by_file(File, #msstate { msg_locations = MsgLocations }) ->
+    lists:sort(fun (#msg_location { offset = OffA },
+                    #msg_location { offset = OffB }) ->
+                       OffA < OffB
+               end, ets:match_object(MsgLocations,
+                                     #msg_location { file = File, _ = '_' })).
+
+    
+index_delete_by_file(File, #msstate { msg_locations = MsgLocations }) ->
+    MatchHead = #msg_location { file = File, _ = '_' },
+    ets:select_delete(MsgLocations, [{MatchHead, [], [true]}]),
+    ok.
 
 %%----------------------------------------------------------------------------
 %% recovery
@@ -684,33 +568,27 @@ count_msg_refs(Gen, Seed, State) ->
         finished -> ok;
         {_MsgId, 0, Next} -> count_msg_refs(Gen, Next, State);
         {MsgId, Delta, Next} ->
-            case dets_ets_lookup(State, MsgId) of
-                [] -> true = dets_ets_insert_new(
-                               State, #msg_location { msg_id = MsgId,
-                                                      ref_count = Delta });
-                [StoreEntry = #msg_location { msg_id = MsgId,
-                                              ref_count = RefCount }] ->
+            case index_lookup(MsgId, State) of
+                not_found ->
+                    ok = index_insert(#msg_location { msg_id = MsgId,
+                                                      ref_count = Delta },
+                                      State);
+                StoreEntry = #msg_location { ref_count = RefCount } ->
                     NewRefCount = RefCount + Delta,
                     case NewRefCount of
-                        0 -> ok = dets_ets_delete(State, MsgId);
-                        _ -> ok = dets_ets_insert(
-                                    State, StoreEntry #msg_location {
-                                             ref_count = NewRefCount })
+                        0 -> ok = index_delete(MsgId, State);
+                        _ -> ok = index_update(StoreEntry #msg_location {
+                                                 ref_count = NewRefCount },
+                                               State)
                     end
             end,
             count_msg_refs(Gen, Next, State)
     end.
 
 verify_messages_referenced(State, MsgIds) ->
-    lists:foreach(fun (MsgId) -> [_] = dets_ets_lookup(State, MsgId) end,
-                  MsgIds).
-
-prune_stale_refs(State) ->
-    MatchHead = #msg_location { file = undefined, _ = '_' },
-    case dets_ets_select_delete(State, [{MatchHead, [], [true]}]) of
-        N when is_number(N) -> ok;
-        Other -> Other
-    end.
+    lists:foreach(fun (MsgId) ->
+                          #msg_location {} = index_lookup(MsgId, State)
+                  end, MsgIds).
 
 recover_crashed_compactions(Dir, FileNames, TmpFileNames, State) ->
     lists:foreach(fun (TmpFileName) ->
@@ -865,9 +743,9 @@ load_messages(Files, State) ->
     load_messages(undefined, Files, State).
 
 load_messages(Left, [], State) ->
-    ok = prune_stale_refs(State),
+    ok = index_delete_by_file(undefined, State),
     Offset =
-        case sort_msg_locations_by_offset(desc, Left, State) of
+        case lists:reverse(index_search_by_file(Left, State)) of
             [] -> 0;
             [#msg_location { offset = MaxOffset,
                              total_size = TotalSize } | _] ->
@@ -879,14 +757,13 @@ load_messages(Left, [File|Files],
     {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} = lists:foldl(
         fun (Obj = {MsgId, Attrs, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case dets_ets_lookup(State, MsgId) of
-                    [] -> {VMAcc, VTSAcc};
-                    [StoreEntry] ->
-                        ok = dets_ets_insert(
-                               State, StoreEntry #msg_location {
-                                        file = File, offset = Offset,
-                                        total_size = TotalSize,
-                                        attrs = Attrs }),
+                case index_lookup(MsgId, State) of
+                    not_found -> {VMAcc, VTSAcc};
+                    StoreEntry ->
+                        ok = index_update(StoreEntry #msg_location {
+                                            file = File, offset = Offset,
+                                            total_size = TotalSize,
+                                            attrs = Attrs }, State),
                         {[Obj | VMAcc], VTSAcc + TotalSize}
                 end
         end, {[], 0}, Messages),
@@ -1004,17 +881,6 @@ adjust_meta_and_combine(
        true -> {false, State}
     end.
 
-sort_msg_locations_by_offset(Dir, File, State) ->
-    Comp = case Dir of
-               asc  -> fun erlang:'<'/2;
-               desc -> fun erlang:'>'/2
-           end,
-    lists:sort(fun (#msg_location { offset = OffA },
-                    #msg_location { offset = OffB }) ->
-                       Comp(OffA, OffB)
-               end, dets_ets_match_object(
-                      State, #msg_location { file = File, _ = '_' })).
-
 combine_files(#file_summary { file = Source,
                               valid_total_size = SourceValid,
                               left = Destination },
@@ -1055,7 +921,7 @@ combine_files(#file_summary { file = Source,
                           %% that the list should be naturally sorted
                           %% as we require, however, we need to
                           %% enforce it anyway
-                  end, sort_msg_locations_by_offset(asc, Destination, State1)),
+                  end, index_search_by_file(Destination, State1)),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State1),
@@ -1073,7 +939,7 @@ combine_files(#file_summary { file = Source,
             ok = file:close(TmpHdl),
             ok = file:delete(form_filename(Dir, Tmp))
     end,
-    SourceWorkList = sort_msg_locations_by_offset(asc, Source, State1),
+    SourceWorkList = index_search_by_file(Source, State1),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
@@ -1092,9 +958,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   %% update MsgLocationDets to reflect change of file and offset
-                  ok = dets_ets_insert(State, StoreEntry #msg_location {
-                                                file = Destination,
-                                                offset = CurOffset }),
+                  ok = index_update(StoreEntry #msg_location {
+                                      file = Destination,
+                                      offset = CurOffset }, State),
                   NextOffset = CurOffset + TotalSize,
                   if BlockStart =:= undefined ->
                           %% base case, called only for the first list elem
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1f2187bc..1e50696a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -828,7 +828,6 @@ test_disk_queue() ->
     passed = rdq_test_purge(),
     passed = rdq_test_mixed_queue_modes(),
     passed = rdq_test_mode_conversion_mid_txn(),
-    passed = rdq_test_disk_queue_modes(),
     rdq_virgin(),
     passed.
 
@@ -1266,47 +1265,6 @@ rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) -
     0 = rabbit_mixed_queue:len(MS11),
     passed.
 
-rdq_test_disk_queue_modes() ->
-    rdq_virgin(),
-    rdq_start(),
-    Msg = <<0:(8*256)>>,
-    Total = 1000,
-    Half1 = lists:seq(1,round(Total/2)),
-    Half2 = lists:seq(1 + round(Total/2), Total),
-    CommitHalf1 = commit_list(Half1, round(Total/2)),
-    CommitHalf2 = commit_list(Half2, Total - round(Total/2)),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half1],
-    ok = rabbit_disk_queue:tx_commit(q, CommitHalf1, []),
-    io:format("Publish done~n", []),
-    ok = rabbit_disk_queue:to_disk_only_mode(),
-    io:format("To Disk Only done~n", []),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- Half2],
-    ok = rabbit_disk_queue:tx_commit(q, CommitHalf2, []),
-    Seqs = [begin
-                Remaining = Total - N,
-                {Message, false, SeqId, Remaining} =
-                    rabbit_disk_queue:fetch(q),
-                ok = rdq_match_message(Message, N, Msg, 256),
-                SeqId
-            end || N <- Half1],
-    io:format("Deliver first half done~n", []),
-    ok = rabbit_disk_queue:to_ram_disk_mode(),
-    io:format("To RAM Disk done~n", []),
-    Seqs2 = [begin
-                 Remaining = Total - N,
-                 {Message, false, SeqId, Remaining} =
-                     rabbit_disk_queue:fetch(q),
-                 ok = rdq_match_message(Message, N, Msg, 256),
-                 SeqId
-             end || N <- Half2],
-    io:format("Deliver second half done~n", []),
-    ok = rabbit_disk_queue:tx_commit(q, [], Seqs),
-    ok = rabbit_disk_queue:to_disk_only_mode(),
-    ok = rabbit_disk_queue:tx_commit(q, [], Seqs2),
-    empty = rabbit_disk_queue:fetch(q),
-    rdq_stop(),
-    passed.
-
 rdq_time_commands(Funcs) ->
     lists:foreach(fun (F) -> F() end, Funcs).
 
@@ -1319,7 +1277,6 @@ rdq_virgin() ->
 
 rdq_start() ->
     {ok, _} = rabbit_disk_queue:start_link(),
-    ok = rabbit_disk_queue:to_ram_disk_mode(),
     ok.
 
 rdq_stop() ->
-- 
cgit v1.2.1


From fede187da537abe0bd44a3174f61fc766af6b6d8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 22 Sep 2009 05:26:16 +0100
Subject: fix typo

---
 src/rabbit_mixed_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index b2a03b58..3015f6dc 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -70,7 +70,7 @@
                             }).
 -type(msg_id() :: guid()).
 -type(seq_id() :: non_neg_integer()).
--type(ack_tag() :: ( 'no_on_disk' | {msg_id(), seq_id()} )).
+-type(ack_tag() :: ( 'not_on_disk' | {msg_id(), seq_id()} )).
 -type(okmqs() :: {'ok', mqstate()}).
 
 -spec(init/2 :: (queue_name(), boolean()) -> okmqs()).
-- 
cgit v1.2.1


From 3643fb75e2dbd6e76a561f17bb74d1ebe4ff0ee8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 22 Sep 2009 07:53:16 +0100
Subject: do not call msg_store:attrs/2 from
 disk_queue:{phantom_fetch,prefetch} It turns out that we don't actually need
 the 'persistent' attribute. So this saves us a potentially expensive
 interaction with the msg_store.

---
 src/rabbit_disk_queue.erl  | 27 +++++++++------------------
 src/rabbit_mixed_queue.erl |  4 ++--
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 8991939d..02c20e30 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -94,7 +94,7 @@
               {message(), boolean(), ack_tag(), non_neg_integer()})).
 -spec(phantom_fetch/1 :: (queue_name()) ->
              ('empty' |
-              {msg_id(), boolean(), boolean(), ack_tag(), non_neg_integer()})).
+              {msg_id(), boolean(), ack_tag(), non_neg_integer()})).
 -spec(prefetch/1 :: (queue_name()) -> 'ok').
 -spec(ack/2 :: (queue_name(), [ack_tag()]) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
@@ -213,11 +213,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({fetch, Q}, _From, State) ->
-    {Result, State1} =
-        internal_fetch_body(Q, record_delivery, pop_queue, State),
+    {Result, State1} = internal_fetch_body(Q, pop_queue, State),
     reply(Result, State1);
 handle_call({phantom_fetch, Q}, _From, State) ->
-    Result = internal_fetch_attributes(Q, record_delivery, pop_queue, State),
+    Result = internal_fetch_attributes(Q, record_delivery, State),
     reply(Result, State);
 handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
     State1 =
@@ -268,8 +267,7 @@ handle_cast({requeue_next_n, Q, N}, State) ->
     {ok, State1} = internal_requeue_next_n(Q, N, State),
     noreply(State1);
 handle_cast({prefetch, Q, From}, State) ->
-    {Result, State1} =
-        internal_fetch_body(Q, record_delivery, peek_queue, State),
+    {Result, State1} = internal_fetch_body(Q, peek_queue, State),
     case rabbit_misc:with_exit_handler(
            fun () -> false end,
            fun () ->
@@ -277,7 +275,7 @@ handle_cast({prefetch, Q, From}, State) ->
                    true
            end) of
         true ->
-            internal_fetch_attributes(Q, ignore_delivery, pop_queue, State1);
+            internal_fetch_attributes(Q, ignore_delivery, State1);
         false -> ok
     end,
     noreply(State1).
@@ -367,9 +365,8 @@ sync(State = #dqstate { store = Store, on_sync_txns = Txns }) ->
 %% internal functions
 %%----------------------------------------------------------------------------
 
-internal_fetch_body(Q, MarkDelivered, Advance,
-                    State = #dqstate { store = Store }) ->
-    case next(Q, MarkDelivered, Advance, State) of
+internal_fetch_body(Q, Advance, State = #dqstate { store = Store }) ->
+    case next(Q, record_delivery, Advance, State) of
         empty -> {empty, State};
         {MsgId, IsDelivered, AckTag, Remaining} ->
             {Message, Store1} = rabbit_msg_store:read(MsgId, Store),
@@ -377,14 +374,8 @@ internal_fetch_body(Q, MarkDelivered, Advance,
             {{Message, IsDelivered, AckTag, Remaining}, State1}
     end.
 
-internal_fetch_attributes(Q, MarkDelivered, Advance,
-                          State = #dqstate { store = Store }) ->
-    case next(Q, MarkDelivered, Advance, State) of
-        empty -> empty;
-        {MsgId, IsDelivered, AckTag, Remaining} ->
-            IsPersistent = rabbit_msg_store:attrs(MsgId, Store),
-            {MsgId, IsPersistent, IsDelivered, AckTag, Remaining}
-    end.
+internal_fetch_attributes(Q, MarkDelivered, State) ->
+    next(Q, MarkDelivered, pop_queue, State).
 
 next(Q, MarkDelivered, Advance, #dqstate { sequences = Sequences }) ->
     case sequence_lookup(Sequences, Q) of
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 3015f6dc..ddae4da0 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -155,7 +155,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
     %% must call phantom_fetch otherwise the msg remains at the head
     %% of the queue. This is synchronous, but unavoidable as we need
     %% the AckTag
-    {MsgId, IsPersistent, true, AckTag, 0} = rabbit_disk_queue:phantom_fetch(Q),
+    {MsgId, true, AckTag, 0} = rabbit_disk_queue:phantom_fetch(Q),
     {ok, AckTag, State1};
 publish_delivered(Msg, State = #mqstate { length = 0 }) ->
     Msg1 = ensure_binary_properties(Msg),
@@ -175,7 +175,7 @@ fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
             AckTag =
                 case IsDurable andalso IsPersistent of
                     true ->
-                        {MsgId, IsPersistent, IsDelivered, AckTag1, _PRem}
+                        {MsgId, IsDelivered, AckTag1, _PRem}
                             = rabbit_disk_queue:phantom_fetch(Q),
                         AckTag1;
                     false ->
-- 
cgit v1.2.1


From e835129ed076e851b159b193b88cc777f3f147d2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 22 Sep 2009 15:58:06 +0100
Subject: refactoring: get rid of MnesiaDelete flag to remove_messages

---
 src/rabbit_disk_queue.erl | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 02c20e30..af5d808a 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -425,22 +425,16 @@ internal_foldl(Q, WriteSeqId, Fun, State = #dqstate { store = Store },
                    Acc1, ReadSeqId + 1).
 
 internal_ack(Q, MsgSeqIds, State) ->
-    remove_messages(Q, MsgSeqIds, true, State).
+    remove_messages(Q, MsgSeqIds, State).
 
-%% Q is only needed if MnesiaDelete /= false
-remove_messages(Q, MsgSeqIds, MnesiaDelete,
-                State = #dqstate { store = Store } ) ->
+remove_messages(Q, MsgSeqIds, State = #dqstate { store = Store } ) ->
     MsgIds = lists:foldl(
                fun ({MsgId, SeqId}, MsgIdAcc) ->
-                       ok = case MnesiaDelete of
-                                true -> mnesia:dirty_delete(rabbit_disk_queue,
-                                                            {Q, SeqId});
-                                _ -> ok
-                            end,
+                       ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
                        [MsgId | MsgIdAcc]
                end, [], MsgSeqIds),
     Store1 = rabbit_msg_store:remove(MsgIds, Store),
-    {ok, State #dqstate { store = Store1}}.
+    {ok, State #dqstate { store = Store1 }}.
 
 internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
                                                guid = MsgId,
@@ -481,7 +475,7 @@ internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
                         end, {ok, InitWriteSeqId}, PubMsgIds),
                   WriteSeqId1
           end),
-    {ok, State1} = remove_messages(Q, AckSeqIds, true, State),
+    {ok, State1} = remove_messages(Q, AckSeqIds, State),
     true = case PubMsgIds of
                [] -> true;
                _  -> ets:insert(Sequences, {Q, InitReadSeqId, WriteSeqId})
@@ -501,11 +495,9 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId + 1}),
     {ok, {MsgId, WriteSeqId}, State1}.
 
-internal_tx_rollback(MsgIds, State) ->
-    %% we don't need seq ids because we're not touching mnesia,
-    %% because seqids were never assigned
-    MsgSeqIds = lists:zip(MsgIds, lists:duplicate(length(MsgIds), undefined)),
-    remove_messages(undefined, MsgSeqIds, false, State).
+internal_tx_rollback(MsgIds, State = #dqstate { store = Store }) ->
+    Store1 = rabbit_msg_store:remove(MsgIds, Store),
+    {ok, State #dqstate { store = Store1 }}.
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
@@ -599,7 +591,7 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
                           {true, {MsgId, SeqId}, SeqId + 1}
                   end, ReadSeqId),
             true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
-            {ok, State1} = remove_messages(Q, MsgSeqIds, true, State),
+            {ok, State1} = remove_messages(Q, MsgSeqIds, State),
             {ok, WriteSeqId - ReadSeqId, State1}
     end.
 
@@ -612,12 +604,11 @@ internal_delete_queue(Q, State) ->
     Objs = mnesia:dirty_match_object(
              rabbit_disk_queue,
              #dq_msg_loc { queue_and_seq_id = {Q, '_'}, _ = '_' }),
-    MsgSeqIds =
-        lists:map(
-          fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
-                             msg_id = MsgId }) ->
-                  {MsgId, SeqId} end, Objs),
-    remove_messages(Q, MsgSeqIds, true, State2).
+    MsgSeqIds = lists:map(fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
+                                             msg_id = MsgId }) ->
+                                  {MsgId, SeqId}
+                          end, Objs),
+    remove_messages(Q, MsgSeqIds, State2).
 
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
-- 
cgit v1.2.1


From 00d7b1ce78fb8d6d34a7575ff0c1a852ea8ae797 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 23 Sep 2009 11:43:59 +0100
Subject: get rid of message cache limit limiting the size of the message cache
 is pointless. Firstly, the ets memory info does not include binaries.
 Secondly, the cache is only holding onto messages which current queue
 processes are holding onto, so we are not actually leaking any memory, and
 the only cost is the cost of the cache entries themselves, which should be
 small.

---
 src/rabbit_msg_store.erl | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 5b7afb6c..f9d27e12 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -62,9 +62,7 @@
 -define(FILE_EXTENSION,        ".rdq").
 -define(FILE_EXTENSION_TMP,    ".rdt").
 -define(FILE_EXTENSION_DETS,   ".dets").
-
 -define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
--define(CACHE_MAX_SIZE,        10485760).
 
 -define(BINARY_MODE, [raw, binary]).
 -define(READ_MODE,   [read, read_ahead]).
@@ -515,14 +513,8 @@ decrement_cache(MsgId, #msstate { message_cache = Cache }) ->
     ok.
 
 insert_into_cache(MsgId, Msg, #msstate { message_cache = Cache }) ->
-    case cache_is_full(Cache) of
-        true -> ok;
-        false -> true = ets:insert_new(Cache, {MsgId, Msg, 1}),
-                 ok
-    end.
-
-cache_is_full(Cache) ->
-    ets:info(Cache, memory) > ?CACHE_MAX_SIZE.
+    true = ets:insert_new(Cache, {MsgId, Msg, 1}),
+    ok.
 
 %%----------------------------------------------------------------------------
 %% index
-- 
cgit v1.2.1


From 4ec521c5d18e90a1820b797ea0027068191df4c7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 23 Sep 2009 12:00:05 +0100
Subject: remove verify_messages_referenced check This can actually fail, e.g.
 when a message got ack'ed but the corresponding mnesia delete in dq hasn't
 been flushed yet.

load_messages and the pruning in dq take care of this situation.
---
 src/rabbit_msg_store.erl | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index f9d27e12..cdf8bf40 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -263,7 +263,7 @@ init(Dir, FileSizeLimit, ReadFileHandlesLimit,
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
-    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames, State),
+    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames),
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
@@ -577,27 +577,18 @@ count_msg_refs(Gen, Seed, State) ->
             count_msg_refs(Gen, Next, State)
     end.
 
-verify_messages_referenced(State, MsgIds) ->
-    lists:foreach(fun (MsgId) ->
-                          #msg_location {} = index_lookup(MsgId, State)
-                  end, MsgIds).
-
-recover_crashed_compactions(Dir, FileNames, TmpFileNames, State) ->
+recover_crashed_compactions(Dir, FileNames, TmpFileNames) ->
     lists:foreach(fun (TmpFileName) ->
                           ok = recover_crashed_compactions1(
-                                 Dir, FileNames, TmpFileName, State)
-                  end,
-                  TmpFileNames),
+                                 Dir, FileNames, TmpFileName)
+                  end, TmpFileNames),
     ok.
 
-recover_crashed_compactions1(Dir, FileNames, TmpFileName, State) ->
+recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
     NonTmpRelatedFileName = filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFileName, FileNames),
     {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
         scan_file_for_valid_messages_msg_ids(Dir, TmpFileName),
-    %% all of these messages should be referenced
-    %% otherwise they wouldn't have been copied out
-    verify_messages_referenced(State, MsgIdsTmp),
     {ok, UncorruptedMessages, MsgIds} =
         scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
@@ -651,8 +642,6 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName, State) ->
                                                2 + length(Dropped),
                                                length(Rest))}
                   end,
-            %% Check that everything in the main file prefix is referenced
-            verify_messages_referenced(State, MsgIds1),
             %% The main file prefix should be contiguous
             {Top, MsgIds1} = find_contiguous_block_prefix(
                                lists:reverse(UncorruptedMessages1)),
-- 
cgit v1.2.1


From faf896c29e2010c32760b05cdf0a55412808330b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 23 Sep 2009 12:42:18 +0100
Subject: rename load_messages to build_index since that's what it does plus
 some minor cosmetic changes

---
 src/rabbit_msg_store.erl | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index cdf8bf40..9bafe659 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -268,7 +268,7 @@ init(Dir, FileSizeLimit, ReadFileHandlesLimit,
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
     State1 = #msstate { current_file = CurFile, current_offset = Offset } =
-        load_messages(Files, State),
+        build_index(Files, State),
 
     %% read is only needed so that we can seek
     {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
@@ -717,23 +717,22 @@ find_contiguous_block_prefix([{MsgId, _Attrs, TotalSize, ExpectedOffset}
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
-load_messages([], State) ->
+build_index([], State) ->
     CurFile = State #msstate.current_file,
-    load_messages(undefined, [CurFile], State);
-load_messages(Files, State) ->
-    load_messages(undefined, Files, State).
+    build_index(undefined, [CurFile], State);
+build_index(Files, State) ->
+    build_index(undefined, Files, State).
 
-load_messages(Left, [], State) ->
+build_index(Left, [], State) ->
     ok = index_delete_by_file(undefined, State),
-    Offset =
-        case lists:reverse(index_search_by_file(Left, State)) of
-            [] -> 0;
-            [#msg_location { offset = MaxOffset,
-                             total_size = TotalSize } | _] ->
-                MaxOffset + TotalSize
-        end,
+    Offset = case lists:reverse(index_search_by_file(Left, State)) of
+                 [] -> 0;
+                 [#msg_location { offset = MaxOffset,
+                                  total_size = TotalSize } | _] ->
+                     MaxOffset + TotalSize
+             end,
     State #msstate { current_file = Left, current_offset = Offset };
-load_messages(Left, [File|Files],
+build_index(Left, [File|Files],
               State = #msstate { dir = Dir, file_summary = FileSummary }) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} = lists:foldl(
@@ -752,14 +751,14 @@ load_messages(Left, [File|Files],
     %% msgs eldest first, so, ValidMessages is the right way round
     {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
     Right = case Files of
-                [] -> undefined;
+                []    -> undefined;
                 [F|_] -> F
             end,
     true = ets:insert_new(FileSummary, #file_summary {
                             file = File, valid_total_size = ValidTotalSize,
                             contiguous_top = ContiguousTop,
                             left = Left, right = Right }),
-    load_messages(File, Files, State).
+    build_index(File, Files, State).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation
-- 
cgit v1.2.1


From 4a255a212190f52679e275eb763dd68c1bf87599 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 23 Sep 2009 13:26:31 +0100
Subject: Made sure the magic marker message cannot be produced by a client by
 forcing the guid to be the empty binary

---
 src/rabbit_mixed_queue.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index ddae4da0..74e1da2b 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -656,7 +656,7 @@ on_disk(mixed, _IsDurable, _IsPersistent) -> false.
 publish_magic_marker_message(Q) ->
     Msg = rabbit_basic:message(
             rabbit_misc:r(<<"/">>, exchange, <<>>), ?MAGIC_MARKER,
-            [], <<>>, rabbit_guid:guid(), true),
+            [], <<>>, <<>>, true),
     ok = rabbit_disk_queue:publish(Q, ensure_binary_properties(Msg), false).
 
 fetch_ack_magic_marker_message(Q) ->
@@ -665,8 +665,8 @@ fetch_ack_magic_marker_message(Q) ->
     ok = rabbit_disk_queue:ack(Q, [AckTag]),
     {ok, Length}.
 
-is_magic_marker_message(
-  #basic_message { routing_key = ?MAGIC_MARKER, is_persistent = true }) ->
+is_magic_marker_message(#basic_message { routing_key = ?MAGIC_MARKER,
+                                         is_persistent = true, guid = <<>> }) ->
     true;
 is_magic_marker_message(_) ->
     false.
-- 
cgit v1.2.1


From ddc6b1c6377fc42b4a659c2f57598ce3f6418bf4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 23 Sep 2009 15:11:48 +0100
Subject: overload open_file so it can cope with filenumbers and names, plus
 associated minor refactorings

---
 src/rabbit_msg_store.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 9bafe659..c29ecddb 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -271,8 +271,7 @@ init(Dir, FileSizeLimit, ReadFileHandlesLimit,
         build_index(Files, State),
 
     %% read is only needed so that we can seek
-    {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
-                              ?WRITE_MODE ++ [read]),
+    {ok, FileHdl} = open_file(Dir, CurFile, ?WRITE_MODE ++ [read]),
     {ok, Offset} = file:position(FileHdl, Offset),
 
     State1 #msstate { current_file_handle = FileHdl }.
@@ -425,6 +424,8 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 
 filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
+open_file(Dir, FileNum, Mode) when is_integer(FileNum) ->
+    open_file(Dir, filenum_to_name(FileNum), Mode);
 open_file(Dir, FileName, Mode) ->
     file:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode).
 
@@ -774,7 +775,7 @@ maybe_roll_to_new_file(Offset,
     State1 = sync(State),
     ok = file:close(CurHdl),
     NextFile = CurFile + 1,
-    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
+    {ok, NextHdl} = open_file(Dir, NextFile, ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurFile,
                               {#file_summary.right, NextFile}),
     true = ets:insert_new(
-- 
cgit v1.2.1


From 8068807e42b24cfba29884118f5e16a43cd2c1a5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 23 Sep 2009 15:23:16 +0100
Subject: inverting previous changeset...

---
 src/rabbit_msg_store.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c29ecddb..9bafe659 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -271,7 +271,8 @@ init(Dir, FileSizeLimit, ReadFileHandlesLimit,
         build_index(Files, State),
 
     %% read is only needed so that we can seek
-    {ok, FileHdl} = open_file(Dir, CurFile, ?WRITE_MODE ++ [read]),
+    {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
+                              ?WRITE_MODE ++ [read]),
     {ok, Offset} = file:position(FileHdl, Offset),
 
     State1 #msstate { current_file_handle = FileHdl }.
@@ -424,8 +425,6 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 
 filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
-open_file(Dir, FileNum, Mode) when is_integer(FileNum) ->
-    open_file(Dir, filenum_to_name(FileNum), Mode);
 open_file(Dir, FileName, Mode) ->
     file:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode).
 
@@ -775,7 +774,7 @@ maybe_roll_to_new_file(Offset,
     State1 = sync(State),
     ok = file:close(CurHdl),
     NextFile = CurFile + 1,
-    {ok, NextHdl} = open_file(Dir, NextFile, ?WRITE_MODE),
+    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurFile,
                               {#file_summary.right, NextFile}),
     true = ets:insert_new(
-- 
cgit v1.2.1


From 2a2283235123404900abc81f20a23774297d2300 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 23 Sep 2009 15:33:05 +0100
Subject: english is better

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 9bafe659..3634975b 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -112,7 +112,7 @@
 
 %% The components:
 %%
-%% MsgLocation: this is a ets table which contains:
+%% MsgLocation: this is an ets table which contains:
 %%              {MsgId, RefCount, File, Offset, TotalSize, Attrs}
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
-- 
cgit v1.2.1


From bbfbed322ec5d31641e1688dceabfde80ad92593 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 23 Sep 2009 15:35:47 +0100
Subject: on startup, compact files from which we removed unreferenced messages

---
 src/rabbit_msg_store.erl | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 9bafe659..b5fae343 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -719,11 +719,11 @@ find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
 
 build_index([], State) ->
     CurFile = State #msstate.current_file,
-    build_index(undefined, [CurFile], State);
+    build_index(undefined, [CurFile], [], State);
 build_index(Files, State) ->
-    build_index(undefined, Files, State).
+    build_index(undefined, Files, [], State).
 
-build_index(Left, [], State) ->
+build_index(Left, [], FilesToCompact, State) ->
     ok = index_delete_by_file(undefined, State),
     Offset = case lists:reverse(index_search_by_file(Left, State)) of
                  [] -> 0;
@@ -731,22 +731,25 @@ build_index(Left, [], State) ->
                                   total_size = TotalSize } | _] ->
                      MaxOffset + TotalSize
              end,
-    State #msstate { current_file = Left, current_offset = Offset };
-build_index(Left, [File|Files],
-              State = #msstate { dir = Dir, file_summary = FileSummary }) ->
+    compact(FilesToCompact, %% this never includes the current file
+            State #msstate { current_file = Left, current_offset = Offset });
+build_index(Left, [File|Files], FilesToCompact,
+            State = #msstate { dir = Dir, file_summary = FileSummary }) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
-    {ValidMessages, ValidTotalSize} = lists:foldl(
-        fun (Obj = {MsgId, Attrs, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                case index_lookup(MsgId, State) of
-                    not_found -> {VMAcc, VTSAcc};
-                    StoreEntry ->
-                        ok = index_update(StoreEntry #msg_location {
-                                            file = File, offset = Offset,
-                                            total_size = TotalSize,
-                                            attrs = Attrs }, State),
-                        {[Obj | VMAcc], VTSAcc + TotalSize}
-                end
-        end, {[], 0}, Messages),
+    {ValidMessages, ValidTotalSize, AllValid} =
+        lists:foldl(
+          fun (Obj = {MsgId, Attrs, TotalSize, Offset},
+               {VMAcc, VTSAcc, AVAcc}) ->
+                  case index_lookup(MsgId, State) of
+                      not_found -> {VMAcc, VTSAcc, false};
+                      StoreEntry ->
+                          ok = index_update(StoreEntry #msg_location {
+                                              file = File, offset = Offset,
+                                              total_size = TotalSize,
+                                              attrs = Attrs }, State),
+                          {[Obj | VMAcc], VTSAcc + TotalSize, AVAcc}
+                  end
+          end, {[], 0, true}, Messages),
     %% foldl reverses lists, find_contiguous_block_prefix needs
     %% msgs eldest first, so, ValidMessages is the right way round
     {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
@@ -758,7 +761,11 @@ build_index(Left, [File|Files],
                             file = File, valid_total_size = ValidTotalSize,
                             contiguous_top = ContiguousTop,
                             left = Left, right = Right }),
-    build_index(File, Files, State).
+    FilesToCompact1 = case AllValid orelse Right =:= undefined of
+                          true  -> FilesToCompact;
+                          false -> [File | FilesToCompact]
+                      end,
+    build_index(File, Files, FilesToCompact1, State).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation
-- 
cgit v1.2.1


From 263d97249bac54688b27defc152d115d0d1cb86c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 23 Sep 2009 15:54:31 +0100
Subject: compact (well, delete, actually) files with no valid messages

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b5fae343..9505d7c7 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -749,7 +749,7 @@ build_index(Left, [File|Files], FilesToCompact,
                                               attrs = Attrs }, State),
                           {[Obj | VMAcc], VTSAcc + TotalSize, AVAcc}
                   end
-          end, {[], 0, true}, Messages),
+          end, {[], 0, Messages =/= []}, Messages),
     %% foldl reverses lists, find_contiguous_block_prefix needs
     %% msgs eldest first, so, ValidMessages is the right way round
     {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
-- 
cgit v1.2.1


From b0e7ad62a35cd6a42a1dd444f61c1ae2c2b5e720 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 23 Sep 2009 19:01:31 +0100
Subject: replace msg_store:attrs with contains This is a step on the path to
 getting rid of message attributes in msg_store.

msg_store:attrs was only being used in disk_queue:prune, to detect
when the store contained a non-persistent message and remove that
message from the store and the rabbit_disk_queue table.

Now rabbit_disk_queue records contain an IsPersistent flag. By making
the msg count delta generator pay attention to that flag we trim
non-persistent messages from the store during its initialisation,
disk_queue:prune no longer needs to remove messages from the store, it
just needs to remove all messages from the rabbit_disk_queue table
which are no longer referenced by the store - hence the new
msg_store:contains function.

Keeping the IsPersistent flag in the rabbit_disk_queue table is
sub-optimal since it means we store it once per message reference
rather than just once per message. That's a small price to pay though
for the cleaner interaction between the disk_queue and msg_store, and
the opportunity to remove the notion of message attributes from
msg_store altogether.

Populating the new field in rabbit_disk_queue is straightforward in
most places except disk_queue:tx_commit. That used to just be given
{MsgId, IsDelivered} tuples, so I had to change the API to {MsgId,
IsDelivered, IsPersistent} tuples.
---
 include/rabbit.hrl         |  2 +-
 src/rabbit_disk_queue.erl  | 93 ++++++++++++++++++++++------------------------
 src/rabbit_mixed_queue.erl | 11 +++---
 src/rabbit_msg_store.erl   | 10 ++---
 src/rabbit_tests.erl       | 17 +++++----
 5 files changed, 65 insertions(+), 68 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 095044e7..bebaee98 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -65,7 +65,7 @@
 -record(basic_message, {exchange_name, routing_key, content,
                         guid, is_persistent}).
 
--record(dq_msg_loc, {queue_and_seq_id, is_delivered, msg_id}).
+-record(dq_msg_loc, {queue_and_seq_id, is_delivered, is_persistent, msg_id}).
 
 -record(ssl_socket, {tcp, ssl}).
 -record(delivery, {mandatory, immediate, txn, sender, message}).
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index af5d808a..0f69f83b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -62,7 +62,8 @@
 -define(SHUTDOWN_MESSAGE,
         #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
                       msg_id = infinity_and_beyond,
-                      is_delivered = never
+                      is_delivered = never,
+                      is_persistent = true
                     }).
 
 -define(SYNC_INTERVAL, 5). %% milliseconds
@@ -98,7 +99,8 @@
 -spec(prefetch/1 :: (queue_name()) -> 'ok').
 -spec(ack/2 :: (queue_name(), [ack_tag()]) -> 'ok').
 -spec(tx_publish/1 :: (message()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean()}], [ack_tag()]) ->
+-spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean(), boolean()}],
+                      [ack_tag()]) ->
              'ok').
 -spec(tx_rollback/1 :: ([msg_id()]) -> 'ok').
 -spec(requeue/2 :: (queue_name(), [{ack_tag(), boolean()}]) -> 'ok').
@@ -198,9 +200,10 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
-    Store = prune(rabbit_msg_store:init(base_directory(),
-                                        FileSizeLimit, ReadFileHandlesLimit,
-                                        fun msg_ref_gen/1, msg_ref_gen_init())),
+    Store = rabbit_msg_store:init(base_directory(),
+                                  FileSizeLimit, ReadFileHandlesLimit,
+                                  fun msg_ref_gen/1, msg_ref_gen_init()),
+    ok = prune(Store),
 
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
     ok = extract_sequence_numbers(Sequences),
@@ -449,7 +452,8 @@ internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
                    State = #dqstate { store = Store, on_sync_txns = Txns }) ->
     TxnDetails = {Q, PubMsgIds, AckSeqIds, From},
     case rabbit_msg_store:needs_sync(
-           [MsgId || {MsgId, _IsDelivered} <- PubMsgIds], Store) of
+           [MsgId || {MsgId, _IsDelivered, _IsPersistent} <- PubMsgIds],
+           Store) of
         true  -> Txns1 = [TxnDetails | Txns],
                  State #dqstate { on_sync_txns = Txns1 };
         false -> internal_do_tx_commit(TxnDetails, State)
@@ -464,12 +468,13 @@ internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
                   ok = mnesia:write_lock_table(rabbit_disk_queue),
                   {ok, WriteSeqId1} =
                       lists:foldl(
-                        fun ({MsgId, IsDelivered}, {ok, SeqId}) ->
+                        fun ({MsgId, IsDelivered, IsPersistent}, {ok, SeqId}) ->
                                 {mnesia:write(
                                    rabbit_disk_queue,
                                    #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
                                                  msg_id = MsgId,
-                                                 is_delivered = IsDelivered
+                                                 is_delivered = IsDelivered,
+                                                 is_persistent = IsPersistent
                                                }, write),
                                  SeqId + 1}
                         end, {ok, InitWriteSeqId}, PubMsgIds),
@@ -483,7 +488,8 @@ internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
     gen_server2:reply(From, ok),
     State1.
 
-internal_publish(Q, Message = #basic_message { guid = MsgId },
+internal_publish(Q, Message = #basic_message { guid = MsgId,
+                                               is_persistent = IsPersistent },
                  IsDelivered, State) ->
     {ok, State1 = #dqstate { sequences = Sequences }} =
         internal_tx_publish(Message, State),
@@ -491,7 +497,8 @@ internal_publish(Q, Message = #basic_message { guid = MsgId },
     ok = mnesia:dirty_write(rabbit_disk_queue,
                             #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
                                           msg_id = MsgId,
-                                          is_delivered = IsDelivered}),
+                                          is_delivered = IsDelivered,
+                                          is_persistent = IsPersistent }),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId + 1}),
     {ok, {MsgId, WriteSeqId}, State1}.
 
@@ -694,54 +701,42 @@ msg_ref_gen_init() -> mnesia:dirty_first(rabbit_disk_queue).
 
 msg_ref_gen('$end_of_table') -> finished;
 msg_ref_gen(Key) ->
-    [Obj] = mnesia:dirty_read(rabbit_disk_queue, Key),
-    {Obj #dq_msg_loc.msg_id, 1, mnesia:dirty_next(rabbit_disk_queue, Key)}.
+    [#dq_msg_loc { msg_id = MsgId, is_persistent = IsPersistent }] =
+        mnesia:dirty_read(rabbit_disk_queue, Key),
+    NextKey = mnesia:dirty_next(rabbit_disk_queue, Key),
+    {MsgId, case IsPersistent of true -> 1; false -> 0 end, NextKey}.
 
-prune_flush_batch(DeleteAcc, RemoveAcc, Store) ->
+prune_flush_batch(DeleteAcc) ->
     lists:foldl(fun (Key, ok) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
-                end, ok, DeleteAcc),
-    rabbit_msg_store:remove(RemoveAcc, Store).
+                end, ok, DeleteAcc).
 
 prune(Store) ->
-    prune(Store, mnesia:dirty_first(rabbit_disk_queue), [], [], 0).
+    prune(Store, mnesia:dirty_first(rabbit_disk_queue), [], 0).
 
-prune(Store, '$end_of_table', _DeleteAcc, _RemoveAcc, 0) ->
-    Store;
-prune(Store, '$end_of_table', DeleteAcc, RemoveAcc, _Len) ->
-    prune_flush_batch(DeleteAcc, RemoveAcc, Store);
-prune(Store, Key, DeleteAcc, RemoveAcc, Len) ->
+prune(_Store, '$end_of_table', DeleteAcc, _Len) ->
+    prune_flush_batch(DeleteAcc);
+prune(Store, Key, DeleteAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
-    {DeleteAcc1, RemoveAcc1, Len1} =
-        case rabbit_msg_store:attrs(MsgId, Store) of
-            not_found ->
-                %% msg hasn't been found on disk, delete it
-                {[{Q, SeqId} | DeleteAcc], RemoveAcc, Len + 1};
-            true ->
-                %% msg is persistent, keep it
-                {DeleteAcc, RemoveAcc, Len};
-            false ->
-                %% msg is not persistent, delete it
-                {[{Q, SeqId} | DeleteAcc], [MsgId | RemoveAcc], Len + 1}
-        end,
-    {Store1, Key1, DeleteAcc2, RemoveAcc2, Len2} =
-        if
-            Len1 >= ?BATCH_SIZE ->
-                %% We have no way of knowing how flushing the batch
-                %% will affect ordering of records within the table,
-                %% so have no choice but to start again. Although this
-                %% will make recovery slower for large queues, we
-                %% guarantee we can start up in constant memory
-                Store2 = prune_flush_batch(DeleteAcc1, RemoveAcc1,
-                                                  Store),
-                Key2 = mnesia:dirty_first(rabbit_disk_queue),
-                {Store2, Key2, [], [], 0};
-            true ->
-                Key2 = mnesia:dirty_next(rabbit_disk_queue, Key),
-                {Store, Key2, DeleteAcc1, RemoveAcc1, Len1}
+    {DeleteAcc1, Len1} =
+        case rabbit_msg_store:contains(MsgId, Store) of
+            true  -> {DeleteAcc, Len};
+            false -> {[{Q, SeqId} | DeleteAcc], Len + 1}
         end,
-    prune(Store1, Key1, DeleteAcc2, RemoveAcc2, Len2).
+    if Len1 >= ?BATCH_SIZE ->
+            %% We have no way of knowing how flushing the batch will
+            %% affect ordering of records within the table, so have no
+            %% choice but to start again. Although this will make
+            %% recovery slower for large queues, we guarantee we can
+            %% start up in constant memory
+            ok = prune_flush_batch(DeleteAcc1),
+            NextKey = mnesia:dirty_first(rabbit_disk_queue),
+            prune(Store, NextKey, [], 0);
+       true ->
+            NextKey = mnesia:dirty_next(rabbit_disk_queue, Key),
+            prune(Store, NextKey, DeleteAcc1, Len1)
+    end.
 
 extract_sequence_numbers(Sequences) ->
     true =
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
index 74e1da2b..c278bac8 100644
--- a/src/rabbit_mixed_queue.erl
+++ b/src/rabbit_mixed_queue.erl
@@ -235,7 +235,7 @@ tx_commit(Publishes, MsgsWithAcks,
           State = #mqstate { mode = Mode, queue = Q, msg_buf = MsgBuf,
                              is_durable = IsDurable, length = Length }) ->
     PersistentPubs =
-        [{MsgId, false} ||
+        [{MsgId, false, IsPersistent} ||
             #basic_message { guid = MsgId,
                              is_persistent = IsPersistent } <- Publishes,
             on_disk(Mode, IsDurable, IsPersistent)],
@@ -534,12 +534,13 @@ send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
               Commit, Ack, inc_queue_length(MsgBuf, Count))
     end.
 
-republish_message_to_disk_queue(IsDurable, Q, Queue, PublishCount, RequeueCount,
-                                Commit, Ack, MsgBuf, Msg =
-                                #basic_message { guid = MsgId }, IsDelivered) ->
+republish_message_to_disk_queue(
+  IsDurable, Q, Queue, PublishCount, RequeueCount, Commit, Ack, MsgBuf,
+  Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
+  IsDelivered) ->
     {Commit1, Ack1} = flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack),
     ok = rabbit_disk_queue:tx_publish(Msg),
-    Commit2 = [{MsgId, IsDelivered} | Commit1],
+    Commit2 = [{MsgId, IsDelivered, IsPersistent} | Commit1],
     {PublishCount1, Commit3, Ack2} =
         case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
             true  -> ok = flush_messages_to_disk_queue(Q, Commit2, Ack1),
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index aee501c3..b752b9f6 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_store).
 
--export([init/5, write/4, read/2, attrs/2, remove/2, release/2,
+-export([init/5, write/4, read/2, contains/2, remove/2, release/2,
          needs_sync/2, sync/1, cleanup/1]).
 
 %%----------------------------------------------------------------------------
@@ -99,7 +99,7 @@
                  A) -> msstate()).
 -spec(write/4 :: (msg_id(), msg(), msg_attrs(), msstate()) -> msstate()).
 -spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
--spec(attrs/2 :: (msg_id(), msstate()) -> msg_attrs() | 'not_found').
+-spec(contains/2 :: (msg_id(), msstate()) -> boolean()).
 -spec(remove/2 :: ([msg_id()], msstate()) -> msstate()).
 -spec(release/2 :: ([msg_id()], msstate()) -> msstate()).
 -spec(needs_sync/2 :: ([msg_id()], msstate()) -> boolean()).
@@ -354,10 +354,10 @@ read(MsgId, State) ->
             end
     end.
 
-attrs(MsgId, State) ->
+contains(MsgId, State) ->
     case index_lookup(MsgId, State) of
-        not_found -> not_found;
-        #msg_location { attrs = Attrs } -> Attrs
+        not_found        -> false;
+        #msg_location {} -> true
     end.
 
 remove(MsgIds, State = #msstate { current_file = CurFile }) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1e50696a..9d9e60ba 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -862,9 +862,10 @@ rdq_match_messages(#basic_message { guid = MsgId, content = #content { payload_f
                    #basic_message { guid = MsgId, content = #content { payload_fragments_rev = MsgBody }}) ->
     ok.
 
-commit_list(List, MsgCount) ->
-    lists:zip([term_to_binary(MsgId) || MsgId <- List],
-              lists:duplicate(MsgCount, false)).
+commit_list(List, MsgCount, IsPersistent) ->
+    lists:zip3([term_to_binary(MsgId) || MsgId <- List],
+               lists:duplicate(MsgCount, false),
+               lists:duplicate(MsgCount, IsPersistent)).
 
 rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     Startup = rdq_virgin(),
@@ -872,7 +873,7 @@ rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
     QCount = length(Qs),
     Msg = <<0:(8*MsgSizeBytes)>>,
     List = lists:seq(1, MsgCount),
-    CommitList = commit_list(List, MsgCount),
+    CommitList = commit_list(List, MsgCount, false),
     {Publish, ok} =
         timer:tc(?MODULE, rdq_time_commands,
                  [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false))
@@ -911,7 +912,7 @@ rdq_stress_gc(MsgCount) ->
     MsgSizeBytes = 256*1024,
     Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
     List = lists:seq(1, MsgCount),
-    CommitList = commit_list(List, MsgCount),
+    CommitList = commit_list(List, MsgCount, false),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- List],
     rabbit_disk_queue:tx_commit(q, CommitList, []),
     StartChunk = round(MsgCount / 20), % 5%
@@ -954,7 +955,7 @@ rdq_test_startup_with_queue_gaps() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    CommitAll = commit_list(All, Total),
+    CommitAll = commit_list(All, Total, true),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, true)) || N <- All],
     rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
@@ -1011,7 +1012,7 @@ rdq_test_redeliver() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    CommitAll = commit_list(All, Total),
+    CommitAll = commit_list(All, Total, false),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
     rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
@@ -1064,7 +1065,7 @@ rdq_test_purge() ->
     Total = 1000,
     Half = round(Total/2),
     All = lists:seq(1,Total),
-    CommitAll = commit_list(All, Total),
+    CommitAll = commit_list(All, Total, false),
     [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
     rabbit_disk_queue:tx_commit(q, CommitAll, []),
     io:format("Publish done~n", []),
-- 
cgit v1.2.1


From 7e3ac47ea13a57b62f49ddf65c6f7efcd2560188 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 24 Sep 2009 00:29:29 +0100
Subject: get rid of message attriburs in msg_store and msg_file

---
 src/rabbit_disk_queue.erl |  5 ++---
 src/rabbit_msg_file.erl   | 51 +++++++++++++++++------------------------------
 src/rabbit_msg_store.erl  | 43 +++++++++++++++++++--------------------
 3 files changed, 40 insertions(+), 59 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 0f69f83b..ee5fead7 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -439,13 +439,12 @@ remove_messages(Q, MsgSeqIds, State = #dqstate { store = Store } ) ->
     Store1 = rabbit_msg_store:remove(MsgIds, Store),
     {ok, State #dqstate { store = Store1 }}.
 
-internal_tx_publish(Message = #basic_message { is_persistent = IsPersistent,
-                                               guid = MsgId,
+internal_tx_publish(Message = #basic_message { guid = MsgId,
                                                content = Content },
                     State = #dqstate { store = Store }) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
     Message1 = Message #basic_message { content = ClearedContent },
-    Store1 = rabbit_msg_store:write(MsgId, Message1, IsPersistent, Store),
+    Store1 = rabbit_msg_store:write(MsgId, Message1, Store),
     {ok, State #dqstate { store = Store1 }}.
 
 internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 46128612..94525d84 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_file).
 
--export([append/4, read/2, scan/1]).
+-export([append/3, read/2, scan/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -39,7 +39,7 @@
 -define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(WRITE_OK_SIZE_BITS,      8).
 -define(WRITE_OK_MARKER,         255).
--define(FILE_PACKING_ADJUSTMENT, (1 + (3 * (?INTEGER_SIZE_BYTES)))).
+-define(FILE_PACKING_ADJUSTMENT, (1 + (2 * (?INTEGER_SIZE_BYTES)))).
 
 %%----------------------------------------------------------------------------
 
@@ -48,32 +48,27 @@
 -type(io_device() :: any()).
 -type(msg_id() :: binary()).
 -type(msg() :: any()).
--type(msg_attrs() :: any()).
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
 
--spec(append/4 :: (io_device(), msg_id(), msg(), msg_attrs()) ->
+-spec(append/3 :: (io_device(), msg_id(), msg()) ->
              ({'ok', msg_size()} | {'error', any()})).
 -spec(read/2 :: (io_device(), msg_size()) ->
-             ({'ok', {msg_id(), msg(), msg_attrs()}} | {'error', any()})).
+             ({'ok', {msg_id(), msg()}} | {'error', any()})).
 -spec(scan/1 :: (io_device()) ->
-             {'ok', [{msg_id(), msg_attrs(), msg_size(), position()}]}).
+             {'ok', [{msg_id(), msg_size(), position()}]}).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, MsgId, MsgBody, MsgAttrs) when is_binary(MsgId) ->
+append(FileHdl, MsgId, MsgBody) when is_binary(MsgId) ->
     MsgBodyBin  = term_to_binary(MsgBody),
-    MsgAttrsBin = term_to_binary(MsgAttrs),
-    [MsgIdSize, MsgBodyBinSize, MsgAttrsBinSize] = Sizes =
-        [size(B) || B <- [MsgId, MsgBodyBin, MsgAttrsBin]],
+    [MsgIdSize, MsgBodyBinSize] = Sizes = [size(B) || B <- [MsgId, MsgBodyBin]],
     Size = lists:sum(Sizes),
     case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
                                MsgIdSize:?INTEGER_SIZE_BITS,
-                               MsgAttrsBinSize:?INTEGER_SIZE_BITS,
                                MsgId:MsgIdSize/binary,
-                               MsgAttrsBin:MsgAttrsBinSize/binary,
                                MsgBodyBin:MsgBodyBinSize/binary,
                                ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
@@ -86,15 +81,12 @@ read(FileHdl, TotalSize) ->
     case file:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
                MsgIdSize:?INTEGER_SIZE_BITS,
-               MsgAttrsBinSize:?INTEGER_SIZE_BITS,
                Rest:SizeWriteOkBytes/binary>>} ->
-            BodyBinSize = Size - MsgIdSize - MsgAttrsBinSize,
+            BodyBinSize = Size - MsgIdSize,
             <<MsgId:MsgIdSize/binary,
-              MsgAttrsBin:MsgAttrsBinSize/binary,
               MsgBodyBin:BodyBinSize/binary,
               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>> = Rest,
-            {ok, {MsgId,
-                  binary_to_term(MsgBodyBin), binary_to_term(MsgAttrsBin)}};
+            {ok, {MsgId, binary_to_term(MsgBodyBin)}};
         KO -> KO
     end.
 
@@ -105,23 +97,19 @@ scan(FileHdl, Offset, Acc) ->
         eof -> {ok, Acc};
         {corrupted, NextOffset} ->
             scan(FileHdl, NextOffset, Acc);
-        {ok, {MsgId, MsgAttrs, TotalSize, NextOffset}} ->
-            scan(FileHdl, NextOffset,
-                 [{MsgId, MsgAttrs, TotalSize, Offset} | Acc]);
+        {ok, {MsgId, TotalSize, NextOffset}} ->
+            scan(FileHdl, NextOffset, [{MsgId, TotalSize, Offset} | Acc]);
         _KO ->
             %% bad message, but we may still have recovered some valid messages
             {ok, Acc}
     end.
 
 read_next(FileHdl, Offset) ->
-    ThreeIntegers = 3 * ?INTEGER_SIZE_BYTES,
-    case file:read(FileHdl, ThreeIntegers) of
-        {ok,
-         <<Size:?INTEGER_SIZE_BITS,
-           MsgIdSize:?INTEGER_SIZE_BITS,
-           MsgAttrsBinSize:?INTEGER_SIZE_BITS>>} ->
+    TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
+    case file:read(FileHdl, TwoIntegers) of
+        {ok, <<Size:?INTEGER_SIZE_BITS, MsgIdSize:?INTEGER_SIZE_BITS>>} ->
             if Size == 0 -> eof; %% Nothing we can do other than stop
-               MsgIdSize == 0 orelse MsgAttrsBinSize == 0 ->
+               MsgIdSize == 0 ->
                     %% current message corrupted, try skipping past it
                     ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
                     case file:position(FileHdl, {cur, Size + 1}) of
@@ -130,21 +118,18 @@ read_next(FileHdl, Offset) ->
                         KO                   -> KO
                     end;
                true -> %% all good, let's continue
-                    HeaderSize = MsgIdSize + MsgAttrsBinSize,
-                    case file:read(FileHdl, HeaderSize) of
-                        {ok, <<MsgId:MsgIdSize/binary,
-                               MsgAttrsBin:MsgAttrsBinSize/binary>>} ->
+                    case file:read(FileHdl, MsgIdSize) of
+                        {ok, <<MsgId:MsgIdSize/binary>>} ->
                             TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
                             ExpectedAbsPos = Offset + TotalSize - 1,
                             case file:position(
-                                   FileHdl, {cur, Size - HeaderSize}) of
+                                   FileHdl, {cur, Size - MsgIdSize}) of
                                 {ok, ExpectedAbsPos} ->
                                     NextOffset = ExpectedAbsPos + 1,
                                     case file:read(FileHdl, 1) of
                                         {ok, <<?WRITE_OK_MARKER:
                                                ?WRITE_OK_SIZE_BITS>>} ->
                                             {ok, {MsgId,
-                                                  binary_to_term(MsgAttrsBin),
                                                   TotalSize, NextOffset}};
                                         {ok, _SomeOtherData} ->
                                             {corrupted, NextOffset};
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b752b9f6..66bfb719 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_store).
 
--export([init/5, write/4, read/2, contains/2, remove/2, release/2,
+-export([init/5, write/3, read/2, contains/2, remove/2, release/2,
          needs_sync/2, sync/1, cleanup/1]).
 
 %%----------------------------------------------------------------------------
@@ -52,7 +52,7 @@
          }).
 
 -record(msg_location,
-        {msg_id, ref_count, file, offset, total_size, attrs}).
+        {msg_id, ref_count, file, offset, total_size}).
 
 -record(file_summary,
         {file, valid_total_size, contiguous_top, left, right}).
@@ -75,7 +75,6 @@
 -type(ets_table() :: any()).
 -type(msg_id() :: binary()).
 -type(msg() :: any()).
--type(msg_attrs() :: any()).
 -type(file_path() :: any()).
 -type(io_device() :: any()).
 
@@ -97,7 +96,7 @@
                  non_neg_integer(), non_neg_integer(),
                  (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})),
                  A) -> msstate()).
--spec(write/4 :: (msg_id(), msg(), msg_attrs(), msstate()) -> msstate()).
+-spec(write/3 :: (msg_id(), msg(), msstate()) -> msstate()).
 -spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
 -spec(contains/2 :: (msg_id(), msstate()) -> boolean()).
 -spec(remove/2 :: ([msg_id()], msstate()) -> msstate()).
@@ -113,7 +112,7 @@
 %% The components:
 %%
 %% MsgLocation: this is an ets table which contains:
-%%              {MsgId, RefCount, File, Offset, TotalSize, Attrs}
+%%              {MsgId, RefCount, File, Offset, TotalSize}
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
 %%
@@ -277,19 +276,18 @@ init(Dir, FileSizeLimit, ReadFileHandlesLimit,
 
     State1 #msstate { current_file_handle = FileHdl }.
 
-write(MsgId, Msg, Attrs,
-      State = #msstate { current_file_handle = CurHdl,
-                         current_file        = CurFile,
-                         current_offset      = CurOffset,
-                         file_summary        = FileSummary }) ->
+write(MsgId, Msg, State = #msstate { current_file_handle = CurHdl,
+                                     current_file        = CurFile,
+                                     current_offset      = CurOffset,
+                                     file_summary        = FileSummary }) ->
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
-            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg, Attrs),
+            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
                                 msg_id = MsgId, ref_count = 1, file = CurFile,
-                                offset = CurOffset, total_size = TotalSize,
-                                attrs = Attrs }, State),
+                                offset = CurOffset, total_size = TotalSize },
+                              State),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
                                        right = undefined }] =
@@ -323,13 +321,13 @@ read(MsgId, State) ->
                         total_size = TotalSize } ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
-                    {{ok, {MsgId, Msg, _Attrs}}, State1} =
+                    {{ok, {MsgId, Msg}}, State1} =
                         with_read_handle_at(
                           File, Offset,
                           fun(Hdl) ->
                                   Res = case rabbit_msg_file:read(
                                                Hdl, TotalSize) of
-                                            {ok, {MsgId, _, _}} = Obj -> Obj;
+                                            {ok, {MsgId, _}} = Obj -> Obj;
                                             {ok, Rest} ->
                                                 throw({error,
                                                        {misread, 
@@ -654,7 +652,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% Wipe out any rubbish at the end of the file. Remember
             %% the head of the list will be the highest entry in the
             %% file.
-            [{_, _, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
+            [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
             TmpSize = TmpTopOffset + TmpTopTotalSize,
             %% Extend the main file as big as necessary in a single
             %% move. If we run out of disk space, this truncate could
@@ -685,8 +683,7 @@ is_disjoint(SmallerL, BiggerL) ->
 
 scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, FileName),
-    {ok, Messages,
-     [MsgId || {MsgId, _Attrs, _TotalSize, _FileOffset} <- Messages]}.
+    {ok, Messages, [MsgId || {MsgId, _TotalSize, _FileOffset} <- Messages]}.
 
 scan_file_for_valid_messages(Dir, FileName) ->
     case open_file(Dir, FileName, ?READ_MODE) of
@@ -710,8 +707,8 @@ find_contiguous_block_prefix(List) ->
 
 find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds};
-find_contiguous_block_prefix([{MsgId, _Attrs, TotalSize, ExpectedOffset}
-                             | Tail], ExpectedOffset, MsgIds) ->
+find_contiguous_block_prefix([{MsgId, TotalSize, ExpectedOffset} | Tail],
+                             ExpectedOffset, MsgIds) ->
     ExpectedOffset1 = ExpectedOffset + TotalSize,
     find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
@@ -738,15 +735,15 @@ build_index(Left, [File|Files], FilesToCompact,
     {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize, AllValid} =
         lists:foldl(
-          fun (Obj = {MsgId, Attrs, TotalSize, Offset},
+          fun (Obj = {MsgId, TotalSize, Offset},
                {VMAcc, VTSAcc, AVAcc}) ->
                   case index_lookup(MsgId, State) of
                       not_found -> {VMAcc, VTSAcc, false};
                       StoreEntry ->
                           ok = index_update(StoreEntry #msg_location {
                                               file = File, offset = Offset,
-                                              total_size = TotalSize,
-                                              attrs = Attrs }, State),
+                                              total_size = TotalSize },
+                                            State),
                           {[Obj | VMAcc], VTSAcc + TotalSize, AVAcc}
                   end
           end, {[], 0, Messages =/= []}, Messages),
-- 
cgit v1.2.1


From 67b24e5a7212beb97e7718b89adb96fbc9851dde Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 25 Sep 2009 11:26:08 +0100
Subject: move constants to where they belong

---
 src/rabbit_disk_queue.erl |  9 ++-------
 src/rabbit_msg_store.erl  | 15 ++++++++-------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index ee5fead7..6fe2a4b3 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -52,9 +52,6 @@
 
 -include("rabbit.hrl").
 
--define(MAX_READ_FILE_HANDLES, 256).
--define(FILE_SIZE_LIMIT,       (256*1024*1024)).
-
 -define(SEQUENCE_ETS_NAME,       rabbit_disk_queue_sequences).
 -define(BATCH_SIZE,              10000).
 
@@ -122,8 +119,7 @@
 %%----------------------------------------------------------------------------
 
 start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE,
-                           [?FILE_SIZE_LIMIT, ?MAX_READ_FILE_HANDLES], []).
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
 
 publish(Q, Message = #basic_message {}, IsDelivered) ->
     gen_server2:cast(?SERVER, {publish, Q, Message, IsDelivered}).
@@ -185,7 +181,7 @@ filesync() ->
 %% gen_server behaviour
 %%----------------------------------------------------------------------------
 
-init([FileSizeLimit, ReadFileHandlesLimit]) ->
+init([]) ->
     %% If the gen_server is part of a supervision tree and is ordered
     %% by its supervisor to terminate, terminate will be called with
     %% Reason=shutdown if the following conditions apply:
@@ -201,7 +197,6 @@ init([FileSizeLimit, ReadFileHandlesLimit]) ->
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
     Store = rabbit_msg_store:init(base_directory(),
-                                  FileSizeLimit, ReadFileHandlesLimit,
                                   fun msg_ref_gen/1, msg_ref_gen_init()),
     ok = prune(Store),
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 66bfb719..06c61f35 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,11 +31,14 @@
 
 -module(rabbit_msg_store).
 
--export([init/5, write/3, read/2, contains/2, remove/2, release/2,
+-export([init/3, write/3, read/2, contains/2, remove/2, release/2,
          needs_sync/2, sync/1, cleanup/1]).
 
 %%----------------------------------------------------------------------------
 
+-define(MAX_READ_FILE_HANDLES, 256).
+-define(FILE_SIZE_LIMIT,       (256*1024*1024)).
+
 -record(msstate,
         {dir,                    %% store directory
          msg_locations,          %% where are messages?
@@ -92,8 +95,7 @@
                message_cache          :: ets_table()
                }).
 
--spec(init/5 :: (file_path(),
-                 non_neg_integer(), non_neg_integer(),
+-spec(init/3 :: (file_path(),
                  (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})),
                  A) -> msstate()).
 -spec(write/3 :: (msg_id(), msg(), msstate()) -> msstate()).
@@ -231,14 +233,13 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-init(Dir, FileSizeLimit, ReadFileHandlesLimit,
-     MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+init(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
 
     MsgLocations = ets:new(?MSG_LOC_NAME,
                            [set, private, {keypos, #msg_location.msg_id}]),
 
     InitFile = 0,
-    HandleCache = rabbit_file_handle_cache:init(ReadFileHandlesLimit,
+    HandleCache = rabbit_file_handle_cache:init(?MAX_READ_FILE_HANDLES,
                                                 ?BINARY_MODE ++ [read]),
     FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
                           [set, private, {keypos, #file_summary.file}]),
@@ -251,7 +252,7 @@ init(Dir, FileSizeLimit, ReadFileHandlesLimit,
                    current_file_handle    = undefined,
                    current_offset         = 0,
                    current_dirty          = false,
-                   file_size_limit        = FileSizeLimit,
+                   file_size_limit        = ?FILE_SIZE_LIMIT,
                    read_file_handle_cache = HandleCache,
                    last_sync_offset       = 0,
                    message_cache          = MessageCache
-- 
cgit v1.2.1


From fa65a1266274ad41b1addf15c83112ee33d6f69d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 25 Sep 2009 14:49:54 +0100
Subject: turn the msg_store into a separate process

---
 src/rabbit_disk_queue.erl |  96 +++++++---------
 src/rabbit_msg_store.erl  | 285 +++++++++++++++++++++++++---------------------
 2 files changed, 200 insertions(+), 181 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 6fe2a4b3..02a8ed8c 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -70,8 +70,7 @@
 -define(SERVER, ?MODULE).
 
 -record(dqstate,
-        {store,                   %% message store
-         sequences,               %% next read and write for each q
+        {sequences,               %% next read and write for each q
          on_sync_txns,            %% list of commiters to run on sync (reversed)
          commit_timer_ref         %% TRef for our interval timer
         }).
@@ -196,15 +195,15 @@ init([]) ->
 
     ok = detect_shutdown_state_and_adjust_delivered_flags(),
 
-    Store = rabbit_msg_store:init(base_directory(),
-                                  fun msg_ref_gen/1, msg_ref_gen_init()),
-    ok = prune(Store),
+    {ok, _Pid} = rabbit_msg_store:start_link(base_directory(),
+                                             fun msg_ref_gen/1,
+                                             msg_ref_gen_init()),
+    ok = prune(),
 
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
     ok = extract_sequence_numbers(Sequences),
 
-    State = #dqstate { store            = Store,
-                       sequences        = Sequences,
+    State = #dqstate { sequences        = Sequences,
                        on_sync_txns     = [],
                        commit_timer_ref = undefined },
     {ok, State, hibernate,
@@ -291,11 +290,11 @@ terminate(_Reason, State) ->
 
 shutdown(State = #dqstate { sequences = undefined }) ->
     State;
-shutdown(State = #dqstate { sequences = Sequences, store = Store }) ->
+shutdown(State = #dqstate { sequences = Sequences }) ->
     State1 = stop_commit_timer(State),
-    Store1 = rabbit_msg_store:cleanup(Store),
+    ok = rabbit_msg_store:stop(),
     ets:delete(Sequences),
-    State1 #dqstate { sequences = undefined, store = Store1 }.
+    State1 #dqstate { sequences = undefined }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -350,12 +349,12 @@ stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
     State #dqstate { commit_timer_ref = undefined }.
 
-sync(State = #dqstate { store = Store, on_sync_txns = Txns }) ->
-    State1 = State #dqstate { store = rabbit_msg_store:sync(Store) },
+sync(State = #dqstate { on_sync_txns = Txns }) ->
+    ok = rabbit_msg_store:sync(),
     case Txns of
-        [] -> State1;
+        [] -> State;
         _  -> lists:foldl(fun internal_do_tx_commit/2,
-                          State1 #dqstate { on_sync_txns = [] },
+                          State #dqstate { on_sync_txns = [] },
                           lists:reverse(Txns))
     end.
 
@@ -363,13 +362,12 @@ sync(State = #dqstate { store = Store, on_sync_txns = Txns }) ->
 %% internal functions
 %%----------------------------------------------------------------------------
 
-internal_fetch_body(Q, Advance, State = #dqstate { store = Store }) ->
+internal_fetch_body(Q, Advance, State) ->
     case next(Q, record_delivery, Advance, State) of
         empty -> {empty, State};
         {MsgId, IsDelivered, AckTag, Remaining} ->
-            {Message, Store1} = rabbit_msg_store:read(MsgId, Store),
-            State1 = State #dqstate { store = Store1 },
-            {{Message, IsDelivered, AckTag, Remaining}, State1}
+            {ok, Message} = rabbit_msg_store:read(MsgId),
+            {{Message, IsDelivered, AckTag, Remaining}, State}
     end.
 
 internal_fetch_attributes(Q, MarkDelivered, State) ->
@@ -413,41 +411,37 @@ internal_foldl(Q, Fun, Init, State) ->
 
 internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
     {ok, Acc, State};
-internal_foldl(Q, WriteSeqId, Fun, State = #dqstate { store = Store },
-               Acc, ReadSeqId) ->
+internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
     [#dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
         mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
-    {Message, Store1} = rabbit_msg_store:read(MsgId, Store),
+    {ok, Message} = rabbit_msg_store:read(MsgId),
     Acc1 = Fun(Message, {MsgId, ReadSeqId}, IsDelivered, Acc),
-    internal_foldl(Q, WriteSeqId, Fun, State #dqstate { store = Store1 },
-                   Acc1, ReadSeqId + 1).
+    internal_foldl(Q, WriteSeqId, Fun, State, Acc1, ReadSeqId + 1).
 
 internal_ack(Q, MsgSeqIds, State) ->
     remove_messages(Q, MsgSeqIds, State).
 
-remove_messages(Q, MsgSeqIds, State = #dqstate { store = Store } ) ->
+remove_messages(Q, MsgSeqIds, State) ->
     MsgIds = lists:foldl(
                fun ({MsgId, SeqId}, MsgIdAcc) ->
                        ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
                        [MsgId | MsgIdAcc]
                end, [], MsgSeqIds),
-    Store1 = rabbit_msg_store:remove(MsgIds, Store),
-    {ok, State #dqstate { store = Store1 }}.
+    ok = rabbit_msg_store:remove(MsgIds),
+    {ok, State}.
 
 internal_tx_publish(Message = #basic_message { guid = MsgId,
-                                               content = Content },
-                    State = #dqstate { store = Store }) ->
+                                               content = Content }, State) ->
     ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
-    Message1 = Message #basic_message { content = ClearedContent },
-    Store1 = rabbit_msg_store:write(MsgId, Message1, Store),
-    {ok, State #dqstate { store = Store1 }}.
+    ok = rabbit_msg_store:write(
+           MsgId, Message #basic_message { content = ClearedContent }),
+    {ok, State}.
 
 internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
-                   State = #dqstate { store = Store, on_sync_txns = Txns }) ->
+                   State = #dqstate { on_sync_txns = Txns }) ->
     TxnDetails = {Q, PubMsgIds, AckSeqIds, From},
     case rabbit_msg_store:needs_sync(
-           [MsgId || {MsgId, _IsDelivered, _IsPersistent} <- PubMsgIds],
-           Store) of
+           [MsgId || {MsgId, _IsDelivered, _IsPersistent} <- PubMsgIds]) of
         true  -> Txns1 = [TxnDetails | Txns],
                  State #dqstate { on_sync_txns = Txns1 };
         false -> internal_do_tx_commit(TxnDetails, State)
@@ -496,14 +490,13 @@ internal_publish(Q, Message = #basic_message { guid = MsgId,
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId + 1}),
     {ok, {MsgId, WriteSeqId}, State1}.
 
-internal_tx_rollback(MsgIds, State = #dqstate { store = Store }) ->
-    Store1 = rabbit_msg_store:remove(MsgIds, Store),
-    {ok, State #dqstate { store = Store1 }}.
+internal_tx_rollback(MsgIds, State) ->
+    ok = rabbit_msg_store:remove(MsgIds),
+    {ok, State}.
 
 internal_requeue(_Q, [], State) ->
     {ok, State};
-internal_requeue(Q, MsgSeqIds, State = #dqstate { store = Store,
-                                                  sequences = Sequences }) ->
+internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
     %% We know that every seq_id in here is less than the ReadSeqId
     %% you'll get if you look up this queue in Sequences (i.e. they've
     %% already been delivered). We also know that the rows for these
@@ -536,8 +529,8 @@ internal_requeue(Q, MsgSeqIds, State = #dqstate { store = Store,
                               MsgSeqIds)
           end),
     true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId1}),
-    Store1 = rabbit_msg_store:release(MsgIds, Store),
-    {ok, State #dqstate { store = Store1 }}.
+    ok = rabbit_msg_store:release(MsgIds),
+    {ok, State}.
 
 requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, Acc}) ->
     [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
@@ -551,8 +544,7 @@ requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, Acc}) ->
     {WriteSeqId + 1, Q, [MsgId | Acc]}.
 
 %% move the next N messages from the front of the queue to the back.
-internal_requeue_next_n(Q, N, State = #dqstate { store = Store,
-                                                 sequences = Sequences }) ->
+internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
     if N >= (WriteSeqId - ReadSeqId) -> {ok, State};
        true ->
@@ -564,8 +556,8 @@ internal_requeue_next_n(Q, N, State = #dqstate { store = Store,
                   end
                  ),
             true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN}),
-            Store1 = rabbit_msg_store:release(MsgIds, Store),
-            {ok, State #dqstate { store = Store1 }}
+            ok = rabbit_msg_store:release(MsgIds),
+            {ok, State}
     end.
 
 requeue_next_messages(_Q, 0, ReadSeq, WriteSeq, Acc) ->
@@ -705,16 +697,16 @@ prune_flush_batch(DeleteAcc) ->
                         mnesia:dirty_delete(rabbit_disk_queue, Key)
                 end, ok, DeleteAcc).
 
-prune(Store) ->
-    prune(Store, mnesia:dirty_first(rabbit_disk_queue), [], 0).
+prune() ->
+    prune(mnesia:dirty_first(rabbit_disk_queue), [], 0).
 
-prune(_Store, '$end_of_table', DeleteAcc, _Len) ->
+prune('$end_of_table', DeleteAcc, _Len) ->
     prune_flush_batch(DeleteAcc);
-prune(Store, Key, DeleteAcc, Len) ->
+prune(Key, DeleteAcc, Len) ->
     [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
         mnesia:dirty_read(rabbit_disk_queue, Key),
     {DeleteAcc1, Len1} =
-        case rabbit_msg_store:contains(MsgId, Store) of
+        case rabbit_msg_store:contains(MsgId) of
             true  -> {DeleteAcc, Len};
             false -> {[{Q, SeqId} | DeleteAcc], Len + 1}
         end,
@@ -726,10 +718,10 @@ prune(Store, Key, DeleteAcc, Len) ->
             %% start up in constant memory
             ok = prune_flush_batch(DeleteAcc1),
             NextKey = mnesia:dirty_first(rabbit_disk_queue),
-            prune(Store, NextKey, [], 0);
+            prune(NextKey, [], 0);
        true ->
             NextKey = mnesia:dirty_next(rabbit_disk_queue, Key),
-            prune(Store, NextKey, DeleteAcc1, Len1)
+            prune(NextKey, DeleteAcc1, Len1)
     end.
 
 extract_sequence_numbers(Sequences) ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 06c61f35..ef973d8a 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -31,14 +31,44 @@
 
 -module(rabbit_msg_store).
 
--export([init/3, write/3, read/2, contains/2, remove/2, release/2,
-         needs_sync/2, sync/1, cleanup/1]).
+-behaviour(gen_server2).
 
-%%----------------------------------------------------------------------------
+-export([start_link/3, write/2, read/1, contains/1, remove/1, release/1,
+         needs_sync/1, sync/0, stop/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-define(SERVER, ?MODULE).
 
 -define(MAX_READ_FILE_HANDLES, 256).
 -define(FILE_SIZE_LIMIT,       (256*1024*1024)).
 
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(msg_id() :: binary()).
+-type(msg() :: any()).
+-type(file_path() :: any()).
+
+-spec(start_link/3 ::
+      (file_path(),
+       (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})), A) ->
+             {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(write/2 :: (msg_id(), msg()) -> 'ok').
+-spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
+-spec(contains/1 :: (msg_id()) -> boolean()).
+-spec(remove/1 :: ([msg_id()]) -> 'ok').
+-spec(release/1 :: ([msg_id()]) -> 'ok').
+-spec(needs_sync/1 :: ([msg_id()]) -> boolean()).
+-spec(sync/0 :: () -> 'ok').
+-spec(stop/0 :: () -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
 -record(msstate,
         {dir,                    %% store directory
          msg_locations,          %% where are messages?
@@ -64,53 +94,12 @@
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
 -define(FILE_EXTENSION,        ".rdq").
 -define(FILE_EXTENSION_TMP,    ".rdt").
--define(FILE_EXTENSION_DETS,   ".dets").
 -define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
 
 -define(BINARY_MODE, [raw, binary]).
 -define(READ_MODE,   [read, read_ahead]).
 -define(WRITE_MODE,  [write, delayed_write]).
 
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--type(ets_table() :: any()).
--type(msg_id() :: binary()).
--type(msg() :: any()).
--type(file_path() :: any()).
--type(io_device() :: any()).
-
--type(msstate() :: #msstate {
-               dir                    :: file_path(),
-               msg_locations          :: ets_table(),
-               file_summary           :: ets_table(),
-               current_file           :: non_neg_integer(),
-               current_file_handle    :: io_device(),
-               current_offset         :: non_neg_integer(),
-               current_dirty          :: boolean(),
-               file_size_limit        :: non_neg_integer(),
-               read_file_handle_cache :: any(),
-               last_sync_offset       :: non_neg_integer(),
-               message_cache          :: ets_table()
-               }).
-
--spec(init/3 :: (file_path(),
-                 (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})),
-                 A) -> msstate()).
--spec(write/3 :: (msg_id(), msg(), msstate()) -> msstate()).
--spec(read/2 :: (msg_id(), msstate()) -> {msg(), msstate()} | 'not_found').
--spec(contains/2 :: (msg_id(), msstate()) -> boolean()).
--spec(remove/2 :: ([msg_id()], msstate()) -> msstate()).
--spec(release/2 :: ([msg_id()], msstate()) -> msstate()).
--spec(needs_sync/2 :: ([msg_id()], msstate()) -> boolean()).
--spec(sync/1 :: (msstate()) -> msstate()).
--spec(cleanup/1 :: (msstate()) -> msstate()).
-
--endif.
-
-%%----------------------------------------------------------------------------
-
 %% The components:
 %%
 %% MsgLocation: this is an ets table which contains:
@@ -233,7 +222,25 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-init(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE,
+                           [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
+                           [{timeout, infinity}]).
+
+write(MsgId, Msg)  -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+read(MsgId)        -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
+contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
+remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
+release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
+needs_sync(MsgIds) -> gen_server2:call(?SERVER, {needs_sync, MsgIds}, infinity).
+sync()             -> gen_server2:call(?SERVER, sync, infinity).
+stop()             -> gen_server2:call(?SERVER, stop, infinity).
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     MsgLocations = ets:new(?MSG_LOC_NAME,
                            [set, private, {keypos, #msg_location.msg_id}]),
@@ -275,47 +282,11 @@ init(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                               ?WRITE_MODE ++ [read]),
     {ok, Offset} = file:position(FileHdl, Offset),
 
-    State1 #msstate { current_file_handle = FileHdl }.
-
-write(MsgId, Msg, State = #msstate { current_file_handle = CurHdl,
-                                     current_file        = CurFile,
-                                     current_offset      = CurOffset,
-                                     file_summary        = FileSummary }) ->
-    case index_lookup(MsgId, State) of
-        not_found ->
-            %% New message, lots to do
-            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
-            ok = index_insert(#msg_location {
-                                msg_id = MsgId, ref_count = 1, file = CurFile,
-                                offset = CurOffset, total_size = TotalSize },
-                              State),
-            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
-                                       contiguous_top = ContiguousTop,
-                                       right = undefined }] =
-                ets:lookup(FileSummary, CurFile),
-            ValidTotalSize1 = ValidTotalSize + TotalSize,
-            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
-                                     %% can't be any holes in this file
-                                     ValidTotalSize1;
-                                true -> ContiguousTop
-                             end,
-            true = ets:insert(FileSummary, FSEntry #file_summary {
-                                             valid_total_size = ValidTotalSize1,
-                                             contiguous_top = ContiguousTop1 }),
-            NextOffset = CurOffset + TotalSize,
-            maybe_roll_to_new_file(
-              NextOffset, State #msstate {current_offset = NextOffset,
-                                          current_dirty = true});
-        StoreEntry = #msg_location { ref_count = RefCount } ->
-            %% We already know about it, just update counter
-            ok = index_update(StoreEntry #msg_location {
-                                ref_count = RefCount + 1 }, State),
-            State
-    end.
+    {ok, State1 #msstate { current_file_handle = FileHdl }}.
 
-read(MsgId, State) ->
+handle_call({read, MsgId}, _From, State) ->
     case index_lookup(MsgId, State) of
-        not_found -> not_found;
+        not_found -> reply(not_found, State);
         #msg_location { ref_count  = RefCount,
                         file       = File,
                         offset     = Offset,
@@ -347,57 +318,100 @@ read(MsgId, State) ->
                                     %% message. So don't bother
                                     %% putting it in the cache.
                          end,
-                    {Msg, State1};
+                    reply({ok, Msg}, State1);
                 {Msg, _RefCount} ->
-                    {Msg, State}
+                    reply({ok, Msg}, State)
             end
-    end.
-
-contains(MsgId, State) ->
+    end;
+
+handle_call({contains, MsgId}, _From, State) ->
+    reply(case index_lookup(MsgId, State) of
+              not_found        -> false;
+              #msg_location {} -> true
+          end, State);
+
+handle_call({needs_sync, _MsgIds}, _From,
+            State = #msstate { current_dirty = false }) ->
+    reply(false, State);
+handle_call({needs_sync, MsgIds}, _From,
+            State = #msstate { current_file     = CurFile,
+                               last_sync_offset = SyncOffset }) ->
+    reply(lists:any(fun (MsgId) ->
+                            #msg_location { file = File, offset = Offset } =
+                                index_lookup(MsgId, State),
+                            File =:= CurFile andalso Offset >= SyncOffset
+                    end, MsgIds), State);
+
+handle_call(sync, _From, State) ->
+    reply(ok, sync(State));
+
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}.
+
+handle_cast({write, MsgId, Msg},
+            State = #msstate { current_file_handle = CurHdl,
+                               current_file        = CurFile,
+                               current_offset      = CurOffset,
+                               file_summary        = FileSummary }) ->
     case index_lookup(MsgId, State) of
-        not_found        -> false;
-        #msg_location {} -> true
-    end.
-
-remove(MsgIds, State = #msstate { current_file = CurFile }) ->
-    compact(sets:to_list(
-              lists:foldl(
-                fun (MsgId, Files1) ->
-                        case remove_message(MsgId, State) of
-                            {compact, File} ->
-                                if CurFile =:= File -> Files1;
-                                   true -> sets:add_element(File, Files1)
-                                end;
-                            no_compact -> Files1
+        not_found ->
+            %% New message, lots to do
+            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
+            ok = index_insert(#msg_location {
+                                msg_id = MsgId, ref_count = 1, file = CurFile,
+                                offset = CurOffset, total_size = TotalSize },
+                              State),
+            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
+                                       contiguous_top = ContiguousTop,
+                                       right = undefined }] =
+                ets:lookup(FileSummary, CurFile),
+            ValidTotalSize1 = ValidTotalSize + TotalSize,
+            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
+                                     %% can't be any holes in this file
+                                     ValidTotalSize1;
+                                true -> ContiguousTop
+                             end,
+            true = ets:insert(FileSummary, FSEntry #file_summary {
+                                             valid_total_size = ValidTotalSize1,
+                                             contiguous_top = ContiguousTop1 }),
+            NextOffset = CurOffset + TotalSize,
+            noreply(
+              maybe_roll_to_new_file(
+                NextOffset, State #msstate {current_offset = NextOffset,
+                                            current_dirty = true}));
+        StoreEntry = #msg_location { ref_count = RefCount } ->
+            %% We already know about it, just update counter
+            ok = index_update(StoreEntry #msg_location {
+                                ref_count = RefCount + 1 }, State),
+            noreply(State)
+    end;
+
+handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
+    noreply(
+      compact(sets:to_list(
+                lists:foldl(
+                  fun (MsgId, Files1) ->
+                          case remove_message(MsgId, State) of
+                              {compact, File} ->
+                                  if CurFile =:= File -> Files1;
+                                     true -> sets:add_element(File, Files1)
+                                  end;
+                              no_compact -> Files1
                         end
-                end, sets:new(), MsgIds)),
-            State).
+                  end, sets:new(), MsgIds)),
+              State));
 
-release(MsgIds, State) ->
+handle_cast({release, MsgIds}, State) ->
     lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
-    State.
+    noreply(State).
 
-needs_sync(_MsgIds, #msstate { current_dirty = false }) ->
-    false;    
-needs_sync(MsgIds, State = #msstate { current_file     = CurFile,
-                                      last_sync_offset = SyncOffset }) ->
-    lists:any(fun (MsgId) ->
-                      #msg_location { file = File, offset = Offset } =
-                          index_lookup(MsgId, State),
-                      File =:= CurFile andalso Offset >= SyncOffset
-              end, MsgIds).
+handle_info(_Info, State) ->
+    noreply(State).
 
-sync(State = #msstate { current_dirty = false }) ->
-    State;
-sync(State = #msstate { current_file_handle = CurHdl,
-                        current_offset = CurOffset }) ->
-    ok = file:sync(CurHdl),
-    State #msstate { current_dirty = false, last_sync_offset = CurOffset }.
-
-cleanup(State = #msstate { msg_locations          = MsgLocations,
-                           file_summary           = FileSummary,
-                           current_file_handle    = FileHdl,
-                           read_file_handle_cache = HC }) ->
+terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
+                                      file_summary           = FileSummary,
+                                      current_file_handle    = FileHdl,
+                                      read_file_handle_cache = HC }) ->
     State1 = case FileHdl of
                  undefined -> State;
                  _ -> State2 = sync(State),
@@ -411,13 +425,19 @@ cleanup(State = #msstate { msg_locations          = MsgLocations,
                       file_summary           = undefined,
                       current_file_handle    = undefined,
                       current_dirty          = false,
-                      read_file_handle_cache = HC1
-                     }.
+                      read_file_handle_cache = HC1 }.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
 
 %%----------------------------------------------------------------------------
 %% general helper functions
 %%----------------------------------------------------------------------------
 
+noreply(State) -> {noreply, State}.
+
+reply(Reply, State) -> {reply, Reply, State}.
+
 form_filename(Dir, Name) -> filename:join(Dir, Name).
 
 filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
@@ -442,6 +462,13 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
     ok = file:truncate(FileHdl),
     ok = preallocate(FileHdl, Highpoint, Lowpoint).
 
+sync(State = #msstate { current_dirty = false }) ->
+    State;
+sync(State = #msstate { current_file_handle = CurHdl,
+                        current_offset = CurOffset }) ->
+    ok = file:sync(CurHdl),
+    State #msstate { current_dirty = false, last_sync_offset = CurOffset }.
+
 with_read_handle_at(File, Offset, Fun,
                     State = #msstate { dir                    = Dir,
                                        read_file_handle_cache = HC,
-- 
cgit v1.2.1


From 35a9fa05f32539e876a75535049e5a71acb7fc5d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 30 Sep 2009 17:59:28 +0100
Subject: cosmetic refactoring

---
 src/rabbit_msg_store.erl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index ef973d8a..aa779e61 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -586,20 +586,20 @@ count_msg_refs(Gen, Seed, State) ->
         finished -> ok;
         {_MsgId, 0, Next} -> count_msg_refs(Gen, Next, State);
         {MsgId, Delta, Next} ->
-            case index_lookup(MsgId, State) of
-                not_found ->
-                    ok = index_insert(#msg_location { msg_id = MsgId,
+            ok = case index_lookup(MsgId, State) of
+                     not_found ->
+                         index_insert(#msg_location { msg_id = MsgId,
                                                       ref_count = Delta },
                                       State);
-                StoreEntry = #msg_location { ref_count = RefCount } ->
-                    NewRefCount = RefCount + Delta,
-                    case NewRefCount of
-                        0 -> ok = index_delete(MsgId, State);
-                        _ -> ok = index_update(StoreEntry #msg_location {
+                     StoreEntry = #msg_location { ref_count = RefCount } ->
+                         NewRefCount = RefCount + Delta,
+                         case NewRefCount of
+                             0 -> index_delete(MsgId, State);
+                             _ -> index_update(StoreEntry #msg_location {
                                                  ref_count = NewRefCount },
                                                State)
-                    end
-            end,
+                         end
+                 end,
             count_msg_refs(Gen, Next, State)
     end.
 
-- 
cgit v1.2.1


From dc27bccf94073ae73e9f9b273485763169f198b4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Oct 2009 13:21:31 +0100
Subject: first cut at queue index. Untested, and uglier than necessary due to
 absence of file handle cache

---
 src/rabbit_queue_index.erl | 370 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 370 insertions(+)
 create mode 100644 src/rabbit_queue_index.erl

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
new file mode 100644
index 00000000..36637323
--- /dev/null
+++ b/src/rabbit_queue_index.erl
@@ -0,0 +1,370 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_index).
+
+-export([init/1, write_published/5, write_delivered/2, write_acks/2,
+         flush_journal/1, read_segment_entries/2]).
+
+-define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
+-define(ACK_JOURNAL_FILENAME, "ack_journal.jif").
+-define(SEQ_BYTES, 8).
+-define(SEQ_BITS, (?SEQ_BYTES * 8)).
+-define(SEGMENT_EXTENSION, ".idx").
+
+-define(REL_SEQ_BITS, 13).
+-define(SEGMENT_ENTRIES_COUNT, 8192). %% trunc(math:pow(2,?REL_SEQ_BITS))).
+
+%% seq only is binary 000 followed by 13 bits of rel seq id
+%% (range: 0 - 8191)
+-define(REL_SEQ_ONLY_PREFIX, 000).
+-define(REL_SEQ_ONLY_PREFIX_BITS, 3).
+-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
+
+%% publish record is binary 1 followed by bits for 
+%% is_delivered and is_persistent, then 13 bits of rel seq id,
+%% and 128 bits of md5sum msg id
+-define(PUBLISH_PREFIX, 1).
+-define(PUBLISH_PREFIX_BITS, 1).
+
+-define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)).
+%% 16 bytes for md5sum + 2 for seq, bits and prefix
+-define(PUBLISH_RECORD_LENGTH_BYTES, ?MSG_ID_BYTES + 2).
+
+%% 1 publish, 1 deliver, 1 ack per msg
+-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRIES_COUNT *
+        (?PUBLISH_RECORD_LENGTH_BYTES +
+         (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
+
+%%----------------------------------------------------------------------------
+
+-record(qistate,
+        { dir,
+          cur_seg_num,
+          cur_seg_hdl,
+          journal_ack_count,
+          journal_ack_dict,
+          journal_handle,
+          seg_ack_counts
+        }).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(io_device() :: any()).
+-type(msg_id() :: binary()).
+-type(seq_id() :: integer()).
+-type(file_path() :: any()).
+-type(int_or_undef() :: integer() | 'undefined').
+-type(io_dev_or_undef() :: io_device() | 'undefined').
+-type(qistate() :: #qistate { dir               :: file_path(),
+                              cur_seg_num       :: int_or_undef(),
+                              cur_seg_hdl       :: io_dev_or_undef(),
+                              journal_ack_count :: integer(),
+                              journal_ack_dict  :: dict(),
+                              journal_handle    :: io_device(),
+                              seg_ack_counts    :: dict()
+                            }).
+
+-spec(init/1 :: (string()) -> qistate()).
+-spec(write_published/5 :: (msg_id(), seq_id(), boolean(), boolean(), qistate())
+      -> qistate()).
+-spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
+-spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(flush_journal/1 :: (qistate()) -> {boolean(), qistate()}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+init(Name) ->
+    Dir = filename:join(rabbit_mnesia:dir(), Name),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    AckCounts = scatter_journal(Dir, find_ack_counts(Dir)),
+    {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
+                                 [raw, binary, delayed_write, write, read]),
+    #qistate { dir = Dir,
+               cur_seg_num = undefined,
+               cur_seg_hdl = undefined,
+               journal_ack_count = 0,
+               journal_ack_dict = dict:new(),
+               journal_handle = JournalHdl,
+               seg_ack_counts = AckCounts
+             }.
+
+write_published(MsgId, SeqId, IsDelivered, IsPersistent, State) ->
+    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    {Hdl, State1} = get_file_handle_for_seg(SegNum, State),
+    IsDeliveredNum = bool_to_int(IsDelivered),
+    IsPersistentNum = bool_to_int(IsPersistent),
+    ok = file:write(Hdl,
+                    <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                     IsDeliveredNum:1, IsPersistentNum:1,
+                     RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS/binary>>),
+    State1.
+
+write_delivered(SeqId, State) ->
+    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    {Hdl, State1} = get_file_handle_for_seg(SegNum, State),
+    ok = file:write(Hdl,
+                    <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                     RelSeq:?REL_SEQ_BITS>>),
+    State1.
+
+write_acks(SeqIds, State = #qistate { journal_handle    = JournalHdl,
+                                      journal_ack_dict  = JAckDict,
+                                      journal_ack_count = JAckCount }) ->
+    {JAckDict1, JAckCount1} =
+        lists:foldl(
+          fun (SeqId, {JAckDict2, JAckCount2}) ->
+                  ok = file:write(JournalHdl, <<SeqId:?SEQ_BITS>>),
+                  {add_ack_to_ack_dict(SeqId, JAckDict2), JAckCount2 + 1}
+          end, {JAckDict, JAckCount}, SeqIds),
+    State1 = State #qistate { journal_ack_dict = JAckDict1,
+                              journal_ack_count = JAckCount1 },
+    case JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
+        true -> flush_journal(State1);
+        false -> State1
+    end.
+
+flush_journal(State = #qistate { journal_ack_count = 0 }) ->
+    {false, State};
+flush_journal(State = #qistate { journal_handle = JournalHdl,
+                                 journal_ack_dict = JAckDict,
+                                 journal_ack_count = JAckCount,
+                                 seg_ack_counts = AckCounts,
+                                 dir = Dir }) ->
+    [SegNum|_] = dict:fetch_keys(JAckDict),
+    Acks = dict:fetch(SegNum, JAckDict),
+    SegPath = seg_num_to_path(Dir, SegNum),
+    State1 = close_file_handle_for_seg(SegNum, State),
+    AckCounts1 = append_acks_to_segment(SegPath, SegNum, AckCounts, Acks),
+    JAckCount1 = JAckCount - length(Acks),
+    State2 = State1 #qistate { journal_ack_dict = dict:erase(SegNum, JAckDict),
+                               journal_ack_count = JAckCount1,
+                               seg_ack_counts = AckCounts1 },
+    if
+        JAckCount1 == 0 ->
+            {ok, 0} = file:position(JournalHdl, 0),
+            file:truncate(JournalHdl),
+            {false, State2};
+        JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
+            flush_journal(State2);
+        true ->
+            {true, State2}
+    end.
+
+read_segment_entries(InitSeqId, State = #qistate { dir = Dir }) ->
+    {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
+    SegPath = seg_num_to_path(Dir, SegNum),
+    {SDict, _AckCount} = load_segment(SegNum, SegPath),
+    %% deliberately sort the list desc, because foldl will reverse it
+    RelSeqs = lists:sort(fun (A, B) -> B < A end, dict:fetch_keys(SDict)),
+    {lists:foldl(fun (RelSeq, Acc) ->
+                         {MsgId, IsDelivered, IsPersistent} =
+                             dict:fetch(RelSeq, SDict),
+                        [{index_entry, reconstruct_seq_id(SegNum, RelSeq),
+                          MsgId, IsDelivered, IsPersistent, on_disk} | Acc]
+                 end, [], RelSeqs),
+     State}.
+
+%%----------------------------------------------------------------------------
+%% Minor Helpers
+%%----------------------------------------------------------------------------
+
+close_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
+                                                     cur_seg_hdl = Hdl }) ->
+    ok = file:sync(Hdl),
+    ok = file:close(Hdl),
+    State #qistate { cur_seg_num = undefined, cur_seg_hdl = undefined };
+close_file_handle_for_seg(_SegNum, State) ->
+    State.
+
+get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
+                                                   cur_seg_hdl = Hdl }) ->
+    {Hdl, State};
+get_file_handle_for_seg(SegNum, State) ->
+    State1 = #qistate { dir = Dir } = close_file_handle_for_seg(SegNum, State),
+    {ok, Hdl} = file:open(seg_num_to_path(Dir, SegNum),
+                          [binary, raw, append, delayed_write]),
+    {Hdl, State1 #qistate { cur_seg_num = SegNum, cur_seg_hdl = Hdl}}.
+
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
+
+seq_id_to_seg_and_rel_seq_id(SeqId) ->
+    { SeqId div ?SEGMENT_ENTRIES_COUNT, SeqId rem ?SEGMENT_ENTRIES_COUNT }.
+
+reconstruct_seq_id(SegNum, RelSeq) ->
+    (SegNum * ?SEGMENT_ENTRIES_COUNT) + RelSeq.
+
+seg_num_to_path(Dir, SegNum) ->
+    SegName = integer_to_list(SegNum),
+    filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).    
+
+
+%%----------------------------------------------------------------------------
+%% Startup Functions
+%%----------------------------------------------------------------------------
+
+find_ack_counts(Dir) ->
+    SegNumsPaths =
+        lists:map(
+          fun (SegName) ->
+                  {list_to_integer(
+                     lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                                     SegName)), filename:join(Dir, SegName)}
+          end, filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)),
+    lists:foldl(
+      fun ({SegNum, SegPath}, Acc) ->
+              case load_segment(SegNum, SegPath) of
+                  {_SDict, 0} -> Acc;
+                  {_SDict, AckCount} -> dict:store(SegNum, AckCount, Acc)
+              end
+      end, dict:new(), SegNumsPaths).
+
+scatter_journal(Dir, AckCounts) ->
+    JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
+    case file:open(JournalPath, [read, read_ahead, raw, binary]) of
+        {error, enoent} -> AckCounts;
+        {ok, Hdl} ->
+            ADict = load_journal(Hdl, dict:new()),
+            ok = file:close(Hdl),
+            {AckCounts1, _Dir} = dict:fold(fun replay_journal_acks_to_segment/3,
+                                           {AckCounts, Dir}, ADict),
+            ok = file:delete(JournalPath),
+            AckCounts1
+    end.
+
+load_journal(Hdl, ADict) ->
+    case file:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<SeqId:?SEQ_BITS>>} ->
+            load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict));
+        _ErrOrEoF -> ADict
+    end.
+
+add_ack_to_ack_dict(SeqId, ADict) ->
+    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
+
+replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, Dir}) ->
+    SegPath = seg_num_to_path(Dir, SegNum),
+    {SDict, _AckCount} = load_segment(SegNum, SegPath),
+    ValidRelSeqIds = dict:fetch_keys(SDict),
+    ValidAcks = sets:intersection(sets:from_list(ValidRelSeqIds),
+                                  sets:from_list(Acks)),
+    {append_acks_to_segment(SegPath, SegNum, AckCounts,
+                            sets:to_list(ValidAcks)),
+     Dir}.
+
+
+%%----------------------------------------------------------------------------
+%% Loading Segments
+%%----------------------------------------------------------------------------
+
+load_segment(SegNum, SegPath) ->
+    case file:open(SegPath, [raw, binary, read_ahead, read]) of
+        {error, enoent} -> dict:new();
+        {ok, Hdl} ->
+            Result = load_segment_entries(SegNum, Hdl, {dict:new(), 0}),
+            ok = file:close(Hdl),
+            Result
+    end.
+
+load_segment_entries(SegNum, Hdl, {SDict, AckCount}) ->
+    case file:read(Hdl, 1) of
+        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+               MSB/bitstring>>} ->
+            {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
+            <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
+            load_segment_entries(SegNum, Hdl,
+                                 deliver_or_ack_msg(SDict, AckCount, RelSeq));
+        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+               IsDeliveredNum:1, IsPersistentNum:1, MSB/bitstring>>} ->
+            {ok, <<LSB:8/binary, MsgId:?MSG_ID_BITS/binary>>} =
+                file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
+            <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
+            load_segment_entries(
+              SegNum, Hdl, {dict:store(RelSeq, {MsgId, 1 == IsDeliveredNum,
+                                                1 == IsPersistentNum}, SDict),
+                            AckCount});
+        _ErrOrEoF -> {SDict, AckCount}
+    end.
+
+deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
+    case dict:find(RelSeq, SDict) of
+        {ok, {MsgId, false, IsPersistent}} ->
+            {dict:store(RelSeq, {MsgId, true, IsPersistent}, SDict), AckCount};
+        {ok, {_MsgId, true, _IsPersistent}} ->
+            {dict:erase(RelSeq, SDict), AckCount + 1}
+    end.
+
+
+%%----------------------------------------------------------------------------
+%% Appending Acks to Segments
+%%----------------------------------------------------------------------------
+
+append_acks_to_segment(SegPath, SegNum, AckCounts, Acks) ->
+    AckCount = case dict:find(SegNum, AckCounts) of
+                   {ok, AckCount1} -> AckCount1;
+                   error           -> 0
+               end,
+    case append_acks_to_segment(SegPath, AckCount, Acks) of
+        0 -> AckCounts;
+        ?SEGMENT_ENTRIES_COUNT -> dict:erase(SegNum, AckCounts);
+        AckCount -> dict:store(SegNum, AckCount, AckCounts)
+    end.
+
+append_acks_to_segment(SegPath, AckCount, Acks)
+  when length(Acks) + AckCount == ?SEGMENT_ENTRIES_COUNT ->
+    ok = case file:delete(SegPath) of
+             ok -> ok;
+             {error, enoent} -> ok
+         end,
+    ?SEGMENT_ENTRIES_COUNT;
+append_acks_to_segment(SegPath, AckCount, Acks)
+  when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
+    {ok, Hdl} = file:open(SegPath, [raw, binary, delayed_write, append]),
+    AckCount1 =
+        lists:foldl(
+          fun (RelSeq, AckCount2) ->
+                  ok = file:write(Hdl,
+                                  <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                                   RelSeq:?REL_SEQ_BITS>>),
+                  AckCount2 + 1
+          end, AckCount, Acks),
+    ok = file:sync(Hdl),
+    ok = file:close(Hdl),
+    AckCount1.
-- 
cgit v1.2.1


From 65aeab4ff5b71149e463119b365e7a37780c3b15 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Oct 2009 14:33:53 +0100
Subject: ...and now some of it actually works

---
 src/rabbit_queue_index.erl | 62 ++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 36637323..0ec0cd3d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_queue_index).
 
--export([init/1, write_published/5, write_delivered/2, write_acks/2,
+-export([init/1, write_published/4, write_delivered/2, write_acks/2,
          flush_journal/1, read_segment_entries/2]).
 
 -define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
@@ -40,18 +40,17 @@
 -define(SEQ_BITS, (?SEQ_BYTES * 8)).
 -define(SEGMENT_EXTENSION, ".idx").
 
--define(REL_SEQ_BITS, 13).
--define(SEGMENT_ENTRIES_COUNT, 8192). %% trunc(math:pow(2,?REL_SEQ_BITS))).
+-define(REL_SEQ_BITS, 14).
+-define(SEGMENT_ENTRIES_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
 
-%% seq only is binary 000 followed by 13 bits of rel seq id
-%% (range: 0 - 8191)
--define(REL_SEQ_ONLY_PREFIX, 000).
--define(REL_SEQ_ONLY_PREFIX_BITS, 3).
+%% seq only is binary 00 followed by 14 bits of rel seq id
+%% (range: 0 - 16383)
+-define(REL_SEQ_ONLY_PREFIX, 00).
+-define(REL_SEQ_ONLY_PREFIX_BITS, 2).
 -define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
 
-%% publish record is binary 1 followed by bits for 
-%% is_delivered and is_persistent, then 13 bits of rel seq id,
-%% and 128 bits of md5sum msg id
+%% publish record is binary 1 followed by a bit for is_persistent,
+%% then 14 bits of rel seq id, and 128 bits of md5sum msg id
 -define(PUBLISH_PREFIX, 1).
 -define(PUBLISH_PREFIX_BITS, 1).
 
@@ -97,7 +96,7 @@
                             }).
 
 -spec(init/1 :: (string()) -> qistate()).
--spec(write_published/5 :: (msg_id(), seq_id(), boolean(), boolean(), qistate())
+-spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
@@ -124,15 +123,15 @@ init(Name) ->
                seg_ack_counts = AckCounts
              }.
 
-write_published(MsgId, SeqId, IsDelivered, IsPersistent, State) ->
+write_published(MsgId, SeqId, IsPersistent, State)
+  when is_binary(MsgId) ->
+    ?MSG_ID_BYTES = size(MsgId),
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     {Hdl, State1} = get_file_handle_for_seg(SegNum, State),
-    IsDeliveredNum = bool_to_int(IsDelivered),
-    IsPersistentNum = bool_to_int(IsPersistent),
     ok = file:write(Hdl,
                     <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                     IsDeliveredNum:1, IsPersistentNum:1,
-                     RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS/binary>>),
+                     (bool_to_int(IsPersistent)):1,
+                     RelSeq:?REL_SEQ_BITS, MsgId/binary>>),
     State1.
 
 write_delivered(SeqId, State) ->
@@ -204,6 +203,8 @@ read_segment_entries(InitSeqId, State = #qistate { dir = Dir }) ->
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+close_file_handle_for_seg(undefined, State) ->
+    State;
 close_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
                                                      cur_seg_hdl = Hdl }) ->
     ok = file:sync(Hdl),
@@ -215,8 +216,9 @@ close_file_handle_for_seg(_SegNum, State) ->
 get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
                                                    cur_seg_hdl = Hdl }) ->
     {Hdl, State};
-get_file_handle_for_seg(SegNum, State) ->
-    State1 = #qistate { dir = Dir } = close_file_handle_for_seg(SegNum, State),
+get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = CurSegNum }) ->
+    State1 = #qistate { dir = Dir } =
+        close_file_handle_for_seg(CurSegNum, State),
     {ok, Hdl} = file:open(seg_num_to_path(Dir, SegNum),
                           [binary, raw, append, delayed_write]),
     {Hdl, State1 #qistate { cur_seg_num = SegNum, cur_seg_hdl = Hdl}}.
@@ -241,12 +243,10 @@ seg_num_to_path(Dir, SegNum) ->
 
 find_ack_counts(Dir) ->
     SegNumsPaths =
-        lists:map(
-          fun (SegName) ->
-                  {list_to_integer(
-                     lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
-                                     SegName)), filename:join(Dir, SegName)}
-          end, filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)),
+        [{list_to_integer(
+            lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                            SegName)), filename:join(Dir, SegName)}
+         || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)],
     lists:foldl(
       fun ({SegNum, SegPath}, Acc) ->
               case load_segment(SegNum, SegPath) of
@@ -312,14 +312,16 @@ load_segment_entries(SegNum, Hdl, {SDict, AckCount}) ->
             load_segment_entries(SegNum, Hdl,
                                  deliver_or_ack_msg(SDict, AckCount, RelSeq));
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-               IsDeliveredNum:1, IsPersistentNum:1, MSB/bitstring>>} ->
-            {ok, <<LSB:8/binary, MsgId:?MSG_ID_BITS/binary>>} =
+               IsPersistentNum:1, MSB/bitstring>>} ->
+            %% because we specify /binary, and binaries are complete
+            %% bytes, the size spec is in bytes, not bits.
+            {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
             load_segment_entries(
-              SegNum, Hdl, {dict:store(RelSeq, {MsgId, 1 == IsDeliveredNum,
-                                                1 == IsPersistentNum}, SDict),
-                            AckCount});
+              SegNum, Hdl, {dict:store(RelSeq, {MsgId, false,
+                                                1 == IsPersistentNum},
+                                       SDict), AckCount});
         _ErrOrEoF -> {SDict, AckCount}
     end.
 
@@ -344,7 +346,7 @@ append_acks_to_segment(SegPath, SegNum, AckCounts, Acks) ->
     case append_acks_to_segment(SegPath, AckCount, Acks) of
         0 -> AckCounts;
         ?SEGMENT_ENTRIES_COUNT -> dict:erase(SegNum, AckCounts);
-        AckCount -> dict:store(SegNum, AckCount, AckCounts)
+        AckCount2 -> dict:store(SegNum, AckCount2, AckCounts)
     end.
 
 append_acks_to_segment(SegPath, AckCount, Acks)
-- 
cgit v1.2.1


From 69a4ca36ca4b3dfcb8da75dc58d7b9df75f046f9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Oct 2009 15:26:37 +0100
Subject: fixed some more bugs

---
 src/rabbit_queue_index.erl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 0ec0cd3d..6b075a17 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -154,7 +154,8 @@ write_acks(SeqIds, State = #qistate { journal_handle    = JournalHdl,
     State1 = State #qistate { journal_ack_dict = JAckDict1,
                               journal_ack_count = JAckCount1 },
     case JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
-        true -> flush_journal(State1);
+        true -> {_Cont, State2} = flush_journal(State1),
+                State2;
         false -> State1
     end.
 
@@ -177,7 +178,7 @@ flush_journal(State = #qistate { journal_handle = JournalHdl,
     if
         JAckCount1 == 0 ->
             {ok, 0} = file:position(JournalHdl, 0),
-            file:truncate(JournalHdl),
+            ok = file:truncate(JournalHdl),
             {false, State2};
         JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
             flush_journal(State2);
@@ -203,7 +204,8 @@ read_segment_entries(InitSeqId, State = #qistate { dir = Dir }) ->
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
-close_file_handle_for_seg(undefined, State) ->
+close_file_handle_for_seg(_SegNum,
+                          State = #qistate { cur_seg_num = undefined }) ->
     State;
 close_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
                                                      cur_seg_hdl = Hdl }) ->
-- 
cgit v1.2.1


From 4c0ac4a07c3bc6a8e2a5518f395ef91895eb183a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Oct 2009 17:09:52 +0100
Subject: when loading a segment, we should of course take into account the
 acks which have not yet made it into the segment file. Also fixed one further
 bug.

---
 src/rabbit_queue_index.erl | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6b075a17..d887a770 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -186,10 +186,11 @@ flush_journal(State = #qistate { journal_handle = JournalHdl,
             {true, State2}
     end.
 
-read_segment_entries(InitSeqId, State = #qistate { dir = Dir }) ->
+read_segment_entries(InitSeqId, State =
+                     #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
     SegPath = seg_num_to_path(Dir, SegNum),
-    {SDict, _AckCount} = load_segment(SegNum, SegPath),
+    {SDict, _AckCount} = load_segment(SegNum, SegPath, JAckDict),
     %% deliberately sort the list desc, because foldl will reverse it
     RelSeqs = lists:sort(fun (A, B) -> B < A end, dict:fetch_keys(SDict)),
     {lists:foldl(fun (RelSeq, Acc) ->
@@ -251,7 +252,7 @@ find_ack_counts(Dir) ->
          || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)],
     lists:foldl(
       fun ({SegNum, SegPath}, Acc) ->
-              case load_segment(SegNum, SegPath) of
+              case load_segment(SegNum, SegPath, dict:new()) of
                   {_SDict, 0} -> Acc;
                   {_SDict, AckCount} -> dict:store(SegNum, AckCount, Acc)
               end
@@ -283,7 +284,7 @@ add_ack_to_ack_dict(SeqId, ADict) ->
 
 replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, Dir}) ->
     SegPath = seg_num_to_path(Dir, SegNum),
-    {SDict, _AckCount} = load_segment(SegNum, SegPath),
+    {SDict, _AckCount} = load_segment(SegNum, SegPath, dict:new()),
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:intersection(sets:from_list(ValidRelSeqIds),
                                   sets:from_list(Acks)),
@@ -296,13 +297,20 @@ replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, Dir}) ->
 %% Loading Segments
 %%----------------------------------------------------------------------------
 
-load_segment(SegNum, SegPath) ->
+load_segment(SegNum, SegPath, JAckDict) ->
     case file:open(SegPath, [raw, binary, read_ahead, read]) of
-        {error, enoent} -> dict:new();
+        {error, enoent} -> {dict:new(), 0};
         {ok, Hdl} ->
-            Result = load_segment_entries(SegNum, Hdl, {dict:new(), 0}),
+            {SDict, AckCount} =
+                load_segment_entries(SegNum, Hdl, {dict:new(), 0}),
             ok = file:close(Hdl),
-            Result
+            RelSeqs = case dict:find(SegNum, JAckDict) of
+                        {ok, RelSeqs1} -> RelSeqs1;
+                        error -> []
+                      end,
+            lists:foldl(fun (RelSeq, {SDict1, AckCount1}) ->
+                                {dict:erase(RelSeq, SDict1), AckCount1+1}
+                        end, {SDict, AckCount}, RelSeqs)
     end.
 
 load_segment_entries(SegNum, Hdl, {SDict, AckCount}) ->
-- 
cgit v1.2.1


From ebfc4d3de237d9a34402c4dd85f75540f6f56bbc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Oct 2009 17:53:05 +0100
Subject: Added documentation. Also discovered that opening a file in append
 mode seems to be slower than opening it read/write and seeking to the end.
 However, further tuning delayed until after the file handle cache appears

---
 src/rabbit_queue_index.erl | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d887a770..3158a1b3 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -34,6 +34,43 @@
 -export([init/1, write_published/4, write_delivered/2, write_acks/2,
          flush_journal/1, read_segment_entries/2]).
 
+%%----------------------------------------------------------------------------
+%% The queue disk index
+%%
+%% The queue disk index operates over an ack journal, and a number of
+%% segment files. Each segment is the same size, both in max number of
+%% entries, and max file size, owing to fixed sized records.
+%%
+%% Publishes and delivery notes are written directly to the segment
+%% files. The segment is found by dividing the sequence id by the the
+%% max number of entries per segment. Only the relative sequeuence
+%% within the segment is recorded as the sequence id within a segment
+%% file (i.e. sequeuence id modulo max number of entries per
+%% segment). This is keeps entries as small as possible. Publishes and
+%% deliveries are only ever going to be received in contiguous
+%% ascending order, with publishes following the tail of the queue and
+%% deliveries following the head of the queue.
+%%
+%% Acks are written to a bounded journal and are also held in memory,
+%% in a dict with the segment file as the key. When the journal gets
+%% too big, or flush_journal is called, the journal is (possibly
+%% incrementally) flushed out to the segment files. As acks can be
+%% received from any delivered message in any order, this journal
+%% reduces seeking, and batches writes to the segment files, keeping
+%% performance high. The flush_journal/1 function returns a boolean
+%% indicating whether there is more flushing work that can be
+%% done. This means that the process can call this whenever it has an
+%% empty mailbox, only a small amount of work is done, allowing the
+%% process to respond quickly to new messages if they arrive, or to
+%% call flush_journal/1 several times until the result indicates there
+%% is no more flushing to be done.
+%%
+%% On startup, the ack journal is read along with all the segment
+%% files, and the ack journal is fully flushed out to the segment
+%% files. Care is taken to ensure that no message can be ack'd twice.
+%%
+%%----------------------------------------------------------------------------
+
 -define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
 -define(ACK_JOURNAL_FILENAME, "ack_journal.jif").
 -define(SEQ_BYTES, 8).
@@ -101,6 +138,9 @@
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush_journal/1 :: (qistate()) -> {boolean(), qistate()}).
+-spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
+             {[{'index_entry', seq_id(), msg_id(), boolean(), boolean(),
+                'on_disk'}], qistate()}).
 
 -endif.
 
@@ -223,7 +263,8 @@ get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = CurSegNum }) ->
     State1 = #qistate { dir = Dir } =
         close_file_handle_for_seg(CurSegNum, State),
     {ok, Hdl} = file:open(seg_num_to_path(Dir, SegNum),
-                          [binary, raw, append, delayed_write]),
+                          [binary, raw, write, delayed_write, read]),
+    {ok, _} = file:position(Hdl, {eof, 0}),
     {Hdl, State1 #qistate { cur_seg_num = SegNum, cur_seg_hdl = Hdl}}.
 
 bool_to_int(true ) -> 1;
@@ -368,7 +409,8 @@ append_acks_to_segment(SegPath, AckCount, Acks)
     ?SEGMENT_ENTRIES_COUNT;
 append_acks_to_segment(SegPath, AckCount, Acks)
   when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
-    {ok, Hdl} = file:open(SegPath, [raw, binary, delayed_write, append]),
+    {ok, Hdl} = file:open(SegPath, [raw, binary, delayed_write, write, read]),
+    {ok, _} = file:position(Hdl, {eof, 0}),
     AckCount1 =
         lists:foldl(
           fun (RelSeq, AckCount2) ->
-- 
cgit v1.2.1


From 6b7e8f134c73763fd8db25d995bc2f8f87651912 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Oct 2009 10:31:42 +0100
Subject: minor corrections to documentation that I failed to commit on Friday

---
 src/rabbit_queue_index.erl | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 3158a1b3..34bb9920 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -43,27 +43,27 @@
 %%
 %% Publishes and delivery notes are written directly to the segment
 %% files. The segment is found by dividing the sequence id by the the
-%% max number of entries per segment. Only the relative sequeuence
+%% max number of entries per segment. Only the relative sequence
 %% within the segment is recorded as the sequence id within a segment
-%% file (i.e. sequeuence id modulo max number of entries per
-%% segment). This is keeps entries as small as possible. Publishes and
+%% file (i.e. sequence id modulo max number of entries per segment).
+%% This is keeps entries as small as possible. Publishes and
 %% deliveries are only ever going to be received in contiguous
-%% ascending order, with publishes following the tail of the queue and
-%% deliveries following the head of the queue.
+%% ascending order.
 %%
 %% Acks are written to a bounded journal and are also held in memory,
-%% in a dict with the segment file as the key. When the journal gets
-%% too big, or flush_journal is called, the journal is (possibly
-%% incrementally) flushed out to the segment files. As acks can be
-%% received from any delivered message in any order, this journal
-%% reduces seeking, and batches writes to the segment files, keeping
-%% performance high. The flush_journal/1 function returns a boolean
-%% indicating whether there is more flushing work that can be
-%% done. This means that the process can call this whenever it has an
-%% empty mailbox, only a small amount of work is done, allowing the
-%% process to respond quickly to new messages if they arrive, or to
-%% call flush_journal/1 several times until the result indicates there
-%% is no more flushing to be done.
+%% in a dict with the segment file as the key. Again, the records are
+%% fixed size: the entire sequence id is written and is limited to a
+%% 64-bit unsigned integer. When the journal gets too big, or
+%% flush_journal is called, the journal is (possibly incrementally)
+%% flushed out to the segment files. As acks can be received from any
+%% delivered message in any order, this journal reduces seeking, and
+%% batches writes to the segment files, keeping performance high. The
+%% flush_journal/1 function returns a boolean indicating whether there
+%% is more flushing work that can be done. This means that the process
+%% can call this whenever it has an empty mailbox, only a small amount
+%% of work is done, allowing the process to respond quickly to new
+%% messages if they arrive, or to call flush_journal/1 several times
+%% until the result indicates there is no more flushing to be done.
 %%
 %% On startup, the ack journal is read along with all the segment
 %% files, and the ack journal is fully flushed out to the segment
-- 
cgit v1.2.1


From 2fabc816a5b4fa4eb4540d77d3bbe0147bb47c9c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Oct 2009 16:04:05 +0100
Subject: initial work on new file handle cache. Public API not done yet.

---
 src/rabbit_file_handle_cache2.erl | 220 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 src/rabbit_file_handle_cache2.erl

diff --git a/src/rabbit_file_handle_cache2.erl b/src/rabbit_file_handle_cache2.erl
new file mode 100644
index 00000000..6fe7e476
--- /dev/null
+++ b/src/rabbit_file_handle_cache2.erl
@@ -0,0 +1,220 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_file_handle_cache2).
+
+-behaviour(gen_server2).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-define(SERVER, ?MODULE).
+-define(ETS_HANDLES_NAME, rabbit_file_handle_cache_handles).
+-define(ETS_AGE_NAME, rabbit_file_handle_cache_ages).
+-define(MAX_FILE_HANDLES, 900). %% unlimit -a on debian default gives 1024
+-define(ISSUE_PERIOD, 10000). %% 10 seconds
+-define(HIBERNATE_AFTER_MIN, 1000).
+-define(DESIRED_HIBERNATE, 10000).
+
+-record(server_state,
+        { request_queue,
+          handles,
+          ages,
+          max_handles
+        }).
+
+-record(hdl,
+        { key,
+          handle,
+          offset,
+          timer_ref,
+          released_at
+        }).
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [?MAX_FILE_HANDLES], []).
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([MaxFileHandles]) ->
+    Handles = ets:new(?ETS_HANDLES_NAME, [ordered_set, private, {keypos, #hdl.key}]),
+    Ages = ets:new(?ETS_AGE_NAME, [ordered_set, private]),
+    {ok, #server_state { request_queue = queue:new(),
+                         handles = Handles,
+                         ages = Ages,
+                         max_handles = MaxFileHandles },
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}}, From,
+            State = #server_state { handles = Handles,
+                                    ages = Ages,
+                                    request_queue = Reqs }) ->
+    Key = {From, Path, Mode},
+    State1 =
+        case ets:lookup(Handles, Key) of
+            [Obj = #hdl { handle = Hdl, offset = Offset,
+                          timer_ref = TRef, released_at = ReleasedAt }] ->
+                gen_server2:reply(From, {Hdl, Offset}),
+                ok = stop_timer(TRef),
+                {ok, TRef1} = start_timer(Callback, Key),
+                true = ets:insert(Handles, Obj #hdl { offset = unknown,
+                                                      timer_ref = TRef1,
+                                                      released_at = not_released }),
+                true = ets:delete(Ages, ReleasedAt),
+                State;
+            [] ->
+                process_request_queue(
+                  State #server_state { request_queue = queue:in({Key, Callback}, Reqs) })
+        end,
+    {noreply, State1, hibernate}.
+
+handle_cast({release_handle, Key = {_From, _Path, _Mode}, Offset},
+            State = #server_state { handles = Handles,
+                                    ages = Ages }) ->
+    [Obj = #hdl { timer_ref = TRef, released_at = ReleasedAtOld }] = ets:lookup(Handles, Key),
+    ok = stop_timer(TRef),
+    ok = case ReleasedAtOld of
+             not_released -> ReleasedAt = now(),
+                             true = ets:insert_new(Ages, {ReleasedAt, Key}),
+                             true = ets:insert(Handles, Obj #hdl { released_at = ReleasedAt,
+                                                                   offset = Offset,
+                                                                   timer_ref = no_timer }),
+                             ok;
+             _ -> ok
+         end,
+    State1 = process_request_queue(State),
+    {noreply, State1, hibernate};
+handle_cast({close_handle, Key = {_From, _Path, _Mode}},
+            State = #server_state { handles = Handles,
+                                    ages = Ages }) ->
+    [Obj] = ets:lookup(Handles, Key),
+    ok = close_handle(Obj, Handles, Ages),
+    State1 = process_request_queue(State),
+    {noreply, State1, hibernate}.
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+            State = #server_state { handles = Handles, ages = Ages, request_queue = Reqs }) ->
+    Reqs1 = queue:filter(fun ({{From, _Path, _Mode}, _Callback}) ->
+                                 From /= Pid
+                         end, Reqs),
+    lists:foreach(fun (Obj) ->
+                          ok = close_handle(Obj, Handles, Ages)
+                  end, ets:match_object(Handles, #hdl { key = {Pid, '_', '_'}, _ = '_' })),
+    {noreply, State #server_state { request_queue = Reqs1 }}.
+
+terminate(_Reason, State = #server_state { ages = Ages, request_queue = Reqs }) ->
+    Size = ets:info(Ages, size),
+    Size = free_upto(Size, State),
+    lists:foreach(fun ({{From, _Path, _Mode}, _Callback}) ->
+                          gen_server2:reply(From, exiting)
+                  end, queue:to_list(Reqs)),
+    State #server_state { request_queue = queue:new() }.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% Helpers
+%%----------------------------------------------------------------------------
+
+start_timer({M,F,A}, Key) ->
+    timer:apply_after(?ISSUE_PERIOD, M, F, A ++ [{release_handle, Key}]).
+
+stop_timer(no_timer) ->
+    ok;
+stop_timer(TRef) ->
+    timer:cancel(TRef),
+    ok.
+
+close_handle(#hdl { key = Key, timer_ref = TRef, released_at = ReleasedAt, handle = Hdl },
+             Handles, Ages) ->
+    ok = timer:stop(TRef),
+    ok = file:sync(Hdl),
+    ok = file:close(Hdl),
+    true = ets:delete(Handles, Key),
+    true = ets:delete(Ages, ReleasedAt),
+    ok.
+
+process_request_queue(State = #server_state { max_handles = MaxHandles,
+                                              handles = Handles,
+                                              request_queue = Reqs }) ->
+    Tokens = MaxHandles - ets:info(Handles, size),
+    Requests = queue:len(Reqs),
+    OpenCount = case Tokens >= Requests of
+                      true  -> Requests;
+                      false -> Tokens + free_upto(Requests - Tokens, State)
+                end,
+    open_requested(OpenCount, State).
+
+open_requested(0, State) ->
+    State;
+open_requested(N, State = #server_state { handles = Handles, request_queue = Reqs }) ->
+    case queue:out(Reqs) of
+        {empty, _Reqs} -> State;
+        {{value, {Key = {From, Path, Mode}, Callback}}, Reqs1} ->
+            {ok, Hdl} = file:open(Path, Mode),
+            gen_server2:reply(From, {Hdl, 0}),
+            {ok, TRef} = start_timer(Callback, Key),
+            true = ets:insert_new(Handles, #hdl { key = Key,
+                                                  handle = Hdl,
+                                                  offset = unknown,
+                                                  timer_ref = TRef,
+                                                  released_at = not_released }),
+            open_requested(N - 1, State #server_state { request_queue = Reqs1 })
+    end.
+
+free_upto(N, State) ->
+    free_upto(N, 0, State).
+
+free_upto(0, Count, _State) ->
+    Count;
+free_upto(N, Count, State = #server_state { handles = Handles,
+                                            ages = Ages }) ->
+    case ets:first(Ages) of
+        '$end_of_table' ->
+            Count;
+        {ReleasedAt, Key} ->
+            [#hdl { handle = Hdl, timer_ref = no_timer, released_at = ReleasedAt }]
+                = ets:lookup(Handles, Key),
+            ok = file:sync(Hdl),
+            ok = file:close(Hdl),
+            true = ets:delete(Ages, ReleasedAt),
+            true = ets:delete(Handles, Key),
+            free_upto(N - 1, Count + 1, State)
+    end.
-- 
cgit v1.2.1


From 7f915ea87af798f34cf3ddf104c62d599fb1623f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Oct 2009 18:02:00 +0100
Subject: more work on file handle cache. However, untested, and liable to
 extreme change

---
 src/rabbit_file_handle_cache2.erl | 138 ++++++++++++++++++++++++++++++++------
 1 file changed, 116 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_file_handle_cache2.erl b/src/rabbit_file_handle_cache2.erl
index 6fe7e476..6594e62f 100644
--- a/src/rabbit_file_handle_cache2.erl
+++ b/src/rabbit_file_handle_cache2.erl
@@ -33,7 +33,8 @@
 
 -behaviour(gen_server2).
 
--export([start_link/0]).
+-export([start_link/0, new_client/1, get_file_handle/3, release_file_handle/2,
+         close_file_handle/3, with_file_handle_at/5]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -53,6 +54,11 @@
           max_handles
         }).
 
+-record(client_state,
+        { callback,
+          handles
+        }).
+
 -record(hdl,
         { key,
           handle,
@@ -68,12 +74,87 @@
 start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [?MAX_FILE_HANDLES], []).
 
+new_client(Callback = {_M, _F, _A}) ->
+    ok = gen_server2:call(?SERVER, new_client, infinity),
+    #client_state { callback = Callback,
+                    handles = dict:new() }.
+
+get_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
+    case obtain_file_handle(Path, Mode, CState) of
+        not_available -> {not_available, CState};
+        {Mode1, Hdl, _Offset} ->
+            Handles1 = dict:store({Path, Mode1}, {Hdl, unknown}, Handles),
+            {Hdl, CState #client_state { handles = Handles1 }}
+    end.
+
+release_file_handle({release_handle, Key = {_From, Path, Mode}},
+                    CState = #client_state { handles = Handles }) ->
+    Mode1 = lists:usort(Mode),
+    case dict:find({Path, Mode1}, Handles) of
+        error -> %% oh well, it must have already gone
+            CState;
+        {value, {_Hdl, Offset}} ->
+            Handles1 = dict:erase({Path, Mode1}, Handles),
+            gen_server2:cast(?SERVER, {release_handle, Key, Offset}),
+            CState #client_state { handles = Handles1 }
+    end.
+
+close_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
+    Mode1 = lists:usort(Mode),
+    case dict:find({Path, Mode1}, Handles) of
+        error -> %% oh well, it must have already gone
+             CState;
+        {value, _} ->
+            gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode1}})
+    end.
+
+with_file_handle_at(Path, Mode, Offset, Fun, CState =
+                    #client_state { handles = Handles }) ->
+    case obtain_file_handle(Path, Mode, CState) of
+        not_available -> {not_available, CState};
+        {Mode1, Hdl, OldOffset} ->
+            SeekRes = case Offset == OldOffset of
+                          true -> ok;
+                          false -> case file:position(Hdl, Offset) of
+                                       {ok, _} -> ok;
+                                       KO -> KO
+                                   end
+                      end,
+            case SeekRes of
+                ok -> {NewOffset, Result} = Fun(Hdl),
+                      {Result, CState #client_state {
+                                 handles = dict:store({Path, Mode1},
+                                                      {Hdl, NewOffset},
+                                                      Handles) }};
+                KO1 -> {KO1, CState}
+            end
+    end.
+
+%%----------------------------------------------------------------------------
+%% Client-side helpers
+%%----------------------------------------------------------------------------
+
+obtain_file_handle(Path, Mode, #client_state { handles = Handles,
+                                               callback = Callback }) ->
+    Mode1 = lists:usort(Mode),
+    case dict:find(Mode1, Handles) of
+        error ->
+            case gen_server2:call(?SERVER,
+                                  {get_handle, Path, Mode1, Callback}) of
+                {Hdl, Offset} -> {Mode1, Hdl, Offset};
+                exiting -> not_available
+            end;
+        {value, {Hdl, Offset}} ->
+            {Mode1, Hdl, Offset}
+    end.
+
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
 
 init([MaxFileHandles]) ->
-    Handles = ets:new(?ETS_HANDLES_NAME, [ordered_set, private, {keypos, #hdl.key}]),
+    Handles = ets:new(?ETS_HANDLES_NAME,
+                      [ordered_set, private, {keypos, #hdl.key}]),
     Ages = ets:new(?ETS_AGE_NAME, [ordered_set, private]),
     {ok, #server_state { request_queue = queue:new(),
                          handles = Handles,
@@ -81,6 +162,9 @@ init([MaxFileHandles]) ->
                          max_handles = MaxFileHandles },
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
+handle_call(new_client, From, State) ->
+    _MRef = erlang:monitor(process, From),
+    {reply, ok, State};
 handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}}, From,
             State = #server_state { handles = Handles,
                                     ages = Ages,
@@ -93,30 +177,35 @@ handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}}, From,
                 gen_server2:reply(From, {Hdl, Offset}),
                 ok = stop_timer(TRef),
                 {ok, TRef1} = start_timer(Callback, Key),
-                true = ets:insert(Handles, Obj #hdl { offset = unknown,
-                                                      timer_ref = TRef1,
-                                                      released_at = not_released }),
+                true = ets:insert(Handles,
+                                  Obj #hdl { offset = unknown,
+                                             timer_ref = TRef1,
+                                             released_at = not_released }),
                 true = ets:delete(Ages, ReleasedAt),
                 State;
             [] ->
                 process_request_queue(
-                  State #server_state { request_queue = queue:in({Key, Callback}, Reqs) })
+                  State #server_state { request_queue =
+                                        queue:in({Key, Callback}, Reqs) })
         end,
     {noreply, State1, hibernate}.
 
 handle_cast({release_handle, Key = {_From, _Path, _Mode}, Offset},
             State = #server_state { handles = Handles,
                                     ages = Ages }) ->
-    [Obj = #hdl { timer_ref = TRef, released_at = ReleasedAtOld }] = ets:lookup(Handles, Key),
+    [Obj = #hdl { timer_ref = TRef, released_at = ReleasedAtOld }] =
+        ets:lookup(Handles, Key),
     ok = stop_timer(TRef),
     ok = case ReleasedAtOld of
-             not_released -> ReleasedAt = now(),
-                             true = ets:insert_new(Ages, {ReleasedAt, Key}),
-                             true = ets:insert(Handles, Obj #hdl { released_at = ReleasedAt,
-                                                                   offset = Offset,
-                                                                   timer_ref = no_timer }),
-                             ok;
-             _ -> ok
+             not_released ->
+                 ReleasedAt = now(),
+                 true = ets:insert_new(Ages, {ReleasedAt, Key}),
+                 true = ets:insert(Handles, Obj #hdl { released_at = ReleasedAt,
+                                                       offset = Offset,
+                                                       timer_ref = no_timer }),
+                 ok;
+             _ ->
+                 ok
          end,
     State1 = process_request_queue(State),
     {noreply, State1, hibernate};
@@ -129,16 +218,19 @@ handle_cast({close_handle, Key = {_From, _Path, _Mode}},
     {noreply, State1, hibernate}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #server_state { handles = Handles, ages = Ages, request_queue = Reqs }) ->
+            State = #server_state { handles = Handles, ages = Ages,
+                                    request_queue = Reqs }) ->
     Reqs1 = queue:filter(fun ({{From, _Path, _Mode}, _Callback}) ->
                                  From /= Pid
                          end, Reqs),
     lists:foreach(fun (Obj) ->
                           ok = close_handle(Obj, Handles, Ages)
-                  end, ets:match_object(Handles, #hdl { key = {Pid, '_', '_'}, _ = '_' })),
+                  end, ets:match_object(Handles, #hdl { key = {Pid, '_', '_'},
+                                                        _ = '_' })),
     {noreply, State #server_state { request_queue = Reqs1 }}.
 
-terminate(_Reason, State = #server_state { ages = Ages, request_queue = Reqs }) ->
+terminate(_Reason, State = #server_state { ages = Ages,
+                                           request_queue = Reqs }) ->
     Size = ets:info(Ages, size),
     Size = free_upto(Size, State),
     lists:foreach(fun ({{From, _Path, _Mode}, _Callback}) ->
@@ -150,7 +242,7 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 %%----------------------------------------------------------------------------
-%% Helpers
+%% Server-side Helpers
 %%----------------------------------------------------------------------------
 
 start_timer({M,F,A}, Key) ->
@@ -162,7 +254,8 @@ stop_timer(TRef) ->
     timer:cancel(TRef),
     ok.
 
-close_handle(#hdl { key = Key, timer_ref = TRef, released_at = ReleasedAt, handle = Hdl },
+close_handle(#hdl { key = Key, timer_ref = TRef, released_at = ReleasedAt,
+                    handle = Hdl },
              Handles, Ages) ->
     ok = timer:stop(TRef),
     ok = file:sync(Hdl),
@@ -184,7 +277,8 @@ process_request_queue(State = #server_state { max_handles = MaxHandles,
 
 open_requested(0, State) ->
     State;
-open_requested(N, State = #server_state { handles = Handles, request_queue = Reqs }) ->
+open_requested(N, State = #server_state { handles = Handles,
+                                          request_queue = Reqs }) ->
     case queue:out(Reqs) of
         {empty, _Reqs} -> State;
         {{value, {Key = {From, Path, Mode}, Callback}}, Reqs1} ->
@@ -210,8 +304,8 @@ free_upto(N, Count, State = #server_state { handles = Handles,
         '$end_of_table' ->
             Count;
         {ReleasedAt, Key} ->
-            [#hdl { handle = Hdl, timer_ref = no_timer, released_at = ReleasedAt }]
-                = ets:lookup(Handles, Key),
+            [#hdl { handle = Hdl, timer_ref = no_timer,
+                    released_at = ReleasedAt }] = ets:lookup(Handles, Key),
             ok = file:sync(Hdl),
             ok = file:close(Hdl),
             true = ets:delete(Ages, ReleasedAt),
-- 
cgit v1.2.1


From 1233514815fead301d804f0b3f789b797dc0a9cf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Oct 2009 18:11:02 +0100
Subject: mainly cosmetics

---
 src/rabbit_file_handle_cache2.erl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_file_handle_cache2.erl b/src/rabbit_file_handle_cache2.erl
index 6594e62f..c15a3087 100644
--- a/src/rabbit_file_handle_cache2.erl
+++ b/src/rabbit_file_handle_cache2.erl
@@ -89,12 +89,11 @@ get_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
 
 release_file_handle({release_handle, Key = {_From, Path, Mode}},
                     CState = #client_state { handles = Handles }) ->
-    Mode1 = lists:usort(Mode),
-    case dict:find({Path, Mode1}, Handles) of
+    case dict:find({Path, Mode}, Handles) of
         error -> %% oh well, it must have already gone
             CState;
         {value, {_Hdl, Offset}} ->
-            Handles1 = dict:erase({Path, Mode1}, Handles),
+            Handles1 = dict:erase({Path, Mode}, Handles),
             gen_server2:cast(?SERVER, {release_handle, Key, Offset}),
             CState #client_state { handles = Handles1 }
     end.
@@ -113,13 +112,14 @@ with_file_handle_at(Path, Mode, Offset, Fun, CState =
     case obtain_file_handle(Path, Mode, CState) of
         not_available -> {not_available, CState};
         {Mode1, Hdl, OldOffset} ->
-            SeekRes = case Offset == OldOffset of
-                          true -> ok;
-                          false -> case file:position(Hdl, Offset) of
-                                       {ok, _} -> ok;
-                                       KO -> KO
-                                   end
-                      end,
+            SeekRes =
+                case Offset == OldOffset orelse not is_integer(Offset) of
+                    true -> ok;
+                    false -> case file:position(Hdl, Offset) of
+                                 {ok, _} -> ok;
+                                 KO -> KO
+                             end
+                end,
             case SeekRes of
                 ok -> {NewOffset, Result} = Fun(Hdl),
                       {Result, CState #client_state {
@@ -164,7 +164,7 @@ init([MaxFileHandles]) ->
 
 handle_call(new_client, From, State) ->
     _MRef = erlang:monitor(process, From),
-    {reply, ok, State};
+    {reply, ok, State, hibernate};
 handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}}, From,
             State = #server_state { handles = Handles,
                                     ages = Ages,
-- 
cgit v1.2.1


From 3e7bf336241154f71d44541910badf90e321731b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 6 Oct 2009 13:18:14 +0100
Subject: Well fixed a few bugs in the fhc2, but it's fundamentally flawed by
 the fact that raw files can only be manipulated by the process that opens it.
 Thus the whole design is wrong because the server process can't be
 responsible for holding on to released but unclosed fhs.

---
 src/rabbit_file_handle_cache2.erl | 92 ++++++++++++++++++++++++++++-----------
 1 file changed, 67 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_file_handle_cache2.erl b/src/rabbit_file_handle_cache2.erl
index c15a3087..9f459ebf 100644
--- a/src/rabbit_file_handle_cache2.erl
+++ b/src/rabbit_file_handle_cache2.erl
@@ -34,7 +34,7 @@
 -behaviour(gen_server2).
 
 -export([start_link/0, new_client/1, get_file_handle/3, release_file_handle/2,
-         close_file_handle/3, with_file_handle_at/5]).
+         close_file_handle/3, close_all_file_handles/1, with_file_handle_at/5]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -75,7 +75,7 @@ start_link() ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE, [?MAX_FILE_HANDLES], []).
 
 new_client(Callback = {_M, _F, _A}) ->
-    ok = gen_server2:call(?SERVER, new_client, infinity),
+    gen_server2:cast(?SERVER, {new_client, self()}),
     #client_state { callback = Callback,
                     handles = dict:new() }.
 
@@ -104,9 +104,17 @@ close_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
         error -> %% oh well, it must have already gone
              CState;
         {value, _} ->
-            gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode1}})
+            gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode1}}),
+            CState #client_state { handles = dict:erase({Path, Mode}, Handles) }
     end.
 
+close_all_file_handles(CState = #client_state { handles = Handles }) ->
+    lists:foreach(
+      fun({Path, Mode}) ->
+              gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode}})
+      end, dict:fetch_keys(Handles)),
+    CState #client_state { handles = dict:new() }.
+
 with_file_handle_at(Path, Mode, Offset, Fun, CState =
                     #client_state { handles = Handles }) ->
     case obtain_file_handle(Path, Mode, CState) of
@@ -140,7 +148,10 @@ obtain_file_handle(Path, Mode, #client_state { handles = Handles,
     case dict:find(Mode1, Handles) of
         error ->
             case gen_server2:call(?SERVER,
-                                  {get_handle, Path, Mode1, Callback}) of
+                                  {get_handle, Path, Mode1, Callback, self()},
+                                  infinity) of
+                {open_fun, Fun} -> {Hdl, Offset} = Fun(),
+                                   {Mode1, Hdl, Offset};
                 {Hdl, Offset} -> {Mode1, Hdl, Offset};
                 exiting -> not_available
             end;
@@ -160,16 +171,14 @@ init([MaxFileHandles]) ->
                          handles = Handles,
                          ages = Ages,
                          max_handles = MaxFileHandles },
+     hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call(new_client, From, State) ->
-    _MRef = erlang:monitor(process, From),
-    {reply, ok, State, hibernate};
-handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}}, From,
+handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}, Pid}, From,
             State = #server_state { handles = Handles,
                                     ages = Ages,
                                     request_queue = Reqs }) ->
-    Key = {From, Path, Mode},
+    Key = {Pid, Path, Mode},
     State1 =
         case ets:lookup(Handles, Key) of
             [Obj = #hdl { handle = Hdl, offset = Offset,
@@ -186,11 +195,15 @@ handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}}, From,
             [] ->
                 process_request_queue(
                   State #server_state { request_queue =
-                                        queue:in({Key, Callback}, Reqs) })
+                                        queue:in({Key, From, Callback}, Reqs) })
         end,
     {noreply, State1, hibernate}.
 
-handle_cast({release_handle, Key = {_From, _Path, _Mode}, Offset},
+handle_cast({store_handle, Obj = #hdl {}},
+            State = #server_state { handles = Handles }) ->
+    ets:insert_new(Handles, Obj),
+    {noreply, State, hibernate};
+handle_cast({release_handle, Key = {_Pid, _Path, _Mode}, Offset},
             State = #server_state { handles = Handles,
                                     ages = Ages }) ->
     [Obj = #hdl { timer_ref = TRef, released_at = ReleasedAtOld }] =
@@ -209,19 +222,26 @@ handle_cast({release_handle, Key = {_From, _Path, _Mode}, Offset},
          end,
     State1 = process_request_queue(State),
     {noreply, State1, hibernate};
-handle_cast({close_handle, Key = {_From, _Path, _Mode}},
+handle_cast({close_handle, Key = {_Pid, _Path, _Mode}},
             State = #server_state { handles = Handles,
                                     ages = Ages }) ->
     [Obj] = ets:lookup(Handles, Key),
     ok = close_handle(Obj, Handles, Ages),
     State1 = process_request_queue(State),
-    {noreply, State1, hibernate}.
+    {noreply, State1, hibernate};
+handle_cast({new_client, Pid}, State) ->
+    _MRef = erlang:monitor(process, Pid),
+    {noreply, State, hibernate}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #server_state { handles = Handles, ages = Ages,
                                     request_queue = Reqs }) ->
-    Reqs1 = queue:filter(fun ({{From, _Path, _Mode}, _Callback}) ->
-                                 From /= Pid
+    Reqs1 = queue:filter(fun ({{OPid, _Path, _Mode}, From, _Callback}) ->
+                                 if OPid == Pid ->
+                                         gen_server2:reply(From, exiting),
+                                         false;
+                                    true -> true
+                                 end
                          end, Reqs),
     lists:foreach(fun (Obj) ->
                           ok = close_handle(Obj, Handles, Ages)
@@ -233,7 +253,7 @@ terminate(_Reason, State = #server_state { ages = Ages,
                                            request_queue = Reqs }) ->
     Size = ets:info(Ages, size),
     Size = free_upto(Size, State),
-    lists:foreach(fun ({{From, _Path, _Mode}, _Callback}) ->
+    lists:foreach(fun ({{_Pid, _Path, _Mode}, From, _Callback}) ->
                           gen_server2:reply(From, exiting)
                   end, queue:to_list(Reqs)),
     State #server_state { request_queue = queue:new() }.
@@ -281,15 +301,37 @@ open_requested(N, State = #server_state { handles = Handles,
                                           request_queue = Reqs }) ->
     case queue:out(Reqs) of
         {empty, _Reqs} -> State;
-        {{value, {Key = {From, Path, Mode}, Callback}}, Reqs1} ->
-            {ok, Hdl} = file:open(Path, Mode),
-            gen_server2:reply(From, {Hdl, 0}),
-            {ok, TRef} = start_timer(Callback, Key),
-            true = ets:insert_new(Handles, #hdl { key = Key,
-                                                  handle = Hdl,
-                                                  offset = unknown,
-                                                  timer_ref = TRef,
-                                                  released_at = not_released }),
+        {{value, {Key = {_Pid, Path, Mode}, From, Callback}}, Reqs1} ->
+            Msg =
+                case lists:member(raw, Mode) of
+                    true ->
+                        Fun =
+                            fun() ->
+                                    {ok, Hdl} = file:open(Path, Mode),
+                                    {ok, TRef} = start_timer(Callback, Key),
+                                    gen_server2:cast(
+                                      ?SERVER, {store_handle,
+                                                #hdl {key = Key,
+                                                      handle = Hdl,
+                                                      offset = unknown,
+                                                      timer_ref = TRef,
+                                                      released_at = not_released
+                                                     }}),
+                                    {Hdl, 0}
+                            end,
+                        {open_fun, Fun};
+                    false -> 
+                        {ok, Hdl} = file:open(Path, Mode),
+                        {ok, TRef} = start_timer(Callback, Key),
+                        true = ets:insert_new(Handles, #hdl { key = Key,
+                                                              handle = Hdl,
+                                                              offset = unknown,
+                                                              timer_ref = TRef,
+                                                              released_at =
+                                                              not_released }),
+                        {Hdl, 0}
+                end,
+            gen_server2:reply(From, Msg),
             open_requested(N - 1, State #server_state { request_queue = Reqs1 })
     end.
 
-- 
cgit v1.2.1


From 665eda30bf499749fb187fc9a4cc97ad30fcdb23 Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Tue, 6 Oct 2009 18:21:58 +0100
Subject: Memory monitor code.

---
 src/rabbit.erl                |   3 +-
 src/rabbit_memory_monitor.erl | 207 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 src/rabbit_memory_monitor.erl

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 18fd1b17..01e06db3 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -146,7 +146,8 @@ start(normal, []) ->
                 ok = rabbit_amqqueue:start(),
 
                 ok = start_child(rabbit_router),
-                ok = start_child(rabbit_node_monitor)
+                ok = start_child(rabbit_node_monitor),
+                ok = start_child(rabbit_memory_monitor)
         end},
        {"recovery",
         fun () ->
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
new file mode 100644
index 00000000..8bdd394b
--- /dev/null
+++ b/src/rabbit_memory_monitor.erl
@@ -0,0 +1,207 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+
+%% This module handles the node-wide memory statistics. 
+%% It receives statistics from all queues, counts the desired
+%% queue length (in seconds), and sends this information back to
+%% queues.
+%%
+%% Normally, messages are exchanged like that:
+%%
+%%           (1)      (2)                     (3)
+%% Timer      |        |
+%%            v        v
+%% Queue -----+--------+-----<***hibernated***>------------->
+%%            | ^      | ^                     ^               
+%%            v |      v |                     |
+%% Monitor X--*-+--X---*-+--X------X----X-----X+----------->
+%%
+%% Or to put it in words. Queue periodically sends (casts) 'push_drain_ratio'
+%% message to the Monitor (cases 1 and 2 on the asciiart above). Monitor 
+%% _always_ replies with a 'set_bufsec_limit' cast. This way, 
+%% we're pretty sure that the Queue is not hibernated.
+%% Monitor periodically recounts numbers ('X' on asciiart). If, during this
+%% update we notice that a queue was using too much memory, we send a message
+%% back. This will happen even if the queue is hibernated, as we really do want
+%% it to reduce its memory footprint.
+
+
+-module(rabbit_memory_monitor).
+
+-behaviour(gen_server2).
+
+-export([start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([update/0]).
+
+-export([register/1]).
+
+-record(state, {timer,          %% 'internal_update' timer
+                drain_dict,     %% dict, queue_pid:seconds_till_queue_is_empty
+                drain_avg,      %% global, the desired queue depth (in seconds)
+                memory_limit    %% how much memory we intend to use
+               }).
+
+-define(SERVER, ?MODULE).
+-define(DEFAULT_UPDATE_INTERVAL_MS, 2500).
+
+%% Enable debug reports in stdout:
+-define(debug, true).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
+
+update() ->
+    gen_server2:cast(?SERVER, update).
+
+%%----------------------------------------------------------------------------
+
+register(Pid) ->
+    gen_server2:cast(?SERVER, {register, Pid}).
+
+%%----------------------------------------------------------------------------
+
+init([]) -> 
+    %% TODO: References to os_mon and rabbit_memsup_linux 
+    %%       should go away as bug 21457 removes it.
+    %%       BTW: memsup:get_system_memory_data() doesn't work.
+    {state, TotalMemory, _Allocated} = rabbit_memsup_linux:update({state, 0,0}),
+    
+    {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS, 
+                                                        ?SERVER, update, []),
+    MemoryHighWatermark = os_mon:get_env(memsup, system_memory_high_watermark),
+    MemoryLimit = erlang:trunc(TotalMemory * MemoryHighWatermark),
+    {ok, #state{timer = TRef,
+                drain_dict = dict:new(),
+                drain_avg = infinity,
+                memory_limit = MemoryLimit}}.
+
+handle_call(_Request, _From, State) ->
+    {noreply, State}.
+
+
+handle_cast(update, State) ->
+    {noreply, internal_update(State)};
+
+handle_cast({register, Pid}, State) ->
+    _MRef = erlang:monitor(process, Pid),
+    {noreply, State};
+
+handle_cast({push_drain_ratio, Pid, DrainRatio}, State) ->
+    gen_server2:cast(Pid, {set_bufsec_limit, State#state.drain_avg}),
+    {noreply, State#state{drain_dict = 
+                        dict:store(Pid, DrainRatio, State#state.drain_dict)}};
+
+handle_cast(_Request, State) ->
+    {noreply, State}.
+
+
+handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
+    {noreply, State#state{drain_dict = dict:erase(Pid, State#state.drain_dict)}};
+
+handle_info(_Info, State) -> 
+    {noreply, State}.
+
+
+terminate(_Reason, _State) -> 
+    ok.
+
+code_change(_OldVsn, State, _Extra) -> 
+    {ok, State}.
+
+-ifdef(debug). 
+ftoa(Float) ->
+    Str = case is_float(Float) of
+        true  -> io_lib:format("~11.3f",[Float]);
+        false -> io_lib:format("~p", [Float])
+    end,
+    lists:flatten(Str).
+
+print_debug_info(UsedSeconds, AvailableSeconds, UsedMemory, TotalMemory, 
+                                                PerQueueSeconds, QueueSec) ->
+    io:format("Update ~s/~s ~s/~s PerQueueSeconds:~s ~s~n", 
+                [ftoa(UsedSeconds), ftoa(AvailableSeconds),
+                ftoa(UsedMemory/1024.0/1024.0), ftoa(TotalMemory/1024.0/1024.0),
+                ftoa(PerQueueSeconds), 
+                [" "] ++ lists:flatten([ftoa(Q)++" " || Q <- QueueSec])
+                ]).
+-else.
+print_debug_info(_UsedSeconds, _AvailableSeconds, _UsedMemory, _TotalMemory, 
+                                                _PerQueueSeconds, _QueueSec) ->
+    ok.
+
+-endif.
+
+internal_update(State) ->
+    UsedMemory = erlang:memory(total),
+    TotalMemory = State#state.memory_limit,
+    QueueSec = [V || {_K, V} <- dict:to_list(State#state.drain_dict) ],
+    UsedSeconds = lists:sum( lists:filter(fun (A) -> 
+                                                is_number(A) or is_float(A) 
+                                            end, 
+                                          QueueSec) ),
+    AvailableSeconds = case UsedSeconds of
+        0 -> infinity;
+        0.0 -> infinity;
+        _ -> TotalMemory / (UsedMemory / UsedSeconds)
+    end,
+    QueuesNumber = dict:size(State#state.drain_dict),
+    PerQueueSeconds = case (QueuesNumber > 0) and (AvailableSeconds /= infinity) of
+        true -> AvailableSeconds / QueuesNumber;
+        false -> infinity
+    end,
+    print_debug_info(UsedSeconds, AvailableSeconds, UsedMemory, TotalMemory, 
+                                                    PerQueueSeconds, QueueSec),
+    %% Inform the queue to reduce it's memory usage when needed.
+    %% This can sometimes wake the queue from hibernation. Well, we don't care.
+    ReduceMemory = fun ({Pid, QueueS}) ->
+        case QueueS > PerQueueSeconds of 
+            true -> 
+                gen_server2:cast(Pid, {set_bufsec_limit, PerQueueSeconds});
+            _ -> ok
+        end 
+    end,
+    lists:map(ReduceMemory, dict:to_list(State#state.drain_dict)),
+    State#state{drain_avg = PerQueueSeconds}.
+
+
-- 
cgit v1.2.1


From cd9371e17e8e64072dd04f6b10de3e05e1d52fe9 Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Tue, 6 Oct 2009 18:22:36 +0100
Subject: Changes to amqqueue_process required to proove that the code works.

---
 src/rabbit_amqqueue_process.erl | 65 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index fe2e8509..fa3d17a8 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -42,6 +42,7 @@
 -export([start_link/1]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]).
+-export([send_memory_monitor_update/1]).
 
 -import(queue).
 -import(erlang).
@@ -55,12 +56,18 @@
             next_msg_id,
             message_buffer,
             active_consumers,
-            blocked_consumers}).
+            blocked_consumers,
+            drain_ratio}).
 
 -record(consumer, {tag, ack_required}).
 
 -record(tx, {ch_pid, is_persistent, pending_messages, pending_acks}).
 
+-record(ratio, {ratio,          %% float. messages/microsecond_us
+                t0,             %% previous timestamp (us)
+                next_msg_id     %% previous next_msg_id
+                }).
+
 %% These are held in our process dictionary
 -record(cr, {consumer_count,
              ch_pid,
@@ -92,9 +99,15 @@ start_link(Q) ->
     gen_server2:start_link(?MODULE, Q, []).
 
 %%----------------------------------------------------------------------------
+now_us() ->
+    {Megaseconds,Seconds,Microseconds} = erlang:now(),
+    Megaseconds * 1000000 * 1000000 + Seconds * 1000000 + Microseconds.
 
 init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
+    rabbit_memory_monitor:register(self()),
+    %% Beware. This breaks hibernation!
+    timer:apply_interval(2500, ?MODULE, send_memory_monitor_update, [self()]),
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
@@ -102,7 +115,11 @@ init(Q) ->
             next_msg_id = 1,
             message_buffer = queue:new(),
             active_consumers = queue:new(),
-            blocked_consumers = queue:new()}, hibernate,
+            blocked_consumers = queue:new(),
+            drain_ratio = #ratio{ratio = 0.0, 
+                                 t0 = now_us(),
+                                 next_msg_id = 1}
+            }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 terminate(_Reason, State) ->
@@ -797,7 +814,49 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 end,
                 NewLimited = Limited andalso LimiterPid =/= undefined,
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
-        end)).
+        end));
+
+handle_cast(send_memory_monitor_update, State) ->
+    DrainRatio1 = update_ratio(State#q.drain_ratio, State#q.next_msg_id),
+    MsgSec = DrainRatio1#ratio.ratio * 1000000, % msg/sec
+    BufSec = case MsgSec < 0.016 of  %% less than 1 msg/1 minute
+        true -> infinity;
+        false -> queue:len(State#q.message_buffer) / MsgSec
+    end,
+    gen_server2:cast(rabbit_memory_monitor, {push_drain_ratio, self(), BufSec}),
+    noreply(State#q{drain_ratio = DrainRatio1});
+
+handle_cast({set_bufsec_limit, BufSec}, State) ->
+    DrainRatio = State#q.drain_ratio,
+    DesiredQueueLength = case BufSec of 
+        infinity -> infinity;
+        _ -> BufSec * DrainRatio#ratio.ratio * 1000000
+    end,
+    %% Just to proove that something is happening.
+    io:format("Queue size is ~8p, should be ~p~n", 
+                       [queue:len(State#q.message_buffer), DesiredQueueLength]),
+    noreply(State).
+
+
+%% Based on kernel load average, as descibed:
+%% http://www.teamquest.com/resources/gunther/display/5/
+calc_load(Load, Exp, N) ->
+    Load*Exp +  N*(1.0-Exp).
+
+update_ratio(_RatioRec = #ratio{ratio=Ratio, t0 = T0, next_msg_id = MsgCount0}, MsgCount1) ->
+    T1 = now_us(),
+    Td  = T1 - T0,
+    MsgCount = MsgCount1 - MsgCount0,
+    MsgUSec = MsgCount / Td, % msg/usec
+    %% Td is in usec. We're interested in "load average" from last 30 seconds.
+    Ratio1 = calc_load(Ratio, 1.0/ (math:exp(Td/(30*1000000))), MsgUSec),
+    
+    #ratio{ratio = Ratio1, t0=T1, next_msg_id = MsgCount1}.
+
+
+send_memory_monitor_update(Pid) ->
+    gen_server2:cast(Pid, send_memory_monitor_update).
+    
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
-- 
cgit v1.2.1


From 46d6ad493fae3e3354c9478110231d1a07677d44 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Oct 2009 13:50:44 +0100
Subject: just some fixes as a result of dialyzer, but the module is actually
 about to be deleted

---
 src/rabbit_file_handle_cache2.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_file_handle_cache2.erl b/src/rabbit_file_handle_cache2.erl
index 9f459ebf..82c75fc6 100644
--- a/src/rabbit_file_handle_cache2.erl
+++ b/src/rabbit_file_handle_cache2.erl
@@ -92,7 +92,7 @@ release_file_handle({release_handle, Key = {_From, Path, Mode}},
     case dict:find({Path, Mode}, Handles) of
         error -> %% oh well, it must have already gone
             CState;
-        {value, {_Hdl, Offset}} ->
+        {ok, {_Hdl, Offset}} ->
             Handles1 = dict:erase({Path, Mode}, Handles),
             gen_server2:cast(?SERVER, {release_handle, Key, Offset}),
             CState #client_state { handles = Handles1 }
@@ -103,7 +103,7 @@ close_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
     case dict:find({Path, Mode1}, Handles) of
         error -> %% oh well, it must have already gone
              CState;
-        {value, _} ->
+        {ok, _} ->
             gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode1}}),
             CState #client_state { handles = dict:erase({Path, Mode}, Handles) }
     end.
@@ -155,7 +155,7 @@ obtain_file_handle(Path, Mode, #client_state { handles = Handles,
                 {Hdl, Offset} -> {Mode1, Hdl, Offset};
                 exiting -> not_available
             end;
-        {value, {Hdl, Offset}} ->
+        {ok, {Hdl, Offset}} ->
             {Mode1, Hdl, Offset}
     end.
 
@@ -277,7 +277,7 @@ stop_timer(TRef) ->
 close_handle(#hdl { key = Key, timer_ref = TRef, released_at = ReleasedAt,
                     handle = Hdl },
              Handles, Ages) ->
-    ok = timer:stop(TRef),
+    ok = stop_timer(TRef),
     ok = file:sync(Hdl),
     ok = file:close(Hdl),
     true = ets:delete(Handles, Key),
-- 
cgit v1.2.1


From d2fa09d7370fb04eee96aac90083fc9e0f1918bf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Oct 2009 13:51:14 +0100
Subject: removing bogus and flawed fhc2

---
 src/rabbit_file_handle_cache2.erl | 356 --------------------------------------
 1 file changed, 356 deletions(-)
 delete mode 100644 src/rabbit_file_handle_cache2.erl

diff --git a/src/rabbit_file_handle_cache2.erl b/src/rabbit_file_handle_cache2.erl
deleted file mode 100644
index 82c75fc6..00000000
--- a/src/rabbit_file_handle_cache2.erl
+++ /dev/null
@@ -1,356 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_file_handle_cache2).
-
--behaviour(gen_server2).
-
--export([start_link/0, new_client/1, get_file_handle/3, release_file_handle/2,
-         close_file_handle/3, close_all_file_handles/1, with_file_handle_at/5]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--define(SERVER, ?MODULE).
--define(ETS_HANDLES_NAME, rabbit_file_handle_cache_handles).
--define(ETS_AGE_NAME, rabbit_file_handle_cache_ages).
--define(MAX_FILE_HANDLES, 900). %% unlimit -a on debian default gives 1024
--define(ISSUE_PERIOD, 10000). %% 10 seconds
--define(HIBERNATE_AFTER_MIN, 1000).
--define(DESIRED_HIBERNATE, 10000).
-
--record(server_state,
-        { request_queue,
-          handles,
-          ages,
-          max_handles
-        }).
-
--record(client_state,
-        { callback,
-          handles
-        }).
-
--record(hdl,
-        { key,
-          handle,
-          offset,
-          timer_ref,
-          released_at
-        }).
-
-%%----------------------------------------------------------------------------
-%% Public API
-%%----------------------------------------------------------------------------
-
-start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [?MAX_FILE_HANDLES], []).
-
-new_client(Callback = {_M, _F, _A}) ->
-    gen_server2:cast(?SERVER, {new_client, self()}),
-    #client_state { callback = Callback,
-                    handles = dict:new() }.
-
-get_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
-    case obtain_file_handle(Path, Mode, CState) of
-        not_available -> {not_available, CState};
-        {Mode1, Hdl, _Offset} ->
-            Handles1 = dict:store({Path, Mode1}, {Hdl, unknown}, Handles),
-            {Hdl, CState #client_state { handles = Handles1 }}
-    end.
-
-release_file_handle({release_handle, Key = {_From, Path, Mode}},
-                    CState = #client_state { handles = Handles }) ->
-    case dict:find({Path, Mode}, Handles) of
-        error -> %% oh well, it must have already gone
-            CState;
-        {ok, {_Hdl, Offset}} ->
-            Handles1 = dict:erase({Path, Mode}, Handles),
-            gen_server2:cast(?SERVER, {release_handle, Key, Offset}),
-            CState #client_state { handles = Handles1 }
-    end.
-
-close_file_handle(Path, Mode, CState = #client_state { handles = Handles }) ->
-    Mode1 = lists:usort(Mode),
-    case dict:find({Path, Mode1}, Handles) of
-        error -> %% oh well, it must have already gone
-             CState;
-        {ok, _} ->
-            gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode1}}),
-            CState #client_state { handles = dict:erase({Path, Mode}, Handles) }
-    end.
-
-close_all_file_handles(CState = #client_state { handles = Handles }) ->
-    lists:foreach(
-      fun({Path, Mode}) ->
-              gen_server2:cast(?SERVER, {close_handle, {self(), Path, Mode}})
-      end, dict:fetch_keys(Handles)),
-    CState #client_state { handles = dict:new() }.
-
-with_file_handle_at(Path, Mode, Offset, Fun, CState =
-                    #client_state { handles = Handles }) ->
-    case obtain_file_handle(Path, Mode, CState) of
-        not_available -> {not_available, CState};
-        {Mode1, Hdl, OldOffset} ->
-            SeekRes =
-                case Offset == OldOffset orelse not is_integer(Offset) of
-                    true -> ok;
-                    false -> case file:position(Hdl, Offset) of
-                                 {ok, _} -> ok;
-                                 KO -> KO
-                             end
-                end,
-            case SeekRes of
-                ok -> {NewOffset, Result} = Fun(Hdl),
-                      {Result, CState #client_state {
-                                 handles = dict:store({Path, Mode1},
-                                                      {Hdl, NewOffset},
-                                                      Handles) }};
-                KO1 -> {KO1, CState}
-            end
-    end.
-
-%%----------------------------------------------------------------------------
-%% Client-side helpers
-%%----------------------------------------------------------------------------
-
-obtain_file_handle(Path, Mode, #client_state { handles = Handles,
-                                               callback = Callback }) ->
-    Mode1 = lists:usort(Mode),
-    case dict:find(Mode1, Handles) of
-        error ->
-            case gen_server2:call(?SERVER,
-                                  {get_handle, Path, Mode1, Callback, self()},
-                                  infinity) of
-                {open_fun, Fun} -> {Hdl, Offset} = Fun(),
-                                   {Mode1, Hdl, Offset};
-                {Hdl, Offset} -> {Mode1, Hdl, Offset};
-                exiting -> not_available
-            end;
-        {ok, {Hdl, Offset}} ->
-            {Mode1, Hdl, Offset}
-    end.
-
-%%----------------------------------------------------------------------------
-%% gen_server callbacks
-%%----------------------------------------------------------------------------
-
-init([MaxFileHandles]) ->
-    Handles = ets:new(?ETS_HANDLES_NAME,
-                      [ordered_set, private, {keypos, #hdl.key}]),
-    Ages = ets:new(?ETS_AGE_NAME, [ordered_set, private]),
-    {ok, #server_state { request_queue = queue:new(),
-                         handles = Handles,
-                         ages = Ages,
-                         max_handles = MaxFileHandles },
-     hibernate,
-     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
-
-handle_call({get_handle, Path, Mode, Callback = {_M, _F, _A}, Pid}, From,
-            State = #server_state { handles = Handles,
-                                    ages = Ages,
-                                    request_queue = Reqs }) ->
-    Key = {Pid, Path, Mode},
-    State1 =
-        case ets:lookup(Handles, Key) of
-            [Obj = #hdl { handle = Hdl, offset = Offset,
-                          timer_ref = TRef, released_at = ReleasedAt }] ->
-                gen_server2:reply(From, {Hdl, Offset}),
-                ok = stop_timer(TRef),
-                {ok, TRef1} = start_timer(Callback, Key),
-                true = ets:insert(Handles,
-                                  Obj #hdl { offset = unknown,
-                                             timer_ref = TRef1,
-                                             released_at = not_released }),
-                true = ets:delete(Ages, ReleasedAt),
-                State;
-            [] ->
-                process_request_queue(
-                  State #server_state { request_queue =
-                                        queue:in({Key, From, Callback}, Reqs) })
-        end,
-    {noreply, State1, hibernate}.
-
-handle_cast({store_handle, Obj = #hdl {}},
-            State = #server_state { handles = Handles }) ->
-    ets:insert_new(Handles, Obj),
-    {noreply, State, hibernate};
-handle_cast({release_handle, Key = {_Pid, _Path, _Mode}, Offset},
-            State = #server_state { handles = Handles,
-                                    ages = Ages }) ->
-    [Obj = #hdl { timer_ref = TRef, released_at = ReleasedAtOld }] =
-        ets:lookup(Handles, Key),
-    ok = stop_timer(TRef),
-    ok = case ReleasedAtOld of
-             not_released ->
-                 ReleasedAt = now(),
-                 true = ets:insert_new(Ages, {ReleasedAt, Key}),
-                 true = ets:insert(Handles, Obj #hdl { released_at = ReleasedAt,
-                                                       offset = Offset,
-                                                       timer_ref = no_timer }),
-                 ok;
-             _ ->
-                 ok
-         end,
-    State1 = process_request_queue(State),
-    {noreply, State1, hibernate};
-handle_cast({close_handle, Key = {_Pid, _Path, _Mode}},
-            State = #server_state { handles = Handles,
-                                    ages = Ages }) ->
-    [Obj] = ets:lookup(Handles, Key),
-    ok = close_handle(Obj, Handles, Ages),
-    State1 = process_request_queue(State),
-    {noreply, State1, hibernate};
-handle_cast({new_client, Pid}, State) ->
-    _MRef = erlang:monitor(process, Pid),
-    {noreply, State, hibernate}.
-
-handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #server_state { handles = Handles, ages = Ages,
-                                    request_queue = Reqs }) ->
-    Reqs1 = queue:filter(fun ({{OPid, _Path, _Mode}, From, _Callback}) ->
-                                 if OPid == Pid ->
-                                         gen_server2:reply(From, exiting),
-                                         false;
-                                    true -> true
-                                 end
-                         end, Reqs),
-    lists:foreach(fun (Obj) ->
-                          ok = close_handle(Obj, Handles, Ages)
-                  end, ets:match_object(Handles, #hdl { key = {Pid, '_', '_'},
-                                                        _ = '_' })),
-    {noreply, State #server_state { request_queue = Reqs1 }}.
-
-terminate(_Reason, State = #server_state { ages = Ages,
-                                           request_queue = Reqs }) ->
-    Size = ets:info(Ages, size),
-    Size = free_upto(Size, State),
-    lists:foreach(fun ({{_Pid, _Path, _Mode}, From, _Callback}) ->
-                          gen_server2:reply(From, exiting)
-                  end, queue:to_list(Reqs)),
-    State #server_state { request_queue = queue:new() }.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-%%----------------------------------------------------------------------------
-%% Server-side Helpers
-%%----------------------------------------------------------------------------
-
-start_timer({M,F,A}, Key) ->
-    timer:apply_after(?ISSUE_PERIOD, M, F, A ++ [{release_handle, Key}]).
-
-stop_timer(no_timer) ->
-    ok;
-stop_timer(TRef) ->
-    timer:cancel(TRef),
-    ok.
-
-close_handle(#hdl { key = Key, timer_ref = TRef, released_at = ReleasedAt,
-                    handle = Hdl },
-             Handles, Ages) ->
-    ok = stop_timer(TRef),
-    ok = file:sync(Hdl),
-    ok = file:close(Hdl),
-    true = ets:delete(Handles, Key),
-    true = ets:delete(Ages, ReleasedAt),
-    ok.
-
-process_request_queue(State = #server_state { max_handles = MaxHandles,
-                                              handles = Handles,
-                                              request_queue = Reqs }) ->
-    Tokens = MaxHandles - ets:info(Handles, size),
-    Requests = queue:len(Reqs),
-    OpenCount = case Tokens >= Requests of
-                      true  -> Requests;
-                      false -> Tokens + free_upto(Requests - Tokens, State)
-                end,
-    open_requested(OpenCount, State).
-
-open_requested(0, State) ->
-    State;
-open_requested(N, State = #server_state { handles = Handles,
-                                          request_queue = Reqs }) ->
-    case queue:out(Reqs) of
-        {empty, _Reqs} -> State;
-        {{value, {Key = {_Pid, Path, Mode}, From, Callback}}, Reqs1} ->
-            Msg =
-                case lists:member(raw, Mode) of
-                    true ->
-                        Fun =
-                            fun() ->
-                                    {ok, Hdl} = file:open(Path, Mode),
-                                    {ok, TRef} = start_timer(Callback, Key),
-                                    gen_server2:cast(
-                                      ?SERVER, {store_handle,
-                                                #hdl {key = Key,
-                                                      handle = Hdl,
-                                                      offset = unknown,
-                                                      timer_ref = TRef,
-                                                      released_at = not_released
-                                                     }}),
-                                    {Hdl, 0}
-                            end,
-                        {open_fun, Fun};
-                    false -> 
-                        {ok, Hdl} = file:open(Path, Mode),
-                        {ok, TRef} = start_timer(Callback, Key),
-                        true = ets:insert_new(Handles, #hdl { key = Key,
-                                                              handle = Hdl,
-                                                              offset = unknown,
-                                                              timer_ref = TRef,
-                                                              released_at =
-                                                              not_released }),
-                        {Hdl, 0}
-                end,
-            gen_server2:reply(From, Msg),
-            open_requested(N - 1, State #server_state { request_queue = Reqs1 })
-    end.
-
-free_upto(N, State) ->
-    free_upto(N, 0, State).
-
-free_upto(0, Count, _State) ->
-    Count;
-free_upto(N, Count, State = #server_state { handles = Handles,
-                                            ages = Ages }) ->
-    case ets:first(Ages) of
-        '$end_of_table' ->
-            Count;
-        {ReleasedAt, Key} ->
-            [#hdl { handle = Hdl, timer_ref = no_timer,
-                    released_at = ReleasedAt }] = ets:lookup(Handles, Key),
-            ok = file:sync(Hdl),
-            ok = file:close(Hdl),
-            true = ets:delete(Ages, ReleasedAt),
-            true = ets:delete(Handles, Key),
-            free_upto(N - 1, Count + 1, State)
-    end.
-- 
cgit v1.2.1


From 4e416ab55ab388f05cd2b08a1592e589aebe2fcd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Oct 2009 13:52:48 +0100
Subject: Make init include in its result the next sequence id to use

---
 src/rabbit_queue_index.erl | 48 +++++++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 34bb9920..e4111f82 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -132,7 +132,7 @@
                               seg_ack_counts    :: dict()
                             }).
 
--spec(init/1 :: (string()) -> qistate()).
+-spec(init/1 :: (string()) -> {non_neg_integer(), qistate()}).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
@@ -154,14 +154,15 @@ init(Name) ->
     AckCounts = scatter_journal(Dir, find_ack_counts(Dir)),
     {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
                                  [raw, binary, delayed_write, write, read]),
-    #qistate { dir = Dir,
-               cur_seg_num = undefined,
-               cur_seg_hdl = undefined,
-               journal_ack_count = 0,
-               journal_ack_dict = dict:new(),
-               journal_handle = JournalHdl,
-               seg_ack_counts = AckCounts
-             }.
+    {find_next_seq_id(Dir),
+     #qistate { dir = Dir,
+                cur_seg_num = undefined,
+                cur_seg_hdl = undefined,
+                journal_ack_count = 0,
+                journal_ack_dict = dict:new(),
+                journal_handle = JournalHdl,
+                seg_ack_counts = AckCounts
+              }}.
 
 write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
@@ -232,7 +233,7 @@ read_segment_entries(InitSeqId, State =
     SegPath = seg_num_to_path(Dir, SegNum),
     {SDict, _AckCount} = load_segment(SegNum, SegPath, JAckDict),
     %% deliberately sort the list desc, because foldl will reverse it
-    RelSeqs = lists:sort(fun (A, B) -> B < A end, dict:fetch_keys(SDict)),
+    RelSeqs = rev_sort(dict:fetch_keys(SDict)),
     {lists:foldl(fun (RelSeq, Acc) ->
                          {MsgId, IsDelivered, IsPersistent} =
                              dict:fetch(RelSeq, SDict),
@@ -245,6 +246,9 @@ read_segment_entries(InitSeqId, State =
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+rev_sort(List) ->
+    lists:sort(fun (A, B) -> B < A end, List).
+
 close_file_handle_for_seg(_SegNum,
                           State = #qistate { cur_seg_num = undefined }) ->
     State;
@@ -285,12 +289,26 @@ seg_num_to_path(Dir, SegNum) ->
 %% Startup Functions
 %%----------------------------------------------------------------------------
 
+all_segment_nums_paths(Dir) ->
+    [{list_to_integer(
+        lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                        SegName)), filename:join(Dir, SegName)}
+     || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
+
+find_next_seq_id(Dir) ->
+    SegNumsPaths = all_segment_nums_paths(Dir),
+    case rev_sort(SegNumsPaths) of
+        [] -> 0;
+        [{SegNum, SegPath}|_] ->
+            {SDict, _AckCount} = load_segment(SegNum, SegPath, dict:new()),
+            case rev_sort(dict:fetch_keys(SDict)) of
+                [] -> 0;
+                [RelSeq|_] -> 1 + reconstruct_seq_id(SegNum, RelSeq)
+            end
+    end.
+
 find_ack_counts(Dir) ->
-    SegNumsPaths =
-        [{list_to_integer(
-            lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
-                            SegName)), filename:join(Dir, SegName)}
-         || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)],
+    SegNumsPaths = all_segment_nums_paths(Dir),
     lists:foldl(
       fun ({SegNum, SegPath}, Acc) ->
               case load_segment(SegNum, SegPath, dict:new()) of
-- 
cgit v1.2.1


From b1fac9a717a56b61a7117d98e7edb3bf0886c400 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Oct 2009 13:53:03 +0100
Subject: initial hacking on the variable queue

---
 src/rabbit_variable_queue.erl | 182 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 src/rabbit_variable_queue.erl

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
new file mode 100644
index 00000000..784cda39
--- /dev/null
+++ b/src/rabbit_variable_queue.erl
@@ -0,0 +1,182 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_variable_queue).
+
+-export([init/1, in/3]).
+
+-record(vqstate,
+        { q1,
+          q2,
+          gamma,
+          q3,
+          q4,
+          egress_rate,
+          target_ram_msg_count,
+          ram_msg_count,
+          queue,
+          index_state,
+          next_seq_id
+        }).
+
+-include("rabbit.hrl").
+
+init(QueueName) ->
+    {NextSeqId, IndexState} = rabbit_queue_index:init(QueueName),
+    #vqstate { q1 = queue:new(), q2 = queue:new(),
+               gamma = 0,
+               q3 = queue:new(), q4 = queue:new(),
+               egress_rate = 0,
+               target_ram_msg_count = undefined,
+               ram_msg_count = 0,
+               queue = QueueName,
+               index_state = IndexState,
+               next_seq_id = NextSeqId
+             }.
+
+maybe_write_msg_to_disk(Bool, Msg = #basic_message {
+                                guid = MsgId, is_persistent = IsPersistent })
+  when Bool orelse IsPersistent ->
+    ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
+    true;
+maybe_write_msg_to_disk(_Bool, _Msg) ->
+    false.
+
+maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
+                          IndexState) when Bool orelse IsPersistent ->
+    IndexState1 = rabbit_queue_index:write_published(
+                    MsgId, SeqId, IsPersistent, IndexState),
+    {true, case IsDelivered of
+               true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
+               false -> IndexState1
+           end};
+maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
+                          IndexState) ->
+    {false, IndexState}.
+
+in(Msg = #basic_message {}, IsDelivered, State) ->
+    in(test_keep_msg_in_ram(State), Msg, IsDelivered, State).
+
+in(msg_and_index, Msg = #basic_message { guid = MsgId,
+                                         is_persistent = IsPersistent },
+   IsDelivered, State = #vqstate { index_state = IndexState,
+                                   next_seq_id = SeqId,
+                                   ram_msg_count = RamMsgCount
+                                 }) ->
+    MsgOnDisk = maybe_write_msg_to_disk(false, Msg),
+    {IndexOnDisk, IndexState1} =
+        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    Entry =
+        {msg_and_index, Msg, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk},
+    State1 = State #vqstate { next_seq_id = SeqId + 1,
+                              ram_msg_count = RamMsgCount + 1,
+                              index_state = IndexState1 },
+    store_alpha_entry(Entry, State1);
+
+in(just_index, Msg = #basic_message { guid = MsgId,
+                                      is_persistent = IsPersistent },
+   IsDelivered, State = #vqstate { index_state = IndexState,
+                                   next_seq_id = SeqId, q1 = Q1 }) ->
+    true = maybe_write_msg_to_disk(true, Msg),
+    {IndexOnDisk, IndexState1} =
+        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    Entry = {index, MsgId, SeqId, IsDelivered, true, IndexOnDisk},
+    State1 = State #vqstate { next_seq_id = SeqId + 1,
+                              index_state = IndexState1 },
+    true = queue:is_empty(Q1), %% ASSERTION
+    store_beta_entry(Entry, State1);
+
+in(neither, Msg = #basic_message { guid = MsgId,
+                                   is_persistent = IsPersistent },
+   IsDelivered, State = #vqstate { index_state = IndexState,
+                                   next_seq_id = SeqId,
+                                   q1 = Q1, q2 = Q2, gamma = Gamma }) ->
+    true = maybe_write_msg_to_disk(true, Msg),
+    {true, IndexState1} =
+        maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
+    State #vqstate { next_seq_id = SeqId + 1,
+                     index_state = IndexState1,
+                     gamma = Gamma + 1 }.
+
+test_keep_msg_in_ram(#vqstate { target_ram_msg_count = TargetRamMsgCount,
+                                ram_msg_count = RamMsgCount,
+                                q1 = Q1 }) ->
+    case TargetRamMsgCount of
+        undefined -> msg_and_index;
+        0         -> neither;
+        _ when TargetRamMsgCount > RamMsgCount ->
+                     msg_and_index;
+        _         -> case queue:is_empty(Q1) of
+                         true -> just_index;
+                         false -> msg_and_index %% can push out elders to disk
+                     end
+    end.
+
+ensure_binary_properties(Msg = #basic_message { content = Content }) ->
+    Msg #basic_message {
+      content = rabbit_binary_parser:clear_decoded_content(
+                  rabbit_binary_generator:ensure_content_encoded(Content)) }.
+
+store_alpha_entry(Entry, State = #vqstate { q1 = Q1, q2 = Q2, gamma = Gamma,
+                                            q3 = Q3, q4 = Q4 }) ->
+    case queue:is_empty(Q1) andalso queue:is_empty(Q2) andalso 
+        Gamma == 0 andalso queue:is_empty(Q3) of
+        true ->
+            State #vqstate { q4 = queue:in(Entry, Q4) };
+        false ->
+            maybe_push_q1_out(State #vqstate { q1 = queue:in(Entry, Q1) })
+    end.
+
+store_beta_entry(Entry, State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3 }) ->
+    case queue:is_empty(Q2) andalso Gamma == 0 of
+        true  -> State #vqstate { q3 = queue:in(Entry, Q3) };
+        false -> State #vqstate { q2 = queue:in(Entry, Q2) }
+    end.
+
+maybe_push_q1_out(State = #vqstate { ram_msg_count = RamMsgCount,
+                                     target_ram_msg_count = TargetRamMsgCount
+                                    }) when TargetRamMsgCount > RamMsgCount ->
+    State;
+maybe_push_q1_out(State = #vqstate { ram_msg_count = RamMsgCount, q1 = Q1 }) ->
+    {{value, {msg_and_index, Msg = #basic_message { guid = MsgId }, SeqId,
+              IsDelivered, MsgOnDisk, IndexOnDisk}}, Q1a} = queue:out(Q1),
+    true = case MsgOnDisk of
+               true -> true;
+               false -> maybe_write_msg_to_disk(true, Msg)
+           end,
+    maybe_push_q1_out(
+      store_beta_entry({index, MsgId, SeqId, IsDelivered, true, IndexOnDisk},
+                       State #vqstate { ram_msg_count = RamMsgCount - 1,
+                                        q1 = Q1a })).
-- 
cgit v1.2.1


From ef826c850cfc778c8b804491d5a66a4649ed5168 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Oct 2009 16:27:23 +0100
Subject: More work on variable queue

---
 src/rabbit_queue_index.erl    |   6 +-
 src/rabbit_variable_queue.erl | 211 +++++++++++++++++++++++++++++++++---------
 2 files changed, 174 insertions(+), 43 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e4111f82..c59b12dd 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index).
 
 -export([init/1, write_published/4, write_delivered/2, write_acks/2,
-         flush_journal/1, read_segment_entries/2]).
+         flush_journal/1, read_segment_entries/2, next_segment_boundary/1]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -242,6 +242,10 @@ read_segment_entries(InitSeqId, State =
                  end, [], RelSeqs),
      State}.
 
+next_segment_boundary(SeqId) ->
+    {SegNum, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(SegNum + 1, 0).
+
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 784cda39..73c3c339 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, in/3]).
+-export([init/1, in/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1]).
 
 -record(vqstate,
         { q1,
@@ -39,12 +39,16 @@
           gamma,
           q3,
           q4,
-          egress_rate,
           target_ram_msg_count,
           ram_msg_count,
           queue,
           index_state,
-          next_seq_id
+          next_seq_id,
+          out_counter,
+          egress_rate,
+          old_egress_rate,
+          avg_egress_rate,
+          egress_rate_timestamp
         }).
 
 -include("rabbit.hrl").
@@ -54,35 +58,19 @@ init(QueueName) ->
     #vqstate { q1 = queue:new(), q2 = queue:new(),
                gamma = 0,
                q3 = queue:new(), q4 = queue:new(),
-               egress_rate = 0,
                target_ram_msg_count = undefined,
                ram_msg_count = 0,
                queue = QueueName,
                index_state = IndexState,
-               next_seq_id = NextSeqId
+               next_seq_id = NextSeqId,
+               out_counter = 0,
+               egress_rate = 0,
+               old_egress_rate = 0,
+               avg_egress_rate = 0,
+               egress_rate_timestamp = now()
              }.
 
-maybe_write_msg_to_disk(Bool, Msg = #basic_message {
-                                guid = MsgId, is_persistent = IsPersistent })
-  when Bool orelse IsPersistent ->
-    ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
-    true;
-maybe_write_msg_to_disk(_Bool, _Msg) ->
-    false.
-
-maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
-                          IndexState) when Bool orelse IsPersistent ->
-    IndexState1 = rabbit_queue_index:write_published(
-                    MsgId, SeqId, IsPersistent, IndexState),
-    {true, case IsDelivered of
-               true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
-               false -> IndexState1
-           end};
-maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
-                          IndexState) ->
-    {false, IndexState}.
-
-in(Msg = #basic_message {}, IsDelivered, State) ->
+in(Msg, IsDelivered, State) ->
     in(test_keep_msg_in_ram(State), Msg, IsDelivered, State).
 
 in(msg_and_index, Msg = #basic_message { guid = MsgId,
@@ -110,7 +98,7 @@ in(just_index, Msg = #basic_message { guid = MsgId,
     {IndexOnDisk, IndexState1} =
         maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
-    Entry = {index, MsgId, SeqId, IsDelivered, true, IndexOnDisk},
+    Entry = {index, MsgId, SeqId, IsPersistent, IsDelivered, true, IndexOnDisk},
     State1 = State #vqstate { next_seq_id = SeqId + 1,
                               index_state = IndexState1 },
     true = queue:is_empty(Q1), %% ASSERTION
@@ -130,6 +118,73 @@ in(neither, Msg = #basic_message { guid = MsgId,
                      index_state = IndexState1,
                      gamma = Gamma + 1 }.
 
+set_queue_ram_duration_target(
+  DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
+                                     target_ram_msg_count = TargetRamMsgCount
+                                   }) ->
+    TargetRamMsgCount1 = trunc(DurationTarget * EgressRate), %% msgs = sec * msgs/sec
+    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1 },
+    if TargetRamMsgCount == TargetRamMsgCount1 ->
+            State1;
+       TargetRamMsgCount < TargetRamMsgCount1 ->
+            maybe_start_prefetcher(State1);
+       true ->
+            reduce_memory_use(State1)
+    end.
+
+remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
+                                         egress_rate_timestamp = Timestamp,
+                                         out_counter = OutCount }) ->
+    Now = now(),
+    EgressRate = OutCount / timer:now_diff(Now, Timestamp),
+    AvgEgressRate = (EgressRate + OldEgressRate) / 2,
+    State #vqstate { old_egress_rate = OldEgressRate,
+                     egress_rate = EgressRate,
+                     avg_egress_rate = AvgEgressRate,
+                     egress_rate_timestamp = Now,
+                     out_counter = 0 }.
+
+maybe_start_prefetcher(State) ->
+    %% TODO
+    State.
+
+reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
+                                     target_ram_msg_count = TargetRamMsgCount })
+  when TargetRamMsgCount >= RamMsgCount ->
+    State;
+reduce_memory_use(State =
+                  #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
+    State1 = #vqstate { ram_msg_count = RamMsgCount } =
+        maybe_push_q1_to_betas(State),
+    State2 = case TargetRamMsgCount >= RamMsgCount of
+                 true  -> State1;
+                 false -> maybe_push_q4_to_betas(State)
+             end,
+    case TargetRamMsgCount of
+        0 -> push_betas_to_gammas(State);
+        _ -> State2
+    end.
+
+maybe_write_msg_to_disk(Bool, Msg = #basic_message {
+                                guid = MsgId, is_persistent = IsPersistent })
+  when Bool orelse IsPersistent ->
+    ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
+    true;
+maybe_write_msg_to_disk(_Bool, _Msg) ->
+    false.
+
+maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
+                          IndexState) when Bool orelse IsPersistent ->
+    IndexState1 = rabbit_queue_index:write_published(
+                    MsgId, SeqId, IsPersistent, IndexState),
+    {true, case IsDelivered of
+               true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
+               false -> IndexState1
+           end};
+maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
+                          IndexState) ->
+    {false, IndexState}.
+
 test_keep_msg_in_ram(#vqstate { target_ram_msg_count = TargetRamMsgCount,
                                 ram_msg_count = RamMsgCount,
                                 q1 = Q1 }) ->
@@ -156,7 +211,7 @@ store_alpha_entry(Entry, State = #vqstate { q1 = Q1, q2 = Q2, gamma = Gamma,
         true ->
             State #vqstate { q4 = queue:in(Entry, Q4) };
         false ->
-            maybe_push_q1_out(State #vqstate { q1 = queue:in(Entry, Q1) })
+            maybe_push_q1_to_betas(State #vqstate { q1 = queue:in(Entry, Q1) })
     end.
 
 store_beta_entry(Entry, State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3 }) ->
@@ -165,18 +220,90 @@ store_beta_entry(Entry, State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3 }) ->
         false -> State #vqstate { q2 = queue:in(Entry, Q2) }
     end.
 
-maybe_push_q1_out(State = #vqstate { ram_msg_count = RamMsgCount,
-                                     target_ram_msg_count = TargetRamMsgCount
-                                    }) when TargetRamMsgCount > RamMsgCount ->
+maybe_push_q1_to_betas(State =
+                       #vqstate { ram_msg_count = RamMsgCount,
+                                  target_ram_msg_count = TargetRamMsgCount
+                                }) when TargetRamMsgCount >= RamMsgCount ->
+    State;
+maybe_push_q1_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
+                                          q1 = Q1 }) ->
+    case queue:out(Q1) of
+        {empty, _Q1} -> State;
+        {{value, {msg_and_index, Msg = #basic_message {
+                                   guid = MsgId, is_persistent = IsPersistent },
+                  SeqId, IsDelivered, MsgOnDisk, IndexOnDisk}}, Q1a} ->
+            true = case MsgOnDisk of
+                       true -> true;
+                       false -> maybe_write_msg_to_disk(true, Msg)
+                   end,
+            maybe_push_q1_to_betas(
+              store_beta_entry({index, MsgId, SeqId, IsPersistent, IsDelivered,
+                                true, IndexOnDisk},
+                               State #vqstate { ram_msg_count = RamMsgCount - 1,
+                                                q1 = Q1a }))
+    end.
+
+maybe_push_q4_to_betas(State =
+                       #vqstate { ram_msg_count = RamMsgCount,
+                                  target_ram_msg_count = TargetRamMsgCount
+                                }) when TargetRamMsgCount >= RamMsgCount ->
     State;
-maybe_push_q1_out(State = #vqstate { ram_msg_count = RamMsgCount, q1 = Q1 }) ->
-    {{value, {msg_and_index, Msg = #basic_message { guid = MsgId }, SeqId,
-              IsDelivered, MsgOnDisk, IndexOnDisk}}, Q1a} = queue:out(Q1),
-    true = case MsgOnDisk of
-               true -> true;
-               false -> maybe_write_msg_to_disk(true, Msg)
-           end,
-    maybe_push_q1_out(
-      store_beta_entry({index, MsgId, SeqId, IsDelivered, true, IndexOnDisk},
-                       State #vqstate { ram_msg_count = RamMsgCount - 1,
-                                        q1 = Q1a })).
+maybe_push_q4_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
+                                          q4 = Q4, q3 = Q3 }) ->
+    case queue:out_r(Q4) of
+        {empty, _Q4} -> State;
+        {{value, {msg_and_index, Msg = #basic_message {
+                                   guid = MsgId, is_persistent = IsPersistent },
+                  SeqId, IsDelivered, MsgOnDisk, IndexOnDisk}}, Q4a} ->
+            true = case MsgOnDisk of
+                       true -> true;
+                       false -> maybe_write_msg_to_disk(true, Msg)
+                   end,
+            Q3a = queue:in_r({index, MsgId, SeqId, IsPersistent, IsDelivered,
+                              true, IndexOnDisk}, Q3),
+            maybe_push_q4_to_betas(
+              State #vqstate { ram_msg_count = RamMsgCount - 1,
+                               q3 = Q3a, q4 = Q4a })
+    end.
+
+push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
+                                        index_state = IndexState }) ->
+    {Len1, Q2a, IndexState1} =
+        push_betas_to_gammas(fun queue:out/1, undefined, Q2, IndexState),
+    State1 = State #vqstate { q2 = Q2a, gamma = Gamma + Len1,
+                              index_state = IndexState1 },
+    case queue:out(Q3) of
+        {empty, _Q3} -> State1;
+        {{value, {index, _MsgId, SeqId, _IsPersistent, _IsDelivered,
+                  true, _IndexOnDisk}}, _Q3a} -> 
+            Limit = rabbit_queue_index:next_segment_boundary(SeqId) - 1,
+            {Len2, Q3b, IndexState2} =
+                push_betas_to_gammas(fun queue:out_r/1, Limit, Q3, IndexState1),
+            State1 #vqstate { q3 = Q3b, gamma = Gamma + Len1 + Len2,
+                              index_state = IndexState2 }
+    end.
+
+push_betas_to_gammas(Generator, Limit, Q, IndexState) ->
+    push_betas_to_gammas(Generator, Limit, Q, 0, IndexState).
+
+push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
+    case Generator(Q) of
+        {empty, Qa} -> {Count, Qa, IndexState};
+        {{value, {index, _MsgId, Limit, _IsPersistent, _IsDelivered,
+                  _MsgOnDisk, _IndexOnDisk}}, _Qa} ->
+            {Count, Q, IndexState};
+        {{value, {index, MsgId, SeqId, IsPersistent, IsDelivered,
+                  true, IndexOnDisk}}, Qa} ->
+            IndexState1 =
+                case IndexOnDisk of
+                    true -> IndexState;
+                    false ->
+                        {true, IndexState2} =
+                            maybe_write_index_to_disk(
+                              true, IsPersistent, MsgId,
+                              SeqId, IsDelivered, IndexState),
+                        IndexState2
+                end,
+            push_betas_to_gammas(Generator, Limit, Qa, Count + 1, IndexState1)
+    end.
+            
-- 
cgit v1.2.1


From 3270c8e1c60ca72bd2c7ece7eef6fc83c45981f1 Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Wed, 7 Oct 2009 16:27:41 +0100
Subject: Few more lines of comments

---
 src/rabbit_memory_monitor.erl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 8bdd394b..ebbae94a 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -53,6 +53,20 @@
 %% update we notice that a queue was using too much memory, we send a message
 %% back. This will happen even if the queue is hibernated, as we really do want
 %% it to reduce its memory footprint.
+%%
+%%
+%% The main job of this module, is to make sure that all the queues have
+%% more or less the same number of seconds till become drained.
+%% This average, seconds-till-queue-is-drained, is then multiplied by 
+%% the ratio of Used/Total memory. So, if we can 'afford' more memory to be
+%% used, we'll report greater number back to the queues. In the out of
+%% memory case, we are going to reduce the average drain-seconds.
+%% To acheive all this we need to accumulate the information from every
+%% queue, and count an average from that.
+%% 
+%%  real_drain_avg = avg([drain_from_queue_1, queue_2, queue_3, ...])
+%%  memory_overcommit = used_memory / allowed_memory
+%%  desired_drain_avg = memory_overcommit * real_drain_avg
 
 
 -module(rabbit_memory_monitor).
-- 
cgit v1.2.1


From 9e23636552326c807e4e1fa5618cace2c6e75f9e Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Wed, 7 Oct 2009 18:16:48 +0100
Subject: Rewritten the couting, hopefully, it's simplified now

---
 src/rabbit_memory_monitor.erl | 81 +++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index ebbae94a..e878edda 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -66,7 +66,7 @@
 %% 
 %%  real_drain_avg = avg([drain_from_queue_1, queue_2, queue_3, ...])
 %%  memory_overcommit = used_memory / allowed_memory
-%%  desired_drain_avg = memory_overcommit * real_drain_avg
+%%  desired_drain_avg = real_drain_avg / memory_overcommit
 
 
 -module(rabbit_memory_monitor).
@@ -115,16 +115,30 @@ register(Pid) ->
 
 %%----------------------------------------------------------------------------
 
-init([]) -> 
+get_user_memory_limit() ->
     %% TODO: References to os_mon and rabbit_memsup_linux 
     %%       should go away as bug 21457 removes it.
     %%       BTW: memsup:get_system_memory_data() doesn't work.
     {state, TotalMemory, _Allocated} = rabbit_memsup_linux:update({state, 0,0}),
+    MemoryHighWatermark = os_mon:get_env(memsup, system_memory_high_watermark),
+    Limit = erlang:trunc(TotalMemory * MemoryHighWatermark),
+    %% no more than two gigs on 32 bits.
+    case (Limit > 2*1024*1024*1024) and (erlang:system_info(wordsize) == 4) of
+        true -> 2*1024*1024*1024;
+        false -> Limit
+    end.
+
+
+init([]) -> 
+    %% We should never use more memory than user requested. As the memory 
+    %% manager doesn't really know how much memory queues are using, we shall
+    %% try to remain safe distance from real limit. 
+    MemoryLimit = get_user_memory_limit() * 0.6,
+    rabbit_log:warning("Memory monitor limit: ~pMB~n", 
+                    [erlang:trunc(MemoryLimit/1024/1024)]),
     
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS, 
                                                         ?SERVER, update, []),
-    MemoryHighWatermark = os_mon:get_env(memsup, system_memory_high_watermark),
-    MemoryLimit = erlang:trunc(TotalMemory * MemoryHighWatermark),
     {ok, #state{timer = TRef,
                 drain_dict = dict:new(),
                 drain_avg = infinity,
@@ -171,51 +185,50 @@ ftoa(Float) ->
     end,
     lists:flatten(Str).
 
-print_debug_info(UsedSeconds, AvailableSeconds, UsedMemory, TotalMemory, 
-                                                PerQueueSeconds, QueueSec) ->
-    io:format("Update ~s/~s ~s/~s PerQueueSeconds:~s ~s~n", 
-                [ftoa(UsedSeconds), ftoa(AvailableSeconds),
-                ftoa(UsedMemory/1024.0/1024.0), ftoa(TotalMemory/1024.0/1024.0),
-                ftoa(PerQueueSeconds), 
-                [" "] ++ lists:flatten([ftoa(Q)++" " || Q <- QueueSec])
-                ]).
+print_debug_info(RealDrainAvg, DesiredDrainAvg, MemoryOvercommit) ->
+    io:format("DrainAvg Real/Desired:~s/~s  MemoryOvercommit:~s~n", 
+                [ftoa(RealDrainAvg), ftoa(DesiredDrainAvg),
+                ftoa(MemoryOvercommit)]).
 -else.
-print_debug_info(_UsedSeconds, _AvailableSeconds, _UsedMemory, _TotalMemory, 
-                                                _PerQueueSeconds, _QueueSec) ->
+print_debug_info(_RealDrainAvg, _DesiredDrainAvg, _MemoryOvercommit) ->
     ok.
 
 -endif.
 
+%% Count average from numbers, excluding atoms in the list.
+count_average(List) ->
+    List1 = [V || V <- List, is_number(V) or is_float(V)],
+    case length(List1) of
+        0 -> infinity;
+        Len -> lists:sum(List1) / Len
+    end.
+
 internal_update(State) ->
-    UsedMemory = erlang:memory(total),
-    TotalMemory = State#state.memory_limit,
-    QueueSec = [V || {_K, V} <- dict:to_list(State#state.drain_dict) ],
-    UsedSeconds = lists:sum( lists:filter(fun (A) -> 
-                                                is_number(A) or is_float(A) 
-                                            end, 
-                                          QueueSec) ),
-    AvailableSeconds = case UsedSeconds of
+    %% used memory / available memory
+    MemoryOvercommit = erlang:memory(total) / State#state.memory_limit,
+    
+    RealDrainAvg = count_average([V || {_K, V} <- 
+                                        dict:to_list(State#state.drain_dict)]),
+    %% In case of no active queues, feel free to grow. We can't make any 
+    %% decisionswe have no clue what is the average ram_usage/second.
+    %% Not does the queue.
+    DesiredDrainAvg = case RealDrainAvg of
+        infinity -> infinity;
         0 -> infinity;
         0.0 -> infinity;
-        _ -> TotalMemory / (UsedMemory / UsedSeconds)
-    end,
-    QueuesNumber = dict:size(State#state.drain_dict),
-    PerQueueSeconds = case (QueuesNumber > 0) and (AvailableSeconds /= infinity) of
-        true -> AvailableSeconds / QueuesNumber;
-        false -> infinity
+        _ ->  RealDrainAvg / MemoryOvercommit
     end,
-    print_debug_info(UsedSeconds, AvailableSeconds, UsedMemory, TotalMemory, 
-                                                    PerQueueSeconds, QueueSec),
+    print_debug_info(RealDrainAvg, DesiredDrainAvg, MemoryOvercommit),
     %% Inform the queue to reduce it's memory usage when needed.
     %% This can sometimes wake the queue from hibernation. Well, we don't care.
-    ReduceMemory = fun ({Pid, QueueS}) ->
-        case QueueS > PerQueueSeconds of 
+    ReduceMemory = fun ({Pid, QueueDrain}) ->
+        case QueueDrain > DesiredDrainAvg of 
             true -> 
-                gen_server2:cast(Pid, {set_bufsec_limit, PerQueueSeconds});
+                gen_server2:cast(Pid, {set_bufsec_limit, DesiredDrainAvg});
             _ -> ok
         end 
     end,
     lists:map(ReduceMemory, dict:to_list(State#state.drain_dict)),
-    State#state{drain_avg = PerQueueSeconds}.
+    State#state{drain_avg = DesiredDrainAvg}.
 
 
-- 
cgit v1.2.1


From b0bc48d057f5b1a85f007b2a9c385f8445a9da46 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Oct 2009 18:46:12 +0100
Subject: implemented out. This is getting pretty disgusting, needs some
 refactoring, marginally more useful variable names, and more API, in
 particular proper support for the prefetcher. Also, totally untested.

---
 src/rabbit_queue_index.erl    |  17 ++--
 src/rabbit_variable_queue.erl | 176 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 164 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c59b12dd..27952af1 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,8 @@
 -module(rabbit_queue_index).
 
 -export([init/1, write_published/4, write_delivered/2, write_acks/2,
-         flush_journal/1, read_segment_entries/2, next_segment_boundary/1]).
+         flush_journal/1, read_segment_entries/2, next_segment_boundary/1,
+         segment_size/0]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -139,8 +140,10 @@
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush_journal/1 :: (qistate()) -> {boolean(), qistate()}).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
-             {[{'index_entry', seq_id(), msg_id(), boolean(), boolean(),
-                'on_disk'}], qistate()}).
+             {( [{'index', msg_id(), seq_id(), boolean(), boolean()}]
+              | 'not_found'), qistate()}).
+-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
+-spec(segment_size/0 :: () -> non_neg_integer()).
 
 -endif.
 
@@ -237,8 +240,9 @@ read_segment_entries(InitSeqId, State =
     {lists:foldl(fun (RelSeq, Acc) ->
                          {MsgId, IsDelivered, IsPersistent} =
                              dict:fetch(RelSeq, SDict),
-                        [{index_entry, reconstruct_seq_id(SegNum, RelSeq),
-                          MsgId, IsDelivered, IsPersistent, on_disk} | Acc]
+                         [ {index, MsgId,
+                            reconstruct_seq_id(SegNum, RelSeq),
+                            IsPersistent, IsDelivered, true} | Acc]
                  end, [], RelSeqs),
      State}.
 
@@ -246,6 +250,9 @@ next_segment_boundary(SeqId) ->
     {SegNum, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     reconstruct_seq_id(SegNum + 1, 0).
 
+segment_size() ->
+    ?SEGMENT_ENTRIES_COUNT.
+
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 73c3c339..f041f478 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,7 +31,8 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, in/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1]).
+-export([init/1, in/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
+         out/1]).
 
 -record(vqstate,
         { q1,
@@ -48,7 +49,8 @@
           egress_rate,
           old_egress_rate,
           avg_egress_rate,
-          egress_rate_timestamp
+          egress_rate_timestamp,
+          prefetcher
         }).
 
 -include("rabbit.hrl").
@@ -56,7 +58,7 @@
 init(QueueName) ->
     {NextSeqId, IndexState} = rabbit_queue_index:init(QueueName),
     #vqstate { q1 = queue:new(), q2 = queue:new(),
-               gamma = 0,
+               gamma = {undefined, 0},
                q3 = queue:new(), q4 = queue:new(),
                target_ram_msg_count = undefined,
                ram_msg_count = 0,
@@ -67,7 +69,8 @@ init(QueueName) ->
                egress_rate = 0,
                old_egress_rate = 0,
                avg_egress_rate = 0,
-               egress_rate_timestamp = now()
+               egress_rate_timestamp = now(),
+               prefetcher = undefined
              }.
 
 in(Msg, IsDelivered, State) ->
@@ -98,7 +101,7 @@ in(just_index, Msg = #basic_message { guid = MsgId,
     {IndexOnDisk, IndexState1} =
         maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
-    Entry = {index, MsgId, SeqId, IsPersistent, IsDelivered, true, IndexOnDisk},
+    Entry = {index, MsgId, SeqId, IsPersistent, IsDelivered, IndexOnDisk},
     State1 = State #vqstate { next_seq_id = SeqId + 1,
                               index_state = IndexState1 },
     true = queue:is_empty(Q1), %% ASSERTION
@@ -108,7 +111,8 @@ in(neither, Msg = #basic_message { guid = MsgId,
                                    is_persistent = IsPersistent },
    IsDelivered, State = #vqstate { index_state = IndexState,
                                    next_seq_id = SeqId,
-                                   q1 = Q1, q2 = Q2, gamma = Gamma }) ->
+                                   q1 = Q1, q2 = Q2,
+                                   gamma = {GammaSeqId, GammaCount} }) ->
     true = maybe_write_msg_to_disk(true, Msg),
     {true, IndexState1} =
         maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
@@ -116,7 +120,7 @@ in(neither, Msg = #basic_message { guid = MsgId,
     true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
     State #vqstate { next_seq_id = SeqId + 1,
                      index_state = IndexState1,
-                     gamma = Gamma + 1 }.
+                     gamma = {GammaSeqId, GammaCount + 1} }.
 
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
@@ -144,6 +148,91 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                      egress_rate_timestamp = Now,
                      out_counter = 0 }.
 
+out(State =
+    #vqstate { q4 = Q4,
+               out_counter = OutCount, prefetcher = Prefetcher,
+               index_state = IndexState }) ->
+    case queue:out(Q4) of
+        {empty, _Q4} when Prefetcher == undefined ->
+            out_from_q3(State);
+        {empty, _Q4} ->
+            Q4a =
+                case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
+                    empty -> Q4;
+                    Q4b -> Q4b
+                end,
+            out(State #vqstate { q4 = Q4a, prefetcher = undefined });
+        {{value,
+          {msg_and_index, Msg = #basic_message { guid = MsgId },
+           SeqId, IsDelivered, MsgOnDisk, IndexOnDisk}}, Q4a} ->
+            IndexState1 =
+                case IndexOnDisk andalso not IsDelivered of
+                    true ->
+                        rabbit_queue_index:write_delivered(SeqId, IndexState);
+                    false ->
+                        IndexState
+                end,
+            AckTag = case {IndexOnDisk, MsgOnDisk} of
+                         {true,  true } -> {ack_index_and_store, MsgId, SeqId};
+                         {false, true } -> {ack_store, MsgId};
+                         {false, false} -> not_on_disk
+                     end,
+            {{Msg, IsDelivered, AckTag},
+             State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
+                              index_state = IndexState1 }}
+    end.
+
+out_from_q3(State = #vqstate { q2 = Q2, index_state = IndexState,
+                               gamma = {GammaSeqId, GammaCount}, q3 = Q3,
+                               q4 = Q4 }) ->
+    case queue:out(Q3) of
+        {empty, _Q3} ->
+            case GammaCount of
+                0 ->
+                    undefined = GammaSeqId, %% ASSERTION
+                    true = queue:is_empty(Q2), %% ASSERTION
+                    {empty, State};
+                _ ->
+                    {List = [_|_], IndexState1} =
+                        rabbit_queue_index:read_segment_entries(GammaSeqId,
+                                                                IndexState),
+                    State1 = State #vqstate { index_state = IndexState1 },
+                    Q3a = queue:from_list(List),
+                    State2 =
+                        case GammaCount - length(List) of
+                            0 -> 
+                                State1 #vqstate { gamma = {undefined, 0},
+                                                  q2 = queue:new(),
+                                                  q3 = queue:join(Q3a, Q2) };
+                            N when N > 0 ->
+                                State1 #vqstate { gamma =
+                                                  {rabbit_queue_index:segment_size() +
+                                                   GammaSeqId, N},
+                                                  q3 = Q3a }
+                        end,
+                    out_from_q3(State2)
+            end;
+        {{value, {index, MsgId, SeqId, IsPersistent, IsDelivered, IndexOnDisk}},
+         Q3a} ->
+            {ok, Msg = #basic_message { is_persistent = IsPersistent,
+                                        guid = MsgId }} =
+                rabbit_msg_store:read(MsgId),
+            State1 = #vqstate { q1 = Q1, q4 = Q4a } =
+                State #vqstate { q3 = Q3a,
+                                 q4 = queue:in({msg_and_index, Msg, SeqId,
+                                                IsDelivered, true, IndexOnDisk},
+                                               Q4) },
+            State2 = case queue:is_empty(Q3a) andalso 0 == GammaCount of
+                         true ->
+                             true = queue:is_empty(Q2), %% ASSERTION
+                             State1 #vqstate { q1 = queue:new(),
+                                               q4 = queue:join(Q4a, Q1) };
+                         false ->
+                             State1
+                     end,
+            out(State2)
+    end.
+
 maybe_start_prefetcher(State) ->
     %% TODO
     State.
@@ -204,18 +293,21 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
       content = rabbit_binary_parser:clear_decoded_content(
                   rabbit_binary_generator:ensure_content_encoded(Content)) }.
 
-store_alpha_entry(Entry, State = #vqstate { q1 = Q1, q2 = Q2, gamma = Gamma,
+store_alpha_entry(Entry, State = #vqstate { q1 = Q1, q2 = Q2,
+                                            gamma = {_GammaSeqId, GammaCount},
                                             q3 = Q3, q4 = Q4 }) ->
     case queue:is_empty(Q1) andalso queue:is_empty(Q2) andalso 
-        Gamma == 0 andalso queue:is_empty(Q3) of
+        GammaCount == 0 andalso queue:is_empty(Q3) of
         true ->
             State #vqstate { q4 = queue:in(Entry, Q4) };
         false ->
             maybe_push_q1_to_betas(State #vqstate { q1 = queue:in(Entry, Q1) })
     end.
 
-store_beta_entry(Entry, State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3 }) ->
-    case queue:is_empty(Q2) andalso Gamma == 0 of
+store_beta_entry(Entry, State =
+                 #vqstate { q2 = Q2, gamma = {_GammaSeqId, GammaCount},
+                            q3 = Q3 }) ->
+    case queue:is_empty(Q2) andalso GammaCount == 0 of
         true  -> State #vqstate { q3 = queue:in(Entry, Q3) };
         false -> State #vqstate { q2 = queue:in(Entry, Q2) }
     end.
@@ -238,7 +330,7 @@ maybe_push_q1_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
                    end,
             maybe_push_q1_to_betas(
               store_beta_entry({index, MsgId, SeqId, IsPersistent, IsDelivered,
-                                true, IndexOnDisk},
+                                IndexOnDisk},
                                State #vqstate { ram_msg_count = RamMsgCount - 1,
                                                 q1 = Q1a }))
     end.
@@ -260,7 +352,7 @@ maybe_push_q4_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
                        false -> maybe_write_msg_to_disk(true, Msg)
                    end,
             Q3a = queue:in_r({index, MsgId, SeqId, IsPersistent, IsDelivered,
-                              true, IndexOnDisk}, Q3),
+                              IndexOnDisk}, Q3),
             maybe_push_q4_to_betas(
               State #vqstate { ram_msg_count = RamMsgCount - 1,
                                q3 = Q3a, q4 = Q4a })
@@ -268,32 +360,62 @@ maybe_push_q4_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
 
 push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
                                         index_state = IndexState }) ->
-    {Len1, Q2a, IndexState1} =
+    %% HighSeqId is high in the sense that it must be higher than the
+    %% seqid in Gamma, but it's also the lowest of the betas that we
+    %% transfer from q2 to gamma.
+    {HighSeqId, Len1, Q2a, IndexState1} =
         push_betas_to_gammas(fun queue:out/1, undefined, Q2, IndexState),
-    State1 = State #vqstate { q2 = Q2a, gamma = Gamma + Len1,
+    Gamma1 = {Gamma1SeqId, _} = combine_gammas(Gamma, {HighSeqId, Len1}),
+    State1 = State #vqstate { q2 = Q2a,
+                              gamma = Gamma1,
                               index_state = IndexState1 },
     case queue:out(Q3) of
         {empty, _Q3} -> State1;
         {{value, {index, _MsgId, SeqId, _IsPersistent, _IsDelivered,
-                  true, _IndexOnDisk}}, _Q3a} -> 
-            Limit = rabbit_queue_index:next_segment_boundary(SeqId) - 1,
-            {Len2, Q3b, IndexState2} =
-                push_betas_to_gammas(fun queue:out_r/1, Limit, Q3, IndexState1),
-            State1 #vqstate { q3 = Q3b, gamma = Gamma + Len1 + Len2,
-                              index_state = IndexState2 }
+                  _IndexOnDisk}}, _Q3a} -> 
+            Limit = rabbit_queue_index:next_segment_boundary(SeqId),
+            case Limit == Gamma1SeqId of
+                true -> %% already only holding the minimum, nothing to do
+                    State1;
+                false ->
+                    %% ASSERTION
+                    true = Gamma1SeqId == undefined orelse
+                        Gamma1SeqId == Limit + rabbit_queue_index:segment_size(),
+                    %% LowSeqId is low in the sense that it must be
+                    %% lower than the seqid in Gamma1, in fact either
+                    %% gamma1 has undefined as its seqid or its seqid
+                    %% is LowSeqId + 1. But because we use
+                    %% queue:out_r, LowSeqId is actually also the
+                    %% highest seqid of the betas we transfer from q3
+                    %% to gammas.
+                    {LowSeqId, Len2, Q3b, IndexState2} =
+                        push_betas_to_gammas(fun queue:out_r/1, Limit - 1, Q3,
+                                             IndexState1),
+                    Gamma1SeqId = LowSeqId + 1, %% ASSERTION
+                    Gamma2 = combine_gammas({Limit, Len2}, Gamma1),
+                    State1 #vqstate { q3 = Q3b, gamma = Gamma2,
+                                      index_state = IndexState2 }
+            end
     end.
 
 push_betas_to_gammas(Generator, Limit, Q, IndexState) ->
-    push_betas_to_gammas(Generator, Limit, Q, 0, IndexState).
+    case Generator(Q) of
+        {empty, Qa} -> {undefined, 0, Qa, IndexState};
+        {{value, {index, _MsgId, SeqId, _IsPersistent, _IsDelivered,
+                  _IndexOnDisk}}, _Qa} ->
+            {Count, Qb, IndexState1} =
+                push_betas_to_gammas(Generator, Limit, Q, 0, IndexState),
+            {SeqId, Count, Qb, IndexState1}
+    end.
 
 push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
     case Generator(Q) of
         {empty, Qa} -> {Count, Qa, IndexState};
         {{value, {index, _MsgId, Limit, _IsPersistent, _IsDelivered,
-                  _MsgOnDisk, _IndexOnDisk}}, _Qa} ->
+                  _IndexOnDisk}}, _Qa} ->
             {Count, Q, IndexState};
         {{value, {index, MsgId, SeqId, IsPersistent, IsDelivered,
-                  true, IndexOnDisk}}, Qa} ->
+                  IndexOnDisk}}, Qa} ->
             IndexState1 =
                 case IndexOnDisk of
                     true -> IndexState;
@@ -307,3 +429,9 @@ push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
             push_betas_to_gammas(Generator, Limit, Qa, Count + 1, IndexState1)
     end.
             
+combine_gammas({_, 0}, {_, 0}) -> {undefined, 0};
+combine_gammas({_, 0}, B     ) -> B;
+combine_gammas(A     , {_, 0}) -> A;
+combine_gammas({SeqIdLow, CountLow}, {SeqIdHigh, CountHigh}) ->
+    SeqIdHigh = SeqIdLow + CountLow, %% ASSERTION
+    {SeqIdLow, CountLow + CountHigh}.
-- 
cgit v1.2.1


From 2fd28f7b67f0c4214c6c3629c396b8d2a525e0f9 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 8 Oct 2009 03:09:49 +0100
Subject: make msg_store responsible for sync'ing The API to the msg_store has
 changed: now instead of asking whether a sync is needed for a set of msg ids,
 and subsequently requesting a sync, we request a sync for a set of msg ids
 and supply a callback that is invoked when that sync is done. That way the
 msg_store can make its own decisions on when to sync, and less logic is
 required by callers.

During queue deletion we must remove *all* queue messages from the
store, including those that are part of committed transactions for
which the disk_queue has not yet received the sync callback. To do
that we keep a record of these messages in a dict in the state. The
dict also ensures that we do not act on a sync callback involving a
queue which has since been deleted and perhaps recreated.
---
 src/rabbit_disk_queue.erl | 183 ++++++++++++++++++++--------------------------
 src/rabbit_msg_store.erl  |  95 +++++++++++++++++-------
 2 files changed, 148 insertions(+), 130 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 02a8ed8c..893fae8e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -44,8 +44,6 @@
          prefetch/1
         ]).
 
--export([filesync/0]).
-
 -export([stop/0, stop_and_obliterate/0]).
 
 %%----------------------------------------------------------------------------
@@ -63,16 +61,14 @@
                       is_persistent = true
                     }).
 
--define(SYNC_INTERVAL, 5). %% milliseconds
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
 
 -define(SERVER, ?MODULE).
 
 -record(dqstate,
-        {sequences,               %% next read and write for each q
-         on_sync_txns,            %% list of commiters to run on sync (reversed)
-         commit_timer_ref         %% TRef for our interval timer
+        { sequences,      %% next read and write for each q
+          pending_commits %% dict of txns waiting for msg_store
         }).
 
 %%----------------------------------------------------------------------------
@@ -109,7 +105,6 @@
                   A, queue_name()) -> A).
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_obliterate/0 :: () -> 'ok').
--spec(filesync/0 :: () -> 'ok').
 
 -endif.
 
@@ -173,8 +168,10 @@ stop() ->
 stop_and_obliterate() ->
     gen_server2:call(?SERVER, stop_vaporise, infinity).
 
-filesync() ->
-    gen_server2:pcall(?SERVER, 9, filesync).
+%% private
+
+finalise_commit(TxId) ->
+    gen_server2:cast(?SERVER, {finalise_commit, TxId}).
 
 %%----------------------------------------------------------------------------
 %% gen_server behaviour
@@ -203,9 +200,7 @@ init([]) ->
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
     ok = extract_sequence_numbers(Sequences),
 
-    State = #dqstate { sequences        = Sequences,
-                       on_sync_txns     = [],
-                       commit_timer_ref = undefined },
+    State = #dqstate { sequences = Sequences, pending_commits = dict:new() },
     {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -222,8 +217,6 @@ handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
 handle_call({purge, Q}, _From, State) ->
     {ok, Count, State1} = internal_purge(Q, State),
     reply(Count, State1);
-handle_call(filesync, _From, State) ->
-    reply(ok, sync(State));
 handle_call({delete_queue, Q}, From, State) ->
     gen_server2:reply(From, ok),
     {ok, State1} = internal_delete_queue(Q, State),
@@ -275,13 +268,12 @@ handle_cast({prefetch, Q, From}, State) ->
             internal_fetch_attributes(Q, ignore_delivery, State1);
         false -> ok
     end,
-    noreply(State1).
+    noreply(State1);
+handle_cast({finalise_commit, TxId}, State) ->
+    noreply(finalise_commit(TxId, State)).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
-    {stop, Reason, State};
-handle_info(timeout, State) ->
-    %% must have commit_timer set, so timeout was 0, and we're not hibernating
-    noreply(sync(State)).
+    {stop, Reason, State}.
 
 terminate(_Reason, State) ->
     State1 = shutdown(State),
@@ -291,10 +283,9 @@ terminate(_Reason, State) ->
 shutdown(State = #dqstate { sequences = undefined }) ->
     State;
 shutdown(State = #dqstate { sequences = Sequences }) ->
-    State1 = stop_commit_timer(State),
     ok = rabbit_msg_store:stop(),
     ets:delete(Sequences),
-    State1 #dqstate { sequences = undefined }.
+    State #dqstate { sequences = undefined }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -304,28 +295,10 @@ code_change(_OldVsn, State, _Extra) ->
 %%----------------------------------------------------------------------------
 
 noreply(State) ->
-    noreply1(State).
-
-noreply1(State) ->
-    {State1, Timeout} = next_state(State),
-    {noreply, State1, Timeout}.
+    {noreply, State, hibernate}.
 
 reply(Reply, State) ->
-    reply1(Reply, State).
-
-reply1(Reply, State) ->
-    {State1, Timeout} = next_state(State),
-    {reply, Reply, State1, Timeout}.
-
-next_state(State = #dqstate { on_sync_txns = [],
-                              commit_timer_ref = undefined }) ->
-    {State, hibernate};
-next_state(State = #dqstate { commit_timer_ref = undefined }) ->
-    {start_commit_timer(State), 0};
-next_state(State = #dqstate { on_sync_txns = [] }) ->
-    {stop_commit_timer(State), hibernate};
-next_state(State) ->
-    {State, 0}.
+    {reply, Reply, State, hibernate}.
 
 form_filename(Name) ->
     filename:join(base_directory(), Name).
@@ -339,25 +312,6 @@ sequence_lookup(Sequences, Q) ->
         [{_, ReadSeqId, WriteSeqId}] -> {ReadSeqId, WriteSeqId}
     end.
 
-start_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
-    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, filesync, []),
-    State #dqstate { commit_timer_ref = TRef }.
-
-stop_commit_timer(State = #dqstate { commit_timer_ref = undefined }) ->
-    State;
-stop_commit_timer(State = #dqstate { commit_timer_ref = TRef }) ->
-    {ok, cancel} = timer:cancel(TRef),
-    State #dqstate { commit_timer_ref = undefined }.
-
-sync(State = #dqstate { on_sync_txns = Txns }) ->
-    ok = rabbit_msg_store:sync(),
-    case Txns of
-        [] -> State;
-        _  -> lists:foldl(fun internal_do_tx_commit/2,
-                          State #dqstate { on_sync_txns = [] },
-                          lists:reverse(Txns))
-    end.
-
 %%----------------------------------------------------------------------------
 %% internal functions
 %%----------------------------------------------------------------------------
@@ -404,10 +358,9 @@ maybe_advance(pop_queue, Sequences, Q, ReadSeqId, WriteSeqId) ->
     true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
     ok.
 
-internal_foldl(Q, Fun, Init, State) ->
-    State1 = #dqstate { sequences = Sequences } = sync(State),
+internal_foldl(Q, Fun, Init, State = #dqstate { sequences = Sequences }) ->
     {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    internal_foldl(Q, WriteSeqId, Fun, State1, Init, ReadSeqId).
+    internal_foldl(Q, WriteSeqId, Fun, State, Init, ReadSeqId).
 
 internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
     {ok, Acc, State};
@@ -438,44 +391,57 @@ internal_tx_publish(Message = #basic_message { guid = MsgId,
     {ok, State}.
 
 internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
-                   State = #dqstate { on_sync_txns = Txns }) ->
-    TxnDetails = {Q, PubMsgIds, AckSeqIds, From},
-    case rabbit_msg_store:needs_sync(
-           [MsgId || {MsgId, _IsDelivered, _IsPersistent} <- PubMsgIds]) of
-        true  -> Txns1 = [TxnDetails | Txns],
-                 State #dqstate { on_sync_txns = Txns1 };
-        false -> internal_do_tx_commit(TxnDetails, State)
+                   State = #dqstate { pending_commits = PendingCommits }) ->
+    ok = rabbit_msg_store:sync([MsgId || {MsgId, _, _} <- PubMsgIds],
+                               fun () -> finalise_commit({Q, From}) end),
+    PendingCommits1 = dict:store(Q, {PubMsgIds, AckSeqIds, From},
+                                 PendingCommits),
+    State #dqstate { pending_commits = PendingCommits1 }.
+
+finalise_commit({Q, From},
+                State = #dqstate { sequences = Sequences,
+                                   pending_commits = PendingCommits }) ->
+    case dict:find(Q, PendingCommits) of
+        {ok, {PubMsgIds, AckSeqIds, From}} ->
+            {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
+            WriteSeqId =
+                rabbit_misc:execute_mnesia_transaction(
+                  fun() ->
+                          ok = mnesia:write_lock_table(rabbit_disk_queue),
+                          {ok, WriteSeqId1} =
+                              lists:foldl(
+                                fun ({MsgId, IsDelivered, IsPersistent},
+                                     {ok, SeqId}) ->
+                                        {mnesia:write(
+                                           rabbit_disk_queue,
+                                           #dq_msg_loc {
+                                             queue_and_seq_id = {Q, SeqId},
+                                             msg_id = MsgId,
+                                             is_delivered = IsDelivered,
+                                             is_persistent = IsPersistent
+                                            }, write),
+                                         SeqId + 1}
+                                end, {ok, InitWriteSeqId}, PubMsgIds),
+                          WriteSeqId1
+                  end),
+            {ok, State1} = remove_messages(Q, AckSeqIds, State),
+            true = case PubMsgIds of
+                       [] -> true;
+                       _  -> ets:insert(Sequences, 
+                                        {Q, InitReadSeqId, WriteSeqId})
+                   end,
+            gen_server2:reply(From, ok),
+            State1 # dqstate { pending_commits =
+                               dict:erase(Q, PendingCommits) };
+        {ok, _} ->
+            %% sync notification for a deleted queue which has since
+            %% been recreated
+            State;
+        error ->
+            %% sync notification for a deleted queue
+            State
     end.
 
-internal_do_tx_commit({Q, PubMsgIds, AckSeqIds, From},
-                      State = #dqstate { sequences = Sequences }) ->
-    {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-    WriteSeqId =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  {ok, WriteSeqId1} =
-                      lists:foldl(
-                        fun ({MsgId, IsDelivered, IsPersistent}, {ok, SeqId}) ->
-                                {mnesia:write(
-                                   rabbit_disk_queue,
-                                   #dq_msg_loc { queue_and_seq_id = {Q, SeqId},
-                                                 msg_id = MsgId,
-                                                 is_delivered = IsDelivered,
-                                                 is_persistent = IsPersistent
-                                               }, write),
-                                 SeqId + 1}
-                        end, {ok, InitWriteSeqId}, PubMsgIds),
-                  WriteSeqId1
-          end),
-    {ok, State1} = remove_messages(Q, AckSeqIds, State),
-    true = case PubMsgIds of
-               [] -> true;
-               _  -> ets:insert(Sequences, {Q, InitReadSeqId, WriteSeqId})
-           end,
-    gen_server2:reply(From, ok),
-    State1.
-
 internal_publish(Q, Message = #basic_message { guid = MsgId,
                                                is_persistent = IsPersistent },
                  IsDelivered, State) ->
@@ -588,12 +554,23 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
             {ok, WriteSeqId - ReadSeqId, State1}
     end.
 
-internal_delete_queue(Q, State) ->
-    State1 = sync(State),
+internal_delete_queue(Q,
+                      State = #dqstate { pending_commits = PendingCommits }) ->
+    %% remove pending commits
+    State1 =  case dict:find(Q, PendingCommits) of
+                  {ok, {PubMsgIds, _, _}} ->
+                      ok = rabbit_msg_store:remove(
+                             [MsgId || {MsgId, _, _} <- PubMsgIds]),
+                      State # dqstate { pending_commits =
+                                        dict:erase(Q, PendingCommits) };
+                  error ->
+                      State
+              end,
+    %% remove everything undelivered
     {ok, _Count, State2 = #dqstate { sequences = Sequences }} =
-        internal_purge(Q, State1), %% remove everything undelivered
+        internal_purge(Q, State1),
     true = ets:delete(Sequences, Q),
-    %% now remove everything already delivered
+    %% remove everything already delivered
     Objs = mnesia:dirty_match_object(
              rabbit_disk_queue,
              #dq_msg_loc { queue_and_seq_id = {Q, '_'}, _ = '_' }),
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index aa779e61..f973de5d 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -34,7 +34,9 @@
 -behaviour(gen_server2).
 
 -export([start_link/3, write/2, read/1, contains/1, remove/1, release/1,
-         needs_sync/1, sync/0, stop/0]).
+         sync/2, stop/0]).
+
+-export([sync/0]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -43,6 +45,7 @@
 
 -define(MAX_READ_FILE_HANDLES, 256).
 -define(FILE_SIZE_LIMIT,       (256*1024*1024)).
+-define(SYNC_INTERVAL,         5). %% milliseconds
 
 %%----------------------------------------------------------------------------
 
@@ -61,8 +64,7 @@
 -spec(contains/1 :: (msg_id()) -> boolean()).
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
--spec(needs_sync/1 :: ([msg_id()]) -> boolean()).
--spec(sync/0 :: () -> 'ok').
+-spec(sync/2 :: ([msg_id()], thunk(any())) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 
 -endif.
@@ -81,6 +83,8 @@
          file_size_limit,        %% how big can our files get?
          read_file_handle_cache, %% file handle cache for reading
          last_sync_offset,       %% current_offset at the last time we sync'd
+         on_sync,                %% pending sync requests
+         sync_timer_ref,         %% TRef for our interval timer
          message_cache           %% ets message cache
          }).
 
@@ -232,9 +236,9 @@ read(MsgId)        -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
 contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
 remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
-needs_sync(MsgIds) -> gen_server2:call(?SERVER, {needs_sync, MsgIds}, infinity).
-sync()             -> gen_server2:call(?SERVER, sync, infinity).
+sync(MsgIds, K)    -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
 stop()             -> gen_server2:call(?SERVER, stop, infinity).
+sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
@@ -262,6 +266,8 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                    file_size_limit        = ?FILE_SIZE_LIMIT,
                    read_file_handle_cache = HandleCache,
                    last_sync_offset       = 0,
+                   on_sync                = [],
+                   sync_timer_ref         = undefined,
                    message_cache          = MessageCache
                   },
 
@@ -330,21 +336,6 @@ handle_call({contains, MsgId}, _From, State) ->
               #msg_location {} -> true
           end, State);
 
-handle_call({needs_sync, _MsgIds}, _From,
-            State = #msstate { current_dirty = false }) ->
-    reply(false, State);
-handle_call({needs_sync, MsgIds}, _From,
-            State = #msstate { current_file     = CurFile,
-                               last_sync_offset = SyncOffset }) ->
-    reply(lists:any(fun (MsgId) ->
-                            #msg_location { file = File, offset = Offset } =
-                                index_lookup(MsgId, State),
-                            File =:= CurFile andalso Offset >= SyncOffset
-                    end, MsgIds), State);
-
-handle_call(sync, _From, State) ->
-    reply(ok, sync(State));
-
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}.
 
@@ -403,10 +394,32 @@ handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
 
 handle_cast({release, MsgIds}, State) ->
     lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
-    noreply(State).
+    noreply(State);
 
-handle_info(_Info, State) ->
-    noreply(State).
+handle_cast({sync, _MsgIds, K},
+            State = #msstate { current_dirty = false }) ->
+    K(),
+    noreply(State);
+
+handle_cast({sync, MsgIds, K},
+            State = #msstate { current_file     = CurFile,
+                               last_sync_offset = SyncOffset,
+                               on_sync          = Syncs }) ->
+    case lists:any(fun (MsgId) ->
+                           #msg_location { file = File, offset = Offset } =
+                               index_lookup(MsgId, State),
+                           File =:= CurFile andalso Offset >= SyncOffset
+                   end, MsgIds) of
+        false -> K(),
+                 noreply(State);
+        true  -> noreply(State #msstate { on_sync = [K | Syncs] })
+    end;
+
+handle_cast(sync, State) ->
+    noreply(sync(State)).
+
+handle_info(timeout, State) ->
+    noreply(sync(State)).
 
 terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
                                       file_summary           = FileSummary,
@@ -434,9 +447,32 @@ code_change(_OldVsn, State, _Extra) ->
 %% general helper functions
 %%----------------------------------------------------------------------------
 
-noreply(State) -> {noreply, State}.
+noreply(State) ->
+    {State1, Timeout} = next_state(State),
+    {noreply, State1, Timeout}.
+
+reply(Reply, State) ->
+    {State1, Timeout} = next_state(State),
+    {reply, Reply, State1, Timeout}.
 
-reply(Reply, State) -> {reply, Reply, State}.
+next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) ->
+    {State, infinity};
+next_state(State = #msstate { sync_timer_ref = undefined }) ->
+    {start_sync_timer(State), 0};
+next_state(State = #msstate { on_sync = [] }) ->
+    {stop_sync_timer(State), infinity};
+next_state(State) ->
+    {State, 0}.
+
+start_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, []),
+    State #msstate { sync_timer_ref = TRef }.
+
+stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
+    State;
+stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #msstate { sync_timer_ref = undefined }.
 
 form_filename(Dir, Name) -> filename:join(Dir, Name).
 
@@ -465,9 +501,14 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
 sync(State = #msstate { current_dirty = false }) ->
     State;
 sync(State = #msstate { current_file_handle = CurHdl,
-                        current_offset = CurOffset }) ->
+                        current_offset = CurOffset,
+                        on_sync = Syncs }) ->
+    State1 = stop_sync_timer(State),
     ok = file:sync(CurHdl),
-    State #msstate { current_dirty = false, last_sync_offset = CurOffset }.
+    lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
+    State1 #msstate { current_dirty = false,
+                      last_sync_offset = CurOffset,
+                      on_sync = [] }.
 
 with_read_handle_at(File, Offset, Fun,
                     State = #msstate { dir                    = Dir,
-- 
cgit v1.2.1


From 758c8c38d624b803aa4edb586a5341b46e5a089a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 8 Oct 2009 03:33:11 +0100
Subject: cosmetic

---
 src/rabbit_disk_queue.erl | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 893fae8e..42c4ed8b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -408,11 +408,9 @@ finalise_commit({Q, From},
                 rabbit_misc:execute_mnesia_transaction(
                   fun() ->
                           ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          {ok, WriteSeqId1} =
-                              lists:foldl(
-                                fun ({MsgId, IsDelivered, IsPersistent},
-                                     {ok, SeqId}) ->
-                                        {mnesia:write(
+                          lists:foldl(
+                            fun ({MsgId, IsDelivered, IsPersistent}, SeqId) ->
+                                    ok = mnesia:write(
                                            rabbit_disk_queue,
                                            #dq_msg_loc {
                                              queue_and_seq_id = {Q, SeqId},
@@ -420,9 +418,8 @@ finalise_commit({Q, From},
                                              is_delivered = IsDelivered,
                                              is_persistent = IsPersistent
                                             }, write),
-                                         SeqId + 1}
-                                end, {ok, InitWriteSeqId}, PubMsgIds),
-                          WriteSeqId1
+                                    SeqId + 1
+                            end, InitWriteSeqId, PubMsgIds)
                   end),
             {ok, State1} = remove_messages(Q, AckSeqIds, State),
             true = case PubMsgIds of
-- 
cgit v1.2.1


From c62e4e0a7e21ed00550f99ede4daf5e7bb8613f6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Oct 2009 14:46:45 +0100
Subject: Tidying and refactoring of the variable queue, some documentation,
 and the removal of a lot of algorithmic bugs. No real new features, but code
 in much better state.

---
 src/rabbit_queue_index.erl    |  12 +-
 src/rabbit_variable_queue.erl | 435 ++++++++++++++++++++++++++----------------
 2 files changed, 273 insertions(+), 174 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 27952af1..b21651a2 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -140,7 +140,7 @@
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush_journal/1 :: (qistate()) -> {boolean(), qistate()}).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
-             {( [{'index', msg_id(), seq_id(), boolean(), boolean()}]
+             {( [{msg_id(), seq_id(), boolean(), boolean()}]
               | 'not_found'), qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(segment_size/0 :: () -> non_neg_integer()).
@@ -152,7 +152,7 @@
 %%----------------------------------------------------------------------------
 
 init(Name) ->
-    Dir = filename:join(rabbit_mnesia:dir(), Name),
+    Dir = filename:join(queues_dir(), Name),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     AckCounts = scatter_journal(Dir, find_ack_counts(Dir)),
     {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
@@ -240,9 +240,8 @@ read_segment_entries(InitSeqId, State =
     {lists:foldl(fun (RelSeq, Acc) ->
                          {MsgId, IsDelivered, IsPersistent} =
                              dict:fetch(RelSeq, SDict),
-                         [ {index, MsgId,
-                            reconstruct_seq_id(SegNum, RelSeq),
-                            IsPersistent, IsDelivered, true} | Acc]
+                         [ {MsgId, reconstruct_seq_id(SegNum, RelSeq),
+                            IsPersistent, IsDelivered} | Acc]
                  end, [], RelSeqs),
      State}.
 
@@ -257,6 +256,9 @@ segment_size() ->
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+queues_dir() ->
+    filename:join(rabbit_mnesia:dir(), "queues").
+
 rev_sort(List) ->
     lists:sort(fun (A, B) -> B < A end, List).
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f041f478..79a7f38b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -47,18 +47,64 @@
           next_seq_id,
           out_counter,
           egress_rate,
-          old_egress_rate,
           avg_egress_rate,
           egress_rate_timestamp,
           prefetcher
         }).
 
+-record(alpha,
+        { msg,
+          seq_id,
+          is_delivered,
+          msg_on_disk,
+          index_on_disk
+        }).
+
+-record(beta,
+        { msg_id,
+          seq_id,
+          is_persistent,
+          is_delivered,
+          index_on_disk
+        }).
+
+-record(gamma,
+        { seq_id,
+          count
+        }).
+
 -include("rabbit.hrl").
 
+%% Basic premise is that msgs move from q1 -> q2 -> gamma -> q3 -> q4
+%% but they can only do so in the right form. q1 and q4 only hold
+%% alphas (msgs in ram), q2 and q3 only hold betas (msg on disk, index
+%% in ram), and gamma is just a count of the number of index entries
+%% on disk at that stage (msg on disk, index on disk).
+%%
+%% When a msg arrives, we decide which form it should be in. It is
+%% then added to the rightmost appropriate queue, maintaining
+%% order. Thus if the msg is to be an alpha, it will be added to q1,
+%% unless all of q1, q2, gamma and q3 are empty, in which case it will
+%% go to q4. If it is to be a beta, it will be added to q2 unless all
+%% of q2 and gamma are empty, in which case it will go to q3.
+%%
+%% The major invariant is that if the msg is to be a beta, q1 will be
+%% empty, and if it is to be a gamma then both q1 and q2 will be empty.
+%%
+%% When taking msgs out of the queue, if q4 is empty then we drain the
+%% prefetcher. If that doesn't help then we read directly from q3, or
+%% gamma, if q3 is empty. If q3 and gamma are empty then we have an
+%% invariant that q2 must be empty because q2 can only grow if gamma
+%% is non empty.
+%%
+%% A further invariant is that if the queue is non empty, either q4 or
+%% q3 contains at least one entry. I.e. we never allow gamma to
+%% contain all msgs in the queue.
+
 init(QueueName) ->
     {NextSeqId, IndexState} = rabbit_queue_index:init(QueueName),
     #vqstate { q1 = queue:new(), q2 = queue:new(),
-               gamma = {undefined, 0},
+               gamma = #gamma { seq_id = undefined, count = 0 },
                q3 = queue:new(), q4 = queue:new(),
                target_ram_msg_count = undefined,
                ram_msg_count = 0,
@@ -67,60 +113,60 @@ init(QueueName) ->
                next_seq_id = NextSeqId,
                out_counter = 0,
                egress_rate = 0,
-               old_egress_rate = 0,
                avg_egress_rate = 0,
                egress_rate_timestamp = now(),
                prefetcher = undefined
              }.
 
-in(Msg, IsDelivered, State) ->
-    in(test_keep_msg_in_ram(State), Msg, IsDelivered, State).
+in(Msg, IsDelivered, State = #vqstate { next_seq_id = SeqId }) ->
+    in(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
+       State #vqstate { next_seq_id = SeqId + 1 }).
 
-in(msg_and_index, Msg = #basic_message { guid = MsgId,
-                                         is_persistent = IsPersistent },
-   IsDelivered, State = #vqstate { index_state = IndexState,
-                                   next_seq_id = SeqId,
-                                   ram_msg_count = RamMsgCount
-                                 }) ->
+in(msg, Msg = #basic_message { guid = MsgId,
+                               is_persistent = IsPersistent },
+   SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
+                                          ram_msg_count = RamMsgCount }) ->
     MsgOnDisk = maybe_write_msg_to_disk(false, Msg),
     {IndexOnDisk, IndexState1} =
         maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
-    Entry =
-        {msg_and_index, Msg, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk},
-    State1 = State #vqstate { next_seq_id = SeqId + 1,
-                              ram_msg_count = RamMsgCount + 1,
+    Entry = #alpha { msg = Msg, seq_id = SeqId, is_delivered = IsDelivered,
+                     msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk },
+    State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
                               index_state = IndexState1 },
     store_alpha_entry(Entry, State1);
 
-in(just_index, Msg = #basic_message { guid = MsgId,
-                                      is_persistent = IsPersistent },
-   IsDelivered, State = #vqstate { index_state = IndexState,
-                                   next_seq_id = SeqId, q1 = Q1 }) ->
+in(index, Msg = #basic_message { guid = MsgId,
+                                 is_persistent = IsPersistent },
+   SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
+                                          q1 = Q1 }) ->
     true = maybe_write_msg_to_disk(true, Msg),
     {IndexOnDisk, IndexState1} =
         maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
-    Entry = {index, MsgId, SeqId, IsPersistent, IsDelivered, IndexOnDisk},
-    State1 = State #vqstate { next_seq_id = SeqId + 1,
-                              index_state = IndexState1 },
+    Entry = #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
+                    is_persistent = IsPersistent, index_on_disk = IndexOnDisk },
+    State1 = State #vqstate { index_state = IndexState1 },
     true = queue:is_empty(Q1), %% ASSERTION
     store_beta_entry(Entry, State1);
 
 in(neither, Msg = #basic_message { guid = MsgId,
                                    is_persistent = IsPersistent },
-   IsDelivered, State = #vqstate { index_state = IndexState,
-                                   next_seq_id = SeqId,
-                                   q1 = Q1, q2 = Q2,
-                                   gamma = {GammaSeqId, GammaCount} }) ->
+   SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
+                                          q1 = Q1, q2 = Q2, gamma = Gamma }) ->
     true = maybe_write_msg_to_disk(true, Msg),
     {true, IndexState1} =
         maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
     true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
-    State #vqstate { next_seq_id = SeqId + 1,
-                     index_state = IndexState1,
-                     gamma = {GammaSeqId, GammaCount + 1} }.
+    %% gamma may be empty, seq_id > next_segment_boundary from q3
+    %% head, so we need to find where the segment boundary is before
+    %% or equal to seq_id
+    GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
+        rabbit_queue_index:segment_size(),
+    Gamma1 = #gamma { seq_id = GammaSeqId, count = 1 },
+    State #vqstate { index_state = IndexState1,
+                     gamma = combine_gammas(Gamma, Gamma1) }.
 
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
@@ -139,11 +185,14 @@ set_queue_ram_duration_target(
 remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                                          egress_rate_timestamp = Timestamp,
                                          out_counter = OutCount }) ->
+    %% We do an average over the last two values, but also hold the
+    %% current value separately so that the average always only
+    %% incorporates the last two values, and not the current value and
+    %% the last average. Averaging helps smooth out spikes.
     Now = now(),
     EgressRate = OutCount / timer:now_diff(Now, Timestamp),
     AvgEgressRate = (EgressRate + OldEgressRate) / 2,
-    State #vqstate { old_egress_rate = OldEgressRate,
-                     egress_rate = EgressRate,
+    State #vqstate { egress_rate = EgressRate,
                      avg_egress_rate = AvgEgressRate,
                      egress_rate_timestamp = Now,
                      out_counter = 0 }.
@@ -163,8 +212,9 @@ out(State =
                 end,
             out(State #vqstate { q4 = Q4a, prefetcher = undefined });
         {{value,
-          {msg_and_index, Msg = #basic_message { guid = MsgId },
-           SeqId, IsDelivered, MsgOnDisk, IndexOnDisk}}, Q4a} ->
+          #alpha { msg = Msg = #basic_message { guid = MsgId }, seq_id = SeqId,
+                   is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
+                   index_on_disk = IndexOnDisk }}, Q4a} ->
             IndexState1 =
                 case IndexOnDisk andalso not IsDelivered of
                     true ->
@@ -175,64 +225,85 @@ out(State =
             AckTag = case {IndexOnDisk, MsgOnDisk} of
                          {true,  true } -> {ack_index_and_store, MsgId, SeqId};
                          {false, true } -> {ack_store, MsgId};
-                         {false, false} -> not_on_disk
+                         {false, false} -> ack_not_on_disk
                      end,
             {{Msg, IsDelivered, AckTag},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
                               index_state = IndexState1 }}
     end.
 
-out_from_q3(State = #vqstate { q2 = Q2, index_state = IndexState,
-                               gamma = {GammaSeqId, GammaCount}, q3 = Q3,
-                               q4 = Q4 }) ->
+out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2, index_state = IndexState,
+                               gamma = #gamma { seq_id = GammaSeqId,
+                                                count = GammaCount},
+                               q3 = Q3, q4 = Q4 }) ->
     case queue:out(Q3) of
         {empty, _Q3} ->
-            case GammaCount of
-                0 ->
-                    undefined = GammaSeqId, %% ASSERTION
-                    true = queue:is_empty(Q2), %% ASSERTION
-                    {empty, State};
-                _ ->
-                    {List = [_|_], IndexState1} =
-                        rabbit_queue_index:read_segment_entries(GammaSeqId,
-                                                                IndexState),
-                    State1 = State #vqstate { index_state = IndexState1 },
-                    Q3a = queue:from_list(List),
-                    State2 =
-                        case GammaCount - length(List) of
-                            0 -> 
-                                State1 #vqstate { gamma = {undefined, 0},
-                                                  q2 = queue:new(),
-                                                  q3 = queue:join(Q3a, Q2) };
-                            N when N > 0 ->
-                                State1 #vqstate { gamma =
-                                                  {rabbit_queue_index:segment_size() +
-                                                   GammaSeqId, N},
-                                                  q3 = Q3a }
-                        end,
-                    out_from_q3(State2)
-            end;
-        {{value, {index, MsgId, SeqId, IsPersistent, IsDelivered, IndexOnDisk}},
+            0 = GammaCount, %% ASSERTION
+            true = queue:is_empty(Q2), %% ASSERTION
+            true = queue:is_empty(Q1), %% ASSERTION
+            {empty, State};
+        {{value,
+          #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
+                  is_persistent = IsPersistent, index_on_disk = IndexOnDisk }},
          Q3a} ->
             {ok, Msg = #basic_message { is_persistent = IsPersistent,
                                         guid = MsgId }} =
                 rabbit_msg_store:read(MsgId),
-            State1 = #vqstate { q1 = Q1, q4 = Q4a } =
-                State #vqstate { q3 = Q3a,
-                                 q4 = queue:in({msg_and_index, Msg, SeqId,
-                                                IsDelivered, true, IndexOnDisk},
-                                               Q4) },
-            State2 = case queue:is_empty(Q3a) andalso 0 == GammaCount of
-                         true ->
-                             true = queue:is_empty(Q2), %% ASSERTION
-                             State1 #vqstate { q1 = queue:new(),
-                                               q4 = queue:join(Q4a, Q1) };
-                         false ->
-                             State1
-                     end,
+            Q4a = queue:in(
+                    #alpha { msg = Msg, seq_id = SeqId,
+                             is_delivered = IsDelivered, msg_on_disk = true,
+                             index_on_disk = IndexOnDisk }, Q4),
+            %% TODO - if it's not persistent, remove it from disk now
+            State1 = State #vqstate { q3 = Q3a, q4 = Q4a },
+            State2 =
+                case {queue:is_empty(Q3a), 0 == GammaCount} of
+                    {true, true} ->
+                        %% q3 is now empty, it wasn't before; gamma is
+                        %% still empty. So q2 must be empty, and q1
+                        %% can now be joined onto q4
+                        true = queue:is_empty(Q2), %% ASSERTION
+                        State1 #vqstate { q1 = queue:new(),
+                                          q4 = queue:join(Q4a, Q1) };
+                    {true, false} ->
+                        {List, IndexState1} =
+                            rabbit_queue_index:read_segment_entries(GammaSeqId,
+                                                                    IndexState),
+                        State3 = State1 #vqstate { index_state = IndexState1 },
+                        %% length(List) may be < segment_size because
+                        %% of acks. In fact, List may be []
+                        Q3b = betas_from_segment_entries(List),
+                        case GammaCount - length(List) of
+                            0 ->
+                                %% gamma is now empty, but it wasn't
+                                %% before, so can now join q2 onto q3
+                                State3 #vqstate {
+                                  gamma = #gamma { seq_id = undefined,
+                                                   count = 0 },
+                                  q2 = queue:new(), q3 = queue:join(Q3b, Q2) };
+                            N when N > 0 ->
+                                State3 #vqstate {
+                                  gamma = #gamma { 
+                                    seq_id = GammaSeqId +
+                                    rabbit_queue_index:segment_size(),
+                                    count = N }, q3 = Q3b }
+                        end;
+                    {false, _} ->
+                        %% q3 still isn't empty, we've not touched
+                        %% gamma, so the invariants between q1, q2,
+                        %% gamma and q3 are maintained
+                        State1
+                end,
             out(State2)
     end.
 
+betas_from_segment_entries(List) ->
+    queue:from_list(lists:map(fun ({MsgId, SeqId, IsPersistent, IsDelivered}) ->
+                                      #beta { msg_id = MsgId, seq_id = SeqId,
+                                              is_persistent = IsPersistent,
+                                              is_delivered = IsDelivered,
+                                              index_on_disk = true }
+                              end, List)).
+
 maybe_start_prefetcher(State) ->
     %% TODO
     State.
@@ -243,15 +314,10 @@ reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
     State;
 reduce_memory_use(State =
                   #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
-    State1 = #vqstate { ram_msg_count = RamMsgCount } =
-        maybe_push_q1_to_betas(State),
-    State2 = case TargetRamMsgCount >= RamMsgCount of
-                 true  -> State1;
-                 false -> maybe_push_q4_to_betas(State)
-             end,
+    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
     case TargetRamMsgCount of
-        0 -> push_betas_to_gammas(State);
-        _ -> State2
+        0 -> push_betas_to_gammas(State1);
+        _ -> State1
     end.
 
 maybe_write_msg_to_disk(Bool, Msg = #basic_message {
@@ -274,17 +340,32 @@ maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
                           IndexState) ->
     {false, IndexState}.
 
-test_keep_msg_in_ram(#vqstate { target_ram_msg_count = TargetRamMsgCount,
-                                ram_msg_count = RamMsgCount,
-                                q1 = Q1 }) ->
+test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
+                                       ram_msg_count = RamMsgCount,
+                                       q1 = Q1, q3 = Q3 }) ->
     case TargetRamMsgCount of
-        undefined -> msg_and_index;
-        0         -> neither;
+        undefined ->
+            msg;
+        0 ->
+            case queue:out(Q3) of
+                {empty, _Q3} ->
+                    %% if TargetRamMsgCount == 0, we know we have no
+                    %% alphas. If q3 is empty then gamma must be empty
+                    %% too, so create a beta, which should end up in
+                    %% q3
+                    index;
+                {{value, #beta { seq_id = OldSeqId }}, _Q3a} ->
+                    %% don't look at the current gamma as it may be empty
+                    case SeqId >= rabbit_queue_index:next_segment_boundary(OldSeqId) of
+                        true -> neither;
+                        false -> index
+                    end
+            end;
         _ when TargetRamMsgCount > RamMsgCount ->
-                     msg_and_index;
+                     msg;
         _         -> case queue:is_empty(Q1) of
-                         true -> just_index;
-                         false -> msg_and_index %% can push out elders to disk
+                         true -> index;
+                         false -> msg %% can push out elders to disk
                      end
     end.
 
@@ -293,9 +374,10 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
       content = rabbit_binary_parser:clear_decoded_content(
                   rabbit_binary_generator:ensure_content_encoded(Content)) }.
 
-store_alpha_entry(Entry, State = #vqstate { q1 = Q1, q2 = Q2,
-                                            gamma = {_GammaSeqId, GammaCount},
-                                            q3 = Q3, q4 = Q4 }) ->
+store_alpha_entry(Entry = #alpha {}, State =
+                  #vqstate { q1 = Q1, q2 = Q2,
+                             gamma = #gamma { count = GammaCount },
+                             q3 = Q3, q4 = Q4 }) ->
     case queue:is_empty(Q1) andalso queue:is_empty(Q2) andalso 
         GammaCount == 0 andalso queue:is_empty(Q3) of
         true ->
@@ -304,95 +386,104 @@ store_alpha_entry(Entry, State = #vqstate { q1 = Q1, q2 = Q2,
             maybe_push_q1_to_betas(State #vqstate { q1 = queue:in(Entry, Q1) })
     end.
 
-store_beta_entry(Entry, State =
-                 #vqstate { q2 = Q2, gamma = {_GammaSeqId, GammaCount},
+store_beta_entry(Entry = #beta {}, State =
+                 #vqstate { q2 = Q2, gamma = #gamma { count = GammaCount },
                             q3 = Q3 }) ->
     case queue:is_empty(Q2) andalso GammaCount == 0 of
         true  -> State #vqstate { q3 = queue:in(Entry, Q3) };
         false -> State #vqstate { q2 = queue:in(Entry, Q2) }
     end.
 
-maybe_push_q1_to_betas(State =
-                       #vqstate { ram_msg_count = RamMsgCount,
-                                  target_ram_msg_count = TargetRamMsgCount
-                                }) when TargetRamMsgCount >= RamMsgCount ->
-    State;
-maybe_push_q1_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
-                                          q1 = Q1 }) ->
-    case queue:out(Q1) of
-        {empty, _Q1} -> State;
-        {{value, {msg_and_index, Msg = #basic_message {
-                                   guid = MsgId, is_persistent = IsPersistent },
-                  SeqId, IsDelivered, MsgOnDisk, IndexOnDisk}}, Q1a} ->
-            true = case MsgOnDisk of
-                       true -> true;
-                       false -> maybe_write_msg_to_disk(true, Msg)
-                   end,
-            maybe_push_q1_to_betas(
-              store_beta_entry({index, MsgId, SeqId, IsPersistent, IsDelivered,
-                                IndexOnDisk},
-                               State #vqstate { ram_msg_count = RamMsgCount - 1,
-                                                q1 = Q1a }))
-    end.
+maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
+    maybe_push_alphas_to_betas(
+      fun queue:out/1,
+      fun (Beta, Q1a, State1) ->
+              %% these could legally go to q3 if gamma and q2 are empty
+              store_beta_entry(Beta, State1 #vqstate { q1 = Q1a })
+      end, Q1, State).
 
-maybe_push_q4_to_betas(State =
-                       #vqstate { ram_msg_count = RamMsgCount,
-                                  target_ram_msg_count = TargetRamMsgCount
-                                }) when TargetRamMsgCount >= RamMsgCount ->
+maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
+    maybe_push_alphas_to_betas(
+      fun queue:out_r/1,
+      fun (Beta, Q4a, State1 = #vqstate { q3 = Q3 }) ->
+              %% these must go to q3
+              State1 #vqstate { q3 = queue:in_r(Beta, Q3), q4 = Q4a }
+      end, Q4, State).
+
+maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
+                           #vqstate { ram_msg_count = RamMsgCount,
+                                      target_ram_msg_count = TargetRamMsgCount })
+  when TargetRamMsgCount >= RamMsgCount ->
     State;
-maybe_push_q4_to_betas(State = #vqstate { ram_msg_count = RamMsgCount,
-                                          q4 = Q4, q3 = Q3 }) ->
-    case queue:out_r(Q4) of
-        {empty, _Q4} -> State;
-        {{value, {msg_and_index, Msg = #basic_message {
-                                   guid = MsgId, is_persistent = IsPersistent },
-                  SeqId, IsDelivered, MsgOnDisk, IndexOnDisk}}, Q4a} ->
+maybe_push_alphas_to_betas(Generator, Consumer, Q, State =
+                           #vqstate { ram_msg_count = RamMsgCount }) ->
+    case Generator(Q) of
+        {empty, _Q} -> State;
+        {{value,
+          #alpha { msg = Msg = #basic_message { guid = MsgId,
+                                                is_persistent = IsPersistent },
+                   seq_id = SeqId, is_delivered = IsDelivered,
+                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+         Qa} ->
             true = case MsgOnDisk of
                        true -> true;
                        false -> maybe_write_msg_to_disk(true, Msg)
                    end,
-            Q3a = queue:in_r({index, MsgId, SeqId, IsPersistent, IsDelivered,
-                              IndexOnDisk}, Q3),
-            maybe_push_q4_to_betas(
-              State #vqstate { ram_msg_count = RamMsgCount - 1,
-                               q3 = Q3a, q4 = Q4a })
+            Beta = #beta { msg_id = MsgId, seq_id = SeqId,
+                           is_persistent = IsPersistent,
+                           is_delivered = IsDelivered,
+                           index_on_disk = IndexOnDisk },
+            State1 = State #vqstate { ram_msg_count = RamMsgCount - 1 },
+            maybe_push_alphas_to_betas(Generator, Consumer, Qa,
+                                       Consumer(Beta, Qa, State1))
     end.
 
 push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
                                         index_state = IndexState }) ->
     %% HighSeqId is high in the sense that it must be higher than the
-    %% seqid in Gamma, but it's also the lowest of the betas that we
+    %% seq_id in Gamma, but it's also the lowest of the betas that we
     %% transfer from q2 to gamma.
     {HighSeqId, Len1, Q2a, IndexState1} =
         push_betas_to_gammas(fun queue:out/1, undefined, Q2, IndexState),
-    Gamma1 = {Gamma1SeqId, _} = combine_gammas(Gamma, {HighSeqId, Len1}),
-    State1 = State #vqstate { q2 = Q2a,
-                              gamma = Gamma1,
+    Gamma1 = #gamma { seq_id = Gamma1SeqId } =
+        combine_gammas(Gamma, #gamma { seq_id = HighSeqId, count = Len1 }),
+    State1 = State #vqstate { q2 = Q2a, gamma = Gamma1,
                               index_state = IndexState1 },
     case queue:out(Q3) of
         {empty, _Q3} -> State1;
-        {{value, {index, _MsgId, SeqId, _IsPersistent, _IsDelivered,
-                  _IndexOnDisk}}, _Q3a} -> 
+        {{value, #beta { seq_id = SeqId }}, _Q3a} -> 
             Limit = rabbit_queue_index:next_segment_boundary(SeqId),
-            case Limit == Gamma1SeqId of
-                true -> %% already only holding the minimum, nothing to do
+            case Gamma1SeqId of
+                Limit -> %% already only holding the minimum, nothing to do
                     State1;
-                false ->
-                    %% ASSERTION
-                    true = Gamma1SeqId == undefined orelse
-                        Gamma1SeqId == Limit + rabbit_queue_index:segment_size(),
+                _ when Gamma1SeqId == undefined orelse Gamma1SeqId > Limit ->
+                    %% ASSERTION (sadly large!)
+                    %% This says that if Gamma1SeqId != undefined then
+                    %% the gap from Limit to Gamma1SeqId is an integer
+                    %% multiple of segment_size
+                    SegmentCount =
+                        case Gamma1SeqId of
+                            undefined -> undefined;
+                            _ -> (Gamma1SeqId - Limit) /
+                                     rabbit_queue_index:segment_size()
+                        end,
+                    true = (is_integer(SegmentCount) andalso SegmentCount > 0)
+                        orelse Gamma1SeqId == undefined,
                     %% LowSeqId is low in the sense that it must be
-                    %% lower than the seqid in Gamma1, in fact either
-                    %% gamma1 has undefined as its seqid or its seqid
-                    %% is LowSeqId + 1. But because we use
-                    %% queue:out_r, LowSeqId is actually also the
-                    %% highest seqid of the betas we transfer from q3
-                    %% to gammas.
+                    %% lower than the seq_id in gamma1, in fact either
+                    %% gamma1 has undefined as its seq_id or there
+                    %% does not exist a seq_id X s.t. X > LowSeqId and
+                    %% X < gamma1's seq_id (would be +1 if it wasn't
+                    %% for the possibility of gaps in the seq_ids).
+                    %% But because we use queue:out_r, LowSeqId is
+                    %% actually also the highest seqid of the betas we
+                    %% transfer from q3 to gammas.
                     {LowSeqId, Len2, Q3b, IndexState2} =
-                        push_betas_to_gammas(fun queue:out_r/1, Limit - 1, Q3,
+                        push_betas_to_gammas(fun queue:out_r/1, Limit, Q3,
                                              IndexState1),
-                    Gamma1SeqId = LowSeqId + 1, %% ASSERTION
-                    Gamma2 = combine_gammas({Limit, Len2}, Gamma1),
+                    true = Gamma1SeqId > LowSeqId, %% ASSERTION
+                    Gamma2 = combine_gammas(
+                               #gamma { seq_id = Limit, count = Len2}, Gamma1),
                     State1 #vqstate { q3 = Q3b, gamma = Gamma2,
                                       index_state = IndexState2 }
             end
@@ -401,8 +492,7 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
 push_betas_to_gammas(Generator, Limit, Q, IndexState) ->
     case Generator(Q) of
         {empty, Qa} -> {undefined, 0, Qa, IndexState};
-        {{value, {index, _MsgId, SeqId, _IsPersistent, _IsDelivered,
-                  _IndexOnDisk}}, _Qa} ->
+        {{value, #beta { seq_id = SeqId }}, _Qa} ->
             {Count, Qb, IndexState1} =
                 push_betas_to_gammas(Generator, Limit, Q, 0, IndexState),
             {SeqId, Count, Qb, IndexState1}
@@ -411,11 +501,13 @@ push_betas_to_gammas(Generator, Limit, Q, IndexState) ->
 push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
     case Generator(Q) of
         {empty, Qa} -> {Count, Qa, IndexState};
-        {{value, {index, _MsgId, Limit, _IsPersistent, _IsDelivered,
-                  _IndexOnDisk}}, _Qa} ->
+        {{value, #beta { seq_id = SeqId }}, _Qa}
+        when Limit /= undefined andalso SeqId < Limit ->
             {Count, Q, IndexState};
-        {{value, {index, MsgId, SeqId, IsPersistent, IsDelivered,
-                  IndexOnDisk}}, Qa} ->
+        {{value, #beta { msg_id = MsgId, seq_id = SeqId,
+                         is_persistent = IsPersistent,
+                         is_delivered = IsDelivered,
+                         index_on_disk = IndexOnDisk}}, Qa} ->
             IndexState1 =
                 case IndexOnDisk of
                     true -> IndexState;
@@ -428,10 +520,15 @@ push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
                 end,
             push_betas_to_gammas(Generator, Limit, Qa, Count + 1, IndexState1)
     end.
-            
-combine_gammas({_, 0}, {_, 0}) -> {undefined, 0};
-combine_gammas({_, 0}, B     ) -> B;
-combine_gammas(A     , {_, 0}) -> A;
-combine_gammas({SeqIdLow, CountLow}, {SeqIdHigh, CountHigh}) ->
-    SeqIdHigh = SeqIdLow + CountLow, %% ASSERTION
-    {SeqIdLow, CountLow + CountHigh}.
+
+%% the first arg is the older gamma            
+combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) -> {undefined, 0};
+combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
+combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
+combine_gammas(#gamma { seq_id = SeqIdLow,  count = CountLow },
+               #gamma { seq_id = SeqIdHigh, count = CountHigh}) ->
+    true = SeqIdLow + CountLow =< SeqIdHigh, %% ASSERTION
+    %% note the above assertion does not say ==. This is because acks
+    %% may mean that the counts are not straight multiples of
+    %% segment_size.
+    #gamma { seq_id = SeqIdLow, count = CountLow + CountHigh}.
-- 
cgit v1.2.1


From 0bcf01d6c9dcf28709e5ce668ba11850a4bc720d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Oct 2009 14:51:02 +0100
Subject: fixing erroneous use of thunk(_) in spec to fun()

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index f973de5d..8596e09f 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -64,7 +64,7 @@
 -spec(contains/1 :: (msg_id()) -> boolean()).
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
--spec(sync/2 :: ([msg_id()], thunk(any())) -> 'ok').
+-spec(sync/2 :: ([msg_id()], fun (() -> any())) -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 
 -endif.
-- 
cgit v1.2.1


From 3782d8dd7a06a4b5b4e0d5a13aebc161d0421b54 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Oct 2009 15:32:27 +0100
Subject: minor further corrections and modifications

---
 src/rabbit_variable_queue.erl | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 79a7f38b..85dfbbac 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -99,7 +99,8 @@
 %%
 %% A further invariant is that if the queue is non empty, either q4 or
 %% q3 contains at least one entry. I.e. we never allow gamma to
-%% contain all msgs in the queue.
+%% contain all msgs in the queue.  Also, if q4 is non empty and gamma
+%% is non empty then q3 must be non empty.
 
 init(QueueName) ->
     {NextSeqId, IndexState} = rabbit_queue_index:init(QueueName),
@@ -265,12 +266,11 @@ out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2, index_state = IndexState,
                         State1 #vqstate { q1 = queue:new(),
                                           q4 = queue:join(Q4a, Q1) };
                     {true, false} ->
-                        {List, IndexState1} =
-                            rabbit_queue_index:read_segment_entries(GammaSeqId,
-                                                                    IndexState),
+                        {List, IndexState1, Gamma1SeqId} =
+                            read_index_segment(GammaSeqId, IndexState),
                         State3 = State1 #vqstate { index_state = IndexState1 },
                         %% length(List) may be < segment_size because
-                        %% of acks. In fact, List may be []
+                        %% of acks. But it can't be []
                         Q3b = betas_from_segment_entries(List),
                         case GammaCount - length(List) of
                             0 ->
@@ -282,10 +282,8 @@ out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2, index_state = IndexState,
                                   q2 = queue:new(), q3 = queue:join(Q3b, Q2) };
                             N when N > 0 ->
                                 State3 #vqstate {
-                                  gamma = #gamma { 
-                                    seq_id = GammaSeqId +
-                                    rabbit_queue_index:segment_size(),
-                                    count = N }, q3 = Q3b }
+                                  gamma = #gamma { seq_id = Gamma1SeqId,
+                                                   count = N }, q3 = Q3b }
                         end;
                     {false, _} ->
                         %% q3 still isn't empty, we've not touched
@@ -304,6 +302,13 @@ betas_from_segment_entries(List) ->
                                               index_on_disk = true }
                               end, List)).
 
+read_index_segment(SeqId, IndexState) ->
+    SeqId1 = SeqId + rabbit_queue_index:segment_size(),
+    case rabbit_queue_index:read_segment_entries(SeqId, IndexState) of
+        {[], IndexState1} -> read_index_segment(SeqId1, IndexState1);
+        {List, IndexState1} -> {List, IndexState1, SeqId1}
+    end.
+
 maybe_start_prefetcher(State) ->
     %% TODO
     State.
@@ -456,19 +461,17 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
             case Gamma1SeqId of
                 Limit -> %% already only holding the minimum, nothing to do
                     State1;
-                _ when Gamma1SeqId == undefined orelse Gamma1SeqId > Limit ->
+                _ when Gamma1SeqId == undefined orelse
+                (is_integer(Gamma1SeqId) andalso Gamma1SeqId > Limit) ->
                     %% ASSERTION (sadly large!)
-                    %% This says that if Gamma1SeqId != undefined then
+                    %% This says that if Gamma1SeqId /= undefined then
                     %% the gap from Limit to Gamma1SeqId is an integer
                     %% multiple of segment_size
-                    SegmentCount =
-                        case Gamma1SeqId of
-                            undefined -> undefined;
-                            _ -> (Gamma1SeqId - Limit) /
+                    0 = case Gamma1SeqId of
+                            undefined -> 0;
+                            _ -> (Gamma1SeqId - Limit) rem
                                      rabbit_queue_index:segment_size()
                         end,
-                    true = (is_integer(SegmentCount) andalso SegmentCount > 0)
-                        orelse Gamma1SeqId == undefined,
                     %% LowSeqId is low in the sense that it must be
                     %% lower than the seq_id in gamma1, in fact either
                     %% gamma1 has undefined as its seq_id or there
@@ -476,7 +479,7 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
                     %% X < gamma1's seq_id (would be +1 if it wasn't
                     %% for the possibility of gaps in the seq_ids).
                     %% But because we use queue:out_r, LowSeqId is
-                    %% actually also the highest seqid of the betas we
+                    %% actually also the highest seq_id of the betas we
                     %% transfer from q3 to gammas.
                     {LowSeqId, Len2, Q3b, IndexState2} =
                         push_betas_to_gammas(fun queue:out_r/1, Limit, Q3,
-- 
cgit v1.2.1


From 886926504cb39b72fc1e69cc2b6aeea0d3a620ab Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Oct 2009 17:38:21 +0100
Subject: The calculation of the highest seq id in the index queue was wrong -
 it was simply returning the highest unacked seq id, instead of looking for
 the highest seq id ever encountered. This could have led to reuse of seq ids.
 We also need to know the total message count in the queue index which is the
 number of unacked msgs recorded in the index, and we also need the seq id of
 the segment boundary of the segment containing the first msg in the queue.
 This is so that we can form the inital gamma correctly.

---
 src/rabbit_queue_index.erl    | 97 ++++++++++++++++++++++++++-----------------
 src/rabbit_variable_queue.erl | 11 +++--
 2 files changed, 66 insertions(+), 42 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index b21651a2..98ab2d77 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -133,7 +133,8 @@
                               seg_ack_counts    :: dict()
                             }).
 
--spec(init/1 :: (string()) -> {non_neg_integer(), qistate()}).
+-spec(init/1 :: (string()) -> {non_neg_integer(), non_neg_integer(),
+                               non_neg_integer(), qistate()}).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
@@ -154,10 +155,12 @@
 init(Name) ->
     Dir = filename:join(queues_dir(), Name),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    AckCounts = scatter_journal(Dir, find_ack_counts(Dir)),
+    {AckCounts, TotalMsgCount} = scatter_journal(Dir, find_ack_counts(Dir)),
     {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
                                  [raw, binary, delayed_write, write, read]),
-    {find_next_seq_id(Dir),
+    {LowestSeqIdSeg, HighestSeqId} =
+        find_lowest_seq_id_seg_and_highest_seq_id(Dir),
+    {LowestSeqIdSeg, HighestSeqId + 1, TotalMsgCount,
      #qistate { dir = Dir,
                 cur_seg_num = undefined,
                 cur_seg_hdl = undefined,
@@ -234,7 +237,7 @@ read_segment_entries(InitSeqId, State =
                      #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
     SegPath = seg_num_to_path(Dir, SegNum),
-    {SDict, _AckCount} = load_segment(SegNum, SegPath, JAckDict),
+    {SDict, _AckCount, _HighRelSeq} = load_segment(SegNum, SegPath, JAckDict),
     %% deliberately sort the list desc, because foldl will reverse it
     RelSeqs = rev_sort(dict:fetch_keys(SDict)),
     {lists:foldl(fun (RelSeq, Acc) ->
@@ -308,39 +311,52 @@ all_segment_nums_paths(Dir) ->
                         SegName)), filename:join(Dir, SegName)}
      || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
 
-find_next_seq_id(Dir) ->
+find_lowest_seq_id_seg_and_highest_seq_id(Dir) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
-    case rev_sort(SegNumsPaths) of
-        [] -> 0;
-        [{SegNum, SegPath}|_] ->
-            {SDict, _AckCount} = load_segment(SegNum, SegPath, dict:new()),
-            case rev_sort(dict:fetch_keys(SDict)) of
-                [] -> 0;
-                [RelSeq|_] -> 1 + reconstruct_seq_id(SegNum, RelSeq)
-            end
-    end.
+    %% We don't want the lowest seq_id, merely the seq_id of the start
+    %% of the lowest segment. That seq_id may not actually exist, but
+    %% that's fine. The important thing is that the segment exists and
+    %% the seq_id reported is on a segment boundary.
+    LowSeqIdSeg =
+        case lists:sort(SegNumsPaths) of
+            [] -> 0;
+            [{SegNum1, _SegPath1}|_] -> reconstruct_seq_id(SegNum1, 0)
+        end,
+    HighestSeqId =
+        case rev_sort(SegNumsPaths) of
+            [] -> 0;
+            [{SegNum2, SegPath2}|_] ->
+                {_SDict, _AckCount, HighRelSeq} =
+                    load_segment(SegNum2, SegPath2, dict:new()),
+                reconstruct_seq_id(SegNum2, HighRelSeq)
+        end,
+    {LowSeqIdSeg, HighestSeqId}.
 
 find_ack_counts(Dir) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
     lists:foldl(
-      fun ({SegNum, SegPath}, Acc) ->
-              case load_segment(SegNum, SegPath, dict:new()) of
-                  {_SDict, 0} -> Acc;
-                  {_SDict, AckCount} -> dict:store(SegNum, AckCount, Acc)
-              end
-      end, dict:new(), SegNumsPaths).
-
-scatter_journal(Dir, AckCounts) ->
+      fun ({SegNum, SegPath}, {AccCount, AccDict}) ->
+              {SDict, AckCount, _HighRelSeq} =
+                  load_segment(SegNum, SegPath, dict:new()),
+              {dict:size(SDict) + AccCount,
+               case AckCount of
+                   0 -> AccDict;
+                   _ -> dict:store(SegNum, AckCount, AccDict)
+               end}
+      end, {0, dict:new()}, SegNumsPaths).
+
+scatter_journal(Dir, {TotalMsgCount, AckCounts}) ->
     JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
     case file:open(JournalPath, [read, read_ahead, raw, binary]) of
         {error, enoent} -> AckCounts;
         {ok, Hdl} ->
             ADict = load_journal(Hdl, dict:new()),
             ok = file:close(Hdl),
-            {AckCounts1, _Dir} = dict:fold(fun replay_journal_acks_to_segment/3,
-                                           {AckCounts, Dir}, ADict),
+            {AckCounts1, TotalMsgCount1, _Dir} =
+                dict:fold(fun replay_journal_acks_to_segment/3,
+                          {AckCounts, TotalMsgCount, Dir}, ADict),
             ok = file:delete(JournalPath),
-            AckCounts1
+            {AckCounts1, TotalMsgCount1}
     end.
 
 load_journal(Hdl, ADict) ->
@@ -354,15 +370,15 @@ add_ack_to_ack_dict(SeqId, ADict) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
 
-replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, Dir}) ->
+replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, TotalMsgCount, Dir}) ->
     SegPath = seg_num_to_path(Dir, SegNum),
-    {SDict, _AckCount} = load_segment(SegNum, SegPath, dict:new()),
+    {SDict, _AckCount, _HighRelSeq} = load_segment(SegNum, SegPath, dict:new()),
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:intersection(sets:from_list(ValidRelSeqIds),
                                   sets:from_list(Acks)),
     {append_acks_to_segment(SegPath, SegNum, AckCounts,
                             sets:to_list(ValidAcks)),
-     Dir}.
+     TotalMsgCount - sets:size(ValidAcks), Dir}.
 
 
 %%----------------------------------------------------------------------------
@@ -371,28 +387,30 @@ replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, Dir}) ->
 
 load_segment(SegNum, SegPath, JAckDict) ->
     case file:open(SegPath, [raw, binary, read_ahead, read]) of
-        {error, enoent} -> {dict:new(), 0};
+        {error, enoent} -> {dict:new(), 0, 0};
         {ok, Hdl} ->
-            {SDict, AckCount} =
-                load_segment_entries(SegNum, Hdl, {dict:new(), 0}),
+            {SDict, AckCount, HighRelSeq} =
+                load_segment_entries(SegNum, Hdl, {dict:new(), 0, 0}),
             ok = file:close(Hdl),
             RelSeqs = case dict:find(SegNum, JAckDict) of
                         {ok, RelSeqs1} -> RelSeqs1;
                         error -> []
                       end,
-            lists:foldl(fun (RelSeq, {SDict1, AckCount1}) ->
-                                {dict:erase(RelSeq, SDict1), AckCount1+1}
-                        end, {SDict, AckCount}, RelSeqs)
+            {SDict1, AckCount1} =
+                lists:foldl(fun (RelSeq, {SDict2, AckCount2}) ->
+                                    {dict:erase(RelSeq, SDict2), AckCount2 + 1}
+                            end, {SDict, AckCount}, RelSeqs),
+            {SDict1, AckCount1, HighRelSeq}
     end.
 
-load_segment_entries(SegNum, Hdl, {SDict, AckCount}) ->
+load_segment_entries(SegNum, Hdl, {SDict, AckCount, HighRelSeq}) ->
     case file:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                MSB/bitstring>>} ->
             {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
-            load_segment_entries(SegNum, Hdl,
-                                 deliver_or_ack_msg(SDict, AckCount, RelSeq));
+            {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
+            load_segment_entries(SegNum, Hdl, {SDict1, AckCount1, HighRelSeq});
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                IsPersistentNum:1, MSB/bitstring>>} ->
             %% because we specify /binary, and binaries are complete
@@ -400,11 +418,12 @@ load_segment_entries(SegNum, Hdl, {SDict, AckCount}) ->
             {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
+            HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               SegNum, Hdl, {dict:store(RelSeq, {MsgId, false,
                                                 1 == IsPersistentNum},
-                                       SDict), AckCount});
-        _ErrOrEoF -> {SDict, AckCount}
+                                       SDict), AckCount, HighRelSeq1});
+        _ErrOrEoF -> {SDict, AckCount, HighRelSeq}
     end.
 
 deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 85dfbbac..1491879b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -81,7 +81,7 @@
 %% in ram), and gamma is just a count of the number of index entries
 %% on disk at that stage (msg on disk, index on disk).
 %%
-%% When a msg arrives, we decide which form it should be in. It is
+%% When a msg arrives, we decide in which form it should be. It is
 %% then added to the rightmost appropriate queue, maintaining
 %% order. Thus if the msg is to be an alpha, it will be added to q1,
 %% unless all of q1, q2, gamma and q3 are empty, in which case it will
@@ -103,9 +103,14 @@
 %% is non empty then q3 must be non empty.
 
 init(QueueName) ->
-    {NextSeqId, IndexState} = rabbit_queue_index:init(QueueName),
+    {LowSeqId, NextSeqId, Count, IndexState} =
+        rabbit_queue_index:init(QueueName),
+    Gamma = case Count of
+                0 -> #gamma { seq_id = undefined, count = 0 };
+                _ -> #gamma { seq_id = LowSeqId, count = Count }
+            end,
     #vqstate { q1 = queue:new(), q2 = queue:new(),
-               gamma = #gamma { seq_id = undefined, count = 0 },
+               gamma = Gamma,
                q3 = queue:new(), q4 = queue:new(),
                target_ram_msg_count = undefined,
                ram_msg_count = 0,
-- 
cgit v1.2.1


From af10b695aec2a76da69868962996bcf97e84924d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Oct 2009 17:50:47 +0100
Subject: And now actually make sure that on start up we load in the first
 segment from gamma to q3 if possible. Hence a refactoring here.

---
 src/rabbit_variable_queue.erl | 84 +++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 39 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 1491879b..184050f7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -103,26 +103,28 @@
 %% is non empty then q3 must be non empty.
 
 init(QueueName) ->
-    {LowSeqId, NextSeqId, Count, IndexState} =
+    {GammaSeqId, NextSeqId, GammaCount, IndexState} =
         rabbit_queue_index:init(QueueName),
-    Gamma = case Count of
+    Gamma = case GammaCount of
                 0 -> #gamma { seq_id = undefined, count = 0 };
-                _ -> #gamma { seq_id = LowSeqId, count = Count }
+                _ -> #gamma { seq_id = GammaSeqId, count = GammaCount }
             end,
-    #vqstate { q1 = queue:new(), q2 = queue:new(),
-               gamma = Gamma,
-               q3 = queue:new(), q4 = queue:new(),
-               target_ram_msg_count = undefined,
-               ram_msg_count = 0,
-               queue = QueueName,
-               index_state = IndexState,
-               next_seq_id = NextSeqId,
-               out_counter = 0,
-               egress_rate = 0,
-               avg_egress_rate = 0,
-               egress_rate_timestamp = now(),
-               prefetcher = undefined
-             }.
+    State =
+        #vqstate { q1 = queue:new(), q2 = queue:new(),
+                   gamma = Gamma,
+                   q3 = queue:new(), q4 = queue:new(),
+                   target_ram_msg_count = undefined,
+                   ram_msg_count = 0,
+                   queue = QueueName,
+                   index_state = IndexState,
+                   next_seq_id = NextSeqId,
+                   out_counter = 0,
+                   egress_rate = 0,
+                   avg_egress_rate = 0,
+                   egress_rate_timestamp = now(),
+                   prefetcher = undefined
+                  },
+    maybe_load_next_segment(State).
 
 in(Msg, IsDelivered, State = #vqstate { next_seq_id = SeqId }) ->
     in(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
@@ -238,9 +240,8 @@ out(State =
                               index_state = IndexState1 }}
     end.
 
-out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2, index_state = IndexState,
-                               gamma = #gamma { seq_id = GammaSeqId,
-                                                count = GammaCount},
+out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2,
+                               gamma = #gamma { count = GammaCount },
                                q3 = Q3, q4 = Q4 }) ->
     case queue:out(Q3) of
         {empty, _Q3} ->
@@ -271,25 +272,7 @@ out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2, index_state = IndexState,
                         State1 #vqstate { q1 = queue:new(),
                                           q4 = queue:join(Q4a, Q1) };
                     {true, false} ->
-                        {List, IndexState1, Gamma1SeqId} =
-                            read_index_segment(GammaSeqId, IndexState),
-                        State3 = State1 #vqstate { index_state = IndexState1 },
-                        %% length(List) may be < segment_size because
-                        %% of acks. But it can't be []
-                        Q3b = betas_from_segment_entries(List),
-                        case GammaCount - length(List) of
-                            0 ->
-                                %% gamma is now empty, but it wasn't
-                                %% before, so can now join q2 onto q3
-                                State3 #vqstate {
-                                  gamma = #gamma { seq_id = undefined,
-                                                   count = 0 },
-                                  q2 = queue:new(), q3 = queue:join(Q3b, Q2) };
-                            N when N > 0 ->
-                                State3 #vqstate {
-                                  gamma = #gamma { seq_id = Gamma1SeqId,
-                                                   count = N }, q3 = Q3b }
-                        end;
+                        maybe_load_next_segment(State1);
                     {false, _} ->
                         %% q3 still isn't empty, we've not touched
                         %% gamma, so the invariants between q1, q2,
@@ -299,6 +282,29 @@ out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2, index_state = IndexState,
             out(State2)
     end.
 
+maybe_load_next_segment(State = #vqstate { gamma = #gamma { count = 0 }} ) ->
+    State;
+maybe_load_next_segment(State =
+                        #vqstate { index_state = IndexState, q2 = Q2,
+                                   gamma = #gamma { seq_id = GammaSeqId,
+                                                    count = GammaCount }}) ->
+    {List, IndexState1, Gamma1SeqId} =
+        read_index_segment(GammaSeqId, IndexState),
+    State1 = State #vqstate { index_state = IndexState1 },
+    %% length(List) may be < segment_size because of acks. But it
+    %% can't be []
+    Q3a = betas_from_segment_entries(List),
+    case GammaCount - length(List) of
+        0 ->
+            %% gamma is now empty, but it wasn't before, so can now
+            %% join q2 onto q3
+            State1 #vqstate { gamma = #gamma { seq_id = undefined, count = 0 },
+                              q2 = queue:new(), q3 = queue:join(Q3a, Q2) };
+        N when N > 0 ->
+            State1 #vqstate { gamma = #gamma { seq_id = Gamma1SeqId,
+                                               count = N }, q3 = Q3a }
+    end.
+
 betas_from_segment_entries(List) ->
     queue:from_list(lists:map(fun ({MsgId, SeqId, IsPersistent, IsDelivered}) ->
                                       #beta { msg_id = MsgId, seq_id = SeqId,
-- 
cgit v1.2.1


From 3dfac7c331ee33d3e8c7ce0f2038041f3a00262a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Oct 2009 18:18:09 +0100
Subject: on out/1, if the msg or its index are on disk and they don't need to
 be, take them off disk

---
 src/rabbit_variable_queue.erl | 44 +++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 184050f7..ff3b8b7c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -220,20 +220,41 @@ out(State =
                 end,
             out(State #vqstate { q4 = Q4a, prefetcher = undefined });
         {{value,
-          #alpha { msg = Msg = #basic_message { guid = MsgId }, seq_id = SeqId,
-                   is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
-                   index_on_disk = IndexOnDisk }}, Q4a} ->
-            IndexState1 =
-                case IndexOnDisk andalso not IsDelivered of
+          #alpha { msg = Msg = #basic_message { guid = MsgId,
+                                                is_persistent = IsPersistent },
+                   seq_id = SeqId, is_delivered = IsDelivered,
+                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+         Q4a} ->
+            {IndexState1, IndexOnDisk1} =
+                case IndexOnDisk of
                     true ->
-                        rabbit_queue_index:write_delivered(SeqId, IndexState);
+                        IndexState2 =
+                            case IsDelivered of
+                                false -> rabbit_queue_index:write_delivered(
+                                           SeqId, IndexState);
+                                true -> IndexState
+                            end,
+                        case IsPersistent of
+                            true -> {IndexState2, true};
+                            false -> {rabbit_queue_index:write_acks(
+                                        [SeqId], IndexState2), false}
+                        end;
                     false ->
-                        IndexState
+                        {IndexState, false}
+                end,
+            _MsgOnDisk1 = IndexOnDisk1 =
+                case IndexOnDisk1 of
+                    true  -> true = IsPersistent, %% ASSERTION
+                             true = MsgOnDisk; %% ASSERTION
+                    false -> ok = case MsgOnDisk andalso not IsPersistent of
+                                      true -> rabbit_msg_store:remove([MsgId]);
+                                      false -> ok
+                                  end,
+                             false
                 end,
-            AckTag = case {IndexOnDisk, MsgOnDisk} of
-                         {true,  true } -> {ack_index_and_store, MsgId, SeqId};
-                         {false, true } -> {ack_store, MsgId};
-                         {false, false} -> ack_not_on_disk
+            AckTag = case IndexOnDisk1 of
+                         true  -> {ack_index_and_store, MsgId, SeqId};
+                         false -> ack_not_on_disk
                      end,
             {{Msg, IsDelivered, AckTag},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
@@ -260,7 +281,6 @@ out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2,
                     #alpha { msg = Msg, seq_id = SeqId,
                              is_delivered = IsDelivered, msg_on_disk = true,
                              index_on_disk = IndexOnDisk }, Q4),
-            %% TODO - if it's not persistent, remove it from disk now
             State1 = State #vqstate { q3 = Q3a, q4 = Q4a },
             State2 =
                 case {queue:is_empty(Q3a), 0 == GammaCount} of
-- 
cgit v1.2.1


From 1153ebc6872dbf24f0c74f3c19a4827d7b586140 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 12:13:41 +0100
Subject: adding len and is_empty. Renaming in -> publish and out -> fetch. A
 bit of moving around. Generally trying to work towards the mq API so that it
 can be dropped in

---
 src/rabbit_variable_queue.erl | 151 ++++++++++++++++++++++++------------------
 1 file changed, 86 insertions(+), 65 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ff3b8b7c..cb2e9f24 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,8 +31,10 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, in/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
-         out/1]).
+-export([init/1, publish/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
+         fetch/1, len/1, is_empty/1]).
+
+%%----------------------------------------------------------------------------
 
 -record(vqstate,
         { q1,
@@ -49,7 +51,8 @@
           egress_rate,
           avg_egress_rate,
           egress_rate_timestamp,
-          prefetcher
+          prefetcher,
+          len
         }).
 
 -record(alpha,
@@ -75,6 +78,8 @@
 
 -include("rabbit.hrl").
 
+%%----------------------------------------------------------------------------
+
 %% Basic premise is that msgs move from q1 -> q2 -> gamma -> q3 -> q4
 %% but they can only do so in the right form. q1 and q4 only hold
 %% alphas (msgs in ram), q2 and q3 only hold betas (msg on disk, index
@@ -102,6 +107,10 @@
 %% contain all msgs in the queue.  Also, if q4 is non empty and gamma
 %% is non empty then q3 must be non empty.
 
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
 init(QueueName) ->
     {GammaSeqId, NextSeqId, GammaCount, IndexState} =
         rabbit_queue_index:init(QueueName),
@@ -122,59 +131,15 @@ init(QueueName) ->
                    egress_rate = 0,
                    avg_egress_rate = 0,
                    egress_rate_timestamp = now(),
-                   prefetcher = undefined
+                   prefetcher = undefined,
+                   len = GammaCount
                   },
     maybe_load_next_segment(State).
 
-in(Msg, IsDelivered, State = #vqstate { next_seq_id = SeqId }) ->
-    in(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
-       State #vqstate { next_seq_id = SeqId + 1 }).
-
-in(msg, Msg = #basic_message { guid = MsgId,
-                               is_persistent = IsPersistent },
-   SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
-                                          ram_msg_count = RamMsgCount }) ->
-    MsgOnDisk = maybe_write_msg_to_disk(false, Msg),
-    {IndexOnDisk, IndexState1} =
-        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    Entry = #alpha { msg = Msg, seq_id = SeqId, is_delivered = IsDelivered,
-                     msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk },
-    State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
-                              index_state = IndexState1 },
-    store_alpha_entry(Entry, State1);
-
-in(index, Msg = #basic_message { guid = MsgId,
-                                 is_persistent = IsPersistent },
-   SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
-                                          q1 = Q1 }) ->
-    true = maybe_write_msg_to_disk(true, Msg),
-    {IndexOnDisk, IndexState1} =
-        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    Entry = #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
-                    is_persistent = IsPersistent, index_on_disk = IndexOnDisk },
-    State1 = State #vqstate { index_state = IndexState1 },
-    true = queue:is_empty(Q1), %% ASSERTION
-    store_beta_entry(Entry, State1);
-
-in(neither, Msg = #basic_message { guid = MsgId,
-                                   is_persistent = IsPersistent },
-   SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
-                                          q1 = Q1, q2 = Q2, gamma = Gamma }) ->
-    true = maybe_write_msg_to_disk(true, Msg),
-    {true, IndexState1} =
-        maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
-    %% gamma may be empty, seq_id > next_segment_boundary from q3
-    %% head, so we need to find where the segment boundary is before
-    %% or equal to seq_id
-    GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
-        rabbit_queue_index:segment_size(),
-    Gamma1 = #gamma { seq_id = GammaSeqId, count = 1 },
-    State #vqstate { index_state = IndexState1,
-                     gamma = combine_gammas(Gamma, Gamma1) }.
+publish(Msg, IsDelivered, State = #vqstate { next_seq_id = SeqId,
+                                             len = Len }) ->
+    publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
+            State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 }).
 
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
@@ -205,20 +170,20 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                      egress_rate_timestamp = Now,
                      out_counter = 0 }.
 
-out(State =
-    #vqstate { q4 = Q4,
-               out_counter = OutCount, prefetcher = Prefetcher,
-               index_state = IndexState }) ->
+fetch(State =
+      #vqstate { q4 = Q4,
+                 out_counter = OutCount, prefetcher = Prefetcher,
+                 index_state = IndexState, len = Len }) ->
     case queue:out(Q4) of
         {empty, _Q4} when Prefetcher == undefined ->
-            out_from_q3(State);
+            fetch_from_q3_or_gamma(State);
         {empty, _Q4} ->
             Q4a =
                 case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
                     empty -> Q4;
                     Q4b -> Q4b
                 end,
-            out(State #vqstate { q4 = Q4a, prefetcher = undefined });
+            fetch(State #vqstate { q4 = Q4a, prefetcher = undefined });
         {{value,
           #alpha { msg = Msg = #basic_message { guid = MsgId,
                                                 is_persistent = IsPersistent },
@@ -256,14 +221,70 @@ out(State =
                          true  -> {ack_index_and_store, MsgId, SeqId};
                          false -> ack_not_on_disk
                      end,
-            {{Msg, IsDelivered, AckTag},
+            Len1 = Len - 1,
+            {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
-                              index_state = IndexState1 }}
+                              index_state = IndexState1, len = Len1 }}
     end.
 
-out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2,
-                               gamma = #gamma { count = GammaCount },
-                               q3 = Q3, q4 = Q4 }) ->
+len(#vqstate { len = Len }) ->
+    Len.
+
+is_empty(State) ->
+    0 == len(State).
+
+%%----------------------------------------------------------------------------
+
+publish(msg, Msg = #basic_message { guid = MsgId,
+                                    is_persistent = IsPersistent },
+        SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
+                                               ram_msg_count = RamMsgCount }) ->
+    MsgOnDisk = maybe_write_msg_to_disk(false, Msg),
+    {IndexOnDisk, IndexState1} =
+        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    Entry = #alpha { msg = Msg, seq_id = SeqId, is_delivered = IsDelivered,
+                     msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk },
+    State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
+                              index_state = IndexState1 },
+    store_alpha_entry(Entry, State1);
+
+publish(index, Msg = #basic_message { guid = MsgId,
+                                      is_persistent = IsPersistent },
+        SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
+                                               q1 = Q1 }) ->
+    true = maybe_write_msg_to_disk(true, Msg),
+    {IndexOnDisk, IndexState1} =
+        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    Entry = #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
+                    is_persistent = IsPersistent, index_on_disk = IndexOnDisk },
+    State1 = State #vqstate { index_state = IndexState1 },
+    true = queue:is_empty(Q1), %% ASSERTION
+    store_beta_entry(Entry, State1);
+
+publish(neither, Msg = #basic_message { guid = MsgId,
+                                        is_persistent = IsPersistent },
+        SeqId, IsDelivered,
+        State = #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
+                           gamma = Gamma }) ->
+    true = maybe_write_msg_to_disk(true, Msg),
+    {true, IndexState1} =
+        maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
+    %% gamma may be empty, seq_id > next_segment_boundary from q3
+    %% head, so we need to find where the segment boundary is before
+    %% or equal to seq_id
+    GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
+        rabbit_queue_index:segment_size(),
+    Gamma1 = #gamma { seq_id = GammaSeqId, count = 1 },
+    State #vqstate { index_state = IndexState1,
+                     gamma = combine_gammas(Gamma, Gamma1) }.
+
+fetch_from_q3_or_gamma(State = #vqstate { q1 = Q1, q2 = Q2,
+                                          gamma = #gamma { count = GammaCount },
+                                          q3 = Q3, q4 = Q4 }) ->
     case queue:out(Q3) of
         {empty, _Q3} ->
             0 = GammaCount, %% ASSERTION
@@ -299,7 +320,7 @@ out_from_q3(State = #vqstate { q1 = Q1, q2 = Q2,
                         %% gamma and q3 are maintained
                         State1
                 end,
-            out(State2)
+            fetch(State2)
     end.
 
 maybe_load_next_segment(State = #vqstate { gamma = #gamma { count = 0 }} ) ->
-- 
cgit v1.2.1


From edea013f20b1dfe56fedf8a5e2abd11ac95fd41f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 12:46:10 +0100
Subject: having discovered that erlang:min and erlang:max do exist, swap them
 in in place of lists:min and lists:max where appropriate

---
 src/rabbit_disk_queue.erl | 4 ++--
 src/rabbit_msg_store.erl  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 42c4ed8b..19f5a74e 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -716,8 +716,8 @@ extract_sequence_numbers(Sequences) ->
                                 [] -> ets:insert_new(Sequences,
                                                      {Q, SeqId, NextWrite});
                                 [Orig = {_, Read, Write}] ->
-                                    Repl = {Q, lists:min([Read, SeqId]),
-                                            lists:max([Write, NextWrite])},
+                                    Repl = {Q, erlang:min(Read, SeqId),
+                                            erlang:max(Write, NextWrite)},
                                     case Orig == Repl of
                                         true -> true;
                                         false -> ets:insert(Sequences, Repl)
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 8596e09f..707afc38 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -536,7 +536,7 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop }] =
                 ets:lookup(FileSummary, File),
-            ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+            ContiguousTop1 = erlang:min(ContiguousTop, Offset),
             ValidTotalSize1 = ValidTotalSize - TotalSize,
             true = ets:insert(FileSummary, FSEntry #file_summary { 
                                              valid_total_size = ValidTotalSize1,
-- 
cgit v1.2.1


From db0a107bff8c5793565b89001f349063aad4e62b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 13:58:31 +0100
Subject: altered the prefetcher to drive the msg_store directly, and wired
 into vq

---
 src/rabbit_msg_store.erl        | 108 +++++++++++++++++++++------------------
 src/rabbit_queue_prefetcher.erl | 110 ++++++++++++++++++++++++----------------
 src/rabbit_variable_queue.erl   |  38 ++++++++++----
 3 files changed, 152 insertions(+), 104 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 707afc38..70f8627e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -39,7 +39,7 @@
 -export([sync/0]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+         terminate/2, code_change/3, idle_read/1]).
 
 -define(SERVER, ?MODULE).
 
@@ -231,14 +231,15 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(MsgId, Msg)  -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
-read(MsgId)        -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
-contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
-remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
-release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
-sync(MsgIds, K)    -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-stop()             -> gen_server2:call(?SERVER, stop, infinity).
-sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+write(MsgId, Msg) -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+read(MsgId)       -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
+idle_read(MsgId)  -> gen_server2:pcast(?SERVER, -1, {idle_read, MsgId, self()}).
+contains(MsgId)   -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
+remove(MsgIds)    -> gen_server2:cast(?SERVER, {remove, MsgIds}).
+release(MsgIds)   -> gen_server2:cast(?SERVER, {release, MsgIds}).
+sync(MsgIds, K)   -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
+stop()            -> gen_server2:call(?SERVER, stop, infinity).
+sync()            -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
@@ -291,44 +292,8 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, State1 #msstate { current_file_handle = FileHdl }}.
 
 handle_call({read, MsgId}, _From, State) ->
-    case index_lookup(MsgId, State) of
-        not_found -> reply(not_found, State);
-        #msg_location { ref_count  = RefCount,
-                        file       = File,
-                        offset     = Offset,
-                        total_size = TotalSize } ->
-            case fetch_and_increment_cache(MsgId, State) of
-                not_found ->
-                    {{ok, {MsgId, Msg}}, State1} =
-                        with_read_handle_at(
-                          File, Offset,
-                          fun(Hdl) ->
-                                  Res = case rabbit_msg_file:read(
-                                               Hdl, TotalSize) of
-                                            {ok, {MsgId, _}} = Obj -> Obj;
-                                            {ok, Rest} ->
-                                                throw({error,
-                                                       {misread, 
-                                                        [{old_state, State},
-                                                         {file_num, File},
-                                                         {offset, Offset},
-                                                         {read, Rest}]}})
-                                        end,
-                                  {Offset + TotalSize, Res}
-                          end, State),
-                    ok = if RefCount > 1 ->
-                                 insert_into_cache(MsgId, Msg, State1);
-                            true -> ok
-                                    %% it's not in the cache and we
-                                    %% only have one reference to the
-                                    %% message. So don't bother
-                                    %% putting it in the cache.
-                         end,
-                    reply({ok, Msg}, State1);
-                {Msg, _RefCount} ->
-                    reply({ok, Msg}, State)
-            end
-    end;
+    {Result, State1} = internal_read_message(MsgId, State),
+    reply(Result, State1);
 
 handle_call({contains, MsgId}, _From, State) ->
     reply(case index_lookup(MsgId, State) of
@@ -416,7 +381,14 @@ handle_cast({sync, MsgIds, K},
     end;
 
 handle_cast(sync, State) ->
-    noreply(sync(State)).
+    noreply(sync(State));
+
+handle_cast({idle_read, MsgId, From}, State) ->
+    {Result, State1} = internal_read_message(MsgId, State),
+    rabbit_misc:with_exit_handler(
+      fun () -> ok end,
+      fun () -> rabbit_queue_prefetcher:publish(From, Result) end),
+    noreply(State1).
 
 handle_info(timeout, State) ->
     noreply(sync(State)).
@@ -549,6 +521,46 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             no_compact
     end.
 
+internal_read_message(MsgId, State) ->
+    case index_lookup(MsgId, State) of
+        not_found -> {not_found, State};
+        #msg_location { ref_count  = RefCount,
+                        file       = File,
+                        offset     = Offset,
+                        total_size = TotalSize } ->
+            case fetch_and_increment_cache(MsgId, State) of
+                not_found ->
+                    {{ok, {MsgId, Msg}}, State1} =
+                        with_read_handle_at(
+                          File, Offset,
+                          fun(Hdl) ->
+                                  Res = case rabbit_msg_file:read(
+                                               Hdl, TotalSize) of
+                                            {ok, {MsgId, _}} = Obj -> Obj;
+                                            {ok, Rest} ->
+                                                throw({error,
+                                                       {misread, 
+                                                        [{old_state, State},
+                                                         {file_num, File},
+                                                         {offset, Offset},
+                                                         {read, Rest}]}})
+                                        end,
+                                  {Offset + TotalSize, Res}
+                          end, State),
+                    ok = if RefCount > 1 ->
+                                 insert_into_cache(MsgId, Msg, State1);
+                            true -> ok
+                                    %% it's not in the cache and we
+                                    %% only have one reference to the
+                                    %% message. So don't bother
+                                    %% putting it in the cache.
+                         end,
+                    {{ok, Msg}, State1};
+                {Msg, _RefCount} ->
+                    {{ok, Msg}, State}
+            end
+    end.
+
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index 3b1c219d..cad4c695 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/2]).
+-export([start_link/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -46,12 +46,27 @@
 -define(DESIRED_HIBERNATE, 10000).
 
 -record(pstate,
-        { msg_buf,
-          target_count,
-          queue,
+        { alphas,
+          betas,
           queue_mref
         }).
 
+-record(alpha,
+        { msg,
+          seq_id,
+          is_delivered,
+          msg_on_disk,
+          index_on_disk
+        }).
+
+-record(beta,
+        { msg_id,
+          seq_id,
+          is_persistent,
+          is_delivered,
+          index_on_disk
+        }).
+
 %%----------------------------------------------------------------------------
 %% Novel
 %%----------------------------------------------------------------------------
@@ -182,22 +197,22 @@
 
 -ifdef(use_specs).
 
--spec(start_link/2 :: (queue_name(), non_neg_integer()) ->
+-spec(start_link/1 :: (queue()) ->
              ({'ok', pid()} | 'ignore' | {'error', any()})).
 -spec(publish/2 :: (pid(), (message()| 'empty')) -> 'ok').
--spec(drain/1 :: (pid()) -> ('empty' | {queue(), ('finished' | 'continuing')})).
--spec(drain_and_stop/1 :: (pid()) -> ('empty' | queue())).
+-spec(drain/1 :: (pid()) -> ({('finished' | 'continuing' | 'empty'), queue()})).
+-spec(drain_and_stop/1 :: (pid()) -> ({('empty' | queue()), queue()})).
 -spec(stop/1 :: (pid()) -> 'ok').
              
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start_link(Queue, Count) ->
-    gen_server2:start_link(?MODULE, [Queue, Count, self()], []).
+start_link(Betas) ->
+    false = queue:is_empty(Betas), %% ASSERTION
+    gen_server2:start_link(?MODULE, [Betas, self()], []).
 
-publish(Prefetcher,
-        Obj = { #basic_message {}, _IsDelivered, _AckTag, _Remaining }) ->
+publish(Prefetcher, Obj = #basic_message {}) ->
     gen_server2:call(Prefetcher, {publish, Obj}, infinity);
 publish(Prefetcher, empty) ->
     gen_server2:call(Prefetcher, publish_empty, infinity).
@@ -213,50 +228,50 @@ stop(Prefetcher) ->
 
 %%----------------------------------------------------------------------------
 
-init([Q, Count, QPid]) when Count > 0 andalso is_pid(QPid) ->
+init([Betas, QPid]) when is_pid(QPid) ->
     %% link isn't enough because the signal will not appear if the
     %% queue exits normally. Thus have to use monitor.
     MRef = erlang:monitor(process, QPid),
-    State = #pstate { msg_buf = queue:new(),
-                      target_count = Count,
-                      queue = Q,
+    State = #pstate { alphas = queue:new(),
+                      betas = Betas,
                       queue_mref = MRef
-                     },
-    ok = rabbit_disk_queue:prefetch(Q),
-    {ok, State, infinity, {backoff, ?HIBERNATE_AFTER_MIN,
-                           ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+                    },
+    {ok, prefetch(State), infinity, {backoff, ?HIBERNATE_AFTER_MIN,
+                                     ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call({publish,
-             {Msg = #basic_message {}, IsDelivered, AckTag, _Remaining}},
-	    DiskQueue, State = #pstate {
-                         target_count = Target, msg_buf = MsgBuf, queue = Q}) ->
+handle_call({publish, Msg = #basic_message { guid = MsgId,
+                                             is_persistent = IsPersistent }},
+	    DiskQueue, State = #pstate { alphas = Alphas, betas = Betas }) ->
     gen_server2:reply(DiskQueue, ok),
-    Timeout = case Target of
-                  1 -> hibernate;
-                  _ -> ok = rabbit_disk_queue:prefetch(Q),
-                       infinity
-              end,
-    MsgBuf1 = queue:in({Msg, IsDelivered, AckTag}, MsgBuf),
-    {noreply, State #pstate { target_count = Target - 1, msg_buf = MsgBuf1 },
-     Timeout};
+    {{value, #beta { msg_id = MsgId, seq_id = SeqId,
+                     is_persistent = IsPersistent,
+                     is_delivered = IsDelivered,
+                     index_on_disk = IndexOnDisk}}, Betas1} = queue:out(Betas),
+    Alphas1 = queue:in(#alpha { msg = Msg, seq_id = SeqId,
+                                is_delivered = IsDelivered, msg_on_disk = true,
+                                index_on_disk = IndexOnDisk }, Alphas),
+    State1 = State #pstate { alphas = Alphas1, betas = Betas1 },
+    {Timeout, State2} = case queue:is_empty(Betas1) of
+                            true  -> {hibernate, State1};
+                            false -> {infinity, prefetch(State1)}
+                        end,
+    {noreply, State2, Timeout};
 handle_call(publish_empty, _From, State) ->
     %% Very odd. This could happen if the queue is deleted or purged
     %% and the mixed queue fails to shut us down.
     {reply, ok, State, hibernate};
-handle_call(drain, _From, State = #pstate { target_count = 0,
-                                            msg_buf = MsgBuf }) ->
-    Res = case queue:is_empty(MsgBuf) of
-              true  -> empty;
-              false -> {MsgBuf, finished}
-          end,
-    {stop, normal, Res, State};
-handle_call(drain, _From, State = #pstate { msg_buf = MsgBuf }) ->
-    {reply, {MsgBuf, continuing}, State #pstate { msg_buf = queue:new() },
-     infinity};
-handle_call(drain_and_stop, _From, State = #pstate { msg_buf = MsgBuf }) ->
-    Res = case queue:is_empty(MsgBuf) of
-              true -> empty;
-              false -> MsgBuf
+handle_call(drain, _From, State = #pstate { alphas = Alphas, betas = Betas }) ->
+    case {queue:is_empty(Betas), queue:is_empty(Alphas)} of
+        {true , _    } -> {stop, normal, {finished, Alphas}, State};
+        {false, true } -> {stop, normal, {empty, Betas}, State};
+        {false, false} -> {reply, {continuing, Alphas},
+                           State #pstate { alphas = queue:new() }}
+    end;
+handle_call(drain_and_stop, _From, State = #pstate { alphas = Alphas,
+                                                     betas = Betas }) ->
+    Res = case queue:is_empty(Alphas) of
+              true -> {empty, Betas};
+              false -> {Alphas, Betas}
           end,
     {stop, normal, Res, State};
 handle_call(stop, _From, State) ->
@@ -276,3 +291,8 @@ terminate(_Reason, _State) ->
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
+
+prefetch(State = #pstate { betas = Betas }) ->
+    {{value, #beta { msg_id = MsgId }}, _Betas1} = queue:out(Betas),
+    ok = rabbit_msg_store:idle_read(MsgId),
+    State.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cb2e9f24..4dbcefc8 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,7 +32,7 @@
 -module(rabbit_variable_queue).
 
 -export([init/1, publish/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
-         fetch/1, len/1, is_empty/1]).
+         fetch/1, len/1, is_empty/1, maybe_start_prefetcher/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -171,19 +171,21 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                      out_counter = 0 }.
 
 fetch(State =
-      #vqstate { q4 = Q4,
+      #vqstate { q3 = Q3, q4 = Q4,
                  out_counter = OutCount, prefetcher = Prefetcher,
                  index_state = IndexState, len = Len }) ->
     case queue:out(Q4) of
         {empty, _Q4} when Prefetcher == undefined ->
             fetch_from_q3_or_gamma(State);
         {empty, _Q4} ->
-            Q4a =
-                case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
-                    empty -> Q4;
-                    Q4b -> Q4b
+            {Q3a, Q4a, Prefetcher1} =
+                case rabbit_queue_prefetcher:drain(Prefetcher) of
+                    {empty, Betas} -> {queue:join(Betas, Q3), Q4, undefined};
+                    {finished, Alphas} -> {Q3, Alphas, undefined};
+                    {continuing, Alphas} -> {Q3, Alphas, Prefetcher}
                 end,
-            fetch(State #vqstate { q4 = Q4a, prefetcher = undefined });
+            fetch(State #vqstate { q3 = Q3a, q4 = Q4a,
+                                   prefetcher = Prefetcher1 });
         {{value,
           #alpha { msg = Msg = #basic_message { guid = MsgId,
                                                 is_persistent = IsPersistent },
@@ -233,6 +235,24 @@ len(#vqstate { len = Len }) ->
 is_empty(State) ->
     0 == len(State).
 
+maybe_start_prefetcher(State = #vqstate { ram_msg_count = RamMsgCount,
+                                          target_ram_msg_count = TargetRamMsgCount,
+                                          q3 = Q3, prefetcher = undefined
+                                        }) ->
+    PrefetchCount = erlang:min(queue:len(Q3), TargetRamMsgCount - RamMsgCount),
+    if PrefetchCount =< 0 -> State;
+       true ->
+            {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
+            {ok, Prefetcher} =
+                rabbit_queue_prefetcher:start_link(PrefetchQueue),
+            RamMsgCount1 = RamMsgCount + PrefetchCount,
+            maybe_load_next_segment(State #vqstate { q3 = Q3a,
+                                                     ram_msg_count = RamMsgCount1,
+                                                     prefetcher = Prefetcher })
+    end;
+maybe_start_prefetcher(State) ->
+    State.
+
 %%----------------------------------------------------------------------------
 
 publish(msg, Msg = #basic_message { guid = MsgId,
@@ -361,10 +381,6 @@ read_index_segment(SeqId, IndexState) ->
         {List, IndexState1} -> {List, IndexState1, SeqId1}
     end.
 
-maybe_start_prefetcher(State) ->
-    %% TODO
-    State.
-
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
                                      target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount >= RamMsgCount ->
-- 
cgit v1.2.1


From 41da634da3ff5b626733db057439ea9c915a5e73 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 14:32:23 +0100
Subject: switching back to lists:min and lists:max as erlang:min/max do not
 exist in R12

---
 src/rabbit_disk_queue.erl     | 4 ++--
 src/rabbit_msg_store.erl      | 2 +-
 src/rabbit_variable_queue.erl | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 19f5a74e..42c4ed8b 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -716,8 +716,8 @@ extract_sequence_numbers(Sequences) ->
                                 [] -> ets:insert_new(Sequences,
                                                      {Q, SeqId, NextWrite});
                                 [Orig = {_, Read, Write}] ->
-                                    Repl = {Q, erlang:min(Read, SeqId),
-                                            erlang:max(Write, NextWrite)},
+                                    Repl = {Q, lists:min([Read, SeqId]),
+                                            lists:max([Write, NextWrite])},
                                     case Orig == Repl of
                                         true -> true;
                                         false -> ets:insert(Sequences, Repl)
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 70f8627e..b0b75249 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -508,7 +508,7 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop }] =
                 ets:lookup(FileSummary, File),
-            ContiguousTop1 = erlang:min(ContiguousTop, Offset),
+            ContiguousTop1 = lists:min([ContiguousTop, Offset]),
             ValidTotalSize1 = ValidTotalSize - TotalSize,
             true = ets:insert(FileSummary, FSEntry #file_summary { 
                                              valid_total_size = ValidTotalSize1,
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4dbcefc8..c197f6b0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -239,7 +239,7 @@ maybe_start_prefetcher(State = #vqstate { ram_msg_count = RamMsgCount,
                                           target_ram_msg_count = TargetRamMsgCount,
                                           q3 = Q3, prefetcher = undefined
                                         }) ->
-    PrefetchCount = erlang:min(queue:len(Q3), TargetRamMsgCount - RamMsgCount),
+    PrefetchCount = lists:min([queue:len(Q3), TargetRamMsgCount - RamMsgCount]),
     if PrefetchCount =< 0 -> State;
        true ->
             {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
-- 
cgit v1.2.1


From b14fb7d3a6c91711ac711ee573bccba366beb3ba Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 15:43:22 +0100
Subject: Prefetcher has priority over q1. However, we actually assume that the
 prefetcher does no work. The only other possibility is to assume that the
 prefetcher always completes, which can lead to q1 being pointlessly evicted
 to disk. Also, we stop the prefetcher as soon as have to reduce our memory
 usage, and at that point everything should work out. So, when starting the
 prefetcher, we don't adjust the ram_msg_count, but as we drain or stop the
 prefetcher, we include the prefetched alphas in the ram_msg_count and then
 evict q1 to disk as necessary. Whilst this means we will have more msgs in
 RAM then we claim, the fact that we stop the prefetcher as soon as we have to
 reduce our memory usage saves us from ruin.

---
 src/rabbit_msg_store.erl        |  5 +++-
 src/rabbit_queue_prefetcher.erl |  4 ++--
 src/rabbit_variable_queue.erl   | 51 ++++++++++++++++++++++++++++++-----------
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b0b75249..6e28faa0 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -384,7 +384,10 @@ handle_cast(sync, State) ->
     noreply(sync(State));
 
 handle_cast({idle_read, MsgId, From}, State) ->
-    {Result, State1} = internal_read_message(MsgId, State),
+    {Result, State1} = case internal_read_message(MsgId, State) of
+                           {not_found, _} = Res -> Res;
+                           {{ok, Msg}, State2} -> {Msg, State2}
+                       end,
     rabbit_misc:with_exit_handler(
       fun () -> ok end,
       fun () -> rabbit_queue_prefetcher:publish(From, Result) end),
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index cad4c695..9d1b58ba 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -199,7 +199,7 @@
 
 -spec(start_link/1 :: (queue()) ->
              ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(publish/2 :: (pid(), (message()| 'empty')) -> 'ok').
+-spec(publish/2 :: (pid(), (message()| 'not_found')) -> 'ok').
 -spec(drain/1 :: (pid()) -> ({('finished' | 'continuing' | 'empty'), queue()})).
 -spec(drain_and_stop/1 :: (pid()) -> ({('empty' | queue()), queue()})).
 -spec(stop/1 :: (pid()) -> 'ok').
@@ -214,7 +214,7 @@ start_link(Betas) ->
 
 publish(Prefetcher, Obj = #basic_message {}) ->
     gen_server2:call(Prefetcher, {publish, Obj}, infinity);
-publish(Prefetcher, empty) ->
+publish(Prefetcher, not_found) ->
     gen_server2:call(Prefetcher, publish_empty, infinity).
 
 drain(Prefetcher) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c197f6b0..5a76c23e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -171,21 +171,14 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                      out_counter = 0 }.
 
 fetch(State =
-      #vqstate { q3 = Q3, q4 = Q4,
+      #vqstate { q4 = Q4,
                  out_counter = OutCount, prefetcher = Prefetcher,
                  index_state = IndexState, len = Len }) ->
     case queue:out(Q4) of
         {empty, _Q4} when Prefetcher == undefined ->
             fetch_from_q3_or_gamma(State);
         {empty, _Q4} ->
-            {Q3a, Q4a, Prefetcher1} =
-                case rabbit_queue_prefetcher:drain(Prefetcher) of
-                    {empty, Betas} -> {queue:join(Betas, Q3), Q4, undefined};
-                    {finished, Alphas} -> {Q3, Alphas, undefined};
-                    {continuing, Alphas} -> {Q3, Alphas, Prefetcher}
-                end,
-            fetch(State #vqstate { q3 = Q3a, q4 = Q4a,
-                                   prefetcher = Prefetcher1 });
+            fetch(drain_prefetcher(drain, State));
         {{value,
           #alpha { msg = Msg = #basic_message { guid = MsgId,
                                                 is_persistent = IsPersistent },
@@ -237,17 +230,17 @@ is_empty(State) ->
 
 maybe_start_prefetcher(State = #vqstate { ram_msg_count = RamMsgCount,
                                           target_ram_msg_count = TargetRamMsgCount,
-                                          q3 = Q3, prefetcher = undefined
+                                          q1 = Q1, q3 = Q3, prefetcher = undefined
                                         }) ->
-    PrefetchCount = lists:min([queue:len(Q3), TargetRamMsgCount - RamMsgCount]),
+    %% prefetched content takes priority over q1
+    AvailableSpace = (TargetRamMsgCount - RamMsgCount) + queue:len(Q1),
+    PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
     if PrefetchCount =< 0 -> State;
        true ->
             {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
             {ok, Prefetcher} =
                 rabbit_queue_prefetcher:start_link(PrefetchQueue),
-            RamMsgCount1 = RamMsgCount + PrefetchCount,
             maybe_load_next_segment(State #vqstate { q3 = Q3a,
-                                                     ram_msg_count = RamMsgCount1,
                                                      prefetcher = Prefetcher })
     end;
 maybe_start_prefetcher(State) ->
@@ -381,13 +374,43 @@ read_index_segment(SeqId, IndexState) ->
         {List, IndexState1} -> {List, IndexState1, SeqId1}
     end.
 
+drain_prefetcher(_DrainOrStop, State = #vqstate { prefetcher = undefined }) ->
+    State;
+drain_prefetcher(DrainOrStop,
+                 State = #vqstate { prefetcher = Prefetcher, q3 = Q3, q4 = Q4,
+                                    ram_msg_count = RamMsgCount }) ->
+    Fun = case DrainOrStop of
+              drain -> fun rabbit_queue_prefetcher:drain/1;
+              stop  -> fun rabbit_queue_prefetcher:drain_and_stop/1
+          end,
+    {Q3a, Q4a, Prefetcher1, RamMsgCountAdj} =
+        case Fun(Prefetcher) of
+            {empty, Betas} ->       %% drain or drain_and_stop
+                {queue:join(Betas, Q3), Q4, undefined, 0};
+            {finished, Alphas} ->   %% just drain
+                {Q3, Alphas, undefined, queue:len(Alphas)};
+            {continuing, Alphas} -> %% just drain
+                {Q3, Alphas, Prefetcher, queue:len(Alphas)};
+            {Alphas, Betas} ->      %% just drain_and_stop
+                {queue:join(Betas, Q3), queue:join(Q4, Alphas), undefined,
+                 queue:len(Alphas)}
+        end,
+    maybe_push_q1_to_betas(
+      State #vqstate { prefetcher = Prefetcher1, q3 = Q3a, q4 = Q4a,
+                       ram_msg_count = RamMsgCount + RamMsgCountAdj }).
+
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
                                      target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount >= RamMsgCount ->
     State;
 reduce_memory_use(State =
                   #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
-    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
+    %% strictly, it's not necessary to stop the prefetcher this early,
+    %% but because of its potential effect on q1 and the
+    %% ram_msg_count, it's just much simpler to stop it sooner and
+    %% relaunch when we next hibernate.
+    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(
+                                      drain_prefetcher(stop, State))),
     case TargetRamMsgCount of
         0 -> push_betas_to_gammas(State1);
         _ -> State1
-- 
cgit v1.2.1


From 2c4f88264cc9fed220f195152a32b1386fa02037 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 16:14:27 +0100
Subject: tidying of the beloved msg_store

---
 src/rabbit_msg_store.erl        | 43 +++++++++++++++++++----------------------
 src/rabbit_queue_prefetcher.erl | 13 +++++++++----
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6e28faa0..db273551 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,13 +33,13 @@
 
 -behaviour(gen_server2).
 
--export([start_link/3, write/2, read/1, contains/1, remove/1, release/1,
-         sync/2, stop/0]).
+-export([start_link/3, write/2, read/1, idle_read/2, contains/1, remove/1,
+         release/1, sync/2, stop/0]).
 
 -export([sync/0]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3, idle_read/1]).
+         terminate/2, code_change/3]).
 
 -define(SERVER, ?MODULE).
 
@@ -61,6 +61,8 @@
              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/2 :: (msg_id(), msg()) -> 'ok').
 -spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
+-spec(idle_read/2 :: (msg_id(), fun (({'ok', msg()} | 'not_found') -> 'ok')) ->
+             'ok').
 -spec(contains/1 :: (msg_id()) -> boolean()).
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
@@ -231,15 +233,15 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(MsgId, Msg) -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
-read(MsgId)       -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
-idle_read(MsgId)  -> gen_server2:pcast(?SERVER, -1, {idle_read, MsgId, self()}).
-contains(MsgId)   -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
-remove(MsgIds)    -> gen_server2:cast(?SERVER, {remove, MsgIds}).
-release(MsgIds)   -> gen_server2:cast(?SERVER, {release, MsgIds}).
-sync(MsgIds, K)   -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-stop()            -> gen_server2:call(?SERVER, stop, infinity).
-sync()            -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+write(MsgId, Msg)     -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+read(MsgId)           -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
+idle_read(MsgId, Fun) -> gen_server2:pcast(?SERVER, -1, {idle_read, MsgId, Fun}).
+contains(MsgId)       -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
+remove(MsgIds)        -> gen_server2:cast(?SERVER, {remove, MsgIds}).
+release(MsgIds)       -> gen_server2:cast(?SERVER, {release, MsgIds}).
+sync(MsgIds, K)       -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
+stop()                -> gen_server2:call(?SERVER, stop, infinity).
+sync()                -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
@@ -342,6 +344,11 @@ handle_cast({write, MsgId, Msg},
             noreply(State)
     end;
 
+handle_cast({idle_read, MsgId, Fun}, State) ->
+    {Result, State1} = internal_read_message(MsgId, State),
+    rabbit_misc:with_exit_handler(fun () -> ok end, fun () -> Fun(Result) end),
+    noreply(State1);
+
 handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
     noreply(
       compact(sets:to_list(
@@ -381,17 +388,7 @@ handle_cast({sync, MsgIds, K},
     end;
 
 handle_cast(sync, State) ->
-    noreply(sync(State));
-
-handle_cast({idle_read, MsgId, From}, State) ->
-    {Result, State1} = case internal_read_message(MsgId, State) of
-                           {not_found, _} = Res -> Res;
-                           {{ok, Msg}, State2} -> {Msg, State2}
-                       end,
-    rabbit_misc:with_exit_handler(
-      fun () -> ok end,
-      fun () -> rabbit_queue_prefetcher:publish(From, Result) end),
-    noreply(State1).
+    noreply(sync(State)).
 
 handle_info(timeout, State) ->
     noreply(sync(State)).
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index 9d1b58ba..e3228bea 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -48,7 +48,8 @@
 -record(pstate,
         { alphas,
           betas,
-          queue_mref
+          queue_mref,
+          idle_read_cb
         }).
 
 -record(alpha,
@@ -232,9 +233,13 @@ init([Betas, QPid]) when is_pid(QPid) ->
     %% link isn't enough because the signal will not appear if the
     %% queue exits normally. Thus have to use monitor.
     MRef = erlang:monitor(process, QPid),
+    Self = self(),
     State = #pstate { alphas = queue:new(),
                       betas = Betas,
-                      queue_mref = MRef
+                      queue_mref = MRef,
+                      idle_read_cb = fun ({ok, Msg}) -> publish(Self, Msg);
+                                         (not_found) -> publish(Self, not_found)
+                                     end
                     },
     {ok, prefetch(State), infinity, {backoff, ?HIBERNATE_AFTER_MIN,
                                      ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -292,7 +297,7 @@ terminate(_Reason, _State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-prefetch(State = #pstate { betas = Betas }) ->
+prefetch(State = #pstate { betas = Betas, idle_read_cb = CB }) ->
     {{value, #beta { msg_id = MsgId }}, _Betas1} = queue:out(Betas),
-    ok = rabbit_msg_store:idle_read(MsgId),
+    ok = rabbit_msg_store:idle_read(MsgId, CB),
     State.
-- 
cgit v1.2.1


From 5ccf5d36052c8486e6ef1f347933e11b094fcc1c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 16:18:29 +0100
Subject: gee, can you tell none of this has been run yet?

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 5a76c23e..a6574d47 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -616,7 +616,8 @@ push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
     end.
 
 %% the first arg is the older gamma            
-combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) -> {undefined, 0};
+combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) ->
+    #gamma { seq_id = undefined, count = 0 };
 combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
 combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
 combine_gammas(#gamma { seq_id = SeqIdLow,  count = CountLow },
-- 
cgit v1.2.1


From f6701e79812e028d47bbf56be3d61d088d3ec9de Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 16:52:22 +0100
Subject: Added ack to vq. Realised that ack no longer needs the msg itself, so
 there've been a few associated changes in queue_process.

---
 src/rabbit_amqqueue_process.erl | 20 ++++++++++----------
 src/rabbit_variable_queue.erl   | 16 +++++++++++++++-
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5789b105..06c6cd85 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -278,11 +278,10 @@ deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
                            State = #q { mixed_state = MS }) ->
     {{Msg, IsDelivered, AckTag, Remaining}, MS1} =
         rabbit_mixed_queue:fetch(MS),
-    AutoAcks1 =
-        case AckRequired of
-            true -> AutoAcks;
-            false -> [{Msg, AckTag} | AutoAcks]
-        end,
+    AutoAcks1 = case AckRequired of
+                    true -> AutoAcks;
+                    false -> [AckTag | AutoAcks]
+                end,
     {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
      State #q { mixed_state = MS1 }}.
 
@@ -348,8 +347,8 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
     0 < Len.
 deliver_or_requeue_msgs_deliver(
-  false, {Len, AcksAcc, [(MsgAckTag = {Msg, _}) | MsgsWithAcks]}, State) ->
-    {{Msg, true, noack}, {Len - 1, [MsgAckTag | AcksAcc], MsgsWithAcks}, State};
+  false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
+    {{Msg, true, noack}, {Len - 1, [AckTag | AcksAcc], MsgsWithAcks}, State};
 deliver_or_requeue_msgs_deliver(
   true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
     {{Msg, true, AckTag}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
@@ -620,7 +619,7 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
                         {ok, MS1};
                     false ->
-                        rabbit_mixed_queue:ack([{Msg, AckTag}], MS1)
+                        rabbit_mixed_queue:ack([AckTag], MS1)
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
             reply({ok, Remaining, Message},
@@ -764,8 +763,9 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             case Txn of
                 none ->
                     {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
-                    {ok, MS} =
-                        rabbit_mixed_queue:ack(MsgWithAcks, State #q.mixed_state),
+                    {ok, MS} = rabbit_mixed_queue:ack(
+                                 [AckTag || {_Msg, AckTag} <- MsgWithAcks],
+                                 State #q.mixed_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
                     noreply(State #q { mixed_state = MS });
                 _  ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a6574d47..5cf08939 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,7 +32,7 @@
 -module(rabbit_variable_queue).
 
 -export([init/1, publish/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
-         fetch/1, len/1, is_empty/1, maybe_start_prefetcher/1]).
+         fetch/1, ack/2, len/1, is_empty/1, maybe_start_prefetcher/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -246,6 +246,20 @@ maybe_start_prefetcher(State = #vqstate { ram_msg_count = RamMsgCount,
 maybe_start_prefetcher(State) ->
     State.
 
+ack(AckTags, State = #vqstate { index_state = IndexState }) ->
+    {MsgIds, SeqIds} =
+        lists:foldl(
+          fun (ack_not_on_disk, Acc) -> Acc;
+              ({ack_index_and_store, MsgId, SeqId}, {MsgIds, SeqIds}) ->
+                  {[MsgId | MsgIds], [SeqId, SeqIds]}
+          end, {[], []}, AckTags),
+    IndexState1 = case SeqIds of
+                      [] -> IndexState;
+                      _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
+                  end,
+    ok = rabbit_msg_store:remove(MsgIds),
+    State #vqstate { index_state = IndexState1 }.
+
 %%----------------------------------------------------------------------------
 
 publish(msg, Msg = #basic_message { guid = MsgId,
-- 
cgit v1.2.1


From ee6152c2600bc0688b8a94d32d0dcf57eb6bdae9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 17:01:13 +0100
Subject: cosmetics

---
 src/rabbit_msg_store.erl        | 26 +++++++++++++-------------
 src/rabbit_queue_prefetcher.erl | 19 +++++++++++++------
 src/rabbit_variable_queue.erl   |  9 +++++----
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index db273551..2b4bb1f2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/3, write/2, read/1, idle_read/2, contains/1, remove/1,
+-export([start_link/3, write/2, read/1, peruse/2, contains/1, remove/1,
          release/1, sync/2, stop/0]).
 
 -export([sync/0]). %% internal
@@ -61,7 +61,7 @@
              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/2 :: (msg_id(), msg()) -> 'ok').
 -spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
--spec(idle_read/2 :: (msg_id(), fun (({'ok', msg()} | 'not_found') -> 'ok')) ->
+-spec(peruse/2 :: (msg_id(), fun (({'ok', msg()} | 'not_found') -> 'ok')) ->
              'ok').
 -spec(contains/1 :: (msg_id()) -> boolean()).
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
@@ -233,15 +233,15 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(MsgId, Msg)     -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
-read(MsgId)           -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
-idle_read(MsgId, Fun) -> gen_server2:pcast(?SERVER, -1, {idle_read, MsgId, Fun}).
-contains(MsgId)       -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
-remove(MsgIds)        -> gen_server2:cast(?SERVER, {remove, MsgIds}).
-release(MsgIds)       -> gen_server2:cast(?SERVER, {release, MsgIds}).
-sync(MsgIds, K)       -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-stop()                -> gen_server2:call(?SERVER, stop, infinity).
-sync()                -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+write(MsgId, Msg)  -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+read(MsgId)        -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
+peruse(MsgId, Fun) -> gen_server2:pcast(?SERVER, -1, {peruse, MsgId, Fun}).
+contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
+remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
+release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
+sync(MsgIds, K)    -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
+stop()             -> gen_server2:call(?SERVER, stop, infinity).
+sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
@@ -344,9 +344,9 @@ handle_cast({write, MsgId, Msg},
             noreply(State)
     end;
 
-handle_cast({idle_read, MsgId, Fun}, State) ->
+handle_cast({peruse, MsgId, Fun}, State) ->
     {Result, State1} = internal_read_message(MsgId, State),
-    rabbit_misc:with_exit_handler(fun () -> ok end, fun () -> Fun(Result) end),
+    Fun(Result),
     noreply(State1);
 
 handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index e3228bea..fd407c9d 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -49,7 +49,7 @@
         { alphas,
           betas,
           queue_mref,
-          idle_read_cb
+          peruse_cb
         }).
 
 -record(alpha,
@@ -234,12 +234,19 @@ init([Betas, QPid]) when is_pid(QPid) ->
     %% queue exits normally. Thus have to use monitor.
     MRef = erlang:monitor(process, QPid),
     Self = self(),
+    CB = fun (Result) ->
+                 rabbit_misc:with_exit_handler(
+                   fun () -> ok end,
+                   fun () -> case Result of
+                                 {ok, Msg} -> publish(Self, Msg);
+                                 not_found -> publish(Self, not_found)
+                             end
+                   end)
+         end,
     State = #pstate { alphas = queue:new(),
                       betas = Betas,
                       queue_mref = MRef,
-                      idle_read_cb = fun ({ok, Msg}) -> publish(Self, Msg);
-                                         (not_found) -> publish(Self, not_found)
-                                     end
+                      peruse_cb = CB
                     },
     {ok, prefetch(State), infinity, {backoff, ?HIBERNATE_AFTER_MIN,
                                      ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -297,7 +304,7 @@ terminate(_Reason, _State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-prefetch(State = #pstate { betas = Betas, idle_read_cb = CB }) ->
+prefetch(State = #pstate { betas = Betas, peruse_cb = CB }) ->
     {{value, #beta { msg_id = MsgId }}, _Betas1} = queue:out(Betas),
-    ok = rabbit_msg_store:idle_read(MsgId, CB),
+    ok = rabbit_msg_store:peruse(MsgId, CB),
     State.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 5cf08939..ae9ca375 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -228,10 +228,11 @@ len(#vqstate { len = Len }) ->
 is_empty(State) ->
     0 == len(State).
 
-maybe_start_prefetcher(State = #vqstate { ram_msg_count = RamMsgCount,
-                                          target_ram_msg_count = TargetRamMsgCount,
-                                          q1 = Q1, q3 = Q3, prefetcher = undefined
-                                        }) ->
+maybe_start_prefetcher(State = #vqstate {
+                         ram_msg_count = RamMsgCount,
+                         target_ram_msg_count = TargetRamMsgCount,
+                         q1 = Q1, q3 = Q3, prefetcher = undefined
+                        }) ->
     %% prefetched content takes priority over q1
     AvailableSpace = (TargetRamMsgCount - RamMsgCount) + queue:len(Q1),
     PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
-- 
cgit v1.2.1


From 8bce67f30f803314248c19cc19eaab341f448e8f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Oct 2009 18:17:45 +0100
Subject: Beautiful!

---
 src/rabbit_variable_queue.erl | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ae9ca375..b0bfd8cd 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,7 +32,7 @@
 -module(rabbit_variable_queue).
 
 -export([init/1, publish/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
-         fetch/1, ack/2, len/1, is_empty/1, maybe_start_prefetcher/1]).
+         fetch/1, ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -261,8 +261,47 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
     ok = rabbit_msg_store:remove(MsgIds),
     State #vqstate { index_state = IndexState1 }.
 
+purge(State = #vqstate { q3 = Q3, prefetcher = undefined,
+                         index_state = IndexState }) ->
+    case queue:is_empty(Q3) of
+        true  -> State #vqstate { q1 = queue:new(), q4 = queue:new() };
+        false -> IndexState1 = remove_betas(Q3, IndexState),
+                 purge(maybe_load_next_segment(
+                         State #vqstate { index_state = IndexState1 }))
+    end;
+purge(State) ->
+    purge(drain_prefetcher(stop, State)).
+
 %%----------------------------------------------------------------------------
 
+remove_betas(Q, IndexState) ->
+    {MsgIds, SeqIds, IndexState1} =
+        lists:foldl(
+          fun (#beta { msg_id = MsgId,
+                       seq_id = SeqId,
+                       is_delivered = IsDelivered,
+                       index_on_disk = IndexOnDisk },
+               {MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
+                  IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
+                                     true -> rabbit_queue_index:write_delivered(
+                                               SeqId, IndexStateN);
+                                     false -> IndexStateN
+                                 end,
+                  SeqIdsAcc1 = case IndexOnDisk of
+                                   true  -> [SeqId | SeqIdsAcc];
+                                   false -> SeqIdsAcc
+                               end,
+                  {[MsgId | MsgIdsAcc], SeqIdsAcc1, IndexStateN1}
+          end, {[], [], IndexState}, lists:reverse(queue:to_list(Q))),
+    ok = case MsgIds of
+             [] -> ok;
+             _  -> rabbit_msg_store:remove(MsgIds)
+         end,
+    case SeqIds of
+        [] -> IndexState1;
+        _  -> rabbit_queue_index:write_acks(SeqIds, IndexState1)
+    end.
+
 publish(msg, Msg = #basic_message { guid = MsgId,
                                     is_persistent = IsPersistent },
         SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
-- 
cgit v1.2.1


From f528b2da3af823b457a11e86b901eb6b98fd6f9e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 11 Oct 2009 14:02:44 +0100
Subject: rollback transactions on queue termination That way we don't leave
 garbage - transactionally published, but uncommitted messages - in the
 message store. Also, we we can get rid of the pending_commits state wart in
 disk_queue. That is possible because both tx commits and queue deletions are
 issued by the queue process and tx commits are synchronous, so there is never
 a chance of there being a pending commit when doing a deletion.

---
 src/rabbit_amqqueue_process.erl |  11 +++-
 src/rabbit_disk_queue.erl       | 128 ++++++++++++++++------------------------
 2 files changed, 59 insertions(+), 80 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5789b105..0c334bc3 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -118,16 +118,23 @@ init(Q = #amqqueue { name = QName, durable = Durable }) ->
     {ok, start_memory_timer(State), hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-terminate(_Reason, State) ->
+terminate(_Reason, State = #q{mixed_state = MS}) ->
     %% FIXME: How do we cancel active subscriptions?
     State1 = stop_memory_timer(State),
+    %% Ensure that any persisted tx messages are removed;
+    %% mixed_queue:delete_queue cannot do that for us since neither
+    %% mixed_queue nor disk_queue keep a record of uncommitted tx
+    %% messages.
+    {ok, MS1} = rabbit_mixed_queue:tx_rollback(
+                  lists:concat([PM || #tx { pending_messages = PM } <-
+                                          all_tx_record()]), MS),
     %% Delete from disk queue first. If we crash at this point, when a
     %% durable queue, we will be recreated at startup, possibly with
     %% partial content. The alternative is much worse however - if we
     %% called internal_delete first, we would then have a race between
     %% the disk_queue delete and a new queue with the same name being
     %% created and published to.
-    {ok, _MS} = rabbit_mixed_queue:delete_queue(State1 #q.mixed_state),
+    {ok, _MS} = rabbit_mixed_queue:delete_queue(MS1),
     ok = rabbit_amqqueue:internal_delete(qname(State1)).
 
 code_change(_OldVsn, State, _Extra) ->
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
index 42c4ed8b..7d44dd9d 100644
--- a/src/rabbit_disk_queue.erl
+++ b/src/rabbit_disk_queue.erl
@@ -66,10 +66,7 @@
 
 -define(SERVER, ?MODULE).
 
--record(dqstate,
-        { sequences,      %% next read and write for each q
-          pending_commits %% dict of txns waiting for msg_store
-        }).
+-record(dqstate, { sequences }).      %% next read and write for each q
 
 %%----------------------------------------------------------------------------
 
@@ -170,8 +167,8 @@ stop_and_obliterate() ->
 
 %% private
 
-finalise_commit(TxId) ->
-    gen_server2:cast(?SERVER, {finalise_commit, TxId}).
+finalise_commit(TxDetails) ->
+    gen_server2:cast(?SERVER, {finalise_commit, TxDetails}).
 
 %%----------------------------------------------------------------------------
 %% gen_server behaviour
@@ -200,7 +197,7 @@ init([]) ->
     Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
     ok = extract_sequence_numbers(Sequences),
 
-    State = #dqstate { sequences = Sequences, pending_commits = dict:new() },
+    State = #dqstate { sequences = Sequences },
     {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -269,8 +266,8 @@ handle_cast({prefetch, Q, From}, State) ->
         false -> ok
     end,
     noreply(State1);
-handle_cast({finalise_commit, TxId}, State) ->
-    noreply(finalise_commit(TxId, State)).
+handle_cast({finalise_commit, TxDetails}, State) ->
+    noreply(finalise_commit(TxDetails, State)).
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State}.
@@ -390,54 +387,40 @@ internal_tx_publish(Message = #basic_message { guid = MsgId,
            MsgId, Message #basic_message { content = ClearedContent }),
     {ok, State}.
 
-internal_tx_commit(Q, PubMsgIds, AckSeqIds, From,
-                   State = #dqstate { pending_commits = PendingCommits }) ->
+internal_tx_commit(Q, PubMsgIds, AckSeqIds, From, State) ->
+    TxDetails = {Q, PubMsgIds, AckSeqIds, From},
     ok = rabbit_msg_store:sync([MsgId || {MsgId, _, _} <- PubMsgIds],
-                               fun () -> finalise_commit({Q, From}) end),
-    PendingCommits1 = dict:store(Q, {PubMsgIds, AckSeqIds, From},
-                                 PendingCommits),
-    State #dqstate { pending_commits = PendingCommits1 }.
-
-finalise_commit({Q, From},
-                State = #dqstate { sequences = Sequences,
-                                   pending_commits = PendingCommits }) ->
-    case dict:find(Q, PendingCommits) of
-        {ok, {PubMsgIds, AckSeqIds, From}} ->
-            {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-            WriteSeqId =
-                rabbit_misc:execute_mnesia_transaction(
-                  fun() ->
-                          ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          lists:foldl(
-                            fun ({MsgId, IsDelivered, IsPersistent}, SeqId) ->
-                                    ok = mnesia:write(
-                                           rabbit_disk_queue,
-                                           #dq_msg_loc {
-                                             queue_and_seq_id = {Q, SeqId},
-                                             msg_id = MsgId,
-                                             is_delivered = IsDelivered,
-                                             is_persistent = IsPersistent
-                                            }, write),
-                                    SeqId + 1
-                            end, InitWriteSeqId, PubMsgIds)
-                  end),
-            {ok, State1} = remove_messages(Q, AckSeqIds, State),
-            true = case PubMsgIds of
-                       [] -> true;
-                       _  -> ets:insert(Sequences, 
-                                        {Q, InitReadSeqId, WriteSeqId})
-                   end,
-            gen_server2:reply(From, ok),
-            State1 # dqstate { pending_commits =
-                               dict:erase(Q, PendingCommits) };
-        {ok, _} ->
-            %% sync notification for a deleted queue which has since
-            %% been recreated
-            State;
-        error ->
-            %% sync notification for a deleted queue
-            State
-    end.
+                               fun () -> finalise_commit(TxDetails) end),
+    State.
+
+finalise_commit({Q, PubMsgIds, AckSeqIds, From},
+                State = #dqstate { sequences = Sequences }) ->
+    {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
+    WriteSeqId =
+        rabbit_misc:execute_mnesia_transaction(
+          fun() ->
+                  ok = mnesia:write_lock_table(rabbit_disk_queue),
+                  lists:foldl(
+                    fun ({MsgId, IsDelivered, IsPersistent}, SeqId) ->
+                            ok = mnesia:write(
+                                   rabbit_disk_queue,
+                                   #dq_msg_loc {
+                                     queue_and_seq_id = {Q, SeqId},
+                                     msg_id           = MsgId,
+                                     is_delivered     = IsDelivered,
+                                     is_persistent    = IsPersistent
+                                    }, write),
+                            SeqId + 1
+                    end, InitWriteSeqId, PubMsgIds)
+          end),
+    {ok, State1} = remove_messages(Q, AckSeqIds, State),
+    true = case PubMsgIds of
+               [] -> true;
+               _  -> ets:insert(Sequences, 
+                                {Q, InitReadSeqId, WriteSeqId})
+           end,
+    gen_server2:reply(From, ok),
+    State1.
 
 internal_publish(Q, Message = #basic_message { guid = MsgId,
                                                is_persistent = IsPersistent },
@@ -551,31 +534,20 @@ internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
             {ok, WriteSeqId - ReadSeqId, State1}
     end.
 
-internal_delete_queue(Q,
-                      State = #dqstate { pending_commits = PendingCommits }) ->
-    %% remove pending commits
-    State1 =  case dict:find(Q, PendingCommits) of
-                  {ok, {PubMsgIds, _, _}} ->
-                      ok = rabbit_msg_store:remove(
-                             [MsgId || {MsgId, _, _} <- PubMsgIds]),
-                      State # dqstate { pending_commits =
-                                        dict:erase(Q, PendingCommits) };
-                  error ->
-                      State
-              end,
+internal_delete_queue(Q, State) ->
     %% remove everything undelivered
-    {ok, _Count, State2 = #dqstate { sequences = Sequences }} =
-        internal_purge(Q, State1),
+    {ok, _Count, State1 = #dqstate { sequences = Sequences }} =
+        internal_purge(Q, State),
     true = ets:delete(Sequences, Q),
     %% remove everything already delivered
-    Objs = mnesia:dirty_match_object(
-             rabbit_disk_queue,
-             #dq_msg_loc { queue_and_seq_id = {Q, '_'}, _ = '_' }),
-    MsgSeqIds = lists:map(fun (#dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
-                                             msg_id = MsgId }) ->
-                                  {MsgId, SeqId}
-                          end, Objs),
-    remove_messages(Q, MsgSeqIds, State2).
+    remove_messages(
+      Q, [{MsgId, SeqId} || #dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
+                                          msg_id = MsgId } <-
+                                mnesia:dirty_match_object(
+                                  rabbit_disk_queue,
+                                  #dq_msg_loc {
+                                    queue_and_seq_id = {Q, '_'},
+                                    _ = '_' })], State1).
 
 internal_delete_non_durable_queues(
   DurableQueues, State = #dqstate { sequences = Sequences }) ->
-- 
cgit v1.2.1


From 38b234e722f4ee25aee748341aa3a8f687a9649c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 11:05:44 +0100
Subject: given the way in which the vq works, the removed tests were
 unnecessary. For example, in store_alpha, q1 *must* be empty unless one of
 q2, gamma and q3 is non empty. As such, to determine whether the alpha goes
 to q4, we only need to test for emptiness of q2, gamma and q3, not q1 aswell.
 Similar logic holds for store_beta

---
 src/rabbit_variable_queue.erl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b0bfd8cd..41ad7791 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -87,11 +87,11 @@
 %% on disk at that stage (msg on disk, index on disk).
 %%
 %% When a msg arrives, we decide in which form it should be. It is
-%% then added to the rightmost appropriate queue, maintaining
+%% then added to the right-most appropriate queue, maintaining
 %% order. Thus if the msg is to be an alpha, it will be added to q1,
-%% unless all of q1, q2, gamma and q3 are empty, in which case it will
-%% go to q4. If it is to be a beta, it will be added to q2 unless all
-%% of q2 and gamma are empty, in which case it will go to q3.
+%% unless all of q2, gamma and q3 are empty, in which case it will go
+%% to q4. If it is to be a beta, it will be added to q2 unless gamma
+%% is empty, in which case it will go to q3.
 %%
 %% The major invariant is that if the msg is to be a beta, q1 will be
 %% empty, and if it is to be a gamma then both q1 and q2 will be empty.
@@ -528,8 +528,8 @@ store_alpha_entry(Entry = #alpha {}, State =
                   #vqstate { q1 = Q1, q2 = Q2,
                              gamma = #gamma { count = GammaCount },
                              q3 = Q3, q4 = Q4 }) ->
-    case queue:is_empty(Q1) andalso queue:is_empty(Q2) andalso 
-        GammaCount == 0 andalso queue:is_empty(Q3) of
+    case queue:is_empty(Q2) andalso GammaCount == 0 andalso queue:is_empty(Q3)
+        of
         true ->
             State #vqstate { q4 = queue:in(Entry, Q4) };
         false ->
@@ -539,7 +539,7 @@ store_alpha_entry(Entry = #alpha {}, State =
 store_beta_entry(Entry = #beta {}, State =
                  #vqstate { q2 = Q2, gamma = #gamma { count = GammaCount },
                             q3 = Q3 }) ->
-    case queue:is_empty(Q2) andalso GammaCount == 0 of
+    case GammaCount == 0 of
         true  -> State #vqstate { q3 = queue:in(Entry, Q3) };
         false -> State #vqstate { q2 = queue:in(Entry, Q2) }
     end.
-- 
cgit v1.2.1


From a326542afe92ecb5ce85206265179c464fb13646 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 11:44:59 +0100
Subject: Well, it was beautiful, but it was also wrong. Firstly, purge needs
 to return the count of the msgs purged. Secondly, it needs to remember to
 purge msgs and indices on disk that are in q1 or q4.

---
 src/rabbit_variable_queue.erl | 68 ++++++++++++++++++++++++++++++-------------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 41ad7791..261478c5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -261,27 +261,36 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
     ok = rabbit_msg_store:remove(MsgIds),
     State #vqstate { index_state = IndexState1 }.
 
-purge(State = #vqstate { q3 = Q3, prefetcher = undefined,
+purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
                          index_state = IndexState }) ->
-    case queue:is_empty(Q3) of
-        true  -> State #vqstate { q1 = queue:new(), q4 = queue:new() };
-        false -> IndexState1 = remove_betas(Q3, IndexState),
-                 purge(maybe_load_next_segment(
-                         State #vqstate { index_state = IndexState1 }))
-    end;
+    {Q4Count, IndexState1} = remove_queue_entries(Q4, IndexState),
+    purge1(Q4Count, State #vqstate { index_state = IndexState1,
+                                     q4 = queue:new() });
 purge(State) ->
     purge(drain_prefetcher(stop, State)).
 
 %%----------------------------------------------------------------------------
 
-remove_betas(Q, IndexState) ->
-    {MsgIds, SeqIds, IndexState1} =
+purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
+    case queue:is_empty(Q3) of
+        true ->
+            {Q1Count, IndexState1} =
+                remove_queue_entries(State #vqstate.q1, IndexState),
+            {Count + Q1Count, State #vqstate { q1 = queue:new(),
+                                                index_state = IndexState1 }};
+        false ->
+            {Q3Count, IndexState1} = remove_queue_entries(Q3, IndexState),
+            purge1(Count + Q3Count,
+                   maybe_load_next_segment(
+                     State #vqstate { index_state = IndexState1 }))
+    end.
+
+remove_queue_entries(Q, IndexState) ->
+    {Count, MsgIds, SeqIds, IndexState1} =
         lists:foldl(
-          fun (#beta { msg_id = MsgId,
-                       seq_id = SeqId,
-                       is_delivered = IsDelivered,
-                       index_on_disk = IndexOnDisk },
-               {MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
+          fun (Entry, {CountN, MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
+                  {MsgId, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk} =
+                      entry_salient_details(Entry),
                   IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
                                      true -> rabbit_queue_index:write_delivered(
                                                SeqId, IndexStateN);
@@ -291,16 +300,35 @@ remove_betas(Q, IndexState) ->
                                    true  -> [SeqId | SeqIdsAcc];
                                    false -> SeqIdsAcc
                                end,
-                  {[MsgId | MsgIdsAcc], SeqIdsAcc1, IndexStateN1}
-          end, {[], [], IndexState}, lists:reverse(queue:to_list(Q))),
+                  MsgIdsAcc1 = case MsgOnDisk of
+                                   true  -> [MsgId | MsgIdsAcc];
+                                   false -> MsgIdsAcc
+                               end,
+                  {CountN + 1, MsgIdsAcc1, SeqIdsAcc1, IndexStateN1}
+          %% the foldl is going to reverse the result lists, so start
+          %% by reversing so that we maintain doing things in
+          %% ascending seqid order
+          end, {0, [], [], IndexState}, lists:reverse(queue:to_list(Q))),
     ok = case MsgIds of
              [] -> ok;
              _  -> rabbit_msg_store:remove(MsgIds)
          end,
-    case SeqIds of
-        [] -> IndexState1;
-        _  -> rabbit_queue_index:write_acks(SeqIds, IndexState1)
-    end.
+    IndexState2 =
+        case SeqIds of
+            [] -> IndexState1;
+            _  -> rabbit_queue_index:write_acks(SeqIds, IndexState1)
+        end,
+    {Count, IndexState2}.
+
+entry_salient_details(#alpha { msg = #basic_message { guid = MsgId },
+                               seq_id = SeqId, is_delivered = IsDelivered,
+                               msg_on_disk = MsgOnDisk,
+                               index_on_disk = IndexOnDisk }) ->
+    {MsgId, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk};
+entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
+                              is_delivered = IsDelivered,
+                              index_on_disk = IndexOnDisk }) ->
+    {MsgId, SeqId, IsDelivered, true, IndexOnDisk}.
 
 publish(msg, Msg = #basic_message { guid = MsgId,
                                     is_persistent = IsPersistent },
-- 
cgit v1.2.1


From ede3c4b04c27edb622de7b10041d8076f6229a51 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 12:22:16 +0100
Subject: a couple of full list sorts were unnecessary and should only have
 been max or mins

---
 src/rabbit_queue_index.erl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 98ab2d77..18cea92a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -318,17 +318,18 @@ find_lowest_seq_id_seg_and_highest_seq_id(Dir) ->
     %% that's fine. The important thing is that the segment exists and
     %% the seq_id reported is on a segment boundary.
     LowSeqIdSeg =
-        case lists:sort(SegNumsPaths) of
+        case SegNumsPaths of
             [] -> 0;
-            [{SegNum1, _SegPath1}|_] -> reconstruct_seq_id(SegNum1, 0)
+            _  -> {SegNum1, _SegPath1} = lists:min(SegNumsPaths),
+                  reconstruct_seq_id(SegNum1, 0)
         end,
     HighestSeqId =
-        case rev_sort(SegNumsPaths) of
+        case SegNumsPaths of
             [] -> 0;
-            [{SegNum2, SegPath2}|_] ->
-                {_SDict, _AckCount, HighRelSeq} =
-                    load_segment(SegNum2, SegPath2, dict:new()),
-                reconstruct_seq_id(SegNum2, HighRelSeq)
+            _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
+                  {_SDict, _AckCount, HighRelSeq} =
+                      load_segment(SegNum2, SegPath2, dict:new()),
+                  reconstruct_seq_id(SegNum2, HighRelSeq)
         end,
     {LowSeqIdSeg, HighestSeqId}.
 
-- 
cgit v1.2.1


From 6f4db1865d28464ff48088a7433b8db1ce81e5df Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 12:34:31 +0100
Subject: for queue delete I need to be able to discover the lowest and highest
 non-acked seqids thus splitting out this functionality

---
 src/rabbit_queue_index.erl    | 71 +++++++++++++++++++++----------------------
 src/rabbit_variable_queue.erl |  4 ++-
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 18cea92a..1ce4ab21 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -33,7 +33,7 @@
 
 -export([init/1, write_published/4, write_delivered/2, write_acks/2,
          flush_journal/1, read_segment_entries/2, next_segment_boundary/1,
-         segment_size/0]).
+         segment_size/0, find_lowest_seq_id_seg_and_next_seq_id/1]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -133,8 +133,7 @@
                               seg_ack_counts    :: dict()
                             }).
 
--spec(init/1 :: (string()) -> {non_neg_integer(), non_neg_integer(),
-                               non_neg_integer(), qistate()}).
+-spec(init/1 :: (string()) -> {non_neg_integer(), qistate()}).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
@@ -145,6 +144,8 @@
               | 'not_found'), qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(segment_size/0 :: () -> non_neg_integer()).
+-spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
+             {non_neg_integer(), non_neg_integer()}).
 
 -endif.
 
@@ -158,17 +159,14 @@ init(Name) ->
     {AckCounts, TotalMsgCount} = scatter_journal(Dir, find_ack_counts(Dir)),
     {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
                                  [raw, binary, delayed_write, write, read]),
-    {LowestSeqIdSeg, HighestSeqId} =
-        find_lowest_seq_id_seg_and_highest_seq_id(Dir),
-    {LowestSeqIdSeg, HighestSeqId + 1, TotalMsgCount,
-     #qistate { dir = Dir,
-                cur_seg_num = undefined,
-                cur_seg_hdl = undefined,
-                journal_ack_count = 0,
-                journal_ack_dict = dict:new(),
-                journal_handle = JournalHdl,
-                seg_ack_counts = AckCounts
-              }}.
+    {TotalMsgCount, #qistate { dir = Dir,
+                               cur_seg_num = undefined,
+                               cur_seg_hdl = undefined,
+                               journal_ack_count = 0,
+                               journal_ack_dict = dict:new(),
+                               journal_handle = JournalHdl,
+                               seg_ack_counts = AckCounts
+                             }}.
 
 write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
@@ -255,6 +253,29 @@ next_segment_boundary(SeqId) ->
 segment_size() ->
     ?SEGMENT_ENTRIES_COUNT.
 
+find_lowest_seq_id_seg_and_next_seq_id(
+  #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
+    SegNumsPaths = all_segment_nums_paths(Dir),
+    %% We don't want the lowest seq_id, merely the seq_id of the start
+    %% of the lowest segment. That seq_id may not actually exist, but
+    %% that's fine. The important thing is that the segment exists and
+    %% the seq_id reported is on a segment boundary.
+    LowSeqIdSeg =
+        case SegNumsPaths of
+            [] -> 0;
+            _  -> {SegNum1, _SegPath1} = lists:min(SegNumsPaths),
+                  reconstruct_seq_id(SegNum1, 0)
+        end,
+    HighestSeqId =
+        case SegNumsPaths of
+            [] -> 0;
+            _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
+                  {_SDict, _AckCount, HighRelSeq} =
+                      load_segment(SegNum2, SegPath2, JAckDict),
+                  1 + reconstruct_seq_id(SegNum2, HighRelSeq)
+        end,
+    {LowSeqIdSeg, HighestSeqId}.
+
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
@@ -311,28 +332,6 @@ all_segment_nums_paths(Dir) ->
                         SegName)), filename:join(Dir, SegName)}
      || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
 
-find_lowest_seq_id_seg_and_highest_seq_id(Dir) ->
-    SegNumsPaths = all_segment_nums_paths(Dir),
-    %% We don't want the lowest seq_id, merely the seq_id of the start
-    %% of the lowest segment. That seq_id may not actually exist, but
-    %% that's fine. The important thing is that the segment exists and
-    %% the seq_id reported is on a segment boundary.
-    LowSeqIdSeg =
-        case SegNumsPaths of
-            [] -> 0;
-            _  -> {SegNum1, _SegPath1} = lists:min(SegNumsPaths),
-                  reconstruct_seq_id(SegNum1, 0)
-        end,
-    HighestSeqId =
-        case SegNumsPaths of
-            [] -> 0;
-            _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
-                  {_SDict, _AckCount, HighRelSeq} =
-                      load_segment(SegNum2, SegPath2, dict:new()),
-                  reconstruct_seq_id(SegNum2, HighRelSeq)
-        end,
-    {LowSeqIdSeg, HighestSeqId}.
-
 find_ack_counts(Dir) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
     lists:foldl(
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 261478c5..cd47f669 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -112,8 +112,10 @@
 %%----------------------------------------------------------------------------
 
 init(QueueName) ->
-    {GammaSeqId, NextSeqId, GammaCount, IndexState} =
+    {GammaCount, IndexState} =
         rabbit_queue_index:init(QueueName),
+    {GammaSeqId, NextSeqId} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
     Gamma = case GammaCount of
                 0 -> #gamma { seq_id = undefined, count = 0 };
                 _ -> #gamma { seq_id = GammaSeqId, count = GammaCount }
-- 
cgit v1.2.1


From 27edb9bcef5ce26e8f14f3208a61799d3de38316 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 13:33:56 +0100
Subject: implemented delete. This is slightly less than pretty as, after doing
 the purge, we have to walk through the index on disk in order to pull up msgs
 which have been delivered and not acked

---
 src/rabbit_variable_queue.erl | 47 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cd47f669..a9a43fd8 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,8 +31,9 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, publish/3, set_queue_ram_duration_target/2, remeasure_egress_rate/1,
-         fetch/1, ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1]).
+-export([init/1, publish/3, set_queue_ram_duration_target/2,
+         remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
+         maybe_start_prefetcher/1, purge/1, delete/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -271,15 +272,44 @@ purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
 purge(State) ->
     purge(drain_prefetcher(stop, State)).
 
+%% the only difference between purge and delete is that delete also
+%% needs to delete everything that's been delivered and not ack'd.
+delete(State) ->
+    {PurgeCount, State1 = #vqstate { index_state = IndexState }} = purge(State),
+    case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState)
+        of
+        {N, N} ->
+            {PurgeCount, State1};
+        {GammaSeqId, NextSeqId} ->
+            {DeleteCount, IndexState1} =
+                delete1(NextSeqId, 0, GammaSeqId, IndexState),
+            {PurgeCount + DeleteCount,
+             State1 #vqstate { index_state = IndexState1 }}
+    end.
+
 %%----------------------------------------------------------------------------
 
+delete1(NextSeqId, Count, GammaSeqId, IndexState)
+  when GammaSeqId >= NextSeqId ->
+    {Count, IndexState};
+delete1(NextSeqId, Count, GammaSeqId, IndexState) ->
+    Gamma1SeqId = GammaSeqId + rabbit_queue_index:segment_size(),
+    case rabbit_queue_index:read_segment_entries(GammaSeqId, IndexState) of
+        {[], IndexState1} ->
+            delete1(NextSeqId, Count, Gamma1SeqId, IndexState1);
+        {List, IndexState1} -> 
+            Q = betas_from_segment_entries(List),
+            {QCount, IndexState2} = remove_queue_entries(Q, IndexState1),
+            delete1(NextSeqId, Count + QCount, Gamma1SeqId, IndexState2)
+    end.
+
 purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
     case queue:is_empty(Q3) of
         true ->
             {Q1Count, IndexState1} =
                 remove_queue_entries(State #vqstate.q1, IndexState),
             {Count + Q1Count, State #vqstate { q1 = queue:new(),
-                                                index_state = IndexState1 }};
+                                               index_state = IndexState1 }};
         false ->
             {Q3Count, IndexState1} = remove_queue_entries(Q3, IndexState),
             purge1(Count + Q3Count,
@@ -444,12 +474,11 @@ maybe_load_next_segment(State =
     end.
 
 betas_from_segment_entries(List) ->
-    queue:from_list(lists:map(fun ({MsgId, SeqId, IsPersistent, IsDelivered}) ->
-                                      #beta { msg_id = MsgId, seq_id = SeqId,
-                                              is_persistent = IsPersistent,
-                                              is_delivered = IsDelivered,
-                                              index_on_disk = true }
-                              end, List)).
+    queue:from_list([#beta { msg_id = MsgId, seq_id = SeqId,
+                             is_persistent = IsPersistent,
+                             is_delivered = IsDelivered,
+                             index_on_disk = true }
+                     || {MsgId, SeqId, IsPersistent, IsDelivered} <- List]).
 
 read_index_segment(SeqId, IndexState) ->
     SeqId1 = SeqId + rabbit_queue_index:segment_size(),
-- 
cgit v1.2.1


From 24b1fa75d9b319622893394361f57ca686c2d882 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 14:56:07 +0100
Subject: Requeue turns out to be vastly more elegant and predictable than in
 mq.

---
 src/rabbit_variable_queue.erl | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a9a43fd8..0b4a7689 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -33,7 +33,7 @@
 
 -export([init/1, publish/3, set_queue_ram_duration_target/2,
          remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
-         maybe_start_prefetcher/1, purge/1, delete/1]).
+         maybe_start_prefetcher/1, purge/1, delete/1, requeue/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -287,6 +287,16 @@ delete(State) ->
              State1 #vqstate { index_state = IndexState1 }}
     end.
 
+%% [{Msg, AckTag}]
+requeue(MsgsWithAckTags, State) ->
+    {AckTags, State1} =
+        lists:foldl(
+          fun ({Msg, AckTag}, {AckTagsAcc, StateN}) ->
+                  StateN1 = publish(Msg, true, StateN),
+                  {[AckTag | AckTagsAcc], StateN1}
+          end, {[], State}, MsgsWithAckTags),
+    ack(AckTags, State1).
+
 %%----------------------------------------------------------------------------
 
 delete1(NextSeqId, Count, GammaSeqId, IndexState)
-- 
cgit v1.2.1


From cb46f605de96c73028d7b607c8fe8298c69ddb46 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 16:27:48 +0100
Subject: tx_publish and tx_rollback done. Only tx_commit to go...

---
 src/rabbit_variable_queue.erl | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0b4a7689..95d3e8ec 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -33,7 +33,8 @@
 
 -export([init/1, publish/3, set_queue_ram_duration_target/2,
          remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
-         maybe_start_prefetcher/1, purge/1, delete/1, requeue/2]).
+         maybe_start_prefetcher/1, purge/1, delete/1, requeue/2,
+         tx_publish/2, tx_rollback/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -297,6 +298,16 @@ requeue(MsgsWithAckTags, State) ->
           end, {[], State}, MsgsWithAckTags),
     ack(AckTags, State1).
 
+tx_publish(Msg = #basic_message { is_persistent = true }, State) ->
+    true = maybe_write_msg_to_disk(true, Msg),
+    State.
+
+tx_rollback(Pubs, State) ->
+    ok = rabbit_msg_store:remove(
+           [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
+                     Obj #basic_message.is_persistent]),
+    State.
+
 %%----------------------------------------------------------------------------
 
 delete1(NextSeqId, Count, GammaSeqId, IndexState)
-- 
cgit v1.2.1


From d218fd42052c4705cb11a6989c7c04b877693b69 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 16:49:19 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 95d3e8ec..c6d01df1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -592,11 +592,12 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
                     end
             end;
         _ when TargetRamMsgCount > RamMsgCount ->
-                     msg;
-        _         -> case queue:is_empty(Q1) of
-                         true -> index;
-                         false -> msg %% can push out elders to disk
-                     end
+            msg;
+        _ ->
+            case queue:is_empty(Q1) of
+                true -> index;
+                false -> msg %% can push out elders to disk
+            end
     end.
 
 ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-- 
cgit v1.2.1


From 3383c2002000c3e08035bc53775a1b74c3389447 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Oct 2009 18:05:16 +0100
Subject: in preparation for commit, need to be able to indicate in a publish
 that persistent msgs have already been sent to disk

---
 src/rabbit_variable_queue.erl | 84 +++++++++++++++++++++++++++++++------------
 1 file changed, 62 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c6d01df1..47f8fec3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -140,9 +140,13 @@ init(QueueName) ->
                   },
     maybe_load_next_segment(State).
 
-publish(Msg, IsDelivered, State = #vqstate { next_seq_id = SeqId,
-                                             len = Len }) ->
+publish(Msg, IsDelivered, State) ->
+    publish(Msg, IsDelivered, false, State).
+
+publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { next_seq_id = SeqId, len = Len }) ->
     publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
+            PersistentMsgsAlreadyOnDisk,
             State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 }).
 
 set_queue_ram_duration_target(
@@ -299,17 +303,37 @@ requeue(MsgsWithAckTags, State) ->
     ack(AckTags, State1).
 
 tx_publish(Msg = #basic_message { is_persistent = true }, State) ->
-    true = maybe_write_msg_to_disk(true, Msg),
+    true = maybe_write_msg_to_disk(true, false, Msg),
+    State;
+tx_publish(_Msg, State) ->
     State.
 
 tx_rollback(Pubs, State) ->
-    ok = rabbit_msg_store:remove(
-           [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
-                     Obj #basic_message.is_persistent]),
+    ok = rabbit_msg_store:remove(persistent_msg_ids(Pubs)),
     State.
 
+%% tx_commit(Pubs, AckTags, State) ->
+%%     case persistent_msg_ids(Pubs) of
+%%         [] ->
+%%             do_tx_commit(Pubs, AckTags, State);
+%%         PersistentMsgIds ->
+%%             ok = rabbit_msg_store:sync(
+%%                    PersistentMsgIds,
+%%                    fun () -> ok = rabbit_amqqueue:tx_commit_callback(
+%%                                     self(), Pubs, AckTags)
+%%                    end),
+%%             State
+%%     end.
+
+%% do_tx_commit(Pubs, AckTags, State) ->
+%%     lists:foldl(fun (Msg, StateN) -> publish(Msg, false, StateN) end, State, Pubs).
+
 %%----------------------------------------------------------------------------
 
+persistent_msg_ids(Pubs) ->
+    [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
+              Obj #basic_message.is_persistent].
+
 delete1(NextSeqId, Count, GammaSeqId, IndexState)
   when GammaSeqId >= NextSeqId ->
     {Count, IndexState};
@@ -385,9 +409,11 @@ entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
 
 publish(msg, Msg = #basic_message { guid = MsgId,
                                     is_persistent = IsPersistent },
-        SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
-                                               ram_msg_count = RamMsgCount }) ->
-    MsgOnDisk = maybe_write_msg_to_disk(false, Msg),
+        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { index_state = IndexState,
+                           ram_msg_count = RamMsgCount }) ->
+    MsgOnDisk =
+        maybe_write_msg_to_disk(false, PersistentMsgsAlreadyOnDisk, Msg),
     {IndexOnDisk, IndexState1} =
         maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
@@ -399,9 +425,9 @@ publish(msg, Msg = #basic_message { guid = MsgId,
 
 publish(index, Msg = #basic_message { guid = MsgId,
                                       is_persistent = IsPersistent },
-        SeqId, IsDelivered, State = #vqstate { index_state = IndexState,
-                                               q1 = Q1 }) ->
-    true = maybe_write_msg_to_disk(true, Msg),
+        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { index_state = IndexState, q1 = Q1 }) ->
+    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
     {IndexOnDisk, IndexState1} =
         maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
@@ -413,10 +439,10 @@ publish(index, Msg = #basic_message { guid = MsgId,
 
 publish(neither, Msg = #basic_message { guid = MsgId,
                                         is_persistent = IsPersistent },
-        SeqId, IsDelivered,
+        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
         State = #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
                            gamma = Gamma }) ->
-    true = maybe_write_msg_to_disk(true, Msg),
+    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
     {true, IndexState1} =
         maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
                                   IsDelivered, IndexState),
@@ -550,12 +576,29 @@ reduce_memory_use(State =
         _ -> State1
     end.
 
-maybe_write_msg_to_disk(Bool, Msg = #basic_message {
-                                guid = MsgId, is_persistent = IsPersistent })
-  when Bool orelse IsPersistent ->
+%% Bool  PersistentMsgsAlreadyOnDisk IsPersistent | WriteToDisk?
+%% -----------------------------------------------+-------------
+%% false false                       false        | false      1
+%% false false                       true         | true       2
+%% false true                        false        | false      3
+%% false true                        true         | false      4
+%% true  false                       false        | true       5
+%% true  false                       true         | true       6
+%% true  true                        false        | true       7
+%% true  true                        true         | false      8
+
+%% (Bool and (not PersistentMsgsAlreadyOnDisk)) or        | 5 6
+%% (Bool and (not IsPersistent)) or                       | 5 7
+%% ((not PersistentMsgsAlreadyOnDisk) and IsPersistent)   | 2 6
+maybe_write_msg_to_disk(Bool, PersistentMsgsAlreadyOnDisk,
+                        Msg = #basic_message { guid = MsgId,
+                                               is_persistent = IsPersistent })
+  when (Bool andalso (not PersistentMsgsAlreadyOnDisk)) orelse
+       (Bool andalso (not IsPersistent)) orelse
+       ((not PersistentMsgsAlreadyOnDisk) andalso (IsPersistent)) ->
     ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
     true;
-maybe_write_msg_to_disk(_Bool, _Msg) ->
+maybe_write_msg_to_disk(_Bool, _PersistentMsgsAlreadyOnDisk, _Msg) ->
     false.
 
 maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
@@ -656,10 +699,7 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State =
                    seq_id = SeqId, is_delivered = IsDelivered,
                    msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Qa} ->
-            true = case MsgOnDisk of
-                       true -> true;
-                       false -> maybe_write_msg_to_disk(true, Msg)
-                   end,
+            true = maybe_write_msg_to_disk(true, MsgOnDisk, Msg),
             Beta = #beta { msg_id = MsgId, seq_id = SeqId,
                            is_persistent = IsPersistent,
                            is_delivered = IsDelivered,
-- 
cgit v1.2.1


From 84e924ffed62073f4b7015f3aafb7e6538548460 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 11:11:16 +0100
Subject: factorisation and application of DeMorgan

---
 src/rabbit_variable_queue.erl | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 47f8fec3..97e8141f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -576,26 +576,24 @@ reduce_memory_use(State =
         _ -> State1
     end.
 
-%% Bool  PersistentMsgsAlreadyOnDisk IsPersistent | WriteToDisk?
+%% Bool  IsPersistent PersistentMsgsAlreadyOnDisk | WriteToDisk?
 %% -----------------------------------------------+-------------
-%% false false                       false        | false      1
-%% false false                       true         | true       2
-%% false true                        false        | false      3
-%% false true                        true         | false      4
-%% true  false                       false        | true       5
-%% true  false                       true         | true       6
-%% true  true                        false        | true       7
-%% true  true                        true         | false      8
-
-%% (Bool and (not PersistentMsgsAlreadyOnDisk)) or        | 5 6
-%% (Bool and (not IsPersistent)) or                       | 5 7
-%% ((not PersistentMsgsAlreadyOnDisk) and IsPersistent)   | 2 6
+%% false false        false                       | false      1
+%% false true         false                       | true       2
+%% false false        true                        | false      3
+%% false true         true                        | false      4
+%% true  false        false                       | true       5
+%% true  true         false                       | true       6
+%% true  false        true                        | true       7
+%% true  true         true                        | false      8
+
+%% (Bool and not (IsPersistent and PersistentMsgsAlreadyOnDisk)) or  | 5 6 7
+%% (IsPersistent and (not PersistentMsgsAlreadyOnDisk))              | 2 6
 maybe_write_msg_to_disk(Bool, PersistentMsgsAlreadyOnDisk,
                         Msg = #basic_message { guid = MsgId,
                                                is_persistent = IsPersistent })
-  when (Bool andalso (not PersistentMsgsAlreadyOnDisk)) orelse
-       (Bool andalso (not IsPersistent)) orelse
-       ((not PersistentMsgsAlreadyOnDisk) andalso (IsPersistent)) ->
+  when (Bool andalso not (IsPersistent andalso PersistentMsgsAlreadyOnDisk))
+       orelse (IsPersistent andalso not PersistentMsgsAlreadyOnDisk) ->
     ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
     true;
 maybe_write_msg_to_disk(_Bool, _PersistentMsgsAlreadyOnDisk, _Msg) ->
-- 
cgit v1.2.1


From 39f660e003cef8ddfc29e92a8f370208e12cbae1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 11:45:41 +0100
Subject: Maybe done. Maybe time to start hacking out mq and dq. Maybe.

---
 src/rabbit_amqqueue.erl       |  8 ++++++-
 src/rabbit_variable_queue.erl | 49 ++++++++++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index d55a38d7..32286552 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -39,7 +39,7 @@
 -export([list/1, info/1, info/2, info_all/1, info_all/2]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2]).
+-export([notify_sent/2, unblock/2, tx_commit_callback/3]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 -export([set_storage_mode/2]).
@@ -63,6 +63,8 @@
 -type(qfun(A) :: fun ((amqqueue()) -> A)).
 -type(ok_or_errors() ::
       'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
+-type(seq_id() :: non_neg_integer()).
+-type(acktag() :: ('ack_not_on_disk' | {'ack_index_and_store', msg_id(), seq_id()})).
 
 -spec(start/0 :: () -> 'ok').
 -spec(recover/0 :: () -> {'ok', [amqqueue()]}).
@@ -104,6 +106,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
+-spec(tx_commit_callback/3 :: (pid(), [message()], [acktag()]) -> 'ok').
 -spec(set_storage_mode/2 :: (pid(), ('oppressed' | 'liberated')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -320,6 +323,9 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 8, {unblock, ChPid}).
 
+tx_commit_callback(QPid, Pubs, AckTags) ->
+    gen_server2:pcast(QPid, 8, {tx_commit_callback, Pubs, AckTags}).
+
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 97e8141f..2b024669 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -34,7 +34,7 @@
 -export([init/1, publish/3, set_queue_ram_duration_target/2,
          remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
          maybe_start_prefetcher/1, purge/1, delete/1, requeue/2,
-         tx_publish/2, tx_rollback/2]).
+         tx_publish/2, tx_rollback/2, tx_commit/3, do_tx_commit/3]).
 
 %%----------------------------------------------------------------------------
 
@@ -145,9 +145,9 @@ publish(Msg, IsDelivered, State) ->
 
 publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
         State = #vqstate { next_seq_id = SeqId, len = Len }) ->
-    publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
-            PersistentMsgsAlreadyOnDisk,
-            State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 }).
+    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
+                    PersistentMsgsAlreadyOnDisk,
+                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 })}.
 
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
@@ -312,21 +312,28 @@ tx_rollback(Pubs, State) ->
     ok = rabbit_msg_store:remove(persistent_msg_ids(Pubs)),
     State.
 
-%% tx_commit(Pubs, AckTags, State) ->
-%%     case persistent_msg_ids(Pubs) of
-%%         [] ->
-%%             do_tx_commit(Pubs, AckTags, State);
-%%         PersistentMsgIds ->
-%%             ok = rabbit_msg_store:sync(
-%%                    PersistentMsgIds,
-%%                    fun () -> ok = rabbit_amqqueue:tx_commit_callback(
-%%                                     self(), Pubs, AckTags)
-%%                    end),
-%%             State
-%%     end.
-
-%% do_tx_commit(Pubs, AckTags, State) ->
-%%     lists:foldl(fun (Msg, StateN) -> publish(Msg, false, StateN) end, State, Pubs).
+tx_commit(Pubs, AckTags, State) ->
+    case persistent_msg_ids(Pubs) of
+        [] ->
+            do_tx_commit(Pubs, AckTags, State);
+        PersistentMsgIds ->
+            ok = rabbit_msg_store:sync(
+                   PersistentMsgIds,
+                   fun () -> ok = rabbit_amqqueue:tx_commit_callback(
+                                    self(), Pubs, AckTags)
+                   end),
+            State
+    end.
+
+do_tx_commit(Pubs, AckTags, State) ->
+    {_PubSeqIds, State1} =
+        lists:foldl(
+          fun (Msg, {SeqIdsAcc, StateN}) ->
+                  {SeqId, StateN1} = publish(Msg, false, true, StateN),
+                  {[SeqId | SeqIdsAcc], StateN1}
+          end, {[], State}, Pubs),
+    %% TODO need to do something here about syncing the queue index, PubSeqIds
+    ack(AckTags, State1).
 
 %%----------------------------------------------------------------------------
 
@@ -695,9 +702,9 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State =
           #alpha { msg = Msg = #basic_message { guid = MsgId,
                                                 is_persistent = IsPersistent },
                    seq_id = SeqId, is_delivered = IsDelivered,
-                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+                   index_on_disk = IndexOnDisk }},
          Qa} ->
-            true = maybe_write_msg_to_disk(true, MsgOnDisk, Msg),
+            true = maybe_write_msg_to_disk(true, true, Msg),
             Beta = #beta { msg_id = MsgId, seq_id = SeqId,
                            is_persistent = IsPersistent,
                            is_delivered = IsDelivered,
-- 
cgit v1.2.1


From 532c5be75fdd446eb606614d17ba031e1ee8e407 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 13:39:04 +0100
Subject: Added ability to seed the msg_store. It's a bit hairy.

---
 src/rabbit_queue_index.erl | 62 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 1ce4ab21..d34744a3 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,9 +31,10 @@
 
 -module(rabbit_queue_index).
 
--export([init/1, write_published/4, write_delivered/2, write_acks/2,
-         flush_journal/1, read_segment_entries/2, next_segment_boundary/1,
-         segment_size/0, find_lowest_seq_id_seg_and_next_seq_id/1]).
+-export([init/1, terminate/1, write_published/4, write_delivered/2,
+         write_acks/2, flush_journal/1, read_segment_entries/2,
+         next_segment_boundary/1, segment_size/0,
+         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/0]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -134,6 +135,7 @@
                             }).
 
 -spec(init/1 :: (string()) -> {non_neg_integer(), qistate()}).
+-spec(terminate/1 :: (qistate()) -> qistate()).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
@@ -168,6 +170,18 @@ init(Name) ->
                                seg_ack_counts = AckCounts
                              }}.
 
+terminate(State) ->
+    case flush_journal(State) of
+        {true, State1} ->
+            terminate(State1);
+        {false, State1 = #qistate { cur_seg_num = SegNum }} ->
+            State2 = #qistate { journal_handle = JournalHdl } =
+                close_file_handle_for_seg(SegNum, State1),
+            ok = file:sync(JournalHdl),
+            ok = file:close(JournalHdl),
+            State2 #qistate { journal_handle = undefined }
+    end.
+
 write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
     ?MSG_ID_BYTES = size(MsgId),
@@ -266,7 +280,7 @@ find_lowest_seq_id_seg_and_next_seq_id(
             _  -> {SegNum1, _SegPath1} = lists:min(SegNumsPaths),
                   reconstruct_seq_id(SegNum1, 0)
         end,
-    HighestSeqId =
+    NextSeqId =
         case SegNumsPaths of
             [] -> 0;
             _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
@@ -274,7 +288,19 @@ find_lowest_seq_id_seg_and_next_seq_id(
                       load_segment(SegNum2, SegPath2, JAckDict),
                   1 + reconstruct_seq_id(SegNum2, HighRelSeq)
         end,
-    {LowSeqIdSeg, HighestSeqId}.
+    {LowSeqIdSeg, NextSeqId}.
+
+start_msg_store() ->
+    Queues = case file:list_dir(queues_dir()) of
+                 {ok, Entries} ->
+                     [ Entry || Entry <- Entries, filelib:is_dir(Entry) ];
+                 {error, enoent} ->
+                     []
+             end,
+    MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
+    {ok, _Pid} = rabbit_msg_store:start_link(MsgStoreDir,
+                                             fun queue_index_walker/1,
+                                             Queues).
 
 %%----------------------------------------------------------------------------
 %% Minor Helpers
@@ -322,6 +348,32 @@ seg_num_to_path(Dir, SegNum) ->
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).    
 
 
+%%----------------------------------------------------------------------------
+%% Msg Store Startup Delta Function
+%%----------------------------------------------------------------------------
+
+queue_index_walker([]) ->
+    finished;
+queue_index_walker([QueueName|QueueNames]) ->
+    {TotalMsgCount, State} = init(QueueName),
+    {LowSeqIdSeg, _NextSeqId} = find_lowest_seq_id_seg_and_next_seq_id(State),
+    queue_index_walker({TotalMsgCount, LowSeqIdSeg, State, QueueNames});
+
+queue_index_walker({0, _LowSeqIdSeg, State, QueueNames}) ->
+    terminate(State),
+    queue_index_walker(QueueNames);
+queue_index_walker({N, LowSeqIdSeg, State, QueueNames}) ->
+    {Entries, State1} = read_segment_entries(LowSeqIdSeg, State),
+    LowSeqIdSeg1 = LowSeqIdSeg + segment_size(),
+    queue_index_walker({Entries, N, LowSeqIdSeg1, State1, QueueNames});
+
+queue_index_walker({[], N, LowSeqIdSeg, State, QueueNames}) ->
+    queue_index_walker({N, LowSeqIdSeg, State, QueueNames});
+queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Entries],
+                    N, LowSeqIdSeg, State, QueueNames}) ->
+    {MsgId, bool_to_int(IsPersistent),
+     {Entries, N - 1, LowSeqIdSeg, State, QueueNames}}.
+
 %%----------------------------------------------------------------------------
 %% Startup Functions
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 3c173cf013b95c1231687007a3cb437b72d0f02c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 13:47:30 +0100
Subject: it starts up again

---
 src/rabbit.erl             | 9 +++++----
 src/rabbit_msg_store.erl   | 2 ++
 src/rabbit_queue_index.erl | 3 ++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 892e3c8b..4e027ca8 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -150,15 +150,16 @@ start(normal, []) ->
                 ok = start_child(rabbit_router),
                 ok = start_child(rabbit_node_monitor),
                 ok = start_child(rabbit_guid),
-                ok = start_child(rabbit_disk_queue)
+                ok = rabbit_queue_index:start_msg_store()
         end},
        {"recovery",
         fun () ->
                 ok = maybe_insert_default_data(),
                 ok = rabbit_exchange:recover(),
-                {ok, DurableQueues} = rabbit_amqqueue:recover(),
-                ok = rabbit_disk_queue:delete_non_durable_queues(
-                       [ Q #amqqueue.name || Q <- DurableQueues ])
+                {ok, _DurableQueues} = rabbit_amqqueue:recover()
+                %% TODO - don't use disk_queue any more!
+                %% ok = rabbit_disk_queue:delete_non_durable_queues(
+                %%        [ Q #amqqueue.name || Q <- DurableQueues ])
         end},
        {"builtin applications",
         fun () ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2b4bb1f2..3f2f4cfe 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -249,6 +249,8 @@ sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+
     MsgLocations = ets:new(?MSG_LOC_NAME,
                            [set, private, {keypos, #msg_location.msg_id}]),
 
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d34744a3..c0a559e9 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -300,7 +300,8 @@ start_msg_store() ->
     MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
     {ok, _Pid} = rabbit_msg_store:start_link(MsgStoreDir,
                                              fun queue_index_walker/1,
-                                             Queues).
+                                             Queues),
+    ok.
 
 %%----------------------------------------------------------------------------
 %% Minor Helpers
-- 
cgit v1.2.1


From 55be6c42cf2fc549d82e0a5ff0f20a1c81bf4b3f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 15:15:13 +0100
Subject: publish/3 -> publish/2 because the IsDelivered bit is always false
 when called externally. Also rework requeue, because with the ability to
 indicate that persistent msgs will already be in msg_store, we don't need to
 call msg_store:write for persistent msgs, which means that we can also avoid
 the call to msg_store:remove that would have happened in the call to ack

---
 src/rabbit_variable_queue.erl | 46 ++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 2b024669..0ffc2adc 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, publish/3, set_queue_ram_duration_target/2,
+-export([init/1, publish/2, set_queue_ram_duration_target/2,
          remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
          maybe_start_prefetcher/1, purge/1, delete/1, requeue/2,
          tx_publish/2, tx_rollback/2, tx_commit/3, do_tx_commit/3]).
@@ -140,14 +140,8 @@ init(QueueName) ->
                   },
     maybe_load_next_segment(State).
 
-publish(Msg, IsDelivered, State) ->
-    publish(Msg, IsDelivered, false, State).
-
-publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { next_seq_id = SeqId, len = Len }) ->
-    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
-                    PersistentMsgsAlreadyOnDisk,
-                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 })}.
+publish(Msg, State) ->
+    publish(Msg, false, false, State).
 
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
@@ -293,14 +287,32 @@ delete(State) ->
     end.
 
 %% [{Msg, AckTag}]
+%% We guarantee that after fetch, only persistent msgs are left on
+%% disk. This means that in a requeue, we set
+%% PersistentMsgsAlreadyOnDisk to true, thus avoiding calls to
+%% msg_store:write for persistent msgs. It also means that we don't
+%% need to worry about calling msg_store:remove (as ack would do)
+%% because transient msgs won't be on disk anyway, thus they won't
+%% need to be removed.
 requeue(MsgsWithAckTags, State) ->
-    {AckTags, State1} =
+    {SeqIds, State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
-          fun ({Msg, AckTag}, {AckTagsAcc, StateN}) ->
-                  StateN1 = publish(Msg, true, StateN),
-                  {[AckTag | AckTagsAcc], StateN1}
+          fun ({Msg = #basic_message { guid = MsgId }, AckTag},
+               {SeqIdsAcc, StateN}) ->
+                  {_SeqId, StateN1} = publish(Msg, true, true, StateN),
+                  SeqIdsAcc1 = case AckTag of
+                                   ack_not_on_disk ->
+                                       SeqIdsAcc;
+                                   {ack_index_and_store, MsgId, SeqId} ->
+                                       [SeqId | SeqIdsAcc]
+                               end,
+                  {SeqIdsAcc1, StateN1}
           end, {[], State}, MsgsWithAckTags),
-    ack(AckTags, State1).
+    IndexState1 = case SeqIds of
+                      [] -> IndexState;
+                      _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
+                  end,
+    State1 #vqstate { index_state = IndexState1 }.
 
 tx_publish(Msg = #basic_message { is_persistent = true }, State) ->
     true = maybe_write_msg_to_disk(true, false, Msg),
@@ -414,6 +426,12 @@ entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
                               index_on_disk = IndexOnDisk }) ->
     {MsgId, SeqId, IsDelivered, true, IndexOnDisk}.
 
+publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { next_seq_id = SeqId, len = Len }) ->
+    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
+                    PersistentMsgsAlreadyOnDisk,
+                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 })}.
+
 publish(msg, Msg = #basic_message { guid = MsgId,
                                     is_persistent = IsPersistent },
         SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-- 
cgit v1.2.1


From e4c4153ff7700b4d588a2c00b561b979fd15eb32 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 15:32:15 +0100
Subject: added publish_delivered/2 which deals with adding a message, when the
 queue is empty which we already know has been sent out to a consumer, so it's
 really just a case of writing to disk the message, and index pub and deliver
 entries iff the message is persistent

---
 src/rabbit_variable_queue.erl | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0ffc2adc..e7c546e5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, publish/2, set_queue_ram_duration_target/2,
+-export([init/1, publish/2, publish_delivered/2, set_queue_ram_duration_target/2,
          remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
          maybe_start_prefetcher/1, purge/1, delete/1, requeue/2,
          tx_publish/2, tx_rollback/2, tx_commit/3, do_tx_commit/3]).
@@ -143,6 +143,22 @@ init(QueueName) ->
 publish(Msg, State) ->
     publish(Msg, false, false, State).
 
+publish_delivered(Msg = #basic_message { guid = MsgId,
+                                         is_persistent = IsPersistent },
+                  State = #vqstate { len = 0, index_state = IndexState,
+                                     next_seq_id = SeqId }) ->
+    case maybe_write_msg_to_disk(false, false, Msg) of
+        true ->
+            {true, IndexState1} =
+                maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                          true, IndexState),
+            {{ack_index_and_store, MsgId, SeqId},
+             State #vqstate { index_state = IndexState1,
+                              next_seq_id = SeqId + 1 }};
+        false ->
+            {ack_not_on_disk, State}
+    end.
+
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
                                      target_ram_msg_count = TargetRamMsgCount
-- 
cgit v1.2.1


From 627df11f0fbf9424fc21bf1f1996be7510d4cbff Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 16:15:29 +0100
Subject: most of the rewiring is done. Need to sort out how to delete non
 durable queues on start up, which is a bit cyclical, as I'd like to not start
 the msg_store until we know which queues are durable and which aren't, but we
 also can't start the queues until the msg_store is running. Fun.

---
 src/rabbit.erl                  |   1 +
 src/rabbit_amqqueue.erl         |   5 -
 src/rabbit_amqqueue_process.erl | 188 ++++------
 src/rabbit_disk_queue.erl       | 743 ----------------------------------------
 src/rabbit_mixed_queue.erl      | 673 ------------------------------------
 src/rabbit_queue_index.erl      |   1 -
 6 files changed, 73 insertions(+), 1538 deletions(-)
 delete mode 100644 src/rabbit_disk_queue.erl
 delete mode 100644 src/rabbit_mixed_queue.erl

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 4e027ca8..b859c4af 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -150,6 +150,7 @@ start(normal, []) ->
                 ok = start_child(rabbit_router),
                 ok = start_child(rabbit_node_monitor),
                 ok = start_child(rabbit_guid),
+                %% TODO - this should probably use start_child somehow too
                 ok = rabbit_queue_index:start_msg_store()
         end},
        {"recovery",
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 32286552..840c2c4d 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -42,7 +42,6 @@
 -export([notify_sent/2, unblock/2, tx_commit_callback/3]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
--export([set_storage_mode/2]).
 
 -import(mnesia).
 -import(gen_server2).
@@ -107,7 +106,6 @@
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
 -spec(tx_commit_callback/3 :: (pid(), [message()], [acktag()]) -> 'ok').
--spec(set_storage_mode/2 :: (pid(), ('oppressed' | 'liberated')) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -228,9 +226,6 @@ list(VHostPath) ->
 
 map(VHostPath, F) -> rabbit_misc:filter_exit_map(F, list(VHostPath)).
 
-set_storage_mode(QPid, Mode) ->
-    gen_server2:pcast(QPid, 10, {set_storage_mode, Mode}).
-
 info(#amqqueue{ pid = QPid }) ->
     gen_server2:pcall(QPid, 9, info, infinity).
 
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 99fd6987..152205ed 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -38,7 +38,6 @@
 -define(UNSENT_MESSAGE_LIMIT, 100).
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
--define(MINIMUM_MEMORY_REPORT_TIME_INTERVAL, 10000). %% 10 seconds in milliseconds
 
 -export([start_link/1]).
 
@@ -54,11 +53,10 @@
             owner,
             exclusive_consumer,
             has_had_consumers,
-            mixed_state,
+            variable_queue_state,
             next_msg_id,
             active_consumers,
-            blocked_consumers,
-            memory_report_timer
+            blocked_consumers
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -88,8 +86,7 @@
          acks_uncommitted,
          consumers,
          transactions,
-         memory,
-         storage_mode
+         memory
         ]).
          
 %%----------------------------------------------------------------------------
@@ -99,43 +96,41 @@ start_link(Q) ->
 
 %%----------------------------------------------------------------------------
 
-init(Q = #amqqueue { name = QName, durable = Durable }) ->
+init(Q = #amqqueue { name = QName }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     ok = rabbit_memory_manager:register
            (self(), false, rabbit_amqqueue, set_storage_mode, [self()]),
-    {ok, MS} = rabbit_mixed_queue:init(QName, Durable),
+    VQS = rabbit_variable_queue:init(QName),
     State = #q{q = Q,
                owner = none,
                exclusive_consumer = none,
                has_had_consumers = false,
-               mixed_state = MS,
+               variable_queue_state = VQS,
                next_msg_id = 1,
                active_consumers = queue:new(),
-               blocked_consumers = queue:new(),
-               memory_report_timer = undefined
+               blocked_consumers = queue:new()
               },
-    %% first thing we must do is report_memory.
-    {ok, start_memory_timer(State), hibernate,
+    {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-terminate(_Reason, State = #q{mixed_state = MS}) ->
+terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
     %% FIXME: How do we cancel active subscriptions?
-    State1 = stop_memory_timer(State),
     %% Ensure that any persisted tx messages are removed;
     %% mixed_queue:delete_queue cannot do that for us since neither
     %% mixed_queue nor disk_queue keep a record of uncommitted tx
     %% messages.
-    {ok, MS1} = rabbit_mixed_queue:tx_rollback(
-                  lists:concat([PM || #tx { pending_messages = PM } <-
-                                          all_tx_record()]), MS),
-    %% Delete from disk queue first. If we crash at this point, when a
+    %% TODO: wait for all in flight tx_commits to complete
+    VQS1 = rabbit_variable_queue:tx_rollback(
+             lists:concat([PM || #tx { pending_messages = PM } <-
+                                     all_tx_record()]), VQS),
+    %% Delete from disk first. If we crash at this point, when a
     %% durable queue, we will be recreated at startup, possibly with
     %% partial content. The alternative is much worse however - if we
     %% called internal_delete first, we would then have a race between
-    %% the disk_queue delete and a new queue with the same name being
+    %% the disk delete and a new queue with the same name being
     %% created and published to.
-    {ok, _MS} = rabbit_mixed_queue:delete_queue(MS1),
-    ok = rabbit_amqqueue:internal_delete(qname(State1)).
+    _VQS = rabbit_variable_queue:delete(VQS1),
+    ok = rabbit_amqqueue:internal_delete(qname(State)).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -144,27 +139,14 @@ code_change(_OldVsn, State, _Extra) ->
 
 reply(Reply, NewState) ->
     assert_invariant(NewState),
-    {reply, Reply, start_memory_timer(NewState), hibernate}.
+    {reply, Reply, NewState, hibernate}.
 
 noreply(NewState) ->
     assert_invariant(NewState),
-    {noreply, start_memory_timer(NewState), hibernate}.
+    {noreply, NewState, hibernate}.
 
-assert_invariant(#q { active_consumers = AC, mixed_state = MS }) ->
-    true = (queue:is_empty(AC) orelse rabbit_mixed_queue:is_empty(MS)).
-
-start_memory_timer(State = #q { memory_report_timer = undefined }) ->
-    {ok, TRef} = timer:send_after(?MINIMUM_MEMORY_REPORT_TIME_INTERVAL,
-                                  report_memory),
-    report_memory(false, State #q { memory_report_timer = TRef });
-start_memory_timer(State) ->
-    State.
-
-stop_memory_timer(State = #q { memory_report_timer = undefined }) ->
-    State;
-stop_memory_timer(State = #q { memory_report_timer = TRef }) ->
-    {ok, cancel} = timer:cancel(TRef),
-    State #q { memory_report_timer = undefined }.
+assert_invariant(#q { active_consumers = AC, variable_queue_state = VQS }) ->
+    true = (queue:is_empty(AC) orelse rabbit_variable_queue:is_empty(VQS)).
 
 lookup_ch(ChPid) ->
     case get({ch, ChPid}) of
@@ -282,25 +264,24 @@ deliver_msgs_to_consumers(
 deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
     not IsEmpty.
 deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
-                           State = #q { mixed_state = MS }) ->
-    {{Msg, IsDelivered, AckTag, Remaining}, MS1} =
-        rabbit_mixed_queue:fetch(MS),
+                           State = #q { variable_queue_state = VQS }) ->
+    {{Msg, IsDelivered, AckTag, Remaining}, VQS1} =
+        rabbit_variable_queue:fetch(VQS),
     AutoAcks1 = case AckRequired of
                     true -> AutoAcks;
                     false -> [AckTag | AutoAcks]
                 end,
     {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
-     State #q { mixed_state = MS1 }}.
+     State #q { variable_queue_state = VQS1 }}.
 
-run_message_queue(State = #q { mixed_state = MS }) ->
+run_message_queue(State = #q { variable_queue_state = VQS }) ->
     Funs = { fun deliver_from_queue_pred/2,
              fun deliver_from_queue_deliver/3 },
-    IsEmpty = rabbit_mixed_queue:is_empty(MS),
+    IsEmpty = rabbit_variable_queue:is_empty(VQS),
     {{_IsEmpty1, AutoAcks}, State1} =
         deliver_msgs_to_consumers(Funs, {IsEmpty, []}, State),
-    {ok, MS1} =
-        rabbit_mixed_queue:ack(AutoAcks, State1 #q.mixed_state),
-    State1 #q { mixed_state = MS1 }.
+    VQS1 = rabbit_variable_queue:ack(AutoAcks, State1 #q.variable_queue_state),
+    State1 #q { variable_queue_state = VQS1 }.
 
 attempt_immediate_delivery(none, _ChPid, Msg, State) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
@@ -309,10 +290,10 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
                 {AckTag, State2} =
                     case AckRequired of
                         true ->
-                            {ok, AckTag1, MS} =
-                                rabbit_mixed_queue:publish_delivered(
-                                  Msg, State1 #q.mixed_state),
-                            {AckTag1, State1 #q { mixed_state = MS }};
+                            {AckTag1, VQS} =
+                                rabbit_variable_queue:publish_delivered(
+                                  Msg, State1 #q.variable_queue_state),
+                            {AckTag1, State1 #q { variable_queue_state = VQS }};
                         false ->
                             {noack, State1}
                     end,
@@ -320,9 +301,9 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
         end,
     deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
 attempt_immediate_delivery(Txn, ChPid, Msg, State) ->
-    {ok, MS} = rabbit_mixed_queue:tx_publish(Msg, State #q.mixed_state),
+    VQS = rabbit_variable_queue:tx_publish(Msg, State #q.variable_queue_state),
     record_pending_message(Txn, ChPid, Msg),
-    {true, State #q { mixed_state = MS }}.
+    {true, State #q { variable_queue_state = VQS }}.
 
 deliver_or_enqueue(Txn, ChPid, Msg, State) ->
     case attempt_immediate_delivery(Txn, ChPid, Msg, State) of
@@ -330,8 +311,9 @@ deliver_or_enqueue(Txn, ChPid, Msg, State) ->
             {true, NewState};
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
-            {ok, MS} = rabbit_mixed_queue:publish(Msg, State #q.mixed_state),
-            {false, NewState #q { mixed_state = MS }}
+            {_SeqId, VQS} = rabbit_variable_queue:publish(
+                              Msg, State #q.variable_queue_state),
+            {false, NewState #q { variable_queue_state = VQS }}
     end.
 
 %% all these messages have already been delivered at least once and
@@ -344,11 +326,11 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
         deliver_msgs_to_consumers(
           Funs, {length(MsgsWithAcks), [], MsgsWithAcks}, State),
-    {ok, MS} = rabbit_mixed_queue:ack(AutoAcks, NewState #q.mixed_state),
+    VQS = rabbit_variable_queue:ack(AutoAcks, NewState #q.variable_queue_state),
     case OutstandingMsgs of
-        [] -> NewState #q { mixed_state = MS };
-        _ -> {ok, MS1} = rabbit_mixed_queue:requeue(OutstandingMsgs, MS),
-             NewState #q { mixed_state = MS1 }
+        [] -> NewState #q { variable_queue_state = VQS };
+        _ -> VQS1 = rabbit_variable_queue:requeue(OutstandingMsgs, VQS),
+             NewState #q { variable_queue_state = VQS1 }
     end.
 
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
@@ -504,17 +486,17 @@ commit_transaction(Txn, State) ->
                 store_ch_record(C#cr{unacked_messages = Remaining}),
                 MsgWithAcks
         end,
-    {ok, MS} = rabbit_mixed_queue:tx_commit(
-                 PendingMessagesOrdered, Acks, State #q.mixed_state),
-    State #q { mixed_state = MS }.
+    VQS = rabbit_variable_queue:tx_commit(
+            PendingMessagesOrdered, Acks, State #q.variable_queue_state),
+    State #q { variable_queue_state = VQS }.
 
 rollback_transaction(Txn, State) ->
     #tx { pending_messages = PendingMessages
         } = lookup_tx(Txn),
-    {ok, MS} = rabbit_mixed_queue:tx_rollback(PendingMessages,
-                                              State #q.mixed_state),
+    VQS = rabbit_variable_queue:tx_rollback(PendingMessages,
+                                            State #q.variable_queue_state),
     erase_tx(Txn),
-    State #q { mixed_state = MS }.
+    State #q { variable_queue_state = VQS }.
 
 %% {A, B} = collect_messages(C, D) %% A = C `intersect` D; B = D \\ C
 %% err, A = C `intersect` D , via projection through the dict that is C
@@ -529,12 +511,10 @@ i(name,        #q{q = #amqqueue{name        = Name}})       -> Name;
 i(durable,     #q{q = #amqqueue{durable     = Durable}})    -> Durable;
 i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete;
 i(arguments,   #q{q = #amqqueue{arguments   = Arguments}})  -> Arguments;
-i(storage_mode, #q{ mixed_state = MS }) ->
-    rabbit_mixed_queue:storage_mode(MS);
 i(pid, _) ->
     self();
-i(messages_ready, #q { mixed_state = MS }) ->
-    rabbit_mixed_queue:len(MS);
+i(messages_ready, #q { variable_queue_state = VQS }) ->
+    rabbit_variable_queue:len(VQS);
 i(messages_unacknowledged, _) ->
     lists:sum([dict:size(UAM) ||
                   #cr{unacked_messages = UAM} <- all_ch_record()]);
@@ -558,11 +538,6 @@ i(memory, _) ->
 i(Item, _) ->
     throw({bad_argument, Item}).
 
-report_memory(Hib, State = #q { mixed_state = MS }) ->
-    {MS1, MSize} = rabbit_mixed_queue:estimate_queue_memory(MS),
-    rabbit_memory_manager:report_memory(self(), MSize, Hib),
-    State #q { mixed_state = MS1 }.
-
 %---------------------------------------------------------------------------
 
 handle_call(info, _From, State) ->
@@ -612,25 +587,25 @@ handle_call({notify_down, ChPid}, From, State) ->
 handle_call({basic_get, ChPid, NoAck}, _From,
             State = #q{q = #amqqueue{name = QName},
                        next_msg_id = NextId,
-                       mixed_state = MS
+                       variable_queue_state = VQS
                        }) ->
-    case rabbit_mixed_queue:fetch(MS) of
-        {empty, MS1} -> reply(empty, State #q { mixed_state = MS1 });
-        {{Msg, IsDelivered, AckTag, Remaining}, MS1} ->
+    case rabbit_variable_queue:fetch(VQS) of
+        {empty, VQS1} -> reply(empty, State #q { variable_queue_state = VQS1 });
+        {{Msg, IsDelivered, AckTag, Remaining}, VQS1} ->
             AckRequired = not(NoAck),
-            {ok, MS2} =
+            {ok, VQS2} =
                 case AckRequired of
                     true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        {ok, MS1};
+                        {ok, VQS1};
                     false ->
-                        rabbit_mixed_queue:ack([AckTag], MS1)
+                        rabbit_variable_queue:ack([AckTag], VQS1)
                 end,
             Message = {QName, self(), NextId, IsDelivered, Msg},
             reply({ok, Remaining, Message},
-                  State #q { next_msg_id = NextId + 1, mixed_state = MS2 })
+                  State #q { next_msg_id = NextId + 1, variable_queue_state = VQS2 })
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -710,14 +685,14 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
     end;
 
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    mixed_state = MS,
+                                    variable_queue_state = VQS,
                                     active_consumers = ActiveConsumers}) ->
-    Length = rabbit_mixed_queue:len(MS),
+    Length = rabbit_variable_queue:len(VQS),
     reply({ok, Name, Length, queue:len(ActiveConsumers)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
-            State = #q { mixed_state = MS }) ->
-    Length = rabbit_mixed_queue:len(MS),
+            State = #q { variable_queue_state = VQS }) ->
+    Length = rabbit_variable_queue:len(VQS),
     IsEmpty = Length == 0,
     IsUnused = is_unused(State),
     if
@@ -730,8 +705,8 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
     end;
 
 handle_call(purge, _From, State) ->
-    {Count, MS} = rabbit_mixed_queue:purge(State #q.mixed_state),
-    reply({ok, Count}, State #q { mixed_state = MS });
+    {Count, VQS} = rabbit_variable_queue:purge(State #q.variable_queue_state),
+    reply({ok, Count}, State #q { variable_queue_state = VQS });
 
 handle_call({claim_queue, ReaderPid}, _From,
             State = #q{owner = Owner, exclusive_consumer = Holder}) ->
@@ -770,11 +745,11 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             case Txn of
                 none ->
                     {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
-                    {ok, MS} = rabbit_mixed_queue:ack(
-                                 [AckTag || {_Msg, AckTag} <- MsgWithAcks],
-                                 State #q.mixed_state),
+                    VQS = rabbit_variable_queue:ack(
+                            [AckTag || {_Msg, AckTag} <- MsgWithAcks],
+                            State #q.variable_queue_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
-                    noreply(State #q { mixed_state = MS });
+                    noreply(State #q { variable_queue_state = VQS });
                 _  ->
                     record_pending_acks(Txn, ChPid, MsgIds),
                     noreply(State)
@@ -822,23 +797,7 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 end,
                 NewLimited = Limited andalso LimiterPid =/= undefined,
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
-        end));
-
-handle_cast({set_storage_mode, Mode}, State = #q { mixed_state = MS }) ->
-    PendingMessages =
-        lists:flatten([Pending || #tx { pending_messages = Pending}
-                                      <- all_tx_record()]),
-    Mode1 = case Mode of
-                liberated -> mixed;
-                oppressed -> disk
-            end,
-    {ok, MS1} = rabbit_mixed_queue:set_storage_mode(Mode1, PendingMessages, MS),
-    noreply(State #q { mixed_state = MS1 }).
-
-handle_info(report_memory, State) ->
-    %% deliberately don't call noreply/1 as we don't want to start the timer.
-    %% By unsetting the timer, we force a report on the next normal message.
-    {noreply, State #q { memory_report_timer = undefined }, hibernate};
+        end)).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -860,9 +819,6 @@ handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
 
-handle_pre_hibernate(State = #q { mixed_state = MS }) ->
-    MS1 = rabbit_mixed_queue:maybe_prefetch(MS),
-    State1 =
-        stop_memory_timer(report_memory(true, State #q { mixed_state = MS1 })),
-    %% don't call noreply/1 as that'll restart the memory_report_timer
-    {hibernate, State1}.
+handle_pre_hibernate(State = #q { variable_queue_state = VQS }) ->
+    VQS1 = rabbit_variable_queue:maybe_start_prefetcher(VQS),
+    {hibernate, State #q { variable_queue_state = VQS1 }}.
diff --git a/src/rabbit_disk_queue.erl b/src/rabbit_disk_queue.erl
deleted file mode 100644
index 7d44dd9d..00000000
--- a/src/rabbit_disk_queue.erl
+++ /dev/null
@@ -1,743 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_disk_queue).
-
--behaviour(gen_server2).
-
--export([start_link/0]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([publish/3, fetch/1, phantom_fetch/1, ack/2, tx_publish/1, tx_commit/3,
-         tx_rollback/1, requeue/2, purge/1, delete_queue/1,
-         delete_non_durable_queues/1, requeue_next_n/2, len/1, foldl/3,
-         prefetch/1
-        ]).
-
--export([stop/0, stop_and_obliterate/0]).
-
-%%----------------------------------------------------------------------------
-
--include("rabbit.hrl").
-
--define(SEQUENCE_ETS_NAME,       rabbit_disk_queue_sequences).
--define(BATCH_SIZE,              10000).
-
--define(SHUTDOWN_MESSAGE_KEY, {internal_token, shutdown}).
--define(SHUTDOWN_MESSAGE,
-        #dq_msg_loc { queue_and_seq_id = ?SHUTDOWN_MESSAGE_KEY,
-                      msg_id = infinity_and_beyond,
-                      is_delivered = never,
-                      is_persistent = true
-                    }).
-
--define(HIBERNATE_AFTER_MIN, 1000).
--define(DESIRED_HIBERNATE, 10000).
-
--define(SERVER, ?MODULE).
-
--record(dqstate, { sequences }).      %% next read and write for each q
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--type(msg_id() :: guid()).
--type(seq_id() :: non_neg_integer()).
--type(ack_tag() :: {msg_id(), seq_id()}).
-
--spec(start_link/0 :: () ->
-              ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(publish/3 :: (queue_name(), message(), boolean()) -> 'ok').
--spec(fetch/1 :: (queue_name()) ->
-             ('empty' |
-              {message(), boolean(), ack_tag(), non_neg_integer()})).
--spec(phantom_fetch/1 :: (queue_name()) ->
-             ('empty' |
-              {msg_id(), boolean(), ack_tag(), non_neg_integer()})).
--spec(prefetch/1 :: (queue_name()) -> 'ok').
--spec(ack/2 :: (queue_name(), [ack_tag()]) -> 'ok').
--spec(tx_publish/1 :: (message()) -> 'ok').
--spec(tx_commit/3 :: (queue_name(), [{msg_id(), boolean(), boolean()}],
-                      [ack_tag()]) ->
-             'ok').
--spec(tx_rollback/1 :: ([msg_id()]) -> 'ok').
--spec(requeue/2 :: (queue_name(), [{ack_tag(), boolean()}]) -> 'ok').
--spec(requeue_next_n/2 :: (queue_name(), non_neg_integer()) -> 'ok').
--spec(purge/1 :: (queue_name()) -> non_neg_integer()).
--spec(delete_queue/1 :: (queue_name()) -> 'ok').
--spec(delete_non_durable_queues/1 :: ([queue_name()]) -> 'ok').
--spec(len/1 :: (queue_name()) -> non_neg_integer()).
--spec(foldl/3 :: (fun ((message(), ack_tag(), boolean(), A) -> A),
-                  A, queue_name()) -> A).
--spec(stop/0 :: () -> 'ok').
--spec(stop_and_obliterate/0 :: () -> 'ok').
-
--endif.
-
-%%----------------------------------------------------------------------------
-%% public API
-%%----------------------------------------------------------------------------
-
-start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
-
-publish(Q, Message = #basic_message {}, IsDelivered) ->
-    gen_server2:cast(?SERVER, {publish, Q, Message, IsDelivered}).
-
-fetch(Q) ->
-    gen_server2:call(?SERVER, {fetch, Q}, infinity).
-
-phantom_fetch(Q) ->
-    gen_server2:call(?SERVER, {phantom_fetch, Q}, infinity).
-
-prefetch(Q) ->
-    gen_server2:pcast(?SERVER, -1, {prefetch, Q, self()}).
-
-ack(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
-    gen_server2:cast(?SERVER, {ack, Q, MsgSeqIds}).
-
-tx_publish(Message = #basic_message {}) ->
-    gen_server2:cast(?SERVER, {tx_publish, Message}).
-
-tx_commit(Q, PubMsgIds, AckSeqIds)
-  when is_list(PubMsgIds) andalso is_list(AckSeqIds) ->
-    gen_server2:call(?SERVER, {tx_commit, Q, PubMsgIds, AckSeqIds}, infinity).
-
-tx_rollback(MsgIds) when is_list(MsgIds) ->
-    gen_server2:cast(?SERVER, {tx_rollback, MsgIds}).
-
-requeue(Q, MsgSeqIds) when is_list(MsgSeqIds) ->
-    gen_server2:cast(?SERVER, {requeue, Q, MsgSeqIds}).
-
-requeue_next_n(Q, N) when is_integer(N) ->
-    gen_server2:cast(?SERVER, {requeue_next_n, Q, N}).
-
-purge(Q) ->
-    gen_server2:call(?SERVER, {purge, Q}, infinity).
-
-delete_queue(Q) ->
-    gen_server2:call(?SERVER, {delete_queue, Q}, infinity).
-
-delete_non_durable_queues(DurableQueues) ->
-    gen_server2:call(?SERVER, {delete_non_durable_queues, DurableQueues},
-                     infinity).
-
-len(Q) ->
-    gen_server2:call(?SERVER, {len, Q}, infinity).
-
-foldl(Fun, Init, Acc) ->
-    gen_server2:call(?SERVER, {foldl, Fun, Init, Acc}, infinity).
-
-stop() ->
-    gen_server2:call(?SERVER, stop, infinity).
-
-stop_and_obliterate() ->
-    gen_server2:call(?SERVER, stop_vaporise, infinity).
-
-%% private
-
-finalise_commit(TxDetails) ->
-    gen_server2:cast(?SERVER, {finalise_commit, TxDetails}).
-
-%%----------------------------------------------------------------------------
-%% gen_server behaviour
-%%----------------------------------------------------------------------------
-
-init([]) ->
-    %% If the gen_server is part of a supervision tree and is ordered
-    %% by its supervisor to terminate, terminate will be called with
-    %% Reason=shutdown if the following conditions apply:
-    %%     * the gen_server has been set to trap exit signals, and
-    %%     * the shutdown strategy as defined in the supervisor's
-    %%       child specification is an integer timeout value, not
-    %%       brutal_kill.
-    %% Otherwise, the gen_server will be immediately terminated.
-    process_flag(trap_exit, true),
-
-    ok = filelib:ensure_dir(form_filename("nothing")),
-
-    ok = detect_shutdown_state_and_adjust_delivered_flags(),
-
-    {ok, _Pid} = rabbit_msg_store:start_link(base_directory(),
-                                             fun msg_ref_gen/1,
-                                             msg_ref_gen_init()),
-    ok = prune(),
-
-    Sequences = ets:new(?SEQUENCE_ETS_NAME, [set, private]),
-    ok = extract_sequence_numbers(Sequences),
-
-    State = #dqstate { sequences = Sequences },
-    {ok, State, hibernate,
-     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
-
-handle_call({fetch, Q}, _From, State) ->
-    {Result, State1} = internal_fetch_body(Q, pop_queue, State),
-    reply(Result, State1);
-handle_call({phantom_fetch, Q}, _From, State) ->
-    Result = internal_fetch_attributes(Q, record_delivery, State),
-    reply(Result, State);
-handle_call({tx_commit, Q, PubMsgIds, AckSeqIds}, From, State) ->
-    State1 =
-        internal_tx_commit(Q, PubMsgIds, AckSeqIds, From, State),
-    noreply(State1);
-handle_call({purge, Q}, _From, State) ->
-    {ok, Count, State1} = internal_purge(Q, State),
-    reply(Count, State1);
-handle_call({delete_queue, Q}, From, State) ->
-    gen_server2:reply(From, ok),
-    {ok, State1} = internal_delete_queue(Q, State),
-    noreply(State1);
-handle_call({len, Q}, _From, State = #dqstate { sequences = Sequences }) ->
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    reply(WriteSeqId - ReadSeqId, State);
-handle_call({foldl, Fun, Init, Q}, _From, State) ->
-    {ok, Result, State1} = internal_foldl(Q, Fun, Init, State),
-    reply(Result, State1);
-handle_call(stop, _From, State) ->
-    {stop, normal, ok, State}; %% gen_server now calls terminate
-handle_call(stop_vaporise, _From, State) ->
-    State1 = shutdown(State),
-    {atomic, ok} = mnesia:clear_table(rabbit_disk_queue),
-    lists:foreach(fun file:delete/1, filelib:wildcard(form_filename("*"))),
-    {stop, normal, ok, State1}; %% gen_server now calls terminate
-handle_call({delete_non_durable_queues, DurableQueues}, _From, State) ->
-    {ok, State1} = internal_delete_non_durable_queues(DurableQueues, State),
-    reply(ok, State1).
-
-handle_cast({publish, Q, Message, IsDelivered}, State) ->
-    {ok, _MsgSeqId, State1} = internal_publish(Q, Message, IsDelivered, State),
-    noreply(State1);
-handle_cast({ack, Q, MsgSeqIds}, State) ->
-    {ok, State1} = internal_ack(Q, MsgSeqIds, State),
-    noreply(State1);
-handle_cast({tx_publish, Message}, State) ->
-    {ok, State1} = internal_tx_publish(Message, State),
-    noreply(State1);
-handle_cast({tx_rollback, MsgIds}, State) ->
-    {ok, State1} = internal_tx_rollback(MsgIds, State),
-    noreply(State1);
-handle_cast({requeue, Q, MsgSeqIds}, State) ->
-    {ok, State1} = internal_requeue(Q, MsgSeqIds, State),
-    noreply(State1);
-handle_cast({requeue_next_n, Q, N}, State) ->
-    {ok, State1} = internal_requeue_next_n(Q, N, State),
-    noreply(State1);
-handle_cast({prefetch, Q, From}, State) ->
-    {Result, State1} = internal_fetch_body(Q, peek_queue, State),
-    case rabbit_misc:with_exit_handler(
-           fun () -> false end,
-           fun () ->
-                   ok = rabbit_queue_prefetcher:publish(From, Result),
-                   true
-           end) of
-        true ->
-            internal_fetch_attributes(Q, ignore_delivery, State1);
-        false -> ok
-    end,
-    noreply(State1);
-handle_cast({finalise_commit, TxDetails}, State) ->
-    noreply(finalise_commit(TxDetails, State)).
-
-handle_info({'EXIT', _Pid, Reason}, State) ->
-    {stop, Reason, State}.
-
-terminate(_Reason, State) ->
-    State1 = shutdown(State),
-    store_safe_shutdown(),
-    State1.
-
-shutdown(State = #dqstate { sequences = undefined }) ->
-    State;
-shutdown(State = #dqstate { sequences = Sequences }) ->
-    ok = rabbit_msg_store:stop(),
-    ets:delete(Sequences),
-    State #dqstate { sequences = undefined }.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-%%----------------------------------------------------------------------------
-%% general helper functions
-%%----------------------------------------------------------------------------
-
-noreply(State) ->
-    {noreply, State, hibernate}.
-
-reply(Reply, State) ->
-    {reply, Reply, State, hibernate}.
-
-form_filename(Name) ->
-    filename:join(base_directory(), Name).
-
-base_directory() ->
-    filename:join(rabbit_mnesia:dir(), "rabbit_disk_queue/").
-
-sequence_lookup(Sequences, Q) ->
-    case ets:lookup(Sequences, Q) of
-        []                           -> {0, 0};
-        [{_, ReadSeqId, WriteSeqId}] -> {ReadSeqId, WriteSeqId}
-    end.
-
-%%----------------------------------------------------------------------------
-%% internal functions
-%%----------------------------------------------------------------------------
-
-internal_fetch_body(Q, Advance, State) ->
-    case next(Q, record_delivery, Advance, State) of
-        empty -> {empty, State};
-        {MsgId, IsDelivered, AckTag, Remaining} ->
-            {ok, Message} = rabbit_msg_store:read(MsgId),
-            {{Message, IsDelivered, AckTag, Remaining}, State}
-    end.
-
-internal_fetch_attributes(Q, MarkDelivered, State) ->
-    next(Q, MarkDelivered, pop_queue, State).
-
-next(Q, MarkDelivered, Advance, #dqstate { sequences = Sequences }) ->
-    case sequence_lookup(Sequences, Q) of
-        {SeqId, SeqId} -> empty;
-        {ReadSeqId, WriteSeqId} when WriteSeqId > ReadSeqId ->
-            Remaining = WriteSeqId - ReadSeqId - 1,
-            {MsgId, IsDelivered} =
-                update_message_attributes(Q, ReadSeqId, MarkDelivered),
-            ok = maybe_advance(Advance, Sequences, Q, ReadSeqId, WriteSeqId),
-            AckTag = {MsgId, ReadSeqId},
-            {MsgId, IsDelivered, AckTag, Remaining}
-    end.
-
-update_message_attributes(Q, SeqId, MarkDelivered) ->
-    [Obj =
-     #dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
-        mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
-    ok = case {IsDelivered, MarkDelivered} of
-             {true, _} -> ok;
-             {false, ignore_delivery} -> ok;
-             {false, record_delivery} ->
-                 mnesia:dirty_write(rabbit_disk_queue,
-                                    Obj #dq_msg_loc {is_delivered = true})
-         end,
-    {MsgId, IsDelivered}.
-
-maybe_advance(peek_queue, _, _, _, _) ->
-    ok;
-maybe_advance(pop_queue, Sequences, Q, ReadSeqId, WriteSeqId) ->
-    true = ets:insert(Sequences, {Q, ReadSeqId + 1, WriteSeqId}),
-    ok.
-
-internal_foldl(Q, Fun, Init, State = #dqstate { sequences = Sequences }) ->
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    internal_foldl(Q, WriteSeqId, Fun, State, Init, ReadSeqId).
-
-internal_foldl(_Q, SeqId, _Fun, State, Acc, SeqId) ->
-    {ok, Acc, State};
-internal_foldl(Q, WriteSeqId, Fun, State, Acc, ReadSeqId) ->
-    [#dq_msg_loc {is_delivered = IsDelivered, msg_id = MsgId}] =
-        mnesia:dirty_read(rabbit_disk_queue, {Q, ReadSeqId}),
-    {ok, Message} = rabbit_msg_store:read(MsgId),
-    Acc1 = Fun(Message, {MsgId, ReadSeqId}, IsDelivered, Acc),
-    internal_foldl(Q, WriteSeqId, Fun, State, Acc1, ReadSeqId + 1).
-
-internal_ack(Q, MsgSeqIds, State) ->
-    remove_messages(Q, MsgSeqIds, State).
-
-remove_messages(Q, MsgSeqIds, State) ->
-    MsgIds = lists:foldl(
-               fun ({MsgId, SeqId}, MsgIdAcc) ->
-                       ok = mnesia:dirty_delete(rabbit_disk_queue, {Q, SeqId}),
-                       [MsgId | MsgIdAcc]
-               end, [], MsgSeqIds),
-    ok = rabbit_msg_store:remove(MsgIds),
-    {ok, State}.
-
-internal_tx_publish(Message = #basic_message { guid = MsgId,
-                                               content = Content }, State) ->
-    ClearedContent = rabbit_binary_parser:clear_decoded_content(Content),
-    ok = rabbit_msg_store:write(
-           MsgId, Message #basic_message { content = ClearedContent }),
-    {ok, State}.
-
-internal_tx_commit(Q, PubMsgIds, AckSeqIds, From, State) ->
-    TxDetails = {Q, PubMsgIds, AckSeqIds, From},
-    ok = rabbit_msg_store:sync([MsgId || {MsgId, _, _} <- PubMsgIds],
-                               fun () -> finalise_commit(TxDetails) end),
-    State.
-
-finalise_commit({Q, PubMsgIds, AckSeqIds, From},
-                State = #dqstate { sequences = Sequences }) ->
-    {InitReadSeqId, InitWriteSeqId} = sequence_lookup(Sequences, Q),
-    WriteSeqId =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foldl(
-                    fun ({MsgId, IsDelivered, IsPersistent}, SeqId) ->
-                            ok = mnesia:write(
-                                   rabbit_disk_queue,
-                                   #dq_msg_loc {
-                                     queue_and_seq_id = {Q, SeqId},
-                                     msg_id           = MsgId,
-                                     is_delivered     = IsDelivered,
-                                     is_persistent    = IsPersistent
-                                    }, write),
-                            SeqId + 1
-                    end, InitWriteSeqId, PubMsgIds)
-          end),
-    {ok, State1} = remove_messages(Q, AckSeqIds, State),
-    true = case PubMsgIds of
-               [] -> true;
-               _  -> ets:insert(Sequences, 
-                                {Q, InitReadSeqId, WriteSeqId})
-           end,
-    gen_server2:reply(From, ok),
-    State1.
-
-internal_publish(Q, Message = #basic_message { guid = MsgId,
-                                               is_persistent = IsPersistent },
-                 IsDelivered, State) ->
-    {ok, State1 = #dqstate { sequences = Sequences }} =
-        internal_tx_publish(Message, State),
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    ok = mnesia:dirty_write(rabbit_disk_queue,
-                            #dq_msg_loc { queue_and_seq_id = {Q, WriteSeqId},
-                                          msg_id = MsgId,
-                                          is_delivered = IsDelivered,
-                                          is_persistent = IsPersistent }),
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId + 1}),
-    {ok, {MsgId, WriteSeqId}, State1}.
-
-internal_tx_rollback(MsgIds, State) ->
-    ok = rabbit_msg_store:remove(MsgIds),
-    {ok, State}.
-
-internal_requeue(_Q, [], State) ->
-    {ok, State};
-internal_requeue(Q, MsgSeqIds, State = #dqstate { sequences = Sequences }) ->
-    %% We know that every seq_id in here is less than the ReadSeqId
-    %% you'll get if you look up this queue in Sequences (i.e. they've
-    %% already been delivered). We also know that the rows for these
-    %% messages are still in rabbit_disk_queue (i.e. they've not been
-    %% ack'd).
-    %%
-    %% Now, it would be nice if we could adjust the sequence ids in
-    %% rabbit_disk_queue (mnesia) to create a contiguous block and
-    %% then drop the ReadSeqId for the queue by the corresponding
-    %% amount. However, this is not safe because there may be other
-    %% sequence ids which have been sent out as part of deliveries
-    %% which are not being requeued. As such, moving things about in
-    %% rabbit_disk_queue _under_ the current ReadSeqId would result in
-    %% such sequence ids referring to the wrong messages.
-    %%
-    %% Therefore, the only solution is to take these messages, and to
-    %% reenqueue them at the top of the queue. Usefully, this only
-    %% affects the Sequences and rabbit_disk_queue structures - there
-    %% is no need to physically move the messages about on disk, so
-    %% the message store remains unaffected, except we need to tell it
-    %% about the ids of the requeued messages so it can remove them
-    %% from its message cache if necessary.
-
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    {WriteSeqId1, Q, MsgIds} =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foldl(fun requeue_message/2, {WriteSeqId, Q, []},
-                              MsgSeqIds)
-          end),
-    true = ets:insert(Sequences, {Q, ReadSeqId, WriteSeqId1}),
-    ok = rabbit_msg_store:release(MsgIds),
-    {ok, State}.
-
-requeue_message({{MsgId, SeqId}, IsDelivered}, {WriteSeqId, Q, Acc}) ->
-    [Obj = #dq_msg_loc { is_delivered = true, msg_id = MsgId }] =
-        mnesia:read(rabbit_disk_queue, {Q, SeqId}, write),
-    ok = mnesia:write(rabbit_disk_queue,
-                      Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeqId},
-                                       is_delivered = IsDelivered
-                                      },
-                      write),
-    ok = mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write),
-    {WriteSeqId + 1, Q, [MsgId | Acc]}.
-
-%% move the next N messages from the front of the queue to the back.
-internal_requeue_next_n(Q, N, State = #dqstate { sequences = Sequences }) ->
-    {ReadSeqId, WriteSeqId} = sequence_lookup(Sequences, Q),
-    if N >= (WriteSeqId - ReadSeqId) -> {ok, State};
-       true ->
-            {ReadSeqIdN, WriteSeqIdN, MsgIds} =
-                rabbit_misc:execute_mnesia_transaction(
-                  fun() ->
-                          ok = mnesia:write_lock_table(rabbit_disk_queue),
-                          requeue_next_messages(Q, N, ReadSeqId, WriteSeqId, [])
-                  end
-                 ),
-            true = ets:insert(Sequences, {Q, ReadSeqIdN, WriteSeqIdN}),
-            ok = rabbit_msg_store:release(MsgIds),
-            {ok, State}
-    end.
-
-requeue_next_messages(_Q, 0, ReadSeq, WriteSeq, Acc) ->
-    {ReadSeq, WriteSeq, Acc};
-requeue_next_messages(Q, N, ReadSeq, WriteSeq, Acc) ->
-    [Obj = #dq_msg_loc { msg_id = MsgId }] =
-        mnesia:read(rabbit_disk_queue, {Q, ReadSeq}, write),
-    ok = mnesia:write(rabbit_disk_queue,
-                      Obj #dq_msg_loc {queue_and_seq_id = {Q, WriteSeq}},
-                      write),
-    ok = mnesia:delete(rabbit_disk_queue, {Q, ReadSeq}, write),
-    requeue_next_messages(Q, N - 1, ReadSeq + 1, WriteSeq + 1, [MsgId | Acc]).
-
-internal_purge(Q, State = #dqstate { sequences = Sequences }) ->
-    case sequence_lookup(Sequences, Q) of
-        {SeqId, SeqId} -> {ok, 0, State};
-        {ReadSeqId, WriteSeqId} ->
-            {MsgSeqIds, WriteSeqId} =
-                rabbit_misc:unfold(
-                  fun (SeqId) when SeqId == WriteSeqId -> false;
-                      (SeqId) ->
-                          [#dq_msg_loc { msg_id = MsgId }] =
-                              mnesia:dirty_read(rabbit_disk_queue, {Q, SeqId}),
-                          {true, {MsgId, SeqId}, SeqId + 1}
-                  end, ReadSeqId),
-            true = ets:insert(Sequences, {Q, WriteSeqId, WriteSeqId}),
-            {ok, State1} = remove_messages(Q, MsgSeqIds, State),
-            {ok, WriteSeqId - ReadSeqId, State1}
-    end.
-
-internal_delete_queue(Q, State) ->
-    %% remove everything undelivered
-    {ok, _Count, State1 = #dqstate { sequences = Sequences }} =
-        internal_purge(Q, State),
-    true = ets:delete(Sequences, Q),
-    %% remove everything already delivered
-    remove_messages(
-      Q, [{MsgId, SeqId} || #dq_msg_loc { queue_and_seq_id = {_Q, SeqId},
-                                          msg_id = MsgId } <-
-                                mnesia:dirty_match_object(
-                                  rabbit_disk_queue,
-                                  #dq_msg_loc {
-                                    queue_and_seq_id = {Q, '_'},
-                                    _ = '_' })], State1).
-
-internal_delete_non_durable_queues(
-  DurableQueues, State = #dqstate { sequences = Sequences }) ->
-    DurableQueueSet =  sets:from_list(DurableQueues),
-    ets:foldl(
-      fun ({Q, _Read, _Write}, {ok, State1}) ->
-              case sets:is_element(Q, DurableQueueSet) of
-                  true -> {ok, State1};
-                  false -> internal_delete_queue(Q, State1)
-              end
-      end, {ok, State}, Sequences).
-
-%%----------------------------------------------------------------------------
-%% recovery
-%%----------------------------------------------------------------------------
-
-store_safe_shutdown() ->
-    ok = rabbit_misc:execute_mnesia_transaction(
-           fun() ->
-                   mnesia:write(rabbit_disk_queue,
-                                ?SHUTDOWN_MESSAGE, write)
-           end).
-
-detect_shutdown_state_and_adjust_delivered_flags() ->
-    MarkDelivered =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  case mnesia:read(rabbit_disk_queue,
-                                   ?SHUTDOWN_MESSAGE_KEY, read) of
-                      [?SHUTDOWN_MESSAGE] ->
-                          mnesia:delete(rabbit_disk_queue,
-                                        ?SHUTDOWN_MESSAGE_KEY, write),
-                          false;
-                      [] ->
-                          true
-                  end
-          end),
-    %% if we crash here, then on startup we'll not find the
-    %% SHUTDOWN_MESSAGE so will mark everything delivered, which is
-    %% the safe thing to do.
-    case MarkDelivered of
-        true -> mark_messages_delivered();
-        false -> ok
-    end.
-
-mark_messages_delivered() ->
-    mark_message_delivered('$start_of_table').
-
-%% A single huge transaction is a bad idea because of memory
-%% use. Equally, using dirty operations is a bad idea because you
-%% shouldn't do writes when doing mnesia:dirty_next, because the
-%% ordering can change. So we use transactions of bounded
-%% size. However, even this does necessitate restarting between
-%% transactions.
-mark_message_delivered('$end_of_table') ->
-    ok;
-mark_message_delivered(_Key) ->
-    mark_message_delivered(
-      rabbit_misc:execute_mnesia_transaction(
-        fun () ->
-                ok = mnesia:write_lock_table(rabbit_disk_queue),
-                mark_message_delivered(mnesia:first(rabbit_disk_queue),
-                                       ?BATCH_SIZE)
-        end)).
-
-mark_message_delivered(Key, 0) ->
-    Key;
-mark_message_delivered(Key = '$end_of_table', _N) ->
-    Key;
-mark_message_delivered(Key, N) ->
-    [Obj] = mnesia:read(rabbit_disk_queue, Key, write),
-    M = case Obj #dq_msg_loc.is_delivered of
-            true -> N;
-            false ->
-                ok = mnesia:write(rabbit_disk_queue,
-                                  Obj #dq_msg_loc { is_delivered = true },
-                                  write),
-                N - 1
-        end,
-    mark_message_delivered(mnesia:next(rabbit_disk_queue, Key), M).
-
-msg_ref_gen_init() -> mnesia:dirty_first(rabbit_disk_queue).
-
-msg_ref_gen('$end_of_table') -> finished;
-msg_ref_gen(Key) ->
-    [#dq_msg_loc { msg_id = MsgId, is_persistent = IsPersistent }] =
-        mnesia:dirty_read(rabbit_disk_queue, Key),
-    NextKey = mnesia:dirty_next(rabbit_disk_queue, Key),
-    {MsgId, case IsPersistent of true -> 1; false -> 0 end, NextKey}.
-
-prune_flush_batch(DeleteAcc) ->
-    lists:foldl(fun (Key, ok) ->
-                        mnesia:dirty_delete(rabbit_disk_queue, Key)
-                end, ok, DeleteAcc).
-
-prune() ->
-    prune(mnesia:dirty_first(rabbit_disk_queue), [], 0).
-
-prune('$end_of_table', DeleteAcc, _Len) ->
-    prune_flush_batch(DeleteAcc);
-prune(Key, DeleteAcc, Len) ->
-    [#dq_msg_loc { msg_id = MsgId, queue_and_seq_id = {Q, SeqId} }] =
-        mnesia:dirty_read(rabbit_disk_queue, Key),
-    {DeleteAcc1, Len1} =
-        case rabbit_msg_store:contains(MsgId) of
-            true  -> {DeleteAcc, Len};
-            false -> {[{Q, SeqId} | DeleteAcc], Len + 1}
-        end,
-    if Len1 >= ?BATCH_SIZE ->
-            %% We have no way of knowing how flushing the batch will
-            %% affect ordering of records within the table, so have no
-            %% choice but to start again. Although this will make
-            %% recovery slower for large queues, we guarantee we can
-            %% start up in constant memory
-            ok = prune_flush_batch(DeleteAcc1),
-            NextKey = mnesia:dirty_first(rabbit_disk_queue),
-            prune(NextKey, [], 0);
-       true ->
-            NextKey = mnesia:dirty_next(rabbit_disk_queue, Key),
-            prune(NextKey, DeleteAcc1, Len1)
-    end.
-
-extract_sequence_numbers(Sequences) ->
-    true =
-        rabbit_misc:execute_mnesia_transaction(
-          %% the ets manipulation within this transaction is
-          %% idempotent, in particular we're only reading from mnesia,
-          %% and combining what we read with what we find in
-          %% ets. Should the transaction restart, the non-rolledback
-          %% data in ets can still be successfully combined with what
-          %% we find in mnesia
-          fun() ->
-                  ok = mnesia:read_lock_table(rabbit_disk_queue),
-                  mnesia:foldl(
-                    fun (#dq_msg_loc { queue_and_seq_id = {Q, SeqId} }, true) ->
-                            NextWrite = SeqId + 1,
-                            case ets:lookup(Sequences, Q) of
-                                [] -> ets:insert_new(Sequences,
-                                                     {Q, SeqId, NextWrite});
-                                [Orig = {_, Read, Write}] ->
-                                    Repl = {Q, lists:min([Read, SeqId]),
-                                            lists:max([Write, NextWrite])},
-                                    case Orig == Repl of
-                                        true -> true;
-                                        false -> ets:insert(Sequences, Repl)
-                                    end
-                            end
-                    end, true, rabbit_disk_queue)
-          end),
-    ok = remove_gaps_in_sequences(Sequences).
-
-remove_gaps_in_sequences(Sequences) ->
-    %% read the comments at internal_requeue.
-
-    %% Because we are at startup, we know that no sequence ids have
-    %% been issued (or at least, they were, but have been
-    %% forgotten). Therefore, we can nicely shuffle up and not
-    %% worry. Note that I'm choosing to shuffle up, but alternatively
-    %% we could shuffle downwards. However, I think there's greater
-    %% likelihood of gaps being at the bottom rather than the top of
-    %% the queue, so shuffling up should be the better bet.
-    QueueBoundaries =
-        rabbit_misc:execute_mnesia_transaction(
-          fun() ->
-                  ok = mnesia:write_lock_table(rabbit_disk_queue),
-                  lists:foldl(
-                    fun ({Q, ReadSeqId, WriteSeqId}, Acc) ->
-                            Gap = shuffle_up(Q, ReadSeqId-1, WriteSeqId-1, 0),
-                            [{Q, ReadSeqId + Gap, WriteSeqId} | Acc]
-                    end, [], ets:match_object(Sequences, '_'))
-          end),
-    true = lists:foldl(fun (Obj, true) -> ets:insert(Sequences, Obj) end,
-                       true, QueueBoundaries),
-    ok.
-
-shuffle_up(_Q, SeqId, SeqId, Gap) ->
-    Gap;
-shuffle_up(Q, BaseSeqId, SeqId, Gap) ->
-    GapInc =
-        case mnesia:read(rabbit_disk_queue, {Q, SeqId}, write) of
-            [] -> 1;
-            [Obj] ->
-                case Gap of
-                    0 -> ok;
-                    _ -> mnesia:write(rabbit_disk_queue,
-                                      Obj #dq_msg_loc {
-                                        queue_and_seq_id = {Q, SeqId + Gap }},
-                                      write),
-                         mnesia:delete(rabbit_disk_queue, {Q, SeqId}, write)
-                end,
-                0
-        end,
-    shuffle_up(Q, BaseSeqId, SeqId - 1, Gap + GapInc).
diff --git a/src/rabbit_mixed_queue.erl b/src/rabbit_mixed_queue.erl
deleted file mode 100644
index c278bac8..00000000
--- a/src/rabbit_mixed_queue.erl
+++ /dev/null
@@ -1,673 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_mixed_queue).
-
--include("rabbit.hrl").
-
--export([init/2]).
-
--export([publish/2, publish_delivered/2, fetch/1, ack/2,
-         tx_publish/2, tx_commit/3, tx_rollback/2, requeue/2, purge/1,
-         len/1, is_empty/1, delete_queue/1, maybe_prefetch/1]).
-
--export([set_storage_mode/3, storage_mode/1,
-         estimate_queue_memory/1]).
-
--record(mqstate, { mode,
-                   msg_buf,
-                   queue,
-                   is_durable,
-                   length,
-                   memory_size,
-                   prefetcher
-                 }
-       ).
-
--define(TO_DISK_MAX_FLUSH_SIZE, 100000).
--define(MAGIC_MARKER, <<"$magic_marker">>).
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--type(mode() :: ( 'disk' | 'mixed' )).
--type(mqstate() :: #mqstate { mode :: mode(),
-                              msg_buf :: queue(),
-                              queue :: queue_name(),
-                              is_durable :: boolean(),
-                              length :: non_neg_integer(),
-                              memory_size :: (non_neg_integer() | 'undefined'),
-                              prefetcher :: (pid() | 'undefined')
-                            }).
--type(msg_id() :: guid()).
--type(seq_id() :: non_neg_integer()).
--type(ack_tag() :: ( 'not_on_disk' | {msg_id(), seq_id()} )).
--type(okmqs() :: {'ok', mqstate()}).
-
--spec(init/2 :: (queue_name(), boolean()) -> okmqs()).
--spec(publish/2 :: (message(), mqstate()) -> okmqs()).
--spec(publish_delivered/2 :: (message(), mqstate()) ->
-             {'ok', ack_tag(), mqstate()}).
--spec(fetch/1 :: (mqstate()) ->
-             {('empty' | {message(), boolean(), ack_tag(), non_neg_integer()}),
-              mqstate()}).
--spec(ack/2 :: ([{message(), ack_tag()}], mqstate()) -> okmqs()).
--spec(tx_publish/2 :: (message(), mqstate()) -> okmqs()).
--spec(tx_commit/3 :: ([message()], [ack_tag()], mqstate()) -> okmqs()).
--spec(tx_rollback/2 :: ([message()], mqstate()) -> okmqs()).
--spec(requeue/2 :: ([{message(), ack_tag()}], mqstate()) -> okmqs()).
--spec(purge/1 :: (mqstate()) -> okmqs()).
--spec(delete_queue/1 :: (mqstate()) -> {'ok', mqstate()}).
--spec(len/1 :: (mqstate()) -> non_neg_integer()).
--spec(is_empty/1 :: (mqstate()) -> boolean()).
-
--spec(set_storage_mode/3 :: (mode(), [message()], mqstate()) -> okmqs()).
--spec(estimate_queue_memory/1 :: (mqstate()) ->
-             {mqstate(), non_neg_integer()}).
--spec(storage_mode/1 :: (mqstate()) -> mode()).
-
--endif.
-
-%%----------------------------------------------------------------------------
-
-init(Queue, IsDurable) ->
-    Len = rabbit_disk_queue:len(Queue),
-    {Size, MarkerFound, MarkerPreludeCount} =
-        rabbit_disk_queue:foldl(
-          fun (Msg = #basic_message { is_persistent = true },
-               _AckTag, _IsDelivered, {SizeAcc, MFound, MPCount}) ->
-                  SizeAcc1 = SizeAcc + size_of_message(Msg),
-                  case {MFound, is_magic_marker_message(Msg)} of
-                      {false, false} -> {SizeAcc1, false, MPCount + 1};
-                      {false, true}  -> {SizeAcc,  true,  MPCount};
-                      {true, false}  -> {SizeAcc1, true,  MPCount}
-                  end
-          end, {0, false, 0}, Queue),
-    Len1 = case MarkerFound of
-               false -> Len;
-               true ->
-                   ok = rabbit_disk_queue:requeue_next_n(Queue,
-                                                         MarkerPreludeCount),
-                   Len2 = Len - 1,
-                   {ok, Len2} = fetch_ack_magic_marker_message(Queue),
-                   Len2
-           end,
-    MsgBuf = inc_queue_length(queue:new(), Len1),
-    {ok, #mqstate { mode = disk, msg_buf = MsgBuf, queue = Queue,
-                    is_durable = IsDurable, length = Len1,
-                    memory_size = Size, prefetcher = undefined }}.
-
-publish(Msg = #basic_message { is_persistent = IsPersistent }, State = 
-        #mqstate { queue = Q, mode = Mode, is_durable = IsDurable,
-                   msg_buf = MsgBuf, length = Length }) ->
-    Msg1 = ensure_binary_properties(Msg),
-    ok = case on_disk(Mode, IsDurable, IsPersistent) of
-             true  -> rabbit_disk_queue:publish(Q, Msg1, false);
-             false -> ok
-         end,
-    MsgBuf1 = case Mode of
-                  disk  -> inc_queue_length(MsgBuf, 1);
-                  mixed -> queue:in({Msg1, false}, MsgBuf)
-              end,
-    {ok, gain_memory(size_of_message(Msg1),
-                     State #mqstate { msg_buf = MsgBuf1,
-                                      length = Length + 1 })}.
-
-%% Assumption here is that the queue is empty already (only called via
-%% attempt_immediate_delivery).
-publish_delivered(Msg = #basic_message { guid = MsgId,
-                                         is_persistent = IsPersistent},
-                  State = #mqstate { is_durable = IsDurable, queue = Q,
-                                     length = 0 })
-  when IsDurable andalso IsPersistent ->
-    Msg1 = ensure_binary_properties(Msg),
-    ok = rabbit_disk_queue:publish(Q, Msg1, true),
-    State1 = gain_memory(size_of_message(Msg1), State),
-    %% must call phantom_fetch otherwise the msg remains at the head
-    %% of the queue. This is synchronous, but unavoidable as we need
-    %% the AckTag
-    {MsgId, true, AckTag, 0} = rabbit_disk_queue:phantom_fetch(Q),
-    {ok, AckTag, State1};
-publish_delivered(Msg, State = #mqstate { length = 0 }) ->
-    Msg1 = ensure_binary_properties(Msg),
-    {ok, not_on_disk, gain_memory(size_of_message(Msg1), State)}.
-
-fetch(State = #mqstate { length = 0 }) ->
-    {empty, State};
-fetch(State = #mqstate { msg_buf = MsgBuf, queue = Q,
-                         is_durable = IsDurable, length = Length,
-                         prefetcher = Prefetcher }) ->
-    {{value, Value}, MsgBuf1} = queue:out(MsgBuf),
-    Rem = Length - 1,
-    State1 = State #mqstate { length = Rem },
-    case Value of
-        {Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
-         IsDelivered} ->
-            AckTag =
-                case IsDurable andalso IsPersistent of
-                    true ->
-                        {MsgId, IsDelivered, AckTag1, _PRem}
-                            = rabbit_disk_queue:phantom_fetch(Q),
-                        AckTag1;
-                    false ->
-                        not_on_disk
-                end,
-            {{Msg, IsDelivered, AckTag, Rem},
-             State1 #mqstate { msg_buf = MsgBuf1 }};
-        {Msg = #basic_message { is_persistent = IsPersistent },
-         IsDelivered, AckTag} ->
-            %% message has come via the prefetcher, thus it's been
-            %% marked delivered. If it's not persistent+durable, we
-            %% should ack it now
-            AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
-            {{Msg, IsDelivered, AckTag1, Rem},
-             State1 #mqstate { msg_buf = MsgBuf1 }};
-        _ when Prefetcher == undefined ->
-            MsgBuf2 = dec_queue_length(MsgBuf, 1),
-            {Msg = #basic_message { is_persistent = IsPersistent },
-             IsDelivered, AckTag, _PersistRem}
-                = rabbit_disk_queue:fetch(Q),
-            AckTag1 = maybe_ack(Q, IsDurable, IsPersistent, AckTag),
-            {{Msg, IsDelivered, AckTag1, Rem},
-             State1 #mqstate { msg_buf = MsgBuf2 }};
-        _ ->
-            %% use State, not State1 as we've not dec'd length
-            fetch(case rabbit_queue_prefetcher:drain(Prefetcher) of
-                      empty -> State #mqstate { prefetcher = undefined };
-                      {Fetched, Status} ->
-                          MsgBuf2 = dec_queue_length(MsgBuf, queue:len(Fetched)),
-                          State #mqstate
-                            { msg_buf = queue:join(Fetched, MsgBuf2),
-                              prefetcher = case Status of
-                                               finished -> undefined;
-                                               continuing -> Prefetcher
-                                           end }
-                  end)
-    end.
-
-ack(MsgsWithAcks, State = #mqstate { queue = Q }) ->
-    {AckTags, ASize} = remove_diskless(MsgsWithAcks),
-    ok = case AckTags of
-             [] -> ok;
-             _ -> rabbit_disk_queue:ack(Q, AckTags)
-         end,
-    {ok, lose_memory(ASize, State)}.
-                                                   
-tx_publish(Msg = #basic_message { is_persistent = IsPersistent },
-           State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
-    Msg1 = ensure_binary_properties(Msg),
-    ok = case on_disk(Mode, IsDurable, IsPersistent) of
-             true  -> rabbit_disk_queue:tx_publish(Msg1);
-             false -> ok
-         end,
-    {ok, gain_memory(size_of_message(Msg1), State)}.
-
-tx_commit(Publishes, MsgsWithAcks,
-          State = #mqstate { mode = Mode, queue = Q, msg_buf = MsgBuf,
-                             is_durable = IsDurable, length = Length }) ->
-    PersistentPubs =
-        [{MsgId, false, IsPersistent} ||
-            #basic_message { guid = MsgId,
-                             is_persistent = IsPersistent } <- Publishes,
-            on_disk(Mode, IsDurable, IsPersistent)],
-    {RealAcks, ASize} = remove_diskless(MsgsWithAcks),
-    ok = case {PersistentPubs, RealAcks} of
-             {[], []} -> ok;
-             _        -> rabbit_disk_queue:tx_commit(
-                           Q, PersistentPubs, RealAcks)
-         end,
-    Len = length(Publishes),
-    MsgBuf1 = case Mode of
-                  disk  -> inc_queue_length(MsgBuf, Len);
-                  mixed -> ToAdd = [{Msg, false} || Msg <- Publishes],
-                           queue:join(MsgBuf, queue:from_list(ToAdd))
-              end,
-    {ok, lose_memory(ASize, State #mqstate { msg_buf = MsgBuf1,
-                                             length = Length + Len })}.
-
-tx_rollback(Publishes,
-            State = #mqstate { mode = Mode, is_durable = IsDurable }) ->
-    {PersistentPubs, CSize} =
-        lists:foldl(
-          fun (Msg = #basic_message { is_persistent = IsPersistent,
-                                      guid = MsgId }, {Acc, CSizeAcc}) ->
-                  Msg1 = ensure_binary_properties(Msg),
-                  CSizeAcc1 = CSizeAcc + size_of_message(Msg1),
-                  {case on_disk(Mode, IsDurable, IsPersistent) of
-                       true -> [MsgId | Acc];
-                       _    -> Acc
-                   end, CSizeAcc1}
-          end, {[], 0}, Publishes),
-    ok = case PersistentPubs of
-             [] -> ok;
-             _  -> rabbit_disk_queue:tx_rollback(PersistentPubs)
-         end,
-    {ok, lose_memory(CSize, State)}.
-
-%% [{Msg, AckTag}]
-requeue(MsgsWithAckTags,
-        State = #mqstate { mode = Mode, queue = Q, msg_buf = MsgBuf,
-                           is_durable = IsDurable, length = Length }) ->
-    RQ = lists:foldl(
-           fun ({Msg = #basic_message { is_persistent = IsPersistent }, AckTag},
-                RQAcc) ->
-                   case IsDurable andalso IsPersistent of
-                       true ->
-                           [{AckTag, true} | RQAcc];
-                       false ->
-                           case Mode of
-                               mixed ->
-                                   RQAcc;
-                               disk when not_on_disk =:= AckTag ->
-                                   ok = case RQAcc of
-                                            [] -> ok;
-                                            _  -> rabbit_disk_queue:requeue
-                                                    (Q, lists:reverse(RQAcc))
-                                        end,
-                                   ok = rabbit_disk_queue:publish(Q, Msg, true),
-                                   []
-                           end
-                   end
-           end, [], MsgsWithAckTags),
-    ok = case RQ of
-             [] -> ok;
-             _  -> rabbit_disk_queue:requeue(Q, lists:reverse(RQ))
-         end,
-    Len = length(MsgsWithAckTags),
-    MsgBuf1 = case Mode of
-                  mixed -> ToAdd = [{Msg, true} || {Msg, _} <- MsgsWithAckTags],
-                           queue:join(MsgBuf, queue:from_list(ToAdd));
-                  disk  -> inc_queue_length(MsgBuf, Len)
-              end,
-    {ok, State #mqstate { msg_buf = MsgBuf1, length = Length + Len }}.
-
-purge(State = #mqstate { queue = Q, mode = Mode, length = Count,
-                         prefetcher = Prefetcher, memory_size = QSize }) ->
-    PurgedFromDisk = rabbit_disk_queue:purge(Q),
-    Count = case Mode of
-                disk ->
-                    PurgedFromDisk;
-                mixed ->
-                    ok = case Prefetcher of
-                             undefined -> ok;
-                             _ -> rabbit_queue_prefetcher:stop(Prefetcher)
-                         end,
-                    Count
-            end,
-    {Count, lose_memory(QSize, State #mqstate { msg_buf = queue:new(),
-                                                length = 0,
-                                                prefetcher = undefined })}.
-
-delete_queue(State = #mqstate { queue = Q, memory_size = QSize,
-                                prefetcher = Prefetcher
-                              }) ->
-    ok = case Prefetcher of
-             undefined -> ok;
-             _ -> rabbit_queue_prefetcher:stop(Prefetcher)
-         end,
-    ok = rabbit_disk_queue:delete_queue(Q),
-    {ok, lose_memory(QSize, State #mqstate { length = 0, msg_buf = queue:new(),
-                                             prefetcher = undefined })}.
-
-len(#mqstate { length = Length }) ->
-    Length.
-
-is_empty(#mqstate { length = Length }) ->
-    0 == Length.
-
-%%----------------------------------------------------------------------------
-%% storage mode management
-%%----------------------------------------------------------------------------
-
-set_storage_mode(Mode, _TxnMessages, State = #mqstate { mode = Mode }) ->
-    {ok, State};
-set_storage_mode(disk, TxnMessages, State =
-         #mqstate { mode = mixed, queue = Q, msg_buf = MsgBuf, length = Length,
-                    is_durable = IsDurable, prefetcher = Prefetcher }) ->
-    State1 = State #mqstate { mode = disk },
-    MsgBuf1 =
-        case Prefetcher of
-            undefined -> MsgBuf;
-            _ ->
-                case rabbit_queue_prefetcher:drain_and_stop(Prefetcher) of
-                    empty -> MsgBuf;
-                    Fetched ->
-                        MsgBuf2 = dec_queue_length(MsgBuf, queue:len(Fetched)),
-                        queue:join(Fetched, MsgBuf2)
-                end
-        end,
-    {ok, MsgBuf3} =
-        send_messages_to_disk(IsDurable, Q, MsgBuf1, Length),
-    %% tx_publish txn messages. Some of these will have been already
-    %% published if they really are durable and persistent which is
-    %% why we can't just use our own tx_publish/2 function (would end
-    %% up publishing twice, so refcount would go wrong in disk_queue).
-    %% The order of msgs within a txn is determined only at tx_commit
-    %% time, so it doesn't matter if we're publishing msgs to the disk
-    %% queue in a different order from that which we received them in.
-    lists:foreach(
-      fun (Msg = #basic_message { is_persistent = IsPersistent }) ->
-              ok = case IsDurable andalso IsPersistent of
-                       true -> ok;
-                       _    -> rabbit_disk_queue:tx_publish(Msg)
-                   end
-      end, TxnMessages),
-    garbage_collect(),
-    {ok, State1 #mqstate { msg_buf = MsgBuf3, prefetcher = undefined }};
-set_storage_mode(mixed, TxnMessages, State =
-                 #mqstate { mode = disk, is_durable = IsDurable }) ->
-    %% The queue has a token just saying how many msgs are on disk
-    %% (this is already built for us when in disk mode).
-    %% Don't actually do anything to the disk
-    %% Don't start prefetcher just yet because the queue maybe busy -
-    %% wait for hibernate timeout in the amqqueue_process.
-    
-    %% Remove txn messages from disk which are not (persistent and
-    %% durable). This is necessary to avoid leaks. This is also pretty
-    %% much the inverse behaviour of our own tx_rollback/2 which is
-    %% why we're not using that.
-    Cancel = [ MsgId || #basic_message { is_persistent = IsPersistent,
-                                         guid = MsgId } <- TxnMessages,
-                   not (IsDurable andalso IsPersistent) ],
-    ok = case Cancel of
-             [] -> ok;
-             _  -> rabbit_disk_queue:tx_rollback(Cancel)
-         end,
-    garbage_collect(),
-    {ok, State #mqstate { mode = mixed }}.
-
-send_messages_to_disk(_IsDurable, _Q, MsgBuf, 0) ->
-    {ok, MsgBuf};
-send_messages_to_disk(IsDurable, Q, MsgBuf, Length) ->
-    case scan_for_disk_after_ram(IsDurable, MsgBuf) of
-        disk_only ->
-            %% Everything on disk already, we don't need to do
-            %% anything
-            {ok, inc_queue_length(queue:new(), Length)};
-        {not_found, PrefixLen, MsgBufRAMSuffix} ->
-            %% No disk msgs follow RAM msgs and the queue has a RAM
-            %% suffix, so we can just publish those. If we crash at
-            %% this point, we may lose some messages, but everything
-            %% will remain in the right order, so no need for the
-            %% marker messages.
-            MsgBuf1 = inc_queue_length(queue:new(), PrefixLen),
-            send_messages_to_disk(IsDurable, Q, MsgBufRAMSuffix, 0, 0, [], [],
-                                  MsgBuf1);
-        found ->
-            %% There are disk msgs *after* ram msgs in the queue. We
-            %% need to reenqueue everything. Note that due to batching
-            %% going on (see comments above send_messages_to_disk/8),
-            %% if we crash during this transition, we could have
-            %% messages in the wrong order on disk. Thus we publish a
-            %% magic_marker_message which, when this transition is
-            %% complete, will be back at the head of the queue. Should
-            %% we die, on startup, during the foldl over the queue, we
-            %% detect the marker message and requeue all the messages
-            %% in front of it, to the back of the queue, thus
-            %% correcting the order.  The result is that everything
-            %% ends up back in the same order, but will have new
-            %% sequence IDs.
-            ok = publish_magic_marker_message(Q),
-            {ok, MsgBuf1} =
-                send_messages_to_disk(IsDurable, Q, MsgBuf, 0, 0, [], [],
-                                      queue:new()),
-            {ok, Length} = fetch_ack_magic_marker_message(Q),
-            {ok, MsgBuf1}
-    end.
-
-scan_for_disk_after_ram(IsDurable, MsgBuf) ->
-    scan_for_disk_after_ram(IsDurable, MsgBuf, {disk, 0}).
-
-%% We return 'disk_only' if everything is alread on disk; 'found' if
-%% we find a disk message after finding RAM messages; and
-%% {'not_found', Count, MsgBuf} otherwise, where Count is the length
-%% of the disk prefix, and MsgBuf is the RAM suffix of the MsgBuf
-%% argument. Note msgs via the prefetcher are counted as RAM msgs on
-%% the grounds that they have to be republished.
-scan_for_disk_after_ram(IsDurable, MsgBuf, Mode) ->
-    case queue:out(MsgBuf) of
-        {empty, _MsgBuf} ->
-            case Mode of
-                {ram, N, MsgBuf1} -> {not_found, N, MsgBuf1};
-                {disk, _N}        -> disk_only
-            end;
-        {{value, {on_disk, Count}}, MsgBuf1} ->
-            case Mode of
-                {ram, _, _} -> found; %% found disk after RAM, bad
-                {disk, N} -> scan_for_disk_after_ram(IsDurable, MsgBuf1,
-                                                     {disk, N + Count})
-            end;
-        {{value, {_Msg, _IsDelivered, _AckTag}}, MsgBuf1} ->
-            %% found a msg from the prefetcher. Ensure RAM mode
-            scan_for_disk_after_ram(IsDurable, MsgBuf1,
-                                    ensure_ram(Mode, MsgBuf));
-        {{value,
-          {#basic_message { is_persistent = IsPersistent }, _IsDelivered}},
-          MsgBuf1} ->
-            %% normal message
-            case IsDurable andalso IsPersistent of
-                true ->
-                    case Mode of
-                        {ram, _, _} -> found; %% found disk after RAM, bad
-                        {disk, N} -> scan_for_disk_after_ram(IsDurable, MsgBuf1,
-                                                             {disk, N + 1})
-                    end;
-                false -> scan_for_disk_after_ram(IsDurable, MsgBuf1,
-                                                 ensure_ram(Mode, MsgBuf))
-            end
-    end.
-
-ensure_ram(Obj = {ram, _N, _MsgBuf}, _MsgBuf1) -> Obj;
-ensure_ram({disk, N}, MsgBuf)                  -> {ram, N, MsgBuf}.
-
-%% (Re)enqueue _everything_ here. Messages which are not on disk will
-%% be tx_published, messages that are on disk will be requeued to the
-%% end of the queue. This is done in batches, where a batch consists
-%% of a number a tx_publishes, a tx_commit and then a call to
-%% requeue_next_n. We do not want to fetch messages off disk only to
-%% republish them later. Note in the tx_commit, we ack messages which
-%% are being _re_published. These are messages that have been fetched
-%% by the prefetcher.
-%% Batches are limited in size to make sure that the resultant mnesia
-%% transaction on tx_commit does not get too big, memory wise.
-send_messages_to_disk(IsDurable, Q, Queue, PublishCount, RequeueCount,
-                      Commit, Ack, MsgBuf) ->
-    case queue:out(Queue) of
-        {empty, _Queue} ->
-            ok = flush_messages_to_disk_queue(Q, Commit, Ack),
-            {[], []} = flush_requeue_to_disk_queue(Q, RequeueCount, [], []),
-            {ok, MsgBuf};
-        {{value, {Msg = #basic_message { is_persistent = IsPersistent },
-                  IsDelivered}}, Queue1} ->
-            case IsDurable andalso IsPersistent of
-                true -> %% it's already in the Q
-                    send_messages_to_disk(
-                      IsDurable, Q, Queue1, PublishCount, RequeueCount + 1,
-                      Commit, Ack, inc_queue_length(MsgBuf, 1));
-                false ->
-                    republish_message_to_disk_queue(
-                      IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
-                      Ack, MsgBuf, Msg, IsDelivered)
-            end;
-        {{value, {Msg, IsDelivered, AckTag}}, Queue1} ->
-            %% These have come via the prefetcher, so are no longer in
-            %% the disk queue (yes, they've not been ack'd yet, but
-            %% the head of the queue has passed these messages). We
-            %% need to requeue them, which we sneakily achieve by
-            %% tx_publishing them, and then in the tx_commit, ack the
-            %% old copy.
-            republish_message_to_disk_queue(
-              IsDurable, Q, Queue1, PublishCount, RequeueCount, Commit,
-              [AckTag | Ack], MsgBuf, Msg, IsDelivered);
-        {{value, {on_disk, Count}}, Queue1} ->
-            send_messages_to_disk(
-              IsDurable, Q, Queue1, PublishCount, RequeueCount + Count,
-              Commit, Ack, inc_queue_length(MsgBuf, Count))
-    end.
-
-republish_message_to_disk_queue(
-  IsDurable, Q, Queue, PublishCount, RequeueCount, Commit, Ack, MsgBuf,
-  Msg = #basic_message { guid = MsgId, is_persistent = IsPersistent },
-  IsDelivered) ->
-    {Commit1, Ack1} = flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack),
-    ok = rabbit_disk_queue:tx_publish(Msg),
-    Commit2 = [{MsgId, IsDelivered, IsPersistent} | Commit1],
-    {PublishCount1, Commit3, Ack2} =
-        case PublishCount == ?TO_DISK_MAX_FLUSH_SIZE of
-            true  -> ok = flush_messages_to_disk_queue(Q, Commit2, Ack1),
-                     {0, [], []};
-            false -> {PublishCount + 1, Commit2, Ack1}
-        end,
-    send_messages_to_disk(IsDurable, Q, Queue, PublishCount1, 0,
-                          Commit3, Ack2, inc_queue_length(MsgBuf, 1)).
-
-flush_messages_to_disk_queue(_Q, [], []) ->
-    ok;
-flush_messages_to_disk_queue(Q, Commit, Ack) ->
-    rabbit_disk_queue:tx_commit(Q, lists:reverse(Commit), Ack).
-
-flush_requeue_to_disk_queue(_Q, 0, Commit, Ack) ->
-    {Commit, Ack};
-flush_requeue_to_disk_queue(Q, RequeueCount, Commit, Ack) ->
-    ok = flush_messages_to_disk_queue(Q, Commit, Ack),
-    ok = rabbit_disk_queue:requeue_next_n(Q, RequeueCount),
-    {[], []}.
-
-%% Scaling this by 4 is a magic number. Found by trial and error to
-%% work ok. We are deliberately over reporting so that we run out of
-%% memory sooner rather than later, because the transition to disk
-%% only modes transiently can take quite a lot of memory.
-estimate_queue_memory(State = #mqstate { memory_size = Size }) ->
-    {State, 4 * Size}.
-
-storage_mode(#mqstate { mode = Mode }) ->
-    Mode.
-
-%%----------------------------------------------------------------------------
-%% helpers
-%%----------------------------------------------------------------------------
-
-size_of_message(
-  #basic_message { content = #content { payload_fragments_rev = Payload,
-                                        properties_bin = PropsBin }})
-  when is_binary(PropsBin) ->
-    size(PropsBin) + lists:foldl(fun (Frag, SumAcc) ->
-                                         SumAcc + size(Frag)
-                                 end, 0, Payload).
-
-ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-    Msg #basic_message {
-      content = rabbit_binary_generator:ensure_content_encoded(Content) }.
-
-gain_memory(Inc, State = #mqstate { memory_size = QSize }) ->
-    State #mqstate { memory_size = QSize + Inc }.
-
-lose_memory(Dec, State = #mqstate { memory_size = QSize }) ->
-    State #mqstate { memory_size = QSize - Dec }.
-
-inc_queue_length(MsgBuf, 0) ->
-    MsgBuf;
-inc_queue_length(MsgBuf, Count) ->
-    {NewCount, MsgBufTail} =
-        case queue:out_r(MsgBuf) of
-            {empty, MsgBuf1}                   -> {Count, MsgBuf1};
-            {{value, {on_disk, Len}}, MsgBuf1} -> {Len + Count, MsgBuf1};
-            {{value, _}, _MsgBuf1}             -> {Count, MsgBuf}
-        end,
-    queue:in({on_disk, NewCount}, MsgBufTail).
-
-dec_queue_length(MsgBuf, Count) ->
-    case queue:out(MsgBuf) of
-        {{value, {on_disk, Len}}, MsgBuf1} ->
-            case Len of
-                Count ->
-                    MsgBuf1;
-                _ when Len > Count ->
-                    queue:in_r({on_disk, Len-Count}, MsgBuf1)
-            end;
-        _ -> MsgBuf
-    end.
-
-maybe_prefetch(State = #mqstate { prefetcher = undefined,
-                                  mode = mixed,
-                                  msg_buf = MsgBuf,
-                                  queue = Q }) ->
-    case queue:peek(MsgBuf) of
-        {value, {on_disk, Count}} ->
-            %% only prefetch for the next contiguous block on
-            %% disk. Beyond there, we either hit the end of the queue,
-            %% or the next msg is already in RAM, held by us, the
-            %% mixed queue
-            {ok, Prefetcher} = rabbit_queue_prefetcher:start_link(Q, Count),
-            State #mqstate { prefetcher = Prefetcher };
-        _ -> State
-    end;
-maybe_prefetch(State) ->
-    State.
-
-maybe_ack(_Q, true, true, AckTag) ->
-    AckTag;
-maybe_ack(Q, _, _, AckTag) ->
-    ok = rabbit_disk_queue:ack(Q, [AckTag]),
-    not_on_disk.
-
-remove_diskless(MsgsWithAcks) ->
-    lists:foldl(
-      fun ({Msg, AckTag}, {AccAckTags, AccSize}) ->
-              Msg1 = ensure_binary_properties(Msg),
-              {case AckTag of
-                   not_on_disk -> AccAckTags;
-                   _ -> [AckTag | AccAckTags]
-               end, size_of_message(Msg1) + AccSize}
-      end, {[], 0}, MsgsWithAcks).
-
-on_disk(disk, _IsDurable, _IsPersistent)  -> true;
-on_disk(mixed, true, true)                -> true;
-on_disk(mixed, _IsDurable, _IsPersistent) -> false.
-
-publish_magic_marker_message(Q) ->
-    Msg = rabbit_basic:message(
-            rabbit_misc:r(<<"/">>, exchange, <<>>), ?MAGIC_MARKER,
-            [], <<>>, <<>>, true),
-    ok = rabbit_disk_queue:publish(Q, ensure_binary_properties(Msg), false).
-
-fetch_ack_magic_marker_message(Q) ->
-    {Msg, false, AckTag, Length} = rabbit_disk_queue:fetch(Q),
-    true = is_magic_marker_message(Msg),
-    ok = rabbit_disk_queue:ack(Q, [AckTag]),
-    {ok, Length}.
-
-is_magic_marker_message(#basic_message { routing_key = ?MAGIC_MARKER,
-                                         is_persistent = true, guid = <<>> }) ->
-    true;
-is_magic_marker_message(_) ->
-    false.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c0a559e9..9dae268f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -348,7 +348,6 @@ seg_num_to_path(Dir, SegNum) ->
     SegName = integer_to_list(SegNum),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).    
 
-
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 17a192ada496a5671ecef2395a1fb816ae622dd5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Oct 2009 18:07:23 +0100
Subject: Be a bit more clever on starting the message store and look up
 durable queues. However, it's still not quite right because queues can be
 deleted by other nodes in the cluster between extracting the list of durable
 queues in msg_store startup and the startup of queues themselves. This means
 that we can end up seeding the msg_store with msgs from queues that won't
 actually start up. You might think that we'd be saved by the fact that
 _process:terminate deletes the queue, but no, because fwics, _process isn't
 trapping exits, meaning that the terminate won't be called (and most likely
 rightly so so that it doesn't upset mnesia's state) by the exit(Pid,
 shutdown) in amqqueue:recover. So there still needs to be some sort of fix
 somewhere and somehow

---
 src/rabbit.erl                |  8 +++---
 src/rabbit_amqqueue.erl       | 37 +++++++++++++-----------
 src/rabbit_queue_index.erl    | 66 +++++++++++++++++++++++++++++++++----------
 src/rabbit_variable_queue.erl |  2 +-
 4 files changed, 76 insertions(+), 37 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index b859c4af..1c0f0f91 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -149,15 +149,15 @@ start(normal, []) ->
 
                 ok = start_child(rabbit_router),
                 ok = start_child(rabbit_node_monitor),
-                ok = start_child(rabbit_guid),
-                %% TODO - this should probably use start_child somehow too
-                ok = rabbit_queue_index:start_msg_store()
+                ok = start_child(rabbit_guid)
         end},
        {"recovery",
         fun () ->
                 ok = maybe_insert_default_data(),
                 ok = rabbit_exchange:recover(),
-                {ok, _DurableQueues} = rabbit_amqqueue:recover()
+                %% TODO - this should probably use start_child somehow too
+                {ok, DurableQueues} = rabbit_queue_index:start_msg_store(),
+                {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues)
                 %% TODO - don't use disk_queue any more!
                 %% ok = rabbit_disk_queue:delete_non_durable_queues(
                 %%        [ Q #amqqueue.name || Q <- DurableQueues ])
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 840c2c4d..3367c754 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -31,7 +31,8 @@
 
 -module(rabbit_amqqueue).
 
--export([start/0, recover/0, declare/4, delete/3, purge/1]).
+-export([start/0, recover/1, find_durable_queues/0, declare/4, delete/3,
+         purge/1]).
 -export([internal_declare/2, internal_delete/1]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
@@ -66,7 +67,8 @@
 -type(acktag() :: ('ack_not_on_disk' | {'ack_index_and_store', msg_id(), seq_id()})).
 
 -spec(start/0 :: () -> 'ok').
--spec(recover/0 :: () -> {'ok', [amqqueue()]}).
+-spec(recover/1 :: ([amqqueue()]) -> {'ok', [amqqueue()]}).
+-spec(find_durable_queues/0 :: () -> [amqqueue()]).
 -spec(declare/4 :: (queue_name(), boolean(), boolean(), amqp_table()) ->
              amqqueue()).
 -spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()).
@@ -123,13 +125,11 @@ start() ->
                 transient, infinity, supervisor, [rabbit_amqqueue_sup]}),
     ok.
 
-recover() ->
-    {ok, DurableQueues} = recover_durable_queues(),
-    {ok, DurableQueues}.
+recover(DurableQueues) ->
+    {ok, _RealDurableQueues} = recover_durable_queues(DurableQueues).
 
-recover_durable_queues() ->
-    Node = node(),
-    DurableQueues =
+recover_durable_queues(DurableQueues) ->
+    RealDurableQueues =
         lists:foldl(
           fun (RecoveredQ, Acc) ->
                   Q = start_queue_process(RecoveredQ),
@@ -151,15 +151,18 @@ recover_durable_queues() ->
                       false -> exit(Q#amqqueue.pid, shutdown),
                                Acc
                   end
-          end, [],
-          %% TODO: use dirty ops instead
-          rabbit_misc:execute_mnesia_transaction(
-            fun () ->
-                    qlc:e(qlc:q([Q || Q = #amqqueue{pid = Pid}
-                                          <- mnesia:table(rabbit_durable_queue),
-                                      node(Pid) == Node]))
-            end)),
-    {ok, DurableQueues}.
+          end, [], DurableQueues),
+    {ok, RealDurableQueues}.
+
+find_durable_queues() ->
+    Node = node(),
+    %% TODO: use dirty ops instead
+    rabbit_misc:execute_mnesia_transaction(
+      fun () ->
+              qlc:e(qlc:q([Q || Q = #amqqueue{pid = Pid}
+                                    <- mnesia:table(rabbit_durable_queue),
+                                node(Pid) == Node]))
+      end).
 
 declare(QueueName, Durable, AutoDelete, Args) ->
     Q = start_queue_process(#amqqueue{name = QueueName,
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9dae268f..2ca5b8b8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,9 +31,9 @@
 
 -module(rabbit_queue_index).
 
--export([init/1, terminate/1, write_published/4, write_delivered/2,
-         write_acks/2, flush_journal/1, read_segment_entries/2,
-         next_segment_boundary/1, segment_size/0,
+-export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
+         write_delivered/2, write_acks/2, flush_journal/1,
+         read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/0]).
 
 %%----------------------------------------------------------------------------
@@ -115,14 +115,14 @@
           seg_ack_counts
         }).
 
+-include("rabbit.hrl").
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(io_device() :: any()).
 -type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
--type(file_path() :: any()).
 -type(int_or_undef() :: integer() | 'undefined').
 -type(io_dev_or_undef() :: io_device() | 'undefined').
 -type(qistate() :: #qistate { dir               :: file_path(),
@@ -134,8 +134,9 @@
                               seg_ack_counts    :: dict()
                             }).
 
--spec(init/1 :: (string()) -> {non_neg_integer(), qistate()}).
+-spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
 -spec(terminate/1 :: (qistate()) -> qistate()).
+-spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
@@ -156,7 +157,8 @@
 %%----------------------------------------------------------------------------
 
 init(Name) ->
-    Dir = filename:join(queues_dir(), Name),
+    StrName = queue_name_to_dir_name(Name),
+    Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     {AckCounts, TotalMsgCount} = scatter_journal(Dir, find_ack_counts(Dir)),
     {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
@@ -170,6 +172,8 @@ init(Name) ->
                                seg_ack_counts = AckCounts
                              }}.
 
+terminate(State = #qistate { journal_handle = undefined }) ->
+    State;
 terminate(State) ->
     case flush_journal(State) of
         {true, State1} ->
@@ -182,6 +186,11 @@ terminate(State) ->
             State2 #qistate { journal_handle = undefined }
     end.
 
+terminate_and_erase(State) ->
+    State1 = terminate(State),
+    ok = delete_queue_directory(State1 #qistate.dir),
+    State1.
+
 write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
     ?MSG_ID_BYTES = size(MsgId),
@@ -291,22 +300,43 @@ find_lowest_seq_id_seg_and_next_seq_id(
     {LowSeqIdSeg, NextSeqId}.
 
 start_msg_store() ->
-    Queues = case file:list_dir(queues_dir()) of
-                 {ok, Entries} ->
-                     [ Entry || Entry <- Entries, filelib:is_dir(Entry) ];
-                 {error, enoent} ->
-                     []
-             end,
+    DurableQueues = rabbit_amqqueue:find_durable_queues(),
+    DurableQueueNames = 
+        sets:from_list([ queue_name_to_dir_name(Queue #amqqueue.name)
+                       || Queue <- DurableQueues ]),
+    Directories = case file:list_dir(queues_dir()) of
+                      {ok, Entries} ->
+                          [ Entry || Entry <- Entries, filelib:is_dir(Entry) ];
+                      {error, enoent} ->
+                          []
+                  end,
+    {Durable, Transient} =
+        lists:foldl(fun (Queue, {DurableAcc, TransientAcc}) ->
+                            case sets:is_element(Queue, DurableQueueNames) of
+                                true  -> {[Queue | DurableAcc], TransientAcc};
+                                false -> {DurableAcc, [Queue | TransientAcc]}
+                            end
+                    end, {[], []}, Directories),
     MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
     {ok, _Pid} = rabbit_msg_store:start_link(MsgStoreDir,
                                              fun queue_index_walker/1,
-                                             Queues),
-    ok.
+                                             Durable),
+    lists:foreach(fun (DirName) ->
+                          Dir = filename:join(queues_dir(), DirName),
+                          ok = delete_queue_directory(Dir)
+                  end, Transient),
+    {ok, DurableQueues}.
 
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+queue_name_to_dir_name(Name = #resource { kind = queue }) ->
+    lists:map(fun ($/) -> $_;
+                  ($+) -> $-;
+                  (C)  -> C
+              end, ssl_base64:encode(term_to_binary(Name))).
+
 queues_dir() ->
     filename:join(rabbit_mnesia:dir(), "queues").
 
@@ -348,6 +378,12 @@ seg_num_to_path(Dir, SegNum) ->
     SegName = integer_to_list(SegNum),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).    
 
+delete_queue_directory(Dir) ->
+    {ok, Entries} = file:list_dir(Dir),
+    lists:foreach(fun file:delete/1,
+                  [ filename:join(Dir, Entry) || Entry <- Entries ]),
+    ok = file:del_dir(Dir).
+
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index e7c546e5..9ca06a1c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -371,7 +371,7 @@ persistent_msg_ids(Pubs) ->
 
 delete1(NextSeqId, Count, GammaSeqId, IndexState)
   when GammaSeqId >= NextSeqId ->
-    {Count, IndexState};
+    {Count, rabbit_queue_index:terminate_and_erase(IndexState)};
 delete1(NextSeqId, Count, GammaSeqId, IndexState) ->
     Gamma1SeqId = GammaSeqId + rabbit_queue_index:segment_size(),
     case rabbit_queue_index:read_segment_entries(GammaSeqId, IndexState) of
-- 
cgit v1.2.1


From a50fd6c161892b7a33399df95aec855c5e61a147 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 11:18:56 +0100
Subject: Wire up the tx_commit callback. Note no coalescing because no sync on
 queue_index because no fhc. Also added notes about the deletion of queues on
 startup.

---
 src/rabbit.erl                  | 8 +++++---
 src/rabbit_amqqueue_process.erl | 5 +++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 1c0f0f91..a5e59ce2 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -158,9 +158,11 @@ start(normal, []) ->
                 %% TODO - this should probably use start_child somehow too
                 {ok, DurableQueues} = rabbit_queue_index:start_msg_store(),
                 {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues)
-                %% TODO - don't use disk_queue any more!
-                %% ok = rabbit_disk_queue:delete_non_durable_queues(
-                %%        [ Q #amqqueue.name || Q <- DurableQueues ])
+                %% TODO - RealDurableQueues is a subset of
+                %% DurableQueues. It may have queues removed which
+                %% have since been recreated on another node in our
+                %% cluster. We need to remove DurableQueues --
+                %% RealDurableQueues somehow. See also bug 20916
         end},
        {"builtin applications",
         fun () ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 152205ed..bb4ac0b9 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -783,6 +783,11 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
+handle_cast({tx_commit_callback, Pubs, AckTags},
+            State = #q{variable_queue_state = VQS}) ->
+    noreply(State#q{variable_queue_state =
+                    rabbit_variable_queue:do_tx_commit(Pubs, AckTags, VQS)});
+
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
       possibly_unblock(
-- 
cgit v1.2.1


From fa98888cec64865f7eefc873ba815cc77a0f2d59 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 12:25:09 +0100
Subject: the queue index deletes transient msgs on initialisation. This is
 rather elegant because it means that the delta gen fun used to seed the msg
 store does not generate any deltas for transient msgs, which means that the
 msg_store will take care of deleting transient msgs without any further
 interaction.

---
 src/rabbit_queue_index.erl | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 2ca5b8b8..ae16eb2c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -384,6 +384,10 @@ delete_queue_directory(Dir) ->
                   [ filename:join(Dir, Entry) || Entry <- Entries ]),
     ok = file:del_dir(Dir).
 
+add_ack_to_ack_dict(SeqId, ADict) ->
+    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
+
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
@@ -454,20 +458,27 @@ load_journal(Hdl, ADict) ->
         _ErrOrEoF -> ADict
     end.
 
-add_ack_to_ack_dict(SeqId, ADict) ->
-    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
-
 replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, TotalMsgCount, Dir}) ->
     SegPath = seg_num_to_path(Dir, SegNum),
     {SDict, _AckCount, _HighRelSeq} = load_segment(SegNum, SegPath, dict:new()),
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:intersection(sets:from_list(ValidRelSeqIds),
                                   sets:from_list(Acks)),
-    {append_acks_to_segment(SegPath, SegNum, AckCounts,
-                            sets:to_list(ValidAcks)),
+    AcksToAppend = deliver_and_ack_transient(SDict, sets:to_list(ValidAcks)),
+    {append_acks_to_segment(SegPath, SegNum, AckCounts, AcksToAppend),
      TotalMsgCount - sets:size(ValidAcks), Dir}.
 
+deliver_and_ack_transient(SDict, Acks) ->
+    %% because an Ack entry and a Delivered entry are identical, we
+    %% simply add the RelSeq twice to the accumulator for transient
+    %% msgs that have not yet been delivered.
+    dict:fold(fun (_RelSeq, {_MsgId, _IsDelivered, true}, Acc) ->
+                      Acc;
+                  (RelSeq, {_MsgId, true, false}, Acc) ->
+                      [RelSeq | Acc];
+                  (RelSeq, {_MsgId, false, false}, Acc) ->
+                      [RelSeq, RelSeq | Acc]
+              end, Acks, SDict).
 
 %%----------------------------------------------------------------------------
 %% Loading Segments
-- 
cgit v1.2.1


From b3830be9781915ceed1806b93d742965229c1260 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 13:34:45 +0100
Subject: cosmetics and also removal of bitstring syntax as it seems far too
 modern, and actually isn't necessary because at no time do we need to deal
 with non byte-aligned binary data

---
 src/rabbit_queue_index.erl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index ae16eb2c..4307399f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -472,11 +472,11 @@ deliver_and_ack_transient(SDict, Acks) ->
     %% because an Ack entry and a Delivered entry are identical, we
     %% simply add the RelSeq twice to the accumulator for transient
     %% msgs that have not yet been delivered.
-    dict:fold(fun (_RelSeq, {_MsgId, _IsDelivered, true}, Acc) ->
+    dict:fold(fun (_RelSeq, {_MsgId, _IsDelivered, true }, Acc) ->
                       Acc;
-                  (RelSeq, {_MsgId, true, false}, Acc) ->
+                  (RelSeq,  {_MsgId, true,         false}, Acc) ->
                       [RelSeq | Acc];
-                  (RelSeq, {_MsgId, false, false}, Acc) ->
+                  (RelSeq,  {_MsgId, false,        false}, Acc) ->
                       [RelSeq, RelSeq | Acc]
               end, Acks, SDict).
 
@@ -505,18 +505,18 @@ load_segment(SegNum, SegPath, JAckDict) ->
 load_segment_entries(SegNum, Hdl, {SDict, AckCount, HighRelSeq}) ->
     case file:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-               MSB/bitstring>>} ->
+               MSB>>} ->
             {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
+            <<RelSeq:?REL_SEQ_BITS>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
             load_segment_entries(SegNum, Hdl, {SDict1, AckCount1, HighRelSeq});
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-               IsPersistentNum:1, MSB/bitstring>>} ->
+               IsPersistentNum:1, MSB>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS>> = <<MSB/bitstring, LSB/binary>>,
+            <<RelSeq:?REL_SEQ_BITS>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               SegNum, Hdl, {dict:store(RelSeq, {MsgId, false,
-- 
cgit v1.2.1


From b1e85bb217669ece75b23941b0fd8084f1995c30 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 14:19:58 +0100
Subject: slightly more likely to work

---
 src/rabbit_queue_index.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 4307399f..10773c0c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -80,6 +80,7 @@
 -define(SEGMENT_EXTENSION, ".idx").
 
 -define(REL_SEQ_BITS, 14).
+-define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + (?REL_SEQ_BITS rem 8))).
 -define(SEGMENT_ENTRIES_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
 
 %% seq only is binary 00 followed by 14 bits of rel seq id
@@ -505,18 +506,18 @@ load_segment(SegNum, SegPath, JAckDict) ->
 load_segment_entries(SegNum, Hdl, {SDict, AckCount, HighRelSeq}) ->
     case file:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-               MSB>>} ->
+               MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS>> = <<MSB, LSB/binary>>,
+            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
             load_segment_entries(SegNum, Hdl, {SDict1, AckCount1, HighRelSeq});
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-               IsPersistentNum:1, MSB>>} ->
+               IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS>> = <<MSB, LSB/binary>>,
+            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               SegNum, Hdl, {dict:store(RelSeq, {MsgId, false,
-- 
cgit v1.2.1


From 1dc5ddde5e86a4115999925a94ea840a84ec3093 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 15:51:50 +0100
Subject: the deletion of transient msgs via the scattering of the journal at
 startup was wrong because there is no guarantee that the journal will touch
 all the segments. Thus now in the formation of the ack counts, add deliveries
 at that point where necessary for transient msgs. Acks for these msgs are not
 added at this point because they need to go via the journal scattering
 mechanism so that full segments can be removed.

---
 src/rabbit_queue_index.erl | 121 +++++++++++++++++++++++++++------------------
 1 file changed, 74 insertions(+), 47 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 10773c0c..a38732bd 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -161,17 +161,20 @@ init(Name) ->
     StrName = queue_name_to_dir_name(Name),
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    {AckCounts, TotalMsgCount} = scatter_journal(Dir, find_ack_counts(Dir)),
+    {TotalMsgCount, AckCounts, TransientADict} =
+        find_ack_counts_and_deliver_transient_msgs(Dir),
+    {TotalMsgCount1, AckCounts1} =
+        scatter_journal(Dir, TotalMsgCount, AckCounts, TransientADict),
     {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
                                  [raw, binary, delayed_write, write, read]),
-    {TotalMsgCount, #qistate { dir = Dir,
-                               cur_seg_num = undefined,
-                               cur_seg_hdl = undefined,
-                               journal_ack_count = 0,
-                               journal_ack_dict = dict:new(),
-                               journal_handle = JournalHdl,
-                               seg_ack_counts = AckCounts
-                             }}.
+    {TotalMsgCount1, #qistate { dir = Dir,
+                                cur_seg_num = undefined,
+                                cur_seg_hdl = undefined,
+                                journal_ack_count = 0,
+                                journal_ack_dict = dict:new(),
+                                journal_handle = JournalHdl,
+                                seg_ack_counts = AckCounts1
+                               }}.
 
 terminate(State = #qistate { journal_handle = undefined }) ->
     State;
@@ -425,31 +428,45 @@ all_segment_nums_paths(Dir) ->
                         SegName)), filename:join(Dir, SegName)}
      || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
 
-find_ack_counts(Dir) ->
+find_ack_counts_and_deliver_transient_msgs(Dir) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
     lists:foldl(
-      fun ({SegNum, SegPath}, {AccCount, AccDict}) ->
+      fun ({SegNum, SegPath}, {TotalMsgCount, AckCounts, TransientADict}) ->
               {SDict, AckCount, _HighRelSeq} =
                   load_segment(SegNum, SegPath, dict:new()),
-              {dict:size(SDict) + AccCount,
-               case AckCount of
-                   0 -> AccDict;
-                   _ -> dict:store(SegNum, AckCount, AccDict)
-               end}
-      end, {0, dict:new()}, SegNumsPaths).
-
-scatter_journal(Dir, {TotalMsgCount, AckCounts}) ->
+              TransientMsgsAcks = deliver_transient(SegPath, SDict),
+              %% ignore TransientMsgsAcks in AckCounts1 and
+              %% TotalMsgCount1 because the TransientMsgsAcks fall
+              %% through into scatter_journal at which point the
+              %% AckCounts and TotalMsgCount will be correctly
+              %% adjusted.
+              TotalMsgCount1 = TotalMsgCount + dict:size(SDict),
+              AckCounts1 = case AckCount of
+                               0 -> AckCounts;
+                               N -> dict:store(SegNum, N, AckCounts)
+                           end,
+              TransientADict1 =
+                  case TransientMsgsAcks of
+                      [] -> TransientADict;
+                      _  -> dict:store(SegNum, TransientMsgsAcks, TransientADict)
+                  end,
+              {TotalMsgCount1, AckCounts1, TransientADict1}
+      end, {0, dict:new(), dict:new()}, SegNumsPaths).
+
+scatter_journal(Dir, TotalMsgCount, AckCounts, TransientADict) ->
     JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
     case file:open(JournalPath, [read, read_ahead, raw, binary]) of
         {error, enoent} -> AckCounts;
         {ok, Hdl} ->
-            ADict = load_journal(Hdl, dict:new()),
+            %% ADict may well contain duplicates. However, this is ok,
+            %% due to the use of sets in replay_journal_acks_to_segment
+            ADict = load_journal(Hdl, TransientADict),
             ok = file:close(Hdl),
-            {AckCounts1, TotalMsgCount1, _Dir} =
+            {TotalMsgCount1, AckCounts1, _Dir} =
                 dict:fold(fun replay_journal_acks_to_segment/3,
-                          {AckCounts, TotalMsgCount, Dir}, ADict),
+                          {TotalMsgCount, AckCounts, Dir}, ADict),
             ok = file:delete(JournalPath),
-            {AckCounts1, TotalMsgCount1}
+            {TotalMsgCount1, AckCounts1}
     end.
 
 load_journal(Hdl, ADict) ->
@@ -459,27 +476,37 @@ load_journal(Hdl, ADict) ->
         _ErrOrEoF -> ADict
     end.
 
-replay_journal_acks_to_segment(SegNum, Acks, {AckCounts, TotalMsgCount, Dir}) ->
+replay_journal_acks_to_segment(_, [], Acc) ->
+    Acc;
+replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, AckCounts, Dir}) ->
     SegPath = seg_num_to_path(Dir, SegNum),
+    %% supply empty dict so that we get all msgs in SDict that have
+    %% not been acked in the segment file itself
     {SDict, _AckCount, _HighRelSeq} = load_segment(SegNum, SegPath, dict:new()),
     ValidRelSeqIds = dict:fetch_keys(SDict),
-    ValidAcks = sets:intersection(sets:from_list(ValidRelSeqIds),
-                                  sets:from_list(Acks)),
-    AcksToAppend = deliver_and_ack_transient(SDict, sets:to_list(ValidAcks)),
-    {append_acks_to_segment(SegPath, SegNum, AckCounts, AcksToAppend),
-     TotalMsgCount - sets:size(ValidAcks), Dir}.
-
-deliver_and_ack_transient(SDict, Acks) ->
-    %% because an Ack entry and a Delivered entry are identical, we
-    %% simply add the RelSeq twice to the accumulator for transient
-    %% msgs that have not yet been delivered.
-    dict:fold(fun (_RelSeq, {_MsgId, _IsDelivered, true }, Acc) ->
-                      Acc;
-                  (RelSeq,  {_MsgId, true,         false}, Acc) ->
-                      [RelSeq | Acc];
-                  (RelSeq,  {_MsgId, false,        false}, Acc) ->
-                      [RelSeq, RelSeq | Acc]
-              end, Acks, SDict).
+    ValidAcks = sets:to_list(sets:intersection(sets:from_list(ValidRelSeqIds),
+                                               sets:from_list(Acks))),
+    %% ValidAcks will not contain any duplicates at this point.
+    {TotalMsgCount - length(ValidAcks),
+     append_acks_to_segment(SegPath, SegNum, AckCounts, ValidAcks), Dir}.
+
+deliver_transient(SegPath, SDict) ->
+    {AckMe, DeliverMe} =
+        dict:fold(
+          fun (_RelSeq, {_MsgId, _IsDelivered, true}, Acc) ->
+                  Acc;
+              (RelSeq, {_MsgId, false, false}, {AckMeAcc, DeliverMeAcc}) ->
+                  {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]};
+              (RelSeq, {_MsgId, true, false}, {AckMeAcc, DeliverMeAcc}) ->
+                  {[RelSeq | AckMeAcc], DeliverMeAcc}
+          end, {[], []}, SDict),
+    {ok, Hdl} = file:open(SegPath, [binary, raw, write, delayed_write, read]),
+    {ok, _} = file:position(Hdl, {eof, 0}),
+    ok = file:write(Hdl, [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                             RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ]),
+    ok = file:sync(Hdl),
+    ok = file:close(Hdl),
+    AckMe.
 
 %%----------------------------------------------------------------------------
 %% Loading Segments
@@ -490,7 +517,7 @@ load_segment(SegNum, SegPath, JAckDict) ->
         {error, enoent} -> {dict:new(), 0, 0};
         {ok, Hdl} ->
             {SDict, AckCount, HighRelSeq} =
-                load_segment_entries(SegNum, Hdl, {dict:new(), 0, 0}),
+                load_segment_entries(Hdl, dict:new(), 0, 0),
             ok = file:close(Hdl),
             RelSeqs = case dict:find(SegNum, JAckDict) of
                         {ok, RelSeqs1} -> RelSeqs1;
@@ -503,14 +530,14 @@ load_segment(SegNum, SegPath, JAckDict) ->
             {SDict1, AckCount1, HighRelSeq}
     end.
 
-load_segment_entries(SegNum, Hdl, {SDict, AckCount, HighRelSeq}) ->
+load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
     case file:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
-            load_segment_entries(SegNum, Hdl, {SDict1, AckCount1, HighRelSeq});
+            load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
@@ -520,9 +547,9 @@ load_segment_entries(SegNum, Hdl, {SDict, AckCount, HighRelSeq}) ->
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
-              SegNum, Hdl, {dict:store(RelSeq, {MsgId, false,
-                                                1 == IsPersistentNum},
-                                       SDict), AckCount, HighRelSeq1});
+              Hdl, dict:store(RelSeq, {MsgId, false,
+                                       1 == IsPersistentNum},
+                              SDict), AckCount, HighRelSeq1);
         _ErrOrEoF -> {SDict, AckCount, HighRelSeq}
     end.
 
-- 
cgit v1.2.1


From 8f2c3fae2516026e99c53fa291237501bdcd73a4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 16:31:01 +0100
Subject: some initial fixes

---
 src/rabbit_queue_index.erl    |  2 +-
 src/rabbit_variable_queue.erl | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a38732bd..b58c5a7f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -456,7 +456,7 @@ find_ack_counts_and_deliver_transient_msgs(Dir) ->
 scatter_journal(Dir, TotalMsgCount, AckCounts, TransientADict) ->
     JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
     case file:open(JournalPath, [read, read_ahead, raw, binary]) of
-        {error, enoent} -> AckCounts;
+        {error, enoent} -> {TotalMsgCount, AckCounts};
         {ok, Hdl} ->
             %% ADict may well contain duplicates. However, this is ok,
             %% due to the use of sets in replay_journal_acks_to_segment
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9ca06a1c..a7a07556 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -167,6 +167,7 @@ set_queue_ram_duration_target(
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1 },
     if TargetRamMsgCount == TargetRamMsgCount1 ->
             State1;
+       TargetRamMsgCount == undefined orelse
        TargetRamMsgCount < TargetRamMsgCount1 ->
             maybe_start_prefetcher(State1);
        true ->
@@ -252,7 +253,10 @@ maybe_start_prefetcher(State = #vqstate {
                          q1 = Q1, q3 = Q3, prefetcher = undefined
                         }) ->
     %% prefetched content takes priority over q1
-    AvailableSpace = (TargetRamMsgCount - RamMsgCount) + queue:len(Q1),
+    AvailableSpace = case TargetRamMsgCount of
+                         undefined -> queue:len(Q3);
+                         _ -> (TargetRamMsgCount - RamMsgCount) + queue:len(Q1)
+                     end,
     PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
     if PrefetchCount =< 0 -> State;
        true ->
@@ -602,7 +606,7 @@ drain_prefetcher(DrainOrStop,
 
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
                                      target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount >= RamMsgCount ->
+  when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 reduce_memory_use(State =
                   #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
@@ -726,7 +730,7 @@ maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
 maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
                            #vqstate { ram_msg_count = RamMsgCount,
                                       target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount >= RamMsgCount ->
+  when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 maybe_push_alphas_to_betas(Generator, Consumer, Q, State =
                            #vqstate { ram_msg_count = RamMsgCount }) ->
-- 
cgit v1.2.1


From 61782428c42348c5a78c2bd9ee4950858f4eb203 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 17:25:48 +0100
Subject: some more fixes. durable queue recovery seems to work

---
 src/rabbit_queue_index.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index b58c5a7f..4f283e42 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -80,7 +80,7 @@
 -define(SEGMENT_EXTENSION, ".idx").
 
 -define(REL_SEQ_BITS, 14).
--define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + (?REL_SEQ_BITS rem 8))).
+-define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + 8 - (?REL_SEQ_BITS rem 8))).
 -define(SEGMENT_ENTRIES_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
 
 %% seq only is binary 00 followed by 14 bits of rel seq id
@@ -336,10 +336,10 @@ start_msg_store() ->
 %%----------------------------------------------------------------------------
 
 queue_name_to_dir_name(Name = #resource { kind = queue }) ->
-    lists:map(fun ($/) -> $_;
-                  ($+) -> $-;
-                  (C)  -> C
-              end, ssl_base64:encode(term_to_binary(Name))).
+    Bin = term_to_binary(Name),
+    Size = 8*size(Bin),
+    <<Num:Size>> = Bin,
+    hd(io_lib:format("~.36B", [Num])).
 
 queues_dir() ->
     filename:join(rabbit_mnesia:dir(), "queues").
-- 
cgit v1.2.1


From d904184fb9dae31514f50436b4461020549e66f4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 17:36:36 +0100
Subject: use lists:flatten, not hd

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 4f283e42..cf0258b9 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -339,7 +339,7 @@ queue_name_to_dir_name(Name = #resource { kind = queue }) ->
     Bin = term_to_binary(Name),
     Size = 8*size(Bin),
     <<Num:Size>> = Bin,
-    hd(io_lib:format("~.36B", [Num])).
+    lists:flatten(io_lib:format("~.36B", [Num])).
 
 queues_dir() ->
     filename:join(rabbit_mnesia:dir(), "queues").
-- 
cgit v1.2.1


From 1121e3de53beef26fe4891f6dd1b37439bf5aa2d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 14 Oct 2009 18:15:01 +0100
Subject: A load more fixes. Seem to have uncovered the possibility of finding
 many acks for the same msg when reading in a segment file...

---
 src/rabbit_amqqueue_process.erl |  8 ++++----
 src/rabbit_queue_index.erl      |  3 +++
 src/rabbit_tests.erl            |  2 +-
 src/rabbit_variable_queue.erl   | 14 +++++++-------
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index bb4ac0b9..e9711b54 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -481,10 +481,10 @@ commit_transaction(Txn, State) ->
         case lookup_ch(ChPid) of
             not_found -> [];
             C = #cr { unacked_messages = UAM } ->
-                {MsgWithAcks, Remaining} =
+                {MsgsWithAcks, Remaining} =
                     collect_messages(PendingAcksOrdered, UAM),
                 store_ch_record(C#cr{unacked_messages = Remaining}),
-                MsgWithAcks
+                [AckTag || {_Msg, AckTag} <- MsgsWithAcks]
         end,
     VQS = rabbit_variable_queue:tx_commit(
             PendingMessagesOrdered, Acks, State #q.variable_queue_state),
@@ -593,13 +593,13 @@ handle_call({basic_get, ChPid, NoAck}, _From,
         {empty, VQS1} -> reply(empty, State #q { variable_queue_state = VQS1 });
         {{Msg, IsDelivered, AckTag, Remaining}, VQS1} ->
             AckRequired = not(NoAck),
-            {ok, VQS2} =
+            VQS2 =
                 case AckRequired of
                     true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        {ok, VQS1};
+                        VQS1;
                     false ->
                         rabbit_variable_queue:ack([AckTag], VQS1)
                 end,
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index cf0258b9..9933eb4c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -516,6 +516,7 @@ load_segment(SegNum, SegPath, JAckDict) ->
     case file:open(SegPath, [raw, binary, read_ahead, read]) of
         {error, enoent} -> {dict:new(), 0, 0};
         {ok, Hdl} ->
+            rabbit_log:info("SegNum: ~p~n", [SegNum]),
             {SDict, AckCount, HighRelSeq} =
                 load_segment_entries(Hdl, dict:new(), 0, 0),
             ok = file:close(Hdl),
@@ -536,6 +537,7 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
+            rabbit_log:info("D/A: ~p: ~p~n", [self(), RelSeq]),
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
             load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
@@ -545,6 +547,7 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
             {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
+            rabbit_log:info("Pub: ~p: ~p~n", [self(), RelSeq]),
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               Hdl, dict:store(RelSeq, {MsgId, false,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 259f120a..3a435e79 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -50,7 +50,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
-    passed = test_disk_queue(),
+    %% passed = test_disk_queue(),
     passed = test_priority_queue(),
     passed = test_unfold(),
     passed = test_parsing(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a7a07556..831aa044 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -274,7 +274,7 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
         lists:foldl(
           fun (ack_not_on_disk, Acc) -> Acc;
               ({ack_index_and_store, MsgId, SeqId}, {MsgIds, SeqIds}) ->
-                  {[MsgId | MsgIds], [SeqId, SeqIds]}
+                  {[MsgId | MsgIds], [SeqId | SeqIds]}
           end, {[], []}, AckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
@@ -294,16 +294,15 @@ purge(State) ->
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
 delete(State) ->
-    {PurgeCount, State1 = #vqstate { index_state = IndexState }} = purge(State),
+    {_PurgeCount, State1 = #vqstate { index_state = IndexState }} = purge(State),
     case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState)
         of
         {N, N} ->
-            {PurgeCount, State1};
+            State1;
         {GammaSeqId, NextSeqId} ->
-            {DeleteCount, IndexState1} =
+            {_DeleteCount, IndexState1} =
                 delete1(NextSeqId, 0, GammaSeqId, IndexState),
-            {PurgeCount + DeleteCount,
-             State1 #vqstate { index_state = IndexState1 }}
+            State1 #vqstate { index_state = IndexState1 }
     end.
 
 %% [{Msg, AckTag}]
@@ -349,10 +348,11 @@ tx_commit(Pubs, AckTags, State) ->
         [] ->
             do_tx_commit(Pubs, AckTags, State);
         PersistentMsgIds ->
+            Self = self(),
             ok = rabbit_msg_store:sync(
                    PersistentMsgIds,
                    fun () -> ok = rabbit_amqqueue:tx_commit_callback(
-                                    self(), Pubs, AckTags)
+                                    Self, Pubs, AckTags)
                    end),
             State
     end.
-- 
cgit v1.2.1


From 307c7feb5f1929d19bd647726f50fe45231b4548 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 11:27:45 +0100
Subject: bug fix.

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 831aa044..446042e0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -398,7 +398,8 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
             {Q3Count, IndexState1} = remove_queue_entries(Q3, IndexState),
             purge1(Count + Q3Count,
                    maybe_load_next_segment(
-                     State #vqstate { index_state = IndexState1 }))
+                     State #vqstate { index_state = IndexState1,
+                                      q3 = queue:new() }))
     end.
 
 remove_queue_entries(Q, IndexState) ->
-- 
cgit v1.2.1


From 316991697b66e562134d78a3256f08a5f95b6d0c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 12:12:28 +0100
Subject: bug fix.

---
 src/rabbit_variable_queue.erl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 446042e0..745d59ea 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -280,7 +280,10 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
-    ok = rabbit_msg_store:remove(MsgIds),
+    ok = case MsgIds of
+             [] -> ok;
+             _  -> rabbit_msg_store:remove(MsgIds)
+         end,
     State #vqstate { index_state = IndexState1 }.
 
 purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
@@ -340,7 +343,10 @@ tx_publish(_Msg, State) ->
     State.
 
 tx_rollback(Pubs, State) ->
-    ok = rabbit_msg_store:remove(persistent_msg_ids(Pubs)),
+    ok = case persistent_msg_ids(Pubs) of
+             [] -> ok;
+             PP -> rabbit_msg_store:remove(PP)
+         end,
     State.
 
 tx_commit(Pubs, AckTags, State) ->
@@ -642,6 +648,8 @@ maybe_write_msg_to_disk(Bool, PersistentMsgsAlreadyOnDisk,
        orelse (IsPersistent andalso not PersistentMsgsAlreadyOnDisk) ->
     ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
     true;
+maybe_write_msg_to_disk(_Bool, true, #basic_message { is_persistent = true }) ->
+    true;
 maybe_write_msg_to_disk(_Bool, _PersistentMsgsAlreadyOnDisk, _Msg) ->
     false.
 
-- 
cgit v1.2.1


From f945eac334d61333ad2feb04b14804d1504d9534 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 14:35:15 +0100
Subject: Bug fix.

---
 src/rabbit_amqqueue.erl         |  9 +++++----
 src/rabbit_amqqueue_process.erl | 13 ++++++-------
 src/rabbit_variable_queue.erl   | 14 ++++++++------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 3367c754..561e9e69 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -40,7 +40,7 @@
 -export([list/1, info/1, info/2, info_all/1, info_all/2]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, tx_commit_callback/3]).
+-export([notify_sent/2, unblock/2, tx_commit_callback/4]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -107,7 +107,8 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(tx_commit_callback/3 :: (pid(), [message()], [acktag()]) -> 'ok').
+-spec(tx_commit_callback/4 :: (pid(), [message()], [acktag()], {pid(), any()})
+      -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -321,8 +322,8 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 8, {unblock, ChPid}).
 
-tx_commit_callback(QPid, Pubs, AckTags) ->
-    gen_server2:pcast(QPid, 8, {tx_commit_callback, Pubs, AckTags}).
+tx_commit_callback(QPid, Pubs, AckTags, From) ->
+    gen_server2:pcast(QPid, 8, {tx_commit_callback, Pubs, AckTags, From}).
 
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index e9711b54..66fc45ea 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -470,7 +470,7 @@ record_pending_acks(Txn, ChPid, MsgIds) ->
     store_tx(Txn, Tx#tx{pending_acks = [MsgIds | Pending],
                         ch_pid = ChPid}).
 
-commit_transaction(Txn, State) ->
+commit_transaction(Txn, From, State) ->
     #tx { ch_pid = ChPid,
           pending_messages = PendingMessages,
           pending_acks = PendingAcks
@@ -487,7 +487,7 @@ commit_transaction(Txn, State) ->
                 [AckTag || {_Msg, AckTag} <- MsgsWithAcks]
         end,
     VQS = rabbit_variable_queue:tx_commit(
-            PendingMessagesOrdered, Acks, State #q.variable_queue_state),
+            PendingMessagesOrdered, Acks, From, State #q.variable_queue_state),
     State #q { variable_queue_state = VQS }.
 
 rollback_transaction(Txn, State) ->
@@ -573,9 +573,7 @@ handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
     reply(Delivered, NewState);
 
 handle_call({commit, Txn}, From, State) ->
-    NewState = commit_transaction(Txn, State),
-    %% optimisation: we reply straight away so the sender can continue
-    gen_server2:reply(From, ok),
+    NewState = commit_transaction(Txn, From, State),
     erase_tx(Txn),
     noreply(run_message_queue(NewState));
 
@@ -783,10 +781,11 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({tx_commit_callback, Pubs, AckTags},
+handle_cast({tx_commit_callback, Pubs, AckTags, From},
             State = #q{variable_queue_state = VQS}) ->
     noreply(State#q{variable_queue_state =
-                    rabbit_variable_queue:do_tx_commit(Pubs, AckTags, VQS)});
+                    rabbit_variable_queue:do_tx_commit(
+                      Pubs, AckTags, From, VQS)});
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 745d59ea..dddfb4a8 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -34,7 +34,7 @@
 -export([init/1, publish/2, publish_delivered/2, set_queue_ram_duration_target/2,
          remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
          maybe_start_prefetcher/1, purge/1, delete/1, requeue/2,
-         tx_publish/2, tx_rollback/2, tx_commit/3, do_tx_commit/3]).
+         tx_publish/2, tx_rollback/2, tx_commit/4, do_tx_commit/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -349,21 +349,21 @@ tx_rollback(Pubs, State) ->
          end,
     State.
 
-tx_commit(Pubs, AckTags, State) ->
+tx_commit(Pubs, AckTags, From, State) ->
     case persistent_msg_ids(Pubs) of
         [] ->
-            do_tx_commit(Pubs, AckTags, State);
+            do_tx_commit(Pubs, AckTags, From, State);
         PersistentMsgIds ->
             Self = self(),
             ok = rabbit_msg_store:sync(
                    PersistentMsgIds,
                    fun () -> ok = rabbit_amqqueue:tx_commit_callback(
-                                    Self, Pubs, AckTags)
+                                    Self, Pubs, AckTags, From)
                    end),
             State
     end.
 
-do_tx_commit(Pubs, AckTags, State) ->
+do_tx_commit(Pubs, AckTags, From, State) ->
     {_PubSeqIds, State1} =
         lists:foldl(
           fun (Msg, {SeqIdsAcc, StateN}) ->
@@ -371,7 +371,9 @@ do_tx_commit(Pubs, AckTags, State) ->
                   {[SeqId | SeqIdsAcc], StateN1}
           end, {[], State}, Pubs),
     %% TODO need to do something here about syncing the queue index, PubSeqIds
-    ack(AckTags, State1).
+    State2 = ack(AckTags, State1),
+    gen_server2:reply(From, ok),
+    State2.
 
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From b4d6e157bf465ad0210b9881d6da62f2a2d90993 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 14:55:41 +0100
Subject: Bug fix.

---
 src/rabbit_queue_index.erl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9933eb4c..e74ef295 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -308,9 +308,12 @@ start_msg_store() ->
     DurableQueueNames = 
         sets:from_list([ queue_name_to_dir_name(Queue #amqqueue.name)
                        || Queue <- DurableQueues ]),
-    Directories = case file:list_dir(queues_dir()) of
+    QueuesDir = queues_dir(),
+    Directories = case file:list_dir(QueuesDir) of
                       {ok, Entries} ->
-                          [ Entry || Entry <- Entries, filelib:is_dir(Entry) ];
+                          [ Entry || Entry <- Entries,
+                                     filelib:is_dir(
+                                       filename:join(QueuesDir, Entry)) ];
                       {error, enoent} ->
                           []
                   end,
-- 
cgit v1.2.1


From f84d140f6048f8b424d3c79abccc373c1eb6cb1e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 15:15:27 +0100
Subject: Bug fix.

---
 src/rabbit_queue_index.erl | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e74ef295..708891d9 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -305,9 +305,9 @@ find_lowest_seq_id_seg_and_next_seq_id(
 
 start_msg_store() ->
     DurableQueues = rabbit_amqqueue:find_durable_queues(),
-    DurableQueueNames = 
-        sets:from_list([ queue_name_to_dir_name(Queue #amqqueue.name)
-                       || Queue <- DurableQueues ]),
+    DurableDict = 
+        dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
+                          Queue #amqqueue.name} || Queue <- DurableQueues ]),
     QueuesDir = queues_dir(),
     Directories = case file:list_dir(QueuesDir) of
                       {ok, Entries} ->
@@ -317,21 +317,26 @@ start_msg_store() ->
                       {error, enoent} ->
                           []
                   end,
-    {Durable, Transient} =
-        lists:foldl(fun (Queue, {DurableAcc, TransientAcc}) ->
-                            case sets:is_element(Queue, DurableQueueNames) of
-                                true  -> {[Queue | DurableAcc], TransientAcc};
-                                false -> {DurableAcc, [Queue | TransientAcc]}
-                            end
-                    end, {[], []}, Directories),
+    DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
+    {DurableQueueNames, TransientDirs} =
+        lists:foldl(
+          fun (QueueDir, {DurableAcc, TransientAcc}) ->
+                  case sets:is_element(QueueDir, DurableDirectories) of
+                      true ->
+                          {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
+                           TransientAcc};
+                      false ->
+                          {DurableAcc, [QueueDir | TransientAcc]}
+                  end
+          end, {[], []}, Directories),
     MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
     {ok, _Pid} = rabbit_msg_store:start_link(MsgStoreDir,
                                              fun queue_index_walker/1,
-                                             Durable),
+                                             DurableQueueNames),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = delete_queue_directory(Dir)
-                  end, Transient),
+                  end, TransientDirs),
     {ok, DurableQueues}.
 
 %%----------------------------------------------------------------------------
@@ -519,7 +524,6 @@ load_segment(SegNum, SegPath, JAckDict) ->
     case file:open(SegPath, [raw, binary, read_ahead, read]) of
         {error, enoent} -> {dict:new(), 0, 0};
         {ok, Hdl} ->
-            rabbit_log:info("SegNum: ~p~n", [SegNum]),
             {SDict, AckCount, HighRelSeq} =
                 load_segment_entries(Hdl, dict:new(), 0, 0),
             ok = file:close(Hdl),
@@ -540,7 +544,6 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            rabbit_log:info("D/A: ~p: ~p~n", [self(), RelSeq]),
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
             load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
@@ -550,7 +553,6 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
             {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            rabbit_log:info("Pub: ~p: ~p~n", [self(), RelSeq]),
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               Hdl, dict:store(RelSeq, {MsgId, false,
-- 
cgit v1.2.1


From 86d7e0037db5c601262555253964e47bea9e0a06 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 15:50:44 +0100
Subject: Bug fix.

---
 src/rabbit_amqqueue_process.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 66fc45ea..7190953d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -575,7 +575,7 @@ handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
 handle_call({commit, Txn}, From, State) ->
     NewState = commit_transaction(Txn, From, State),
     erase_tx(Txn),
-    noreply(run_message_queue(NewState));
+    noreply(NewState);
 
 handle_call({notify_down, ChPid}, From, State) ->
     %% optimisation: we reply straight away so the sender can continue
@@ -783,9 +783,10 @@ handle_cast({notify_sent, ChPid}, State) ->
 
 handle_cast({tx_commit_callback, Pubs, AckTags, From},
             State = #q{variable_queue_state = VQS}) ->
-    noreply(State#q{variable_queue_state =
-                    rabbit_variable_queue:do_tx_commit(
-                      Pubs, AckTags, From, VQS)});
+    noreply(
+      run_message_queue(
+        State#q{variable_queue_state =
+                rabbit_variable_queue:do_tx_commit(Pubs, AckTags, From, VQS)}));
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
-- 
cgit v1.2.1


From dd7e242c501799ec06202ce4a7437b5a9cbdd0da Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 16:43:15 +0100
Subject: Bug fix.

---
 src/rabbit_amqqueue_process.erl | 14 +++++++++-----
 src/rabbit_variable_queue.erl   | 14 ++++++++------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 7190953d..546d8fbe 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -486,9 +486,10 @@ commit_transaction(Txn, From, State) ->
                 store_ch_record(C#cr{unacked_messages = Remaining}),
                 [AckTag || {_Msg, AckTag} <- MsgsWithAcks]
         end,
-    VQS = rabbit_variable_queue:tx_commit(
-            PendingMessagesOrdered, Acks, From, State #q.variable_queue_state),
-    State #q { variable_queue_state = VQS }.
+    {RunQueue, VQS} =
+        rabbit_variable_queue:tx_commit(
+          PendingMessagesOrdered, Acks, From, State #q.variable_queue_state),
+    {RunQueue, State #q { variable_queue_state = VQS }}.
 
 rollback_transaction(Txn, State) ->
     #tx { pending_messages = PendingMessages
@@ -573,9 +574,12 @@ handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
     reply(Delivered, NewState);
 
 handle_call({commit, Txn}, From, State) ->
-    NewState = commit_transaction(Txn, From, State),
+    {RunQueue, NewState} = commit_transaction(Txn, From, State),
     erase_tx(Txn),
-    noreply(NewState);
+    noreply(case RunQueue of
+                true -> run_message_queue(NewState);
+                false -> NewState
+            end);
 
 handle_call({notify_down, ChPid}, From, State) ->
     %% optimisation: we reply straight away so the sender can continue
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index dddfb4a8..ac2bab0f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -287,10 +287,12 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state = IndexState1 }.
 
 purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
-                         index_state = IndexState }) ->
+                         index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} = remove_queue_entries(Q4, IndexState),
-    purge1(Q4Count, State #vqstate { index_state = IndexState1,
-                                     q4 = queue:new() });
+    {TotalCount, State1} =
+        purge1(Q4Count, State #vqstate { index_state = IndexState1,
+                                         q4 = queue:new() }),
+    {TotalCount, State1 #vqstate { len = 0 }};
 purge(State) ->
     purge(drain_prefetcher(stop, State)).
 
@@ -305,7 +307,7 @@ delete(State) ->
         {GammaSeqId, NextSeqId} ->
             {_DeleteCount, IndexState1} =
                 delete1(NextSeqId, 0, GammaSeqId, IndexState),
-            State1 #vqstate { index_state = IndexState1 }
+            State1 #vqstate { index_state = IndexState1, len = 0 }
     end.
 
 %% [{Msg, AckTag}]
@@ -352,7 +354,7 @@ tx_rollback(Pubs, State) ->
 tx_commit(Pubs, AckTags, From, State) ->
     case persistent_msg_ids(Pubs) of
         [] ->
-            do_tx_commit(Pubs, AckTags, From, State);
+            {true, do_tx_commit(Pubs, AckTags, From, State)};
         PersistentMsgIds ->
             Self = self(),
             ok = rabbit_msg_store:sync(
@@ -360,7 +362,7 @@ tx_commit(Pubs, AckTags, From, State) ->
                    fun () -> ok = rabbit_amqqueue:tx_commit_callback(
                                     Self, Pubs, AckTags, From)
                    end),
-            State
+            {false, State}
     end.
 
 do_tx_commit(Pubs, AckTags, From, State) ->
-- 
cgit v1.2.1


From 5214dd5ae57f5a7ca0aebfa897e641ca5201ad18 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 16:55:43 +0100
Subject: assertion that the purge count equals the queue length

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ac2bab0f..ffdb695e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -289,10 +289,10 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
 purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
                          index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} = remove_queue_entries(Q4, IndexState),
-    {TotalCount, State1} =
+    {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
-    {TotalCount, State1 #vqstate { len = 0 }};
+    {Len, State1 #vqstate { len = 0 }};
 purge(State) ->
     purge(drain_prefetcher(stop, State)).
 
-- 
cgit v1.2.1


From 72b5042aa269789a5c0012c3fe3bac2de837a5c6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 17:48:40 +0100
Subject: empty queues that get deleted should, um, be deleted

---
 src/rabbit_variable_queue.erl | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ffdb695e..1b4a0fd4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -300,15 +300,18 @@ purge(State) ->
 %% needs to delete everything that's been delivered and not ack'd.
 delete(State) ->
     {_PurgeCount, State1 = #vqstate { index_state = IndexState }} = purge(State),
-    case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState)
-        of
-        {N, N} ->
-            State1;
-        {GammaSeqId, NextSeqId} ->
-            {_DeleteCount, IndexState1} =
-                delete1(NextSeqId, 0, GammaSeqId, IndexState),
-            State1 #vqstate { index_state = IndexState1, len = 0 }
-    end.
+    IndexState1 =
+        case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
+               IndexState) of
+            {N, N} ->
+                IndexState;
+            {GammaSeqId, NextSeqId} ->
+                {_DeleteCount, IndexState2} =
+                    delete1(NextSeqId, 0, GammaSeqId, IndexState),
+                IndexState2
+    end,
+    IndexState3 = rabbit_queue_index:terminate_and_erase(IndexState1),
+    State1 #vqstate { index_state = IndexState3 }.
 
 %% [{Msg, AckTag}]
 %% We guarantee that after fetch, only persistent msgs are left on
@@ -385,7 +388,7 @@ persistent_msg_ids(Pubs) ->
 
 delete1(NextSeqId, Count, GammaSeqId, IndexState)
   when GammaSeqId >= NextSeqId ->
-    {Count, rabbit_queue_index:terminate_and_erase(IndexState)};
+    {Count, IndexState};
 delete1(NextSeqId, Count, GammaSeqId, IndexState) ->
     Gamma1SeqId = GammaSeqId + rabbit_queue_index:segment_size(),
     case rabbit_queue_index:read_segment_entries(GammaSeqId, IndexState) of
-- 
cgit v1.2.1


From 16ce647f12842badc7e20e21dddaaa4894277a86 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Oct 2009 18:17:15 +0100
Subject: If we don't fully flush the journal when it becomes full then we are
 negating the point of the journal as it means we very frequently fill it and
 then have to empty it to one file, whereas if we fully empty it then it takes
 much longer to fill, and then we empty to several files

---
 src/rabbit_queue_index.erl | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 708891d9..9c18b784 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -179,16 +179,12 @@ init(Name) ->
 terminate(State = #qistate { journal_handle = undefined }) ->
     State;
 terminate(State) ->
-    case flush_journal(State) of
-        {true, State1} ->
-            terminate(State1);
-        {false, State1 = #qistate { cur_seg_num = SegNum }} ->
-            State2 = #qistate { journal_handle = JournalHdl } =
-                close_file_handle_for_seg(SegNum, State1),
-            ok = file:sync(JournalHdl),
-            ok = file:close(JournalHdl),
-            State2 #qistate { journal_handle = undefined }
-    end.
+    State1 = #qistate { cur_seg_num = SegNum } = full_flush_journal(State),
+    State2 = #qistate { journal_handle = JournalHdl } =
+        close_file_handle_for_seg(SegNum, State1),
+    ok = file:sync(JournalHdl),
+    ok = file:close(JournalHdl),
+    State2 #qistate { journal_handle = undefined }.
 
 terminate_and_erase(State) ->
     State1 = terminate(State),
@@ -226,11 +222,16 @@ write_acks(SeqIds, State = #qistate { journal_handle    = JournalHdl,
     State1 = State #qistate { journal_ack_dict = JAckDict1,
                               journal_ack_count = JAckCount1 },
     case JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
-        true -> {_Cont, State2} = flush_journal(State1),
-                State2;
+        true  -> full_flush_journal(State1);
         false -> State1
     end.
 
+full_flush_journal(State) ->
+    case flush_journal(State) of
+        {true,  State1} -> full_flush_journal(State1);
+        {false, State1} -> State1
+    end.
+
 flush_journal(State = #qistate { journal_ack_count = 0 }) ->
     {false, State};
 flush_journal(State = #qistate { journal_handle = JournalHdl,
-- 
cgit v1.2.1


From d92b129e918c68d721e0a080f42d9331fa257115 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 12:43:43 +0100
Subject: on requeue, make use of msg_store:release

---
 src/rabbit_variable_queue.erl | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 1b4a0fd4..00f3cce5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -320,25 +320,32 @@ delete(State) ->
 %% msg_store:write for persistent msgs. It also means that we don't
 %% need to worry about calling msg_store:remove (as ack would do)
 %% because transient msgs won't be on disk anyway, thus they won't
-%% need to be removed.
+%% need to be removed. However, we do call msg_store:release so that
+%% the cache isn't held full of msgs which are now at the tail of the
+%% queue.
 requeue(MsgsWithAckTags, State) ->
-    {SeqIds, State1 = #vqstate { index_state = IndexState }} =
+    {SeqIds, MsgIds, State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, AckTag},
-               {SeqIdsAcc, StateN}) ->
+               {SeqIdsAcc, MsgIdsAcc, StateN}) ->
                   {_SeqId, StateN1} = publish(Msg, true, true, StateN),
-                  SeqIdsAcc1 = case AckTag of
-                                   ack_not_on_disk ->
-                                       SeqIdsAcc;
-                                   {ack_index_and_store, MsgId, SeqId} ->
-                                       [SeqId | SeqIdsAcc]
-                               end,
-                  {SeqIdsAcc1, StateN1}
-          end, {[], State}, MsgsWithAckTags),
+                  {SeqIdsAcc1, MsgIdsAcc1} =
+                      case AckTag of
+                          ack_not_on_disk ->
+                              {SeqIdsAcc, MsgIdsAcc};
+                          {ack_index_and_store, MsgId, SeqId} ->
+                              {[SeqId | SeqIdsAcc], [MsgId | MsgIdsAcc]}
+                      end,
+                  {SeqIdsAcc1, MsgIdsAcc1, StateN1}
+          end, {[], [], State}, MsgsWithAckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
+    ok = case MsgIds of
+             [] -> ok;
+             _  -> rabbit_msg_store:release(MsgIds)
+         end,
     State1 #vqstate { index_state = IndexState1 }.
 
 tx_publish(Msg = #basic_message { is_persistent = true }, State) ->
-- 
cgit v1.2.1


From 395e64fa8638998a5692e9c7671d1ec68370c24a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 12:44:14 +0100
Subject: test suite for msg_store - hit's 80% coverage - uncovered code is for
 recovery from crashed compactions

---
 src/rabbit_tests.erl | 613 ++++++++++++---------------------------------------
 1 file changed, 147 insertions(+), 466 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3a435e79..607e7e7b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -33,7 +33,7 @@
 
 -compile(export_all).
 
--export([all_tests/0, test_parsing/0, test_disk_queue/0]).
+-export([all_tests/0, test_parsing/0]).
 
 %% Exported so the hook mechanism can call back
 -export([handle_hook/3, bad_handle_hook/3, extra_arg_hook/5]).
@@ -50,7 +50,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
-    %% passed = test_disk_queue(),
+    passed = test_msg_store(),
     passed = test_priority_queue(),
     passed = test_unfold(),
     passed = test_parsing(),
@@ -822,472 +822,153 @@ bad_handle_hook(_, _, _) ->
 extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
     handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
 
-test_disk_queue() ->
-    rdq_stop(),
-    rdq_virgin(),
-    passed = rdq_stress_gc(5000),
-    passed = rdq_test_startup_with_queue_gaps(),
-    passed = rdq_test_redeliver(),
-    passed = rdq_test_purge(),
-    passed = rdq_test_mixed_queue_modes(),
-    passed = rdq_test_mode_conversion_mid_txn(),
-    rdq_virgin(),
-    passed.
-
-benchmark_disk_queue() ->
-    rdq_stop(),
-    % unicode chars are supported properly from r13 onwards
-    io:format("Msg Count\t| Msg Size\t| Queue Count\t| Startup mu s\t| Publish mu s\t| Pub mu s/msg\t| Pub mu s/byte\t| Deliver mu s\t| Del mu s/msg\t| Del mu s/byte~n", []),
-    [begin rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSize),
-           timer:sleep(1000) end || % 1000 milliseconds
-        MsgSize <- [512, 8192, 32768, 131072],
-        Qs <- [[1], lists:seq(1,10)], %, lists:seq(1,100), lists:seq(1,1000)],
-        MsgCount <- [1024, 4096, 16384]
-    ],
-    rdq_virgin(),
-    ok = control_action(stop_app, []),
-    ok = control_action(start_app, []),
-    passed.
-
-rdq_message(MsgId, MsgBody, IsPersistent) ->
-    rabbit_basic:message(x, <<>>, [], MsgBody, term_to_binary(MsgId),
-                         IsPersistent).
-
-rdq_match_message(Msg, MsgId, MsgBody, Size) when is_number(MsgId) ->
-    rdq_match_message(Msg, term_to_binary(MsgId), MsgBody, Size);
-rdq_match_message(
-  #basic_message { guid = MsgId, content =
-                   #content { payload_fragments_rev = [MsgBody] }},
-  MsgId, MsgBody, Size) when size(MsgBody) =:= Size ->
-    ok.
-
-rdq_match_messages(#basic_message { guid = MsgId, content = #content { payload_fragments_rev = MsgBody }},
-                   #basic_message { guid = MsgId, content = #content { payload_fragments_rev = MsgBody }}) ->
-    ok.
-
-commit_list(List, MsgCount, IsPersistent) ->
-    lists:zip3([term_to_binary(MsgId) || MsgId <- List],
-               lists:duplicate(MsgCount, false),
-               lists:duplicate(MsgCount, IsPersistent)).
-
-rdq_time_tx_publish_commit_deliver_ack(Qs, MsgCount, MsgSizeBytes) ->
-    Startup = rdq_virgin(),
-    rdq_start(),
-    QCount = length(Qs),
-    Msg = <<0:(8*MsgSizeBytes)>>,
-    List = lists:seq(1, MsgCount),
-    CommitList = commit_list(List, MsgCount, false),
-    {Publish, ok} =
-        timer:tc(?MODULE, rdq_time_commands,
-                 [[fun() -> [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false))
-                             || N <- List, _ <- Qs] end,
-                   fun() -> [ok = rabbit_disk_queue:tx_commit(Q, CommitList, [])
-                             || Q <- Qs] end
-                  ]]),
-    {Deliver, ok} =
-        timer:tc(
-          ?MODULE, rdq_time_commands,
-          [[fun() -> [begin SeqIds =
-                                [begin
-                                     Remaining = MsgCount - N,
-                                     {Message, false, SeqId, Remaining}
-                                         = rabbit_disk_queue:fetch(Q),
-                                     ok = rdq_match_message(Message, N, Msg, MsgSizeBytes),
-                                     SeqId
-                                 end || N <- List],
-                            ok = rabbit_disk_queue:tx_commit(Q, [], SeqIds)
-                      end || Q <- Qs]
-            end]]),
-    io:format(" ~15.10B| ~14.10B| ~14.10B| ~14.1f| ~14.1f| ~14.6f| ~14.10f| ~14.1f| ~14.6f| ~14.10f~n",
-              [MsgCount, MsgSizeBytes, QCount, float(Startup),
-               float(Publish), (Publish / (MsgCount * QCount)),
-               (Publish / (MsgCount * QCount * MsgSizeBytes)),
-               float(Deliver), (Deliver / (MsgCount * QCount)),
-               (Deliver / (MsgCount * QCount * MsgSizeBytes))]),
-    rdq_stop().
-
-% we know each file is going to be 1024*1024*10 bytes in size (10MB),
-% so make sure we have several files, and then keep punching holes in
-% a reasonably sensible way.
-rdq_stress_gc(MsgCount) ->
-    rdq_virgin(),
-    rdq_start(),
-    MsgSizeBytes = 256*1024,
-    Msg = <<0:(8*MsgSizeBytes)>>, % 256KB
-    List = lists:seq(1, MsgCount),
-    CommitList = commit_list(List, MsgCount, false),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- List],
-    rabbit_disk_queue:tx_commit(q, CommitList, []),
-    StartChunk = round(MsgCount / 20), % 5%
-    AckList =
+msg_store_dir() ->
+    filename:join(rabbit_mnesia:dir(), "msg_store").
+
+start_msg_store_empty() ->
+    start_msg_store(fun (ok) -> finished end, ok).
+
+start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+    {ok, _Pid} = rabbit_msg_store:start_link(msg_store_dir(), MsgRefDeltaGen,
+                                             MsgRefDeltaGenInit).
+                            
+test_msg_store() ->
+    %% ignore return code just in case it's already stopped
+    rabbit_msg_store:stop(),
+    {ok, _Pid} = start_msg_store_empty(),
+    MsgIds = [term_to_binary(M) || M <- lists:seq(1,100)],
+    {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
+    %% check we don't contain any of the msgs we're about to publish
+    false = lists:foldl(
+              fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
+              false, MsgIds),
+    %% publish some msgs
+    ok = lists:foldl(
+           fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
+           ok, MsgIds),
+    %% check they're all in there
+    true = lists:foldl(
+             fun (MsgId, true) -> rabbit_msg_store:contains(MsgId) end,
+             true, MsgIds),
+    %% publish the latter half twice so we hit the caching and ref count code
+    ok = lists:foldl(
+           fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
+           ok, MsgIds2ndHalf),
+    %% check they're still all in there
+    true = lists:foldl(
+             fun (MsgId, true) -> rabbit_msg_store:contains(MsgId) end,
+             true, MsgIds),
+    %% sync on the 2nd half, but do lots of individual syncs to try
+    %% and cause coalescing to happen
+    Self = self(),
+    ok = lists:foldl(
+           fun (MsgId, ok) -> rabbit_msg_store:sync(
+                                [MsgId], fun () -> Self ! {sync, MsgId} end)
+           end, ok, MsgIds2ndHalf),
+    lists:foreach(
+      fun(MsgId) ->
+              receive
+                  {sync, MsgId} -> ok
+              after
+                  10000 ->
+                      io:format("Sync from msg_store missing (msg_id: ~p)~n",
+                                [MsgId]),
+                      throw(timeout)
+              end
+      end, MsgIds2ndHalf),
+    %% read them all
+    ok =
         lists:foldl(
-          fun (E, Acc) ->
-                  case lists:member(E, Acc) of
-                      true -> Acc;
-                      false -> [E|Acc]
-                  end
-          end, [], lists:flatten(
-                     lists:reverse(
-                       [ lists:seq(N, MsgCount, N)
-                         || N <- lists:seq(1, round(MsgCount / 2), 1)
-                       ]))),
-    {Start, End} = lists:split(StartChunk, AckList),
-    AckList2 = End ++ Start,
-    MsgIdToSeqDict =
+          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
+          ok, MsgIds),
+    %% read them all again - this will hit the cache, not disk
+    ok =
         lists:foldl(
-          fun (MsgId, Acc) ->
-                  Remaining = MsgCount - MsgId,
-                  {Message, false, SeqId, Remaining} =
-                      rabbit_disk_queue:fetch(q),
-                  ok = rdq_match_message(Message, MsgId, Msg, MsgSizeBytes),
-                  dict:store(MsgId, SeqId, Acc)
-          end, dict:new(), List),
-    %% we really do want to ack each of this individually
-    [begin {ok, SeqId} = dict:find(MsgId, MsgIdToSeqDict),
-           rabbit_disk_queue:ack(q, [SeqId])
-     end || MsgId <- AckList2],
-    rabbit_disk_queue:tx_commit(q, [], []),
-    empty = rabbit_disk_queue:fetch(q),
-    rdq_stop(),
-    passed.
-
-rdq_test_startup_with_queue_gaps() ->
-    rdq_virgin(),
-    rdq_start(),
-    Msg = <<0:(8*256)>>,
-    Total = 1000,
-    Half = round(Total/2),
-    All = lists:seq(1,Total),
-    CommitAll = commit_list(All, Total, true),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, true)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, CommitAll, []),
-    io:format("Publish done~n", []),
-    %% deliver first half
-    Seqs = [begin
-                Remaining = Total - N,
-                {Message, false, SeqId, Remaining}
-                    = rabbit_disk_queue:fetch(q),
-                ok = rdq_match_message(Message, N, Msg, 256),
-                SeqId
-            end || N <- lists:seq(1,Half)],
-    io:format("Deliver first half done~n", []),
-    %% ack every other message we have delivered (starting at the _first_)
-    lists:foldl(fun (SeqId2, true) ->
-                        rabbit_disk_queue:ack(q, [SeqId2]),
-                        false;
-                    (_SeqId2, false) ->
-                        true
-                end, true, Seqs),
-    rabbit_disk_queue:tx_commit(q, [], []),
-    io:format("Acked every other message delivered done~n", []),
-    rdq_stop(),
-    rdq_start(),
-    io:format("Startup (with shuffle) done~n", []),
-    %% should have shuffled up. So we should now get
-    %% lists:seq(2,500,2) already delivered
-    Seqs2 = [begin
-                 Remaining = round(Total - ((Half + N)/2)),
-                 {Message, true, SeqId, Remaining} =
-                     rabbit_disk_queue:fetch(q),
-                 ok = rdq_match_message(Message, N, Msg, 256),
-                 SeqId
-             end || N <- lists:seq(2,Half,2)],
-    rabbit_disk_queue:tx_commit(q, [], Seqs2),
-    io:format("Reread non-acked messages done~n", []),
-    %% and now fetch the rest
-    Seqs3 = [begin
-                 Remaining = Total - N,
-                 {Message, false, SeqId, Remaining} =
-                     rabbit_disk_queue:fetch(q),
-                 ok = rdq_match_message(Message, N, Msg, 256),
-                 SeqId
-             end || N <- lists:seq(1 + Half,Total)],
-    rabbit_disk_queue:tx_commit(q, [], Seqs3),
-    io:format("Read second half done~n", []),
-    empty = rabbit_disk_queue:fetch(q),
-    rdq_stop(),
-    passed.
-
-rdq_test_redeliver() ->
-    rdq_virgin(),
-    rdq_start(),
-    Msg = <<0:(8*256)>>,
-    Total = 1000,
-    Half = round(Total/2),
-    All = lists:seq(1,Total),
-    CommitAll = commit_list(All, Total, false),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, CommitAll, []),
-    io:format("Publish done~n", []),
-    %% deliver first half
-    Seqs = [begin
-                Remaining = Total - N,
-                {Message, false, SeqId, Remaining} =
-                    rabbit_disk_queue:fetch(q),
-                ok = rdq_match_message(Message, N, Msg, 256),
-                SeqId
-            end || N <- lists:seq(1,Half)],
-    io:format("Deliver first half done~n", []),
-    %% now requeue every other message (starting at the _first_)
-    %% and ack the other ones
-    lists:foldl(fun (SeqId2, true) ->
-                        rabbit_disk_queue:requeue(q, [{SeqId2, true}]),
-                        false;
-                    (SeqId2, false) ->
-                        rabbit_disk_queue:ack(q, [SeqId2]),
-                        true
-                end, true, Seqs),
-    rabbit_disk_queue:tx_commit(q, [], []),
-    io:format("Redeliver and acking done~n", []),
-    %% we should now get the 2nd half in order, followed by
-    %% every-other-from-the-first-half
-    Seqs2 = [begin
-                 Remaining = round(Total - N + (Half/2)),
-                 {Message, false, SeqId, Remaining} =
-                     rabbit_disk_queue:fetch(q),
-                 ok = rdq_match_message(Message, N, Msg, 256),
-                 SeqId
-             end || N <- lists:seq(1+Half, Total)],
-    rabbit_disk_queue:tx_commit(q, [], Seqs2),
-    Seqs3 = [begin
-                 Remaining = round((Half - N) / 2) - 1,
-                 {Message, true, SeqId, Remaining} =
-                     rabbit_disk_queue:fetch(q),
-                 ok = rdq_match_message(Message, N, Msg, 256),
-                 SeqId
-             end || N <- lists:seq(1, Half, 2)],
-    rabbit_disk_queue:tx_commit(q, [], Seqs3),
-    empty = rabbit_disk_queue:fetch(q),
-    rdq_stop(),
-    passed.
-
-rdq_test_purge() ->
-    rdq_virgin(),
-    rdq_start(),
-    Msg = <<0:(8*256)>>,
-    Total = 1000,
-    Half = round(Total/2),
-    All = lists:seq(1,Total),
-    CommitAll = commit_list(All, Total, false),
-    [rabbit_disk_queue:tx_publish(rdq_message(N, Msg, false)) || N <- All],
-    rabbit_disk_queue:tx_commit(q, CommitAll, []),
-    io:format("Publish done~n", []),
-    %% deliver first half
-    Seqs = [begin
-                Remaining = Total - N,
-                {Message, false, SeqId, Remaining} =
-                    rabbit_disk_queue:fetch(q),
-                ok = rdq_match_message(Message, N, Msg, 256),
-                SeqId
-            end || N <- lists:seq(1,Half)],
-    io:format("Deliver first half done~n", []),
-    rabbit_disk_queue:purge(q),
-    io:format("Purge done~n", []),
-    rabbit_disk_queue:tx_commit(q, [], Seqs),
-    io:format("Ack first half done~n", []),
-    empty = rabbit_disk_queue:fetch(q),
-    rdq_stop(),
-    passed.    
-
-rdq_new_mixed_queue(Q, Durable, Disk) ->
-    {ok, MS} = rabbit_mixed_queue:init(Q, Durable),
-    {MS1, _} =
-        rabbit_mixed_queue:estimate_queue_memory(MS),
-    case Disk of
-        true -> {ok, MS2} = rabbit_mixed_queue:set_storage_mode(disk, [], MS1),
-                MS2;
-        false -> MS1
-    end.
-
-rdq_test_mixed_queue_modes() ->
-    rdq_virgin(),
-    rdq_start(),
-    Payload = <<0:(8*256)>>,
-    MS = rdq_new_mixed_queue(q, true, false),
-    MS2 = lists:foldl(
-            fun (_N, MS1) ->
-                    Msg = rabbit_basic:message(x, <<>>, [], Payload),
-                    {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
-                    MS1a
-            end, MS, lists:seq(1,10)),
-    MS4 = lists:foldl(
-            fun (_N, MS3) ->
-                    Msg = (rabbit_basic:message(x, <<>>, [], Payload))
-                        #basic_message { is_persistent = true },
-                    {ok, MS3a} = rabbit_mixed_queue:publish(Msg, MS3),
-                    MS3a
-            end, MS2, lists:seq(1,10)),
-    MS6 = lists:foldl(
-            fun (_N, MS5) ->
-                    Msg = rabbit_basic:message(x, <<>>, [], Payload),
-                    {ok, MS5a} = rabbit_mixed_queue:publish(Msg, MS5),
-                    MS5a
-            end, MS4, lists:seq(1,10)),
-    30 = rabbit_mixed_queue:len(MS6),
-    io:format("Published a mixture of messages; ~w~n",
-              [rabbit_mixed_queue:estimate_queue_memory(MS6)]),
-    {ok, MS7} = rabbit_mixed_queue:set_storage_mode(disk, [], MS6),
-    30 = rabbit_mixed_queue:len(MS7),
-    io:format("Converted to disk only mode; ~w~n",
-             [rabbit_mixed_queue:estimate_queue_memory(MS7)]),
-    {ok, MS8} = rabbit_mixed_queue:set_storage_mode(mixed, [], MS7),
-    30 = rabbit_mixed_queue:len(MS8),
-    io:format("Converted to mixed mode; ~w~n",
-              [rabbit_mixed_queue:estimate_queue_memory(MS8)]),
-    MS10 =
+          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
+          ok, MsgIds),
+    %% remove them all
+    ok = rabbit_msg_store:remove(MsgIds),
+    %% check first half doesn't exist
+    false = lists:foldl(
+              fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
+              false, MsgIds1stHalf),
+    %% check second half does exist
+    true = lists:foldl(
+             fun (MsgId, true) -> rabbit_msg_store:contains(MsgId) end,
+             true, MsgIds2ndHalf),
+    %% read the second half again
+    ok =
         lists:foldl(
-          fun (N, MS9) ->
-                  Rem = 30 - N,
-                  {{#basic_message { is_persistent = false },
-                    false, _AckTag, Rem},
-                   MS9a} = rabbit_mixed_queue:fetch(MS9),
-                  MS9a
-          end, MS8, lists:seq(1,10)),
-    20 = rabbit_mixed_queue:len(MS10),
-    io:format("Delivered initial non persistent messages~n"),
-    {ok, MS11} = rabbit_mixed_queue:set_storage_mode(disk, [], MS10),
-    20 = rabbit_mixed_queue:len(MS11),
-    io:format("Converted to disk only mode~n"),
-    rdq_stop(),
-    rdq_start(),
-    MS12 = rdq_new_mixed_queue(q, true, false),
-    10 = rabbit_mixed_queue:len(MS12),
-    io:format("Recovered queue~n"),
-    {MS14, AckTags} =
+          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
+          ok, MsgIds2ndHalf),
+    %% release the second half, just for fun
+    ok = rabbit_msg_store:release(MsgIds2ndHalf),
+    %% read the second half again, just for fun
+    ok =
         lists:foldl(
-          fun (N, {MS13, AcksAcc}) ->
-                  Rem = 10 - N,
-                  {{Msg = #basic_message { is_persistent = true },
-                    false, AckTag, Rem},
-                   MS13a} = rabbit_mixed_queue:fetch(MS13),
-                  {MS13a, [{Msg, AckTag} | AcksAcc]}
-          end, {MS12, []}, lists:seq(1,10)),
-    0 = rabbit_mixed_queue:len(MS14),
-    {ok, MS15} = rabbit_mixed_queue:ack(AckTags, MS14),
-    io:format("Delivered and acked all messages~n"),
-    {ok, MS16} = rabbit_mixed_queue:set_storage_mode(disk, [], MS15),
-    0 = rabbit_mixed_queue:len(MS16),
-    io:format("Converted to disk only mode~n"),
-    rdq_stop(),
-    rdq_start(),
-    MS17 = rdq_new_mixed_queue(q, true, false),
-    0 = rabbit_mixed_queue:len(MS17),
-    {MS17,0} = rabbit_mixed_queue:estimate_queue_memory(MS17),
-    io:format("Recovered queue~n"),
-    rdq_stop(),
-    passed.
-
-rdq_test_mode_conversion_mid_txn() ->
-    Payload = <<0:(8*256)>>,
-    MsgIdsA = lists:seq(0,9),
-    MsgsA = [ rdq_message(MsgId, Payload, (0 == MsgId rem 2))
-              || MsgId <- MsgIdsA ],
-    MsgIdsB = lists:seq(10,20),
-    MsgsB = [ rdq_message(MsgId, Payload, (0 == MsgId rem 2))
-            || MsgId <- MsgIdsB ],
-
-    rdq_virgin(),
-    rdq_start(),
-    MS0 = rdq_new_mixed_queue(q, true, false),
-    passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS0, MsgsA, MsgsB, disk, commit),
-
-    rdq_stop_virgin_start(),
-    MS1 = rdq_new_mixed_queue(q, true, false),
-    passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS1, MsgsA, MsgsB, disk, cancel),
-
-
-    rdq_stop_virgin_start(),
-    MS2 = rdq_new_mixed_queue(q, true, true),
-    passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS2, MsgsA, MsgsB, mixed, commit),
-
-    rdq_stop_virgin_start(),
-    MS3 = rdq_new_mixed_queue(q, true, true),
-    passed = rdq_tx_publish_mixed_alter_commit_get(
-               MS3, MsgsA, MsgsB, mixed, cancel),
-
-    rdq_stop(),
-    passed.
-
-rdq_tx_publish_mixed_alter_commit_get(MS0, MsgsA, MsgsB, Mode, CommitOrCancel) ->
-    0 = rabbit_mixed_queue:len(MS0),
-    MS2 = lists:foldl(
-            fun (Msg, MS1) ->
-                    {ok, MS1a} = rabbit_mixed_queue:publish(Msg, MS1),
-                    MS1a
-            end, MS0, MsgsA),
-    Len0 = length(MsgsA),
-    Len0 = rabbit_mixed_queue:len(MS2),
-    MS4 = lists:foldl(
-            fun (Msg, MS3) ->
-                    {ok, MS3a} = rabbit_mixed_queue:tx_publish(Msg, MS3),
-                    MS3a
-            end, MS2, MsgsB),
-    Len0 = rabbit_mixed_queue:len(MS4),
-    {ok, MS5} = rabbit_mixed_queue:set_storage_mode(Mode, MsgsB, MS4),
-    Len0 = rabbit_mixed_queue:len(MS5),
-    {ok, MS9} =
-        case CommitOrCancel of
-            commit ->
-                {ok, MS6} = rabbit_mixed_queue:tx_commit(MsgsB, [], MS5),
-                Len1 = Len0 + length(MsgsB),
-                Len1 = rabbit_mixed_queue:len(MS6),
-                {AckTags, MS8} =
-                    lists:foldl(
-                      fun (Msg, {Acc, MS7}) ->
-                              MsgId = binary_to_term(Msg #basic_message.guid),
-                              Rem = Len1 - MsgId - 1,
-                              {{Msg1, false, AckTag, Rem}, MS7a} =
-                                  rabbit_mixed_queue:fetch(MS7),
-                              ok = rdq_match_messages(Msg, Msg1),
-                              {[{Msg1, AckTag} | Acc], MS7a}
-                      end, {[], MS6}, MsgsA ++ MsgsB),
-                0 = rabbit_mixed_queue:len(MS8),
-                rabbit_mixed_queue:ack(AckTags, MS8);
-            cancel ->
-                {ok, MS6} = rabbit_mixed_queue:tx_rollback(MsgsB, MS5),
-                Len0 = rabbit_mixed_queue:len(MS6),
-                {AckTags, MS8} =
-                    lists:foldl(
-                      fun (Msg, {Acc, MS7}) ->
-                              MsgId = binary_to_term(Msg #basic_message.guid),
-                              Rem = Len0 - MsgId - 1,
-                              {{Msg1, false, AckTag, Rem}, MS7a} =
-                                  rabbit_mixed_queue:fetch(MS7),
-                              ok = rdq_match_messages(Msg, Msg1),
-                              {[{Msg1, AckTag} | Acc], MS7a}
-                      end, {[], MS6}, MsgsA),
-                0 = rabbit_mixed_queue:len(MS8),
-                rabbit_mixed_queue:ack(AckTags, MS8)
-        end,
-    0 = rabbit_mixed_queue:len(MS9),
-    Msg = rdq_message(0, <<0:256>>, false),
-    {ok, AckTag, MS10} = rabbit_mixed_queue:publish_delivered(Msg, MS9),
-    {ok,MS11} = rabbit_mixed_queue:ack([{Msg, AckTag}], MS10),
-    0 = rabbit_mixed_queue:len(MS11),
+          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
+          ok, MsgIds2ndHalf),
+    %% read the second half via peruse
+    lists:foldl(
+      fun (MsgId, ok) ->
+              rabbit_msg_store:peruse(MsgId,
+                                      fun ({ok, MsgId1}) when MsgId1 == MsgId ->
+                                              Self ! {peruse, MsgId1}
+                                      end),
+              receive
+                  {peruse, MsgId} ->
+                      ok
+              after
+                  10000 ->
+                      io:format("Failed to receive response via perues~n"),
+                      throw(timeout)
+              end
+      end, ok, MsgIds2ndHalf),
+    %% stop and restart, preserving every other msg in 2nd half
+    ok = rabbit_msg_store:stop(),
+    {ok, _Pid1} =
+        start_msg_store(fun ([]) -> finished;
+                            ([MsgId|MsgIdsTail])
+                            when length(MsgIdsTail) rem 2 == 0 ->
+                                {MsgId, 1, MsgIdsTail};
+                            ([MsgId|MsgIdsTail]) ->
+                                {MsgId, 0, MsgIdsTail}
+                        end, MsgIds2ndHalf),
+    %% check we have the right msgs left
+    lists:foldl(
+      fun (MsgId, Bool) ->
+              not(Bool = rabbit_msg_store:contains(MsgId))
+      end, false, MsgIds2ndHalf),
+    %% restart empty
+    ok = rabbit_msg_store:stop(),
+    {ok, _Pid2} = start_msg_store_empty(),
+    %% check we don't contain any of the msgs
+    false = lists:foldl(
+              fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
+              false, MsgIds),
+    %% push a lot of msgs in...
+    MsgIdsBig = lists:seq(1,100000),
+    Payload = << 0:65536 >>,
+    ok = lists:foldl(
+           fun (MsgId, ok) -> rabbit_msg_store:write(term_to_binary(MsgId),
+                                                     Payload) end,
+           ok, MsgIdsBig),
+    %% .., then remove even numbers ascending, and odd numbers
+    %% descending. This hits the GC.
+    ok = lists:foldl(
+           fun (MsgId, ok) ->
+                   rabbit_msg_store:remove([term_to_binary(
+                                              case MsgId rem 2 of
+                                                  0 -> MsgId;
+                                                  1 -> 100000 - MsgId
+                                              end)])
+           end, ok, MsgIdsBig),
+    %% ensure empty
+    false = lists:foldl(
+              fun (MsgId, false) -> rabbit_msg_store:contains(
+                                      term_to_binary(MsgId)) end,
+              false, MsgIdsBig),
+    %% restart empty
+    ok = rabbit_msg_store:stop(),
+    {ok, _Pid3} = start_msg_store_empty(),
     passed.
-
-rdq_time_commands(Funcs) ->
-    lists:foreach(fun (F) -> F() end, Funcs).
-
-rdq_virgin() ->
-    {Micros, {ok, _}} =
-        timer:tc(rabbit_disk_queue, start_link, []),
-    ok = rabbit_disk_queue:stop_and_obliterate(),
-    timer:sleep(1000),
-    Micros.
-
-rdq_start() ->
-    {ok, _} = rabbit_disk_queue:start_link(),
-    ok.
-
-rdq_stop() ->
-    rabbit_disk_queue:stop(),
-    timer:sleep(1000).
-
-rdq_stop_virgin_start() ->
-    rdq_stop(),
-    rdq_virgin(),
-    rdq_start().
-- 
cgit v1.2.1


From 1a8215c36c6ba5176ca352c224f79244c505fb71 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 12:54:30 +0100
Subject: stop the msg_store at the end of the tests so the rest of the test
 suite can run. Also, fractionally increase code coverage.

---
 src/rabbit_tests.erl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 607e7e7b..88aba9d3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -876,6 +876,17 @@ test_msg_store() ->
                       throw(timeout)
               end
       end, MsgIds2ndHalf),
+    %% it's very likely we're totally sync'd here, so the 1st half
+    %% sync should not cause an fsync (hence different code path
+    ok = rabbit_msg_store:sync(MsgIds1stHalf,
+                               fun () -> Self ! {sync, first_half} end),
+    receive
+        {sync, first_half} -> ok
+    after
+        10000 ->
+            io:format("Sync from msg_store missing for first_half~n"),
+            throw(timeout)
+    end,
     %% read them all
     ok =
         lists:foldl(
@@ -971,4 +982,5 @@ test_msg_store() ->
     %% restart empty
     ok = rabbit_msg_store:stop(),
     {ok, _Pid3} = start_msg_store_empty(),
+    ok = rabbit_msg_store:stop(),
     passed.
-- 
cgit v1.2.1


From a68a2a40e3b409471a2a6e231e51dad76f8c8cec Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 12:56:20 +0100
Subject: factoring out of some magic numbers

---
 src/rabbit_tests.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 88aba9d3..bf32714a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -958,7 +958,8 @@ test_msg_store() ->
               fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
               false, MsgIds),
     %% push a lot of msgs in...
-    MsgIdsBig = lists:seq(1,100000),
+    BigCount = 100000,
+    MsgIdsBig = lists:seq(1, BigCount),
     Payload = << 0:65536 >>,
     ok = lists:foldl(
            fun (MsgId, ok) -> rabbit_msg_store:write(term_to_binary(MsgId),
@@ -971,7 +972,7 @@ test_msg_store() ->
                    rabbit_msg_store:remove([term_to_binary(
                                               case MsgId rem 2 of
                                                   0 -> MsgId;
-                                                  1 -> 100000 - MsgId
+                                                  1 -> BigCount - MsgId
                                               end)])
            end, ok, MsgIdsBig),
     %% ensure empty
-- 
cgit v1.2.1


From 2019e6624feed2f01ab4fda4d67654a6bd3f2cae Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 13:14:42 +0100
Subject: preemptive refactoring

---
 src/rabbit_tests.erl | 117 +++++++++++++++++++++++++--------------------------
 1 file changed, 57 insertions(+), 60 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index bf32714a..db54b6d3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -831,36 +831,61 @@ start_msg_store_empty() ->
 start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
     {ok, _Pid} = rabbit_msg_store:start_link(msg_store_dir(), MsgRefDeltaGen,
                                              MsgRefDeltaGenInit).
+
+msg_store_contains(Atom, MsgIds) ->
+    Atom = lists:foldl(
+              fun (MsgId, Atom1) when Atom1 =:= Atom ->
+                      rabbit_msg_store:contains(MsgId) end, Atom, MsgIds).
+
+msg_store_sync(MsgIds) ->
+    Now = now(),
+    Self = self(),
+    ok = rabbit_msg_store:sync(MsgIds,
+                               fun () -> Self ! {sync, Now} end),
+    receive
+        {sync, Now} -> ok
+    after
+        10000 ->
+            io:format("Sync from msg_store missing for msg_ids ~p~n", [MsgIds]),
+            throw(timeout)
+    end.
+
+msg_store_read(MsgIds) ->
+    ok =
+        lists:foldl(
+          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
+          ok, MsgIds).
+
+msg_store_write(MsgIds) ->
+    ok = lists:foldl(
+           fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
+           ok, MsgIds).
                             
 test_msg_store() ->
-    %% ignore return code just in case it's already stopped
     rabbit_msg_store:stop(),
     {ok, _Pid} = start_msg_store_empty(),
+    Self = self(),
     MsgIds = [term_to_binary(M) || M <- lists:seq(1,100)],
     {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
     %% check we don't contain any of the msgs we're about to publish
-    false = lists:foldl(
-              fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
-              false, MsgIds),
-    %% publish some msgs
-    ok = lists:foldl(
-           fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
-           ok, MsgIds),
+    false = msg_store_contains(false, MsgIds),
+    %% publish the first half
+    ok = msg_store_write(MsgIds1stHalf),
+    %% sync on the first half
+    ok = msg_store_sync(MsgIds1stHalf),
+    %% publish the second half
+    ok = msg_store_write(MsgIds2ndHalf),
+    %% sync on the first half again - the msg_store will be dirty, but
+    %% we won't need the fsync
+    ok = msg_store_sync(MsgIds1stHalf),
     %% check they're all in there
-    true = lists:foldl(
-             fun (MsgId, true) -> rabbit_msg_store:contains(MsgId) end,
-             true, MsgIds),
+    true = msg_store_contains(true, MsgIds),
     %% publish the latter half twice so we hit the caching and ref count code
-    ok = lists:foldl(
-           fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
-           ok, MsgIds2ndHalf),
+    ok = msg_store_write(MsgIds2ndHalf),
     %% check they're still all in there
-    true = lists:foldl(
-             fun (MsgId, true) -> rabbit_msg_store:contains(MsgId) end,
-             true, MsgIds),
+    true = msg_store_contains(true, MsgIds),
     %% sync on the 2nd half, but do lots of individual syncs to try
     %% and cause coalescing to happen
-    Self = self(),
     ok = lists:foldl(
            fun (MsgId, ok) -> rabbit_msg_store:sync(
                                 [MsgId], fun () -> Self ! {sync, MsgId} end)
@@ -876,49 +901,25 @@ test_msg_store() ->
                       throw(timeout)
               end
       end, MsgIds2ndHalf),
-    %% it's very likely we're totally sync'd here, so the 1st half
-    %% sync should not cause an fsync (hence different code path
-    ok = rabbit_msg_store:sync(MsgIds1stHalf,
-                               fun () -> Self ! {sync, first_half} end),
-    receive
-        {sync, first_half} -> ok
-    after
-        10000 ->
-            io:format("Sync from msg_store missing for first_half~n"),
-            throw(timeout)
-    end,
+    %% it's very likely we're not dirty here, so the 1st half sync
+    %% should hit a different code path
+    ok = msg_store_sync(MsgIds1stHalf),
     %% read them all
-    ok =
-        lists:foldl(
-          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
-          ok, MsgIds),
+    ok = msg_store_read(MsgIds),
     %% read them all again - this will hit the cache, not disk
-    ok =
-        lists:foldl(
-          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
-          ok, MsgIds),
+    ok = msg_store_read(MsgIds),
     %% remove them all
     ok = rabbit_msg_store:remove(MsgIds),
     %% check first half doesn't exist
-    false = lists:foldl(
-              fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
-              false, MsgIds1stHalf),
+    false = msg_store_contains(false, MsgIds1stHalf),
     %% check second half does exist
-    true = lists:foldl(
-             fun (MsgId, true) -> rabbit_msg_store:contains(MsgId) end,
-             true, MsgIds2ndHalf),
+    true = msg_store_contains(true, MsgIds2ndHalf),
     %% read the second half again
-    ok =
-        lists:foldl(
-          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
-          ok, MsgIds2ndHalf),
-    %% release the second half, just for fun
+    ok = msg_store_read(MsgIds2ndHalf),
+    %% release the second half, just for fun (aka code coverage)
     ok = rabbit_msg_store:release(MsgIds2ndHalf),
-    %% read the second half again, just for fun
-    ok =
-        lists:foldl(
-          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
-          ok, MsgIds2ndHalf),
+    %% read the second half again, just for fun (aka code coverage)
+    ok = msg_store_read(MsgIds2ndHalf),
     %% read the second half via peruse
     lists:foldl(
       fun (MsgId, ok) ->
@@ -954,9 +955,7 @@ test_msg_store() ->
     ok = rabbit_msg_store:stop(),
     {ok, _Pid2} = start_msg_store_empty(),
     %% check we don't contain any of the msgs
-    false = lists:foldl(
-              fun (MsgId, false) -> rabbit_msg_store:contains(MsgId) end,
-              false, MsgIds),
+    false = msg_store_contains(false, MsgIds),
     %% push a lot of msgs in...
     BigCount = 100000,
     MsgIdsBig = lists:seq(1, BigCount),
@@ -976,10 +975,8 @@ test_msg_store() ->
                                               end)])
            end, ok, MsgIdsBig),
     %% ensure empty
-    false = lists:foldl(
-              fun (MsgId, false) -> rabbit_msg_store:contains(
-                                      term_to_binary(MsgId)) end,
-              false, MsgIdsBig),
+    false =
+        msg_store_contains(false, lists:map(fun term_to_binary/1, MsgIdsBig)),
     %% restart empty
     ok = rabbit_msg_store:stop(),
     {ok, _Pid3} = start_msg_store_empty(),
-- 
cgit v1.2.1


From e9457cbfc4b4c35c54a9a3d33a642b624da2bbf8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 13:20:30 +0100
Subject: now() -> make_ref()

---
 src/rabbit_tests.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index db54b6d3..15ba5122 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -838,12 +838,12 @@ msg_store_contains(Atom, MsgIds) ->
                       rabbit_msg_store:contains(MsgId) end, Atom, MsgIds).
 
 msg_store_sync(MsgIds) ->
-    Now = now(),
+    Ref = make_ref(),
     Self = self(),
     ok = rabbit_msg_store:sync(MsgIds,
-                               fun () -> Self ! {sync, Now} end),
+                               fun () -> Self ! {sync, Ref} end),
     receive
-        {sync, Now} -> ok
+        {sync, Ref} -> ok
     after
         10000 ->
             io:format("Sync from msg_store missing for msg_ids ~p~n", [MsgIds]),
-- 
cgit v1.2.1


From 9015ed6c6f6157e614da78ed2ca431bd6431a066 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 13:25:59 +0100
Subject: spelling

---
 src/rabbit_tests.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 15ba5122..75c53f4f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -932,7 +932,7 @@ test_msg_store() ->
                       ok
               after
                   10000 ->
-                      io:format("Failed to receive response via perues~n"),
+                      io:format("Failed to receive response via peruse~n"),
                       throw(timeout)
               end
       end, ok, MsgIds2ndHalf),
-- 
cgit v1.2.1


From dd3b6a9a1e9f54c5ab77fd246da5ab20c508b3a7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 14:42:55 +0100
Subject: added missing spec

---
 src/rabbit_queue_index.erl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9c18b784..dac17402 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -150,6 +150,7 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer()}).
+-spec(start_msg_store/0 :: () -> {'ok', [amqqueue()]}).
 
 -endif.
 
-- 
cgit v1.2.1


From 6f5c208da38ffa9805239de4b0a73407558331a1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 15:57:03 +0100
Subject: Some minor API changes which are pretty sensible anyway, but also
 make writing tests much easier. Also, tests for queue_index which hit 90%
 code coverage for the module. Profiling in progress to try and figure out why
 it's not quite as blazingly fast as I expected.

---
 src/rabbit.erl                |  3 +-
 src/rabbit_queue_index.erl    | 39 +++++++++++++----------
 src/rabbit_tests.erl          | 72 +++++++++++++++++++++++++++++++++++++++++++
 src/rabbit_variable_queue.erl | 20 ++++++------
 4 files changed, 106 insertions(+), 28 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index a5e59ce2..215c1bc4 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -156,7 +156,8 @@ start(normal, []) ->
                 ok = maybe_insert_default_data(),
                 ok = rabbit_exchange:recover(),
                 %% TODO - this should probably use start_child somehow too
-                {ok, DurableQueues} = rabbit_queue_index:start_msg_store(),
+                DurableQueues = rabbit_amqqueue:find_durable_queues(),
+                ok = rabbit_queue_index:start_msg_store(DurableQueues),
                 {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues)
                 %% TODO - RealDurableQueues is a subset of
                 %% DurableQueues. It may have queues removed which
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index dac17402..3471913f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -34,7 +34,7 @@
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/0]).
+         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -149,8 +149,8 @@
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
-             {non_neg_integer(), non_neg_integer()}).
--spec(start_msg_store/0 :: () -> {'ok', [amqqueue()]}).
+             {non_neg_integer(), non_neg_integer(), qistate()}).
+-spec(start_msg_store/1 :: ([amqqueue()]) -> 'ok').
 
 -endif.
 
@@ -283,7 +283,7 @@ segment_size() ->
     ?SEGMENT_ENTRIES_COUNT.
 
 find_lowest_seq_id_seg_and_next_seq_id(
-  #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
+  State = #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
     %% We don't want the lowest seq_id, merely the seq_id of the start
     %% of the lowest segment. That seq_id may not actually exist, but
@@ -295,18 +295,18 @@ find_lowest_seq_id_seg_and_next_seq_id(
             _  -> {SegNum1, _SegPath1} = lists:min(SegNumsPaths),
                   reconstruct_seq_id(SegNum1, 0)
         end,
-    NextSeqId =
+    {NextSeqId, State1} =
         case SegNumsPaths of
-            [] -> 0;
+            [] -> {0, State};
             _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
+                  State2 = close_file_handle_for_seg(SegNum2, State),
                   {_SDict, _AckCount, HighRelSeq} =
                       load_segment(SegNum2, SegPath2, JAckDict),
-                  1 + reconstruct_seq_id(SegNum2, HighRelSeq)
+                  {1 + reconstruct_seq_id(SegNum2, HighRelSeq), State2}
         end,
-    {LowSeqIdSeg, NextSeqId}.
+    {LowSeqIdSeg, NextSeqId, State1}.
 
-start_msg_store() ->
-    DurableQueues = rabbit_amqqueue:find_durable_queues(),
+start_msg_store(DurableQueues) ->
     DurableDict = 
         dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
                           Queue #amqqueue.name} || Queue <- DurableQueues ]),
@@ -339,7 +339,7 @@ start_msg_store() ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = delete_queue_directory(Dir)
                   end, TransientDirs),
-    {ok, DurableQueues}.
+    ok.
 
 %%----------------------------------------------------------------------------
 %% Minor Helpers
@@ -375,7 +375,8 @@ get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = CurSegNum }) ->
     State1 = #qistate { dir = Dir } =
         close_file_handle_for_seg(CurSegNum, State),
     {ok, Hdl} = file:open(seg_num_to_path(Dir, SegNum),
-                          [binary, raw, write, delayed_write, read]),
+                          [binary, raw, read, write,
+                           {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}]),
     {ok, _} = file:position(Hdl, {eof, 0}),
     {Hdl, State1 #qistate { cur_seg_num = SegNum, cur_seg_hdl = Hdl}}.
 
@@ -410,8 +411,9 @@ queue_index_walker([]) ->
     finished;
 queue_index_walker([QueueName|QueueNames]) ->
     {TotalMsgCount, State} = init(QueueName),
-    {LowSeqIdSeg, _NextSeqId} = find_lowest_seq_id_seg_and_next_seq_id(State),
-    queue_index_walker({TotalMsgCount, LowSeqIdSeg, State, QueueNames});
+    {LowSeqIdSeg, _NextSeqId, State1} =
+        find_lowest_seq_id_seg_and_next_seq_id(State),
+    queue_index_walker({TotalMsgCount, LowSeqIdSeg, State1, QueueNames});
 
 queue_index_walker({0, _LowSeqIdSeg, State, QueueNames}) ->
     terminate(State),
@@ -510,7 +512,8 @@ deliver_transient(SegPath, SDict) ->
               (RelSeq, {_MsgId, true, false}, {AckMeAcc, DeliverMeAcc}) ->
                   {[RelSeq | AckMeAcc], DeliverMeAcc}
           end, {[], []}, SDict),
-    {ok, Hdl} = file:open(SegPath, [binary, raw, write, delayed_write, read]),
+    {ok, Hdl} = file:open(SegPath, [binary, raw, read, write,
+                                    {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}]),
     {ok, _} = file:position(Hdl, {eof, 0}),
     ok = file:write(Hdl, [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                              RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ]),
@@ -523,7 +526,8 @@ deliver_transient(SegPath, SDict) ->
 %%----------------------------------------------------------------------------
 
 load_segment(SegNum, SegPath, JAckDict) ->
-    case file:open(SegPath, [raw, binary, read_ahead, read]) of
+    case file:open(SegPath, [raw, binary, read,
+                             {read_ahead, ?SEGMENT_TOTAL_SIZE}]) of
         {error, enoent} -> {dict:new(), 0, 0};
         {ok, Hdl} ->
             {SDict, AckCount, HighRelSeq} =
@@ -596,7 +600,8 @@ append_acks_to_segment(SegPath, AckCount, Acks)
     ?SEGMENT_ENTRIES_COUNT;
 append_acks_to_segment(SegPath, AckCount, Acks)
   when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
-    {ok, Hdl} = file:open(SegPath, [raw, binary, delayed_write, write, read]),
+    {ok, Hdl} = file:open(SegPath, [raw, binary, read, write, 
+                                    {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}]),
     {ok, _} = file:position(Hdl, {eof, 0}),
     AckCount1 =
         lists:foldl(
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 75c53f4f..7d5f02f7 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -51,6 +51,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
 
 all_tests() ->
     passed = test_msg_store(),
+    passed = test_queue_index(),
     passed = test_priority_queue(),
     passed = test_unfold(),
     passed = test_parsing(),
@@ -980,5 +981,76 @@ test_msg_store() ->
     %% restart empty
     ok = rabbit_msg_store:stop(),
     {ok, _Pid3} = start_msg_store_empty(),
+    passed.
+
+queue_name(Name) ->
+    rabbit_misc:r(<<"/">>, queue, term_to_binary(Name)).
+
+test_queue() ->
+    queue_name(test).
+
+test_amqqueue(Durable) ->
+    #amqqueue{name = test_queue(),
+              durable = Durable,
+              auto_delete = true,
+              arguments = [],
+              pid = none}.
+
+empty_test_queue() ->
+    ok = rabbit_queue_index:start_msg_store([]),
+    {0, Qi1} = rabbit_queue_index:init(test_queue()),
+    _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
+    ok.
+
+queue_index_publish(SeqIds, Persistent, Qi) ->
+    lists:foldl(
+      fun (SeqId, {QiN, SeqIdsMsgIdsAcc}) ->
+              MsgId = rabbit_guid:guid(),
+              QiM = rabbit_queue_index:write_published(MsgId, SeqId, Persistent,
+                                                       QiN),
+              {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]}
+      end, {Qi, []}, SeqIds).
+
+test_queue_index() ->
+    ok = empty_test_queue(),
+    SeqIdsA = lists:seq(1,10000),
+    SeqIdsB = lists:seq(10001,20000),
+    {0, Qi0} = rabbit_queue_index:init(test_queue()),
+    {0, 0, Qi1} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
+    {Qi2, _SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
+    {0, 10001, Qi3} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
+    %% call terminate twice to prove it's idempotent
+    _Qi4 = rabbit_queue_index:terminate(rabbit_queue_index:terminate(Qi3)),
+    ok = rabbit_msg_store:stop(),
+    ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
+    %% should get length back as 0, as all the msgs were transient
+    {0, Qi5} = rabbit_queue_index:init(test_queue()),
+    {false, Qi6} = rabbit_queue_index:flush_journal(Qi5),
+    {0, 10001, Qi7} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
+    {Qi8, _SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
+    {0, 20001, Qi9} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
+    _Qi10 = rabbit_queue_index:terminate(Qi9),
+    ok = rabbit_msg_store:stop(),
+    ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
+    %% should get length back as 10000
+    LenB = length(SeqIdsB),
+    {LenB, Qi11} = rabbit_queue_index:init(test_queue()),
+    {0, 20001, Qi12} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi11),
+    Qi13 = lists:foldl(
+             fun (SeqId, QiN) ->
+                     rabbit_queue_index:write_delivered(SeqId, QiN)
+             end, Qi12, SeqIdsB),
+    Qi14 = rabbit_queue_index:write_acks(SeqIdsB, Qi13),
+    {0, 20001, Qi15} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi14),
+    _Qi16 = rabbit_queue_index:terminate(Qi15),
     ok = rabbit_msg_store:stop(),
+    ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
+    %% should get length back as 0 because all persistent msgs have been acked
+    {0, _Qi17} = rabbit_queue_index:init(test_queue()),
     passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 00f3cce5..75ff101e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -116,7 +116,7 @@
 init(QueueName) ->
     {GammaCount, IndexState} =
         rabbit_queue_index:init(QueueName),
-    {GammaSeqId, NextSeqId} =
+    {GammaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
     Gamma = case GammaCount of
                 0 -> #gamma { seq_id = undefined, count = 0 };
@@ -129,7 +129,7 @@ init(QueueName) ->
                    target_ram_msg_count = undefined,
                    ram_msg_count = 0,
                    queue = QueueName,
-                   index_state = IndexState,
+                   index_state = IndexState1,
                    next_seq_id = NextSeqId,
                    out_counter = 0,
                    egress_rate = 0,
@@ -303,15 +303,15 @@ delete(State) ->
     IndexState1 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
                IndexState) of
-            {N, N} ->
-                IndexState;
-            {GammaSeqId, NextSeqId} ->
-                {_DeleteCount, IndexState2} =
-                    delete1(NextSeqId, 0, GammaSeqId, IndexState),
-                IndexState2
+            {N, N, IndexState2} ->
+                IndexState2;
+            {GammaSeqId, NextSeqId, IndexState2} ->
+                {_DeleteCount, IndexState3} =
+                    delete1(NextSeqId, 0, GammaSeqId, IndexState2),
+                IndexState3
     end,
-    IndexState3 = rabbit_queue_index:terminate_and_erase(IndexState1),
-    State1 #vqstate { index_state = IndexState3 }.
+    IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
+    State1 #vqstate { index_state = IndexState4 }.
 
 %% [{Msg, AckTag}]
 %% We guarantee that after fetch, only persistent msgs are left on
-- 
cgit v1.2.1


From ff1136a09ca0bbfe8b1f959853a6a1beeaaa4399 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 16:00:29 +0100
Subject: tidying up end of test_queue_index

---
 src/rabbit_tests.erl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7d5f02f7..5e74142a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1052,5 +1052,7 @@ test_queue_index() ->
     ok = rabbit_msg_store:stop(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, _Qi17} = rabbit_queue_index:init(test_queue()),
+    {0, Qi17} = rabbit_queue_index:init(test_queue()),
+    _Qi18 = rabbit_queue_index:terminate_and_erase(Qi17),
+    ok = rabbit_msg_store:stop(),
     passed.
-- 
cgit v1.2.1


From 927afa27b1f7ffa7e1c03dade196a9c7aba34932 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 16:09:53 +0100
Subject: more helpful when the bugfix fixes the bug

---
 src/rabbit_queue_index.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 3471913f..de0839fb 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -283,7 +283,8 @@ segment_size() ->
     ?SEGMENT_ENTRIES_COUNT.
 
 find_lowest_seq_id_seg_and_next_seq_id(
-  State = #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
+  State = #qistate { dir = Dir, journal_ack_dict = JAckDict,
+                     cur_seg_num = SegNum }) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
     %% We don't want the lowest seq_id, merely the seq_id of the start
     %% of the lowest segment. That seq_id may not actually exist, but
@@ -299,7 +300,7 @@ find_lowest_seq_id_seg_and_next_seq_id(
         case SegNumsPaths of
             [] -> {0, State};
             _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
-                  State2 = close_file_handle_for_seg(SegNum2, State),
+                  State2 = close_file_handle_for_seg(SegNum, State),
                   {_SDict, _AckCount, HighRelSeq} =
                       load_segment(SegNum2, SegPath2, JAckDict),
                   {1 + reconstruct_seq_id(SegNum2, HighRelSeq), State2}
-- 
cgit v1.2.1


From 4392b2a6126392ea5a0850f0f80510f9dea6447c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 16:11:46 +0100
Subject: reversing previous change, as I had actually got it right first time

---
 src/rabbit_queue_index.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index de0839fb..3471913f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -283,8 +283,7 @@ segment_size() ->
     ?SEGMENT_ENTRIES_COUNT.
 
 find_lowest_seq_id_seg_and_next_seq_id(
-  State = #qistate { dir = Dir, journal_ack_dict = JAckDict,
-                     cur_seg_num = SegNum }) ->
+  State = #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
     SegNumsPaths = all_segment_nums_paths(Dir),
     %% We don't want the lowest seq_id, merely the seq_id of the start
     %% of the lowest segment. That seq_id may not actually exist, but
@@ -300,7 +299,7 @@ find_lowest_seq_id_seg_and_next_seq_id(
         case SegNumsPaths of
             [] -> {0, State};
             _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
-                  State2 = close_file_handle_for_seg(SegNum, State),
+                  State2 = close_file_handle_for_seg(SegNum2, State),
                   {_SDict, _AckCount, HighRelSeq} =
                       load_segment(SegNum2, SegPath2, JAckDict),
                   {1 + reconstruct_seq_id(SegNum2, HighRelSeq), State2}
-- 
cgit v1.2.1


From 791571d48e30b16708ff99210ce9a689e0ff612e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 16:48:29 +0100
Subject: Extending queue index test slightly

---
 src/rabbit_tests.erl | 60 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 5e74142a..683d15c9 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1011,6 +1011,15 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
               {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]}
       end, {Qi, []}, SeqIds).
 
+verify_read_with_published(_Delivered, _Persistent, [], _) ->
+    ok;
+verify_read_with_published(Delivered, Persistent,
+                           [{MsgId, SeqId, Persistent, Delivered}|Read],
+                           [{SeqId, MsgId}|Published]) ->
+    verify_read_with_published(Delivered, Persistent, Read, Published);
+verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
+    ko.
+
 test_queue_index() ->
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(1,10000),
@@ -1018,41 +1027,50 @@ test_queue_index() ->
     {0, Qi0} = rabbit_queue_index:init(test_queue()),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
-    {Qi2, _SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
+    {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, 10001, Qi3} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
+    {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
+    ok = verify_read_with_published(false, false, ReadA,
+                                    lists:reverse(SeqIdsMsgIdsA)),
     %% call terminate twice to prove it's idempotent
-    _Qi4 = rabbit_queue_index:terminate(rabbit_queue_index:terminate(Qi3)),
+    _Qi5 = rabbit_queue_index:terminate(rabbit_queue_index:terminate(Qi4)),
     ok = rabbit_msg_store:stop(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0, as all the msgs were transient
-    {0, Qi5} = rabbit_queue_index:init(test_queue()),
-    {false, Qi6} = rabbit_queue_index:flush_journal(Qi5),
-    {0, 10001, Qi7} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
-    {Qi8, _SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
-    {0, 20001, Qi9} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
-    _Qi10 = rabbit_queue_index:terminate(Qi9),
+    {0, Qi6} = rabbit_queue_index:init(test_queue()),
+    {false, Qi7} = rabbit_queue_index:flush_journal(Qi6),
+    {0, 10001, Qi8} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi7),
+    {Qi9, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi8),
+    {0, 20001, Qi10} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi9),
+    {ReadB, Qi11} = rabbit_queue_index:read_segment_entries(0, Qi10),
+    ok = verify_read_with_published(false, true, ReadB,
+                                    lists:reverse(SeqIdsMsgIdsB)),
+    _Qi12 = rabbit_queue_index:terminate(Qi11),
     ok = rabbit_msg_store:stop(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
-    {LenB, Qi11} = rabbit_queue_index:init(test_queue()),
-    {0, 20001, Qi12} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi11),
-    Qi13 = lists:foldl(
+    {LenB, Qi13} = rabbit_queue_index:init(test_queue()),
+    {0, 20001, Qi14} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi13),
+    Qi15 = lists:foldl(
              fun (SeqId, QiN) ->
                      rabbit_queue_index:write_delivered(SeqId, QiN)
-             end, Qi12, SeqIdsB),
-    Qi14 = rabbit_queue_index:write_acks(SeqIdsB, Qi13),
-    {0, 20001, Qi15} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi14),
-    _Qi16 = rabbit_queue_index:terminate(Qi15),
+             end, Qi14, SeqIdsB),
+    {ReadC, Qi16} = rabbit_queue_index:read_segment_entries(0, Qi15),
+    ok = verify_read_with_published(true, true, ReadC,
+                                    lists:reverse(SeqIdsMsgIdsB)),
+    Qi17 = rabbit_queue_index:write_acks(SeqIdsB, Qi16),
+    {0, 20001, Qi18} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
+    _Qi19 = rabbit_queue_index:terminate(Qi18),
     ok = rabbit_msg_store:stop(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, Qi17} = rabbit_queue_index:init(test_queue()),
-    _Qi18 = rabbit_queue_index:terminate_and_erase(Qi17),
+    {0, Qi20} = rabbit_queue_index:init(test_queue()),
+    _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = rabbit_msg_store:stop(),
     passed.
-- 
cgit v1.2.1


From 19642aa806463972a90e3a7db66fcad3d93ce909 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 16:54:43 +0100
Subject: make sure we stop the msg_store at the start of the queue_index tests

---
 src/rabbit_tests.erl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 683d15c9..71107f01 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1021,6 +1021,7 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
 
 test_queue_index() ->
+    rabbit_msg_store:stop(),
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(1,10000),
     SeqIdsB = lists:seq(10001,20000),
-- 
cgit v1.2.1


From 98739476e496248d9f177d838f03479c88a9d039 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Oct 2009 17:36:52 +0100
Subject: don't use foreach seeing as I'm not using it anywhere else

---
 src/rabbit_tests.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 71107f01..547287db 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -891,8 +891,8 @@ test_msg_store() ->
            fun (MsgId, ok) -> rabbit_msg_store:sync(
                                 [MsgId], fun () -> Self ! {sync, MsgId} end)
            end, ok, MsgIds2ndHalf),
-    lists:foreach(
-      fun(MsgId) ->
+    lists:foldl(
+      fun(MsgId, ok) ->
               receive
                   {sync, MsgId} -> ok
               after
@@ -901,7 +901,7 @@ test_msg_store() ->
                                 [MsgId]),
                       throw(timeout)
               end
-      end, MsgIds2ndHalf),
+      end, ok, MsgIds2ndHalf),
     %% it's very likely we're not dirty here, so the 1st half sync
     %% should hit a different code path
     ok = msg_store_sync(MsgIds1stHalf),
-- 
cgit v1.2.1


From 98ec9bf81b1db5ac75c882b12420cb28e778e892 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 20 Oct 2009 17:28:06 +0100
Subject: well, it's about 4 times slower, but the dumb fhc works on the queue
 index

---
 src/horrendously_dumb_file_handle_cache.erl | 263 +++++++++++++++
 src/rabbit_queue_index.erl                  | 476 ++++++++++++++++------------
 2 files changed, 528 insertions(+), 211 deletions(-)
 create mode 100644 src/horrendously_dumb_file_handle_cache.erl

diff --git a/src/horrendously_dumb_file_handle_cache.erl b/src/horrendously_dumb_file_handle_cache.erl
new file mode 100644
index 00000000..5b2bcb6c
--- /dev/null
+++ b/src/horrendously_dumb_file_handle_cache.erl
@@ -0,0 +1,263 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(horrendously_dumb_file_handle_cache).
+
+-export([init/0, open/4, close/2, release/2, read/4, write/4, sync/2,
+         position/3, truncate/2, with_file_handle_at/4, sync_to_offset/3]).
+
+-record(hcstate,
+        { ref_entry, path_mode_ref }).
+
+-record(entry,
+        { hdl,
+          current_offset,
+          last_sync_offset,
+          is_dirty,
+          is_append,
+          path_mode_key }).
+
+init() ->
+    #hcstate { ref_entry = dict:new(),
+               path_mode_ref = dict:new() }.
+
+open(Path, Mode, [] = _ExtraOptions,
+     State = #hcstate { ref_entry = RefEntry, path_mode_ref = PathModeRef }) ->
+    Mode1 = lists:usort(Mode),
+    Path1 = filename:absname(Path),
+    Key = {Path1, Mode1},
+    case dict:find(Key, PathModeRef) of
+        {ok, Ref} -> {{ok, Ref}, State};
+        error ->
+            case file:open(Path1, Mode1) of
+                {ok, Hdl} ->
+                    Ref = make_ref(),
+                    PathModeRef1 = dict:store(Key, Ref, PathModeRef),
+                    Entry = #entry { hdl = Hdl, current_offset = 0,
+                                     last_sync_offset = 0, is_dirty = false,
+                                     is_append = lists:member(append, Mode1),
+                                     path_mode_key = Key },
+                    RefEntry1 = dict:store(Ref, Entry, RefEntry),
+                    {{ok, Ref}, State #hcstate { ref_entry = RefEntry1,
+                                                 path_mode_ref = PathModeRef1 }};
+                {error, Error} ->
+                    {{error, Error}, State}
+            end
+    end.
+
+close(Ref, State = #hcstate { ref_entry = RefEntry,
+                              path_mode_ref = PathModeRef }) ->
+    {ok,
+     case dict:find(Ref, RefEntry) of
+         {ok, #entry { hdl = Hdl, is_dirty = IsDirty, path_mode_key = Key }} ->
+             ok = case IsDirty of
+                      true -> file:sync(Hdl);
+                      false -> ok
+                  end,
+             ok = file:close(Hdl),
+             State #hcstate { ref_entry = dict:erase(Ref, RefEntry),
+                              path_mode_ref = dict:erase(Key, PathModeRef) };
+         error -> State
+     end}.
+
+release(_Ref, State) -> %% noop for the time being
+    {ok, State}.
+
+read(Ref, Offset, Count, State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                              last_sync_offset = LastSyncOffset,
+                              is_dirty = IsDirty }} ->
+            NewOffset = Count +
+                case Offset of
+                    cur -> OldOffset;
+                    _ -> {ok, RealOff} = file:position(Hdl, Offset),
+                         RealOff
+                end,
+            {IsDirty1, LastSyncOffset1} =
+                case IsDirty andalso NewOffset > LastSyncOffset of
+                    true -> ok = file:sync(Hdl),
+                            {false, lists:max([NewOffset, OldOffset])};
+                    false -> {IsDirty, LastSyncOffset}
+                end,
+            Entry1 = Entry #entry { current_offset = NewOffset,
+                                    last_sync_offset = LastSyncOffset1,
+                                    is_dirty = IsDirty1 },
+            State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                             RefEntry) },
+            {file:read(Hdl, Count), State1};
+        error -> {{error, not_open}, State}
+    end.
+
+%% if the file was opened in append mode, then Offset is ignored, as
+%% it would only affect the read head for this file.
+write(Ref, Offset, Data, State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                              is_append = IsAppend }} ->
+            NewOffset =
+                case IsAppend of
+                    true ->
+                        OldOffset;
+                    false ->
+                        size_of_write_data(Data) +
+                            case Offset of
+                                cur -> OldOffset;
+                                _ -> {ok, RealOff} = file:position(Hdl, Offset),
+                                     RealOff
+                            end
+                end,
+            Entry1 = Entry #entry { current_offset = NewOffset,
+                                    is_dirty = true },
+            State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                             RefEntry) },
+            {file:write(Hdl, Data), State1};
+        error -> {{error, not_open}, State}
+    end.
+
+sync(Ref, State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, Entry = #entry { hdl = Hdl, current_offset = Offset,
+                              last_sync_offset = LastSyncOffset,
+                              is_dirty = true }} ->
+            SyncOffset = lists:max([Offset, LastSyncOffset]),
+            ok = file:sync(Hdl),
+            Entry1 = Entry #entry { last_sync_offset = SyncOffset,
+                                      is_dirty = false },
+            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                         RefEntry) }};
+        {ok, _Entry_not_dirty} ->
+            {ok, State};
+        error -> {{error, not_open}, State}
+    end.
+
+position(Ref, NewOffset, State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, #entry { current_offset = NewOffset }} ->
+            {ok, State};
+        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                              last_sync_offset = LastSyncOffset,
+                              is_dirty = IsDirty }} ->
+            {ok, RealOff} = file:position(Hdl, NewOffset),
+            {IsDirty1, LastSyncOffset1} =
+                case {IsDirty, RealOff > LastSyncOffset} of
+                    {true, true} ->
+                        ok = file:sync(Hdl),
+                        {false, lists:max([RealOff, OldOffset])};
+                    {false, true} ->
+                        {false, RealOff};
+                    _ ->
+                        {IsDirty, LastSyncOffset}
+                end,
+            Entry1 = Entry #entry { current_offset = RealOff,
+                                    last_sync_offset = LastSyncOffset1,
+                                    is_dirty = IsDirty1 },
+            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                         RefEntry) }};
+        error ->
+            {{error, not_open}, State}
+    end.
+
+truncate(Ref, State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, Entry = #entry { hdl = Hdl, current_offset = Offset,
+                              last_sync_offset = LastSyncOffset,
+                              is_dirty = IsDirty }} ->
+            ok = case IsDirty of
+                     true -> file:sync(Hdl);
+                     false -> ok
+                 end,
+            LastSyncOffset1 = lists:min([Offset, LastSyncOffset]),
+            ok = file:truncate(Hdl),
+            Entry1 = Entry #entry { last_sync_offset = LastSyncOffset1,
+                                    is_dirty = false },
+            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                         RefEntry) }};
+        error -> {{error, not_open}, State}
+    end.
+
+with_file_handle_at(Ref, Offset, Fun,
+                    State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                              last_sync_offset = LastSyncOffset,
+                              is_dirty = IsDirty }} ->
+            Offset1 =
+                case Offset of
+                    cur       -> OldOffset;
+                    OldOffset -> OldOffset;
+                    _         -> {ok, RealOff} = file:position(Hdl, Offset),
+                                 RealOff
+                end,
+            LastSyncOffset1 =
+                case IsDirty of
+                    true -> ok = file:sync(Hdl),
+                            lists:max([Offset1, OldOffset]);
+                    false -> LastSyncOffset
+                end,
+            {Offset2, Result} = Fun(Hdl),
+            Entry1 = Entry #entry { current_offset = Offset2,
+                                    last_sync_offset = LastSyncOffset1,
+                                    is_dirty = true },
+            State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                             RefEntry) },
+            {Result, State1};
+        error -> {{error, not_open}, State}
+    end.
+
+sync_to_offset(Ref, Offset, State = #hcstate { ref_entry = RefEntry }) ->
+    case dict:find(Ref, RefEntry) of
+        {ok, Entry = #entry { hdl = Hdl, last_sync_offset = LastSyncOffset,
+                              current_offset = CurOffset, is_dirty = true }}
+        when (Offset =:= cur andalso CurOffset > LastSyncOffset)
+        orelse (Offset > LastSyncOffset) ->
+            ok = file:sync(Hdl),
+            LastSyncOffset1 =
+                case Offset of
+                    cur -> lists:max([LastSyncOffset, CurOffset]);
+                    _ -> lists:max([LastSyncOffset, CurOffset, Offset])
+                end,
+            Entry1 = Entry #entry { last_sync_offset = LastSyncOffset1,
+                                    is_dirty = false },
+            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
+                                                         RefEntry) }};
+        error -> {{error, not_open}, State}
+    end.
+
+size_of_write_data(Data) ->
+    size_of_write_data(Data, 0).
+
+size_of_write_data([], Acc) ->
+    Acc;
+size_of_write_data([A|B], Acc) ->
+    size_of_write_data(B, size_of_write_data(A, Acc));
+size_of_write_data(Bin, Acc) when is_binary(Bin) ->
+    size(Bin) + Acc.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 3471913f..3a21c236 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -108,11 +108,10 @@
 
 -record(qistate,
         { dir,
-          cur_seg_num,
-          cur_seg_hdl,
+          seg_num_handles,
+          hc_state,
           journal_ack_count,
           journal_ack_dict,
-          journal_handle,
           seg_ack_counts
         }).
 
@@ -124,14 +123,11 @@
 
 -type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
--type(int_or_undef() :: integer() | 'undefined').
--type(io_dev_or_undef() :: io_device() | 'undefined').
 -type(qistate() :: #qistate { dir               :: file_path(),
-                              cur_seg_num       :: int_or_undef(),
-                              cur_seg_hdl       :: io_dev_or_undef(),
+                              seg_num_handles   :: dict(),
+                              hc_state          :: any(),
                               journal_ack_count :: integer(),
                               journal_ack_dict  :: dict(),
-                              journal_handle    :: io_device(),
                               seg_ack_counts    :: dict()
                             }).
 
@@ -154,38 +150,30 @@
 
 -endif.
 
+
 %%----------------------------------------------------------------------------
 %% Public API
 %%----------------------------------------------------------------------------
 
 init(Name) ->
+    HCState = horrendously_dumb_file_handle_cache:init(),
     StrName = queue_name_to_dir_name(Name),
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    {TotalMsgCount, AckCounts, TransientADict} =
-        find_ack_counts_and_deliver_transient_msgs(Dir),
-    {TotalMsgCount1, AckCounts1} =
-        scatter_journal(Dir, TotalMsgCount, AckCounts, TransientADict),
-    {ok, JournalHdl} = file:open(filename:join(Dir, ?ACK_JOURNAL_FILENAME),
-                                 [raw, binary, delayed_write, write, read]),
-    {TotalMsgCount1, #qistate { dir = Dir,
-                                cur_seg_num = undefined,
-                                cur_seg_hdl = undefined,
-                                journal_ack_count = 0,
-                                journal_ack_dict = dict:new(),
-                                journal_handle = JournalHdl,
-                                seg_ack_counts = AckCounts1
-                               }}.
-
-terminate(State = #qistate { journal_handle = undefined }) ->
-    State;
-terminate(State) ->
-    State1 = #qistate { cur_seg_num = SegNum } = full_flush_journal(State),
-    State2 = #qistate { journal_handle = JournalHdl } =
-        close_file_handle_for_seg(SegNum, State1),
-    ok = file:sync(JournalHdl),
-    ok = file:close(JournalHdl),
-    State2 #qistate { journal_handle = undefined }.
+    State = #qistate { dir = Dir,
+                       seg_num_handles = dict:new(),
+                       hc_state = HCState,
+                       journal_ack_count = 0,
+                       journal_ack_dict = dict:new(),
+                       seg_ack_counts = dict:new() },
+    {TotalMsgCount, State1} = find_ack_counts_and_deliver_transient_msgs(State),
+    scatter_journal(TotalMsgCount, State1).
+
+terminate(State = #qistate { seg_num_handles = SegHdls }) ->
+    case 0 == dict:size(SegHdls) of
+        true  -> State;
+        false -> close_all_handles(full_flush_journal(State))
+    end.
 
 terminate_and_erase(State) ->
     State1 = terminate(State),
@@ -196,35 +184,43 @@ write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
     ?MSG_ID_BYTES = size(MsgId),
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    {Hdl, State1} = get_file_handle_for_seg(SegNum, State),
-    ok = file:write(Hdl,
-                    <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                     (bool_to_int(IsPersistent)):1,
-                     RelSeq:?REL_SEQ_BITS, MsgId/binary>>),
-    State1.
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {ok, HCState} = horrendously_dumb_file_handle_cache:write(
+                      Hdl, eof,
+                      <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                       (bool_to_int(IsPersistent)):1,
+                       RelSeq:?REL_SEQ_BITS, MsgId/binary>>,
+                      State1 #qistate.hc_state),
+    State1 #qistate { hc_state = HCState }.
 
 write_delivered(SeqId, State) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    {Hdl, State1} = get_file_handle_for_seg(SegNum, State),
-    ok = file:write(Hdl,
-                    <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                     RelSeq:?REL_SEQ_BITS>>),
-    State1.
-
-write_acks(SeqIds, State = #qistate { journal_handle    = JournalHdl,
-                                      journal_ack_dict  = JAckDict,
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {ok, HCState} = horrendously_dumb_file_handle_cache:write(
+                      Hdl, eof,
+                      <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                       RelSeq:?REL_SEQ_BITS>>,
+                      State1 #qistate.hc_state),
+    State1 #qistate { hc_state = HCState }.
+
+write_acks(SeqIds, State = #qistate { journal_ack_dict  = JAckDict,
                                       journal_ack_count = JAckCount }) ->
-    {JAckDict1, JAckCount1} =
+    {Hdl, State1} = get_journal_handle(State),
+    {JAckDict1, JAckCount1, HCState} =
         lists:foldl(
-          fun (SeqId, {JAckDict2, JAckCount2}) ->
-                  ok = file:write(JournalHdl, <<SeqId:?SEQ_BITS>>),
-                  {add_ack_to_ack_dict(SeqId, JAckDict2), JAckCount2 + 1}
-          end, {JAckDict, JAckCount}, SeqIds),
-    State1 = State #qistate { journal_ack_dict = JAckDict1,
-                              journal_ack_count = JAckCount1 },
+          fun (SeqId, {JAckDict2, JAckCount2, HCStateN}) ->
+                  {ok, HCStateM} =
+                      horrendously_dumb_file_handle_cache:write(
+                        Hdl, eof, <<SeqId:?SEQ_BITS>>, HCStateN),
+                  {add_ack_to_ack_dict(SeqId, JAckDict2),
+                   JAckCount2 + 1, HCStateM}
+          end, {JAckDict, JAckCount, State1 #qistate.hc_state}, SeqIds),
+    State2 = State1 #qistate { journal_ack_dict = JAckDict1,
+                               journal_ack_count = JAckCount1,
+                               hc_state = HCState },
     case JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
-        true  -> full_flush_journal(State1);
-        false -> State1
+        true  -> full_flush_journal(State2);
+        false -> State2
     end.
 
 full_flush_journal(State) ->
@@ -235,36 +231,32 @@ full_flush_journal(State) ->
 
 flush_journal(State = #qistate { journal_ack_count = 0 }) ->
     {false, State};
-flush_journal(State = #qistate { journal_handle = JournalHdl,
-                                 journal_ack_dict = JAckDict,
-                                 journal_ack_count = JAckCount,
-                                 seg_ack_counts = AckCounts,
-                                 dir = Dir }) ->
+flush_journal(State = #qistate { journal_ack_dict = JAckDict,
+                                 journal_ack_count = JAckCount }) ->
     [SegNum|_] = dict:fetch_keys(JAckDict),
     Acks = dict:fetch(SegNum, JAckDict),
-    SegPath = seg_num_to_path(Dir, SegNum),
-    State1 = close_file_handle_for_seg(SegNum, State),
-    AckCounts1 = append_acks_to_segment(SegPath, SegNum, AckCounts, Acks),
+    State1 = append_acks_to_segment(SegNum, Acks, State),
     JAckCount1 = JAckCount - length(Acks),
     State2 = State1 #qistate { journal_ack_dict = dict:erase(SegNum, JAckDict),
-                               journal_ack_count = JAckCount1,
-                               seg_ack_counts = AckCounts1 },
+                               journal_ack_count = JAckCount1 },
     if
         JAckCount1 == 0 ->
-            {ok, 0} = file:position(JournalHdl, 0),
-            ok = file:truncate(JournalHdl),
-            {false, State2};
+            {Hdl, State3 = #qistate { hc_state = HCState }} =
+                get_journal_handle(State2),
+            {ok, HCState1} =
+                horrendously_dumb_file_handle_cache:position(Hdl, 0, HCState),
+            {ok, HCState2} =
+                horrendously_dumb_file_handle_cache:truncate(Hdl, HCState1),
+            {false, State3 #qistate { hc_state = HCState2 }};
         JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
             flush_journal(State2);
         true ->
             {true, State2}
     end.
 
-read_segment_entries(InitSeqId, State =
-                     #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
+read_segment_entries(InitSeqId, State) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    SegPath = seg_num_to_path(Dir, SegNum),
-    {SDict, _AckCount, _HighRelSeq} = load_segment(SegNum, SegPath, JAckDict),
+    {SDict, _AckCount, _HighRelSeq, State1} = load_segment(SegNum, State),
     %% deliberately sort the list desc, because foldl will reverse it
     RelSeqs = rev_sort(dict:fetch_keys(SDict)),
     {lists:foldl(fun (RelSeq, Acc) ->
@@ -273,7 +265,7 @@ read_segment_entries(InitSeqId, State =
                          [ {MsgId, reconstruct_seq_id(SegNum, RelSeq),
                             IsPersistent, IsDelivered} | Acc]
                  end, [], RelSeqs),
-     State}.
+     State1}.
 
 next_segment_boundary(SeqId) ->
     {SegNum, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
@@ -282,27 +274,31 @@ next_segment_boundary(SeqId) ->
 segment_size() ->
     ?SEGMENT_ENTRIES_COUNT.
 
-find_lowest_seq_id_seg_and_next_seq_id(
-  State = #qistate { dir = Dir, journal_ack_dict = JAckDict }) ->
-    SegNumsPaths = all_segment_nums_paths(Dir),
+find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
+    SegNums = all_segment_nums(Dir),
     %% We don't want the lowest seq_id, merely the seq_id of the start
     %% of the lowest segment. That seq_id may not actually exist, but
     %% that's fine. The important thing is that the segment exists and
     %% the seq_id reported is on a segment boundary.
     LowSeqIdSeg =
-        case SegNumsPaths of
+        case SegNums of
             [] -> 0;
-            _  -> {SegNum1, _SegPath1} = lists:min(SegNumsPaths),
-                  reconstruct_seq_id(SegNum1, 0)
+            _  -> reconstruct_seq_id(lists:min(SegNums), 0)
         end,
     {NextSeqId, State1} =
-        case SegNumsPaths of
+        case SegNums of
             [] -> {0, State};
-            _  -> {SegNum2, SegPath2} = lists:max(SegNumsPaths),
-                  State2 = close_file_handle_for_seg(SegNum2, State),
-                  {_SDict, _AckCount, HighRelSeq} =
-                      load_segment(SegNum2, SegPath2, JAckDict),
-                  {1 + reconstruct_seq_id(SegNum2, HighRelSeq), State2}
+            _  -> SegNum2 = lists:max(SegNums),
+                  {SDict, AckCount, HighRelSeq, State2} =
+                      load_segment(SegNum2, State),
+                  NextSeqId1 = reconstruct_seq_id(SegNum2, HighRelSeq),
+                  NextSeqId2 =
+                      case 0 == AckCount andalso 0 == HighRelSeq andalso
+                          0 == dict:size(SDict) of
+                          true -> NextSeqId1;
+                          false -> NextSeqId1 + 1
+                      end,
+                  {NextSeqId2, State2}
         end,
     {LowSeqIdSeg, NextSeqId, State1}.
 
@@ -341,6 +337,7 @@ start_msg_store(DurableQueues) ->
                   end, TransientDirs),
     ok.
 
+
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
@@ -357,28 +354,56 @@ queues_dir() ->
 rev_sort(List) ->
     lists:sort(fun (A, B) -> B < A end, List).
 
-close_file_handle_for_seg(_SegNum,
-                          State = #qistate { cur_seg_num = undefined }) ->
-    State;
-close_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
-                                                     cur_seg_hdl = Hdl }) ->
-    ok = file:sync(Hdl),
-    ok = file:close(Hdl),
-    State #qistate { cur_seg_num = undefined, cur_seg_hdl = undefined };
-close_file_handle_for_seg(_SegNum, State) ->
-    State.
-
-get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = SegNum,
-                                                   cur_seg_hdl = Hdl }) ->
-    {Hdl, State};
-get_file_handle_for_seg(SegNum, State = #qistate { cur_seg_num = CurSegNum }) ->
-    State1 = #qistate { dir = Dir } =
-        close_file_handle_for_seg(CurSegNum, State),
-    {ok, Hdl} = file:open(seg_num_to_path(Dir, SegNum),
-                          [binary, raw, read, write,
-                           {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}]),
-    {ok, _} = file:position(Hdl, {eof, 0}),
-    {Hdl, State1 #qistate { cur_seg_num = SegNum, cur_seg_hdl = Hdl}}.
+get_journal_handle(State = #qistate { dir = Dir }) ->
+    Path = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
+    Mode = [raw, binary, delayed_write, write, read],
+    get_handle(journal, Path, Mode, State).
+
+get_seg_handle(SegNum, State = #qistate { dir = Dir }) ->
+    get_handle(SegNum, seg_num_to_path(Dir, SegNum),
+               [binary, raw, read, write,
+                {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}],
+               State).
+
+get_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
+    State1 = #qistate { hc_state = HCState,
+                        seg_num_handles = SegHdls1 } =
+        case dict:size(SegHdls) > 10 of
+            true -> close_all_handles(State);
+            false -> State
+        end,
+    case dict:find(Key, SegHdls1) of
+        {ok, Hdl} -> {Hdl, State1};
+        error ->
+            {{ok, Hdl}, HCState1} = 
+                horrendously_dumb_file_handle_cache:open(Path, Mode, [],
+                                                         HCState),
+            {Hdl, State1 #qistate {
+                    hc_state = HCState1,
+                    seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}
+    end.
+
+close_handle(Key, State = #qistate { hc_state = HCState,
+                                     seg_num_handles = SegHdls }) ->
+    case dict:find(Key, SegHdls) of
+        {ok, Hdl} ->
+            {ok, HCState1} =
+                horrendously_dumb_file_handle_cache:close(Hdl, HCState),
+            State #qistate { hc_state = HCState1,
+                             seg_num_handles = dict:erase(Key, SegHdls) };
+        error -> State
+    end.
+
+close_all_handles(State = #qistate { hc_state = HCState,
+                                     seg_num_handles = SegHdls }) ->
+    HCState1 =
+        dict:fold(
+          fun (_Key, Ref, HCStateN) ->
+                  {ok, HCStateM} =
+                      horrendously_dumb_file_handle_cache:close(Ref, HCStateN),
+                  HCStateM
+          end, HCState, SegHdls),
+    State #qistate { hc_state = HCState1, seg_num_handles = dict:new() }.
 
 bool_to_int(true ) -> 1;
 bool_to_int(false) -> 0.
@@ -403,6 +428,7 @@ add_ack_to_ack_dict(SeqId, ADict) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
 
+
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
@@ -430,79 +456,88 @@ queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Entries],
     {MsgId, bool_to_int(IsPersistent),
      {Entries, N - 1, LowSeqIdSeg, State, QueueNames}}.
 
+
 %%----------------------------------------------------------------------------
 %% Startup Functions
 %%----------------------------------------------------------------------------
 
-all_segment_nums_paths(Dir) ->
-    [{list_to_integer(
-        lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
-                        SegName)), filename:join(Dir, SegName)}
+all_segment_nums(Dir) ->
+    [list_to_integer(
+       lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
      || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
 
-find_ack_counts_and_deliver_transient_msgs(Dir) ->
-    SegNumsPaths = all_segment_nums_paths(Dir),
-    lists:foldl(
-      fun ({SegNum, SegPath}, {TotalMsgCount, AckCounts, TransientADict}) ->
-              {SDict, AckCount, _HighRelSeq} =
-                  load_segment(SegNum, SegPath, dict:new()),
-              TransientMsgsAcks = deliver_transient(SegPath, SDict),
-              %% ignore TransientMsgsAcks in AckCounts1 and
-              %% TotalMsgCount1 because the TransientMsgsAcks fall
-              %% through into scatter_journal at which point the
-              %% AckCounts and TotalMsgCount will be correctly
-              %% adjusted.
-              TotalMsgCount1 = TotalMsgCount + dict:size(SDict),
-              AckCounts1 = case AckCount of
-                               0 -> AckCounts;
-                               N -> dict:store(SegNum, N, AckCounts)
-                           end,
-              TransientADict1 =
-                  case TransientMsgsAcks of
-                      [] -> TransientADict;
-                      _  -> dict:store(SegNum, TransientMsgsAcks, TransientADict)
-                  end,
-              {TotalMsgCount1, AckCounts1, TransientADict1}
-      end, {0, dict:new(), dict:new()}, SegNumsPaths).
+find_ack_counts_and_deliver_transient_msgs(State = #qistate { dir = Dir }) ->
+    SegNums = all_segment_nums(Dir),
+    {TotalMsgCount, State1} =
+        lists:foldl(
+          fun (SegNum, {TotalMsgCount1, StateN}) ->
+                  {SDict, AckCount, _HighRelSeq, StateM} =
+                      load_segment(SegNum, StateN),
+                  {TransientMsgsAcks, StateL =
+                   #qistate { seg_ack_counts = AckCounts,
+                              journal_ack_dict = JAckDict }} =
+                      deliver_transient(SegNum, SDict, StateM),
+                  %% ignore TransientMsgsAcks in AckCounts and
+                  %% JAckDict1 because the TransientMsgsAcks fall
+                  %% through into scatter_journal at which point the
+                  %% AckCounts and TotalMsgCount will be correctly
+                  %% adjusted.
+                  TotalMsgCount2 = TotalMsgCount1 + dict:size(SDict),
+                  AckCounts1 = case AckCount of
+                                   0 -> AckCounts;
+                                   N -> dict:store(SegNum, N, AckCounts)
+                               end,
+                  JAckDict1 =
+                      case TransientMsgsAcks of
+                          [] -> JAckDict;
+                          _  -> dict:store(SegNum, TransientMsgsAcks, JAckDict)
+                      end,
+                  {TotalMsgCount2,
+                   StateL #qistate { seg_ack_counts = AckCounts1,
+                                     journal_ack_dict = JAckDict1 }}
+          end, {0, State}, SegNums),
+    {TotalMsgCount, State1}.
 
-scatter_journal(Dir, TotalMsgCount, AckCounts, TransientADict) ->
+scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
-    case file:open(JournalPath, [read, read_ahead, raw, binary]) of
-        {error, enoent} -> {TotalMsgCount, AckCounts};
-        {ok, Hdl} ->
-            %% ADict may well contain duplicates. However, this is ok,
-            %% due to the use of sets in replay_journal_acks_to_segment
-            ADict = load_journal(Hdl, TransientADict),
-            ok = file:close(Hdl),
-            {TotalMsgCount1, AckCounts1, _Dir} =
-                dict:fold(fun replay_journal_acks_to_segment/3,
-                          {TotalMsgCount, AckCounts, Dir}, ADict),
-            ok = file:delete(JournalPath),
-            {TotalMsgCount1, AckCounts1}
-    end.
-
-load_journal(Hdl, ADict) ->
-    case file:read(Hdl, ?SEQ_BYTES) of
-        {ok, <<SeqId:?SEQ_BITS>>} ->
-            load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict));
-        _ErrOrEoF -> ADict
+    {Hdl, State1 = #qistate { hc_state = HCState,
+                              journal_ack_dict = JAckDict }} =
+        get_journal_handle(State),
+    %% ADict may well contain duplicates. However, this is ok, due to
+    %% the use of sets in replay_journal_acks_to_segment
+    {ADict, HCState1} = load_journal(Hdl, JAckDict, HCState),
+    State2 = close_handle(journal, State1 #qistate { hc_state = HCState1 }),
+    {TotalMsgCount1, State3} =
+        dict:fold(fun replay_journal_acks_to_segment/3,
+                  {TotalMsgCount, State2}, ADict),
+    ok = file:delete(JournalPath),
+    {TotalMsgCount1, State3 #qistate { journal_ack_dict = dict:new() }}.
+
+load_journal(Hdl, ADict, HCState) ->
+    case horrendously_dumb_file_handle_cache:read(
+           Hdl, cur, ?SEQ_BYTES, HCState) of
+        {{ok, <<SeqId:?SEQ_BITS>>}, HCState1} ->
+            load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict), HCState1);
+        {_ErrOrEoF, HCState1} -> {ADict, HCState1}
     end.
 
 replay_journal_acks_to_segment(_, [], Acc) ->
     Acc;
-replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, AckCounts, Dir}) ->
-    SegPath = seg_num_to_path(Dir, SegNum),
+replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
     %% supply empty dict so that we get all msgs in SDict that have
     %% not been acked in the segment file itself
-    {SDict, _AckCount, _HighRelSeq} = load_segment(SegNum, SegPath, dict:new()),
+    {SDict, _AckCount, _HighRelSeq, State1} =
+        load_segment(SegNum, State #qistate { journal_ack_dict = dict:new() }),
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:to_list(sets:intersection(sets:from_list(ValidRelSeqIds),
                                                sets:from_list(Acks))),
     %% ValidAcks will not contain any duplicates at this point.
+    State2 =
+        State1 #qistate { journal_ack_dict = State #qistate.journal_ack_dict },
     {TotalMsgCount - length(ValidAcks),
-     append_acks_to_segment(SegPath, SegNum, AckCounts, ValidAcks), Dir}.
+     append_acks_to_segment(SegNum, ValidAcks, State2)}.
 
-deliver_transient(SegPath, SDict) ->
+deliver_transient(SegNum, SDict, State) ->
     {AckMe, DeliverMe} =
         dict:fold(
           fun (_RelSeq, {_MsgId, _IsDelivered, true}, Acc) ->
@@ -512,27 +547,36 @@ deliver_transient(SegPath, SDict) ->
               (RelSeq, {_MsgId, true, false}, {AckMeAcc, DeliverMeAcc}) ->
                   {[RelSeq | AckMeAcc], DeliverMeAcc}
           end, {[], []}, SDict),
-    {ok, Hdl} = file:open(SegPath, [binary, raw, read, write,
-                                    {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}]),
-    {ok, _} = file:position(Hdl, {eof, 0}),
-    ok = file:write(Hdl, [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                             RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ]),
-    ok = file:sync(Hdl),
-    ok = file:close(Hdl),
-    AckMe.
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {ok, HCState} = horrendously_dumb_file_handle_cache:write(
+                      Hdl, eof,
+                      [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                         RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ],
+                      State1 #qistate.hc_state),
+    {AckMe, State1 #qistate { hc_state = HCState }}.
+
 
 %%----------------------------------------------------------------------------
 %% Loading Segments
 %%----------------------------------------------------------------------------
 
-load_segment(SegNum, SegPath, JAckDict) ->
-    case file:open(SegPath, [raw, binary, read,
-                             {read_ahead, ?SEGMENT_TOTAL_SIZE}]) of
-        {error, enoent} -> {dict:new(), 0, 0};
-        {ok, Hdl} ->
-            {SDict, AckCount, HighRelSeq} =
-                load_segment_entries(Hdl, dict:new(), 0, 0),
-            ok = file:close(Hdl),
+load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
+                                        dir = Dir }) ->
+    SegmentExists = case dict:find(SegNum, SegHdls) of
+                        {ok, _} -> true;
+                        error -> filelib:is_file(seg_num_to_path(Dir, SegNum))
+                    end,
+    case SegmentExists of
+        false -> {dict:new(), 0, 0, State};
+        true ->
+            {Hdl, State1 = #qistate { hc_state = HCState,
+                                      journal_ack_dict = JAckDict }} =
+                get_seg_handle(SegNum, State),
+            {ok, HCState1} =
+                horrendously_dumb_file_handle_cache:position(Hdl, 0, HCState),
+            
+            {SDict, AckCount, HighRelSeq, HCState2} =
+                load_segment_entries(Hdl, dict:new(), 0, 0, HCState1),
             RelSeqs = case dict:find(SegNum, JAckDict) of
                         {ok, RelSeqs1} -> RelSeqs1;
                         error -> []
@@ -541,30 +585,35 @@ load_segment(SegNum, SegPath, JAckDict) ->
                 lists:foldl(fun (RelSeq, {SDict2, AckCount2}) ->
                                     {dict:erase(RelSeq, SDict2), AckCount2 + 1}
                             end, {SDict, AckCount}, RelSeqs),
-            {SDict1, AckCount1, HighRelSeq}
+            {SDict1, AckCount1, HighRelSeq,
+             State1 #qistate { hc_state = HCState2 }}
     end.
 
-load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
-    case file:read(Hdl, 1) of
-        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-               MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
-            {ok, LSB} = file:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
+load_segment_entries(Hdl, SDict, AckCount, HighRelSeq, HCState) ->
+    case horrendously_dumb_file_handle_cache:read(Hdl, cur, 1, HCState) of
+        {{ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>}, HCState1} ->
+            {{ok, LSB}, HCState2} =
+                horrendously_dumb_file_handle_cache:read(
+                  Hdl, cur, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1, HCState1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
-            load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq);
-        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-               IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
+            load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq, HCState2);
+        {{ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>}, HCState1} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
-            {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
-                file:read(Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
+            {{ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>}, HCState2} =
+                horrendously_dumb_file_handle_cache:read(
+                  Hdl, cur, ?PUBLISH_RECORD_LENGTH_BYTES - 1, HCState1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               Hdl, dict:store(RelSeq, {MsgId, false,
                                        1 == IsPersistentNum},
-                              SDict), AckCount, HighRelSeq1);
-        _ErrOrEoF -> {SDict, AckCount, HighRelSeq}
+                              SDict), AckCount, HighRelSeq1, HCState2);
+        {_ErrOrEoF, HCState1} ->
+            {SDict, AckCount, HighRelSeq, HCState1}
     end.
 
 deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
@@ -580,37 +629,42 @@ deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
 %% Appending Acks to Segments
 %%----------------------------------------------------------------------------
 
-append_acks_to_segment(SegPath, SegNum, AckCounts, Acks) ->
+append_acks_to_segment(SegNum, Acks,
+                       State = #qistate { seg_ack_counts = AckCounts }) ->
     AckCount = case dict:find(SegNum, AckCounts) of
                    {ok, AckCount1} -> AckCount1;
                    error           -> 0
                end,
-    case append_acks_to_segment(SegPath, AckCount, Acks) of
-        0 -> AckCounts;
-        ?SEGMENT_ENTRIES_COUNT -> dict:erase(SegNum, AckCounts);
-        AckCount2 -> dict:store(SegNum, AckCount2, AckCounts)
+    case append_acks_to_segment(SegNum, AckCount, Acks, State) of
+        {0, State1} -> State1;
+        {?SEGMENT_ENTRIES_COUNT,
+         State1 = #qistate { seg_ack_counts = AckCounts1 }} ->
+            State1 #qistate { seg_ack_counts = dict:erase(SegNum, AckCounts1) };
+        {AckCount2, State1 = #qistate { seg_ack_counts = AckCounts1 }} ->
+            State1 #qistate { seg_ack_counts = dict:store(SegNum, AckCount2,
+                                                          AckCounts1) }
     end.
 
-append_acks_to_segment(SegPath, AckCount, Acks)
+append_acks_to_segment(SegNum, AckCount, Acks, State = #qistate { dir = Dir })
   when length(Acks) + AckCount == ?SEGMENT_ENTRIES_COUNT ->
-    ok = case file:delete(SegPath) of
+    State1 = close_handle(SegNum, State),
+    ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
              ok -> ok;
              {error, enoent} -> ok
          end,
-    ?SEGMENT_ENTRIES_COUNT;
-append_acks_to_segment(SegPath, AckCount, Acks)
+    {?SEGMENT_ENTRIES_COUNT, State1};
+append_acks_to_segment(SegNum, AckCount, Acks, State)
   when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
-    {ok, Hdl} = file:open(SegPath, [raw, binary, read, write, 
-                                    {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}]),
-    {ok, _} = file:position(Hdl, {eof, 0}),
-    AckCount1 =
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {AckCount1, HCState} =
         lists:foldl(
-          fun (RelSeq, AckCount2) ->
-                  ok = file:write(Hdl,
-                                  <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                                   RelSeq:?REL_SEQ_BITS>>),
-                  AckCount2 + 1
-          end, AckCount, Acks),
-    ok = file:sync(Hdl),
-    ok = file:close(Hdl),
-    AckCount1.
+          fun (RelSeq, {AckCount2, HCStateN}) ->
+                  {ok, HCStateM} =
+                      horrendously_dumb_file_handle_cache:write(
+                        Hdl, eof,
+                        <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                         RelSeq:?REL_SEQ_BITS>>, HCStateN),
+                  {AckCount2 + 1, HCStateM}
+          end, {AckCount, State1 #qistate.hc_state}, Acks),
+    {ok, HCState1} = horrendously_dumb_file_handle_cache:sync(Hdl, HCState),
+    {AckCount1, State1 #qistate { hc_state = HCState1 }}.
-- 
cgit v1.2.1


From 47abfa59d18ab92e44085f33a2fd6ccccbc25002 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 20 Oct 2009 17:57:08 +0100
Subject: still far too slow

---
 src/horrendously_dumb_file_handle_cache.erl | 50 ++++++++++-------------------
 1 file changed, 17 insertions(+), 33 deletions(-)

diff --git a/src/horrendously_dumb_file_handle_cache.erl b/src/horrendously_dumb_file_handle_cache.erl
index 5b2bcb6c..afe1dbe0 100644
--- a/src/horrendously_dumb_file_handle_cache.erl
+++ b/src/horrendously_dumb_file_handle_cache.erl
@@ -43,6 +43,7 @@
           last_sync_offset,
           is_dirty,
           is_append,
+          at_eof,
           path_mode_key }).
 
 init() ->
@@ -64,7 +65,7 @@ open(Path, Mode, [] = _ExtraOptions,
                     Entry = #entry { hdl = Hdl, current_offset = 0,
                                      last_sync_offset = 0, is_dirty = false,
                                      is_append = lists:member(append, Mode1),
-                                     path_mode_key = Key },
+                                     at_eof = false, path_mode_key = Key },
                     RefEntry1 = dict:store(Ref, Entry, RefEntry),
                     {{ok, Ref}, State #hcstate { ref_entry = RefEntry1,
                                                  path_mode_ref = PathModeRef1 }};
@@ -93,24 +94,15 @@ release(_Ref, State) -> %% noop for the time being
 
 read(Ref, Offset, Count, State = #hcstate { ref_entry = RefEntry }) ->
     case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                              last_sync_offset = LastSyncOffset,
-                              is_dirty = IsDirty }} ->
+        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset }} ->
             NewOffset = Count +
                 case Offset of
                     cur -> OldOffset;
                     _ -> {ok, RealOff} = file:position(Hdl, Offset),
                          RealOff
                 end,
-            {IsDirty1, LastSyncOffset1} =
-                case IsDirty andalso NewOffset > LastSyncOffset of
-                    true -> ok = file:sync(Hdl),
-                            {false, lists:max([NewOffset, OldOffset])};
-                    false -> {IsDirty, LastSyncOffset}
-                end,
             Entry1 = Entry #entry { current_offset = NewOffset,
-                                    last_sync_offset = LastSyncOffset1,
-                                    is_dirty = IsDirty1 },
+                                    at_eof = Offset =:= eof },
             State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                              RefEntry) },
             {file:read(Hdl, Count), State1};
@@ -122,7 +114,7 @@ read(Ref, Offset, Count, State = #hcstate { ref_entry = RefEntry }) ->
 write(Ref, Offset, Data, State = #hcstate { ref_entry = RefEntry }) ->
     case dict:find(Ref, RefEntry) of
         {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                              is_append = IsAppend }} ->
+                              is_append = IsAppend, at_eof = AtEoF }} ->
             NewOffset =
                 case IsAppend of
                     true ->
@@ -131,12 +123,13 @@ write(Ref, Offset, Data, State = #hcstate { ref_entry = RefEntry }) ->
                         size_of_write_data(Data) +
                             case Offset of
                                 cur -> OldOffset;
+                                eof when AtEoF -> OldOffset;
                                 _ -> {ok, RealOff} = file:position(Hdl, Offset),
                                      RealOff
                             end
                 end,
             Entry1 = Entry #entry { current_offset = NewOffset,
-                                    is_dirty = true },
+                                    is_dirty = true, at_eof = Offset =:= eof },
             State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                              RefEntry) },
             {file:write(Hdl, Data), State1};
@@ -151,7 +144,7 @@ sync(Ref, State = #hcstate { ref_entry = RefEntry }) ->
             SyncOffset = lists:max([Offset, LastSyncOffset]),
             ok = file:sync(Hdl),
             Entry1 = Entry #entry { last_sync_offset = SyncOffset,
-                                      is_dirty = false },
+                                    is_dirty = false },
             {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                          RefEntry) }};
         {ok, _Entry_not_dirty} ->
@@ -163,23 +156,12 @@ position(Ref, NewOffset, State = #hcstate { ref_entry = RefEntry }) ->
     case dict:find(Ref, RefEntry) of
         {ok, #entry { current_offset = NewOffset }} ->
             {ok, State};
-        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                              last_sync_offset = LastSyncOffset,
-                              is_dirty = IsDirty }} ->
+        {ok, #entry { at_eof = true }} when NewOffset =:= eof ->
+            {ok, State};
+        {ok, Entry = #entry { hdl = Hdl }} ->
             {ok, RealOff} = file:position(Hdl, NewOffset),
-            {IsDirty1, LastSyncOffset1} =
-                case {IsDirty, RealOff > LastSyncOffset} of
-                    {true, true} ->
-                        ok = file:sync(Hdl),
-                        {false, lists:max([RealOff, OldOffset])};
-                    {false, true} ->
-                        {false, RealOff};
-                    _ ->
-                        {IsDirty, LastSyncOffset}
-                end,
             Entry1 = Entry #entry { current_offset = RealOff,
-                                    last_sync_offset = LastSyncOffset1,
-                                    is_dirty = IsDirty1 },
+                                    at_eof = NewOffset =:= eof },
             {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                          RefEntry) }};
         error ->
@@ -198,7 +180,7 @@ truncate(Ref, State = #hcstate { ref_entry = RefEntry }) ->
             LastSyncOffset1 = lists:min([Offset, LastSyncOffset]),
             ok = file:truncate(Hdl),
             Entry1 = Entry #entry { last_sync_offset = LastSyncOffset1,
-                                    is_dirty = false },
+                                    is_dirty = false, at_eof = true },
             {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                          RefEntry) }};
         error -> {{error, not_open}, State}
@@ -209,9 +191,10 @@ with_file_handle_at(Ref, Offset, Fun,
     case dict:find(Ref, RefEntry) of
         {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
                               last_sync_offset = LastSyncOffset,
-                              is_dirty = IsDirty }} ->
+                              is_dirty = IsDirty, at_eof = AtEoF }} ->
             Offset1 =
                 case Offset of
+                    eof when AtEoF -> OldOffset;
                     cur       -> OldOffset;
                     OldOffset -> OldOffset;
                     _         -> {ok, RealOff} = file:position(Hdl, Offset),
@@ -226,7 +209,7 @@ with_file_handle_at(Ref, Offset, Fun,
             {Offset2, Result} = Fun(Hdl),
             Entry1 = Entry #entry { current_offset = Offset2,
                                     last_sync_offset = LastSyncOffset1,
-                                    is_dirty = true },
+                                    is_dirty = true, at_eof = false },
             State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                              RefEntry) },
             {Result, State1};
@@ -249,6 +232,7 @@ sync_to_offset(Ref, Offset, State = #hcstate { ref_entry = RefEntry }) ->
                                     is_dirty = false },
             {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
                                                          RefEntry) }};
+        {ok, _Entry} -> {ok, State};
         error -> {{error, not_open}, State}
     end.
 
-- 
cgit v1.2.1


From 9e575f8623741916c9d6f8fd7ca344bef0e02fa2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 21 Oct 2009 12:26:55 +0100
Subject: fprof shows that dict really sucks hard, so switch to using the
 process dictionary in the fhc. This eliminates all need for state, however,
 I've not removed fhc state from either fhc or qi because of the likelihood of
 needing some state back in for the real fhc. Performance is now much better
 because I went back to the original qi before the fhc modifications and
 realised that I should have been opening files with read_ahead enabled, which
 I wasn't before. In fact really, performance is now quite a fair bit better
 than before the fhc came in.

---
 src/horrendously_dumb_file_handle_cache.erl | 176 ++++++++++++----------------
 src/rabbit_queue_index.erl                  |  49 ++++----
 2 files changed, 103 insertions(+), 122 deletions(-)

diff --git a/src/horrendously_dumb_file_handle_cache.erl b/src/horrendously_dumb_file_handle_cache.erl
index afe1dbe0..10fc9745 100644
--- a/src/horrendously_dumb_file_handle_cache.erl
+++ b/src/horrendously_dumb_file_handle_cache.erl
@@ -34,9 +34,6 @@
 -export([init/0, open/4, close/2, release/2, read/4, write/4, sync/2,
          position/3, truncate/2, with_file_handle_at/4, sync_to_offset/3]).
 
--record(hcstate,
-        { ref_entry, path_mode_ref }).
-
 -record(entry,
         { hdl,
           current_offset,
@@ -46,75 +43,69 @@
           at_eof,
           path_mode_key }).
 
-init() ->
-    #hcstate { ref_entry = dict:new(),
-               path_mode_ref = dict:new() }.
+init() -> empty_state.
 
-open(Path, Mode, [] = _ExtraOptions,
-     State = #hcstate { ref_entry = RefEntry, path_mode_ref = PathModeRef }) ->
+open(Path, Mode, [] = _ExtraOptions, State) ->
     Mode1 = lists:usort(Mode),
     Path1 = filename:absname(Path),
     Key = {Path1, Mode1},
-    case dict:find(Key, PathModeRef) of
-        {ok, Ref} -> {{ok, Ref}, State};
-        error ->
+    case get({rabbit_fhc, path_mode_ref, Key}) of
+        {ref, Ref} -> {{ok, Ref}, State};
+        undefined ->
             case file:open(Path1, Mode1) of
                 {ok, Hdl} ->
                     Ref = make_ref(),
-                    PathModeRef1 = dict:store(Key, Ref, PathModeRef),
+                    put({rabbit_fhc, path_mode_ref, Key}, {ref, Ref}),
                     Entry = #entry { hdl = Hdl, current_offset = 0,
                                      last_sync_offset = 0, is_dirty = false,
                                      is_append = lists:member(append, Mode1),
                                      at_eof = false, path_mode_key = Key },
-                    RefEntry1 = dict:store(Ref, Entry, RefEntry),
-                    {{ok, Ref}, State #hcstate { ref_entry = RefEntry1,
-                                                 path_mode_ref = PathModeRef1 }};
+                    put({rabbit_fhc, ref_entry, Ref}, Entry),
+                    {{ok, Ref}, State};
                 {error, Error} ->
                     {{error, Error}, State}
             end
     end.
 
-close(Ref, State = #hcstate { ref_entry = RefEntry,
-                              path_mode_ref = PathModeRef }) ->
+close(Ref, State) ->
     {ok,
-     case dict:find(Ref, RefEntry) of
-         {ok, #entry { hdl = Hdl, is_dirty = IsDirty, path_mode_key = Key }} ->
+     case erase({rabbit_fhc, ref_entry, Ref}) of
+         #entry { hdl = Hdl, is_dirty = IsDirty, path_mode_key = Key } ->
              ok = case IsDirty of
                       true -> file:sync(Hdl);
                       false -> ok
                   end,
              ok = file:close(Hdl),
-             State #hcstate { ref_entry = dict:erase(Ref, RefEntry),
-                              path_mode_ref = dict:erase(Key, PathModeRef) };
-         error -> State
+             erase({rabbit_fhc, path_mode_ref, Key}),
+             State;
+         undefined -> State
      end}.
 
 release(_Ref, State) -> %% noop for the time being
     {ok, State}.
 
-read(Ref, Offset, Count, State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset }} ->
+read(Ref, Offset, Count, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        Entry = #entry { hdl = Hdl, current_offset = OldOffset } ->
             NewOffset = Count +
                 case Offset of
                     cur -> OldOffset;
                     _ -> {ok, RealOff} = file:position(Hdl, Offset),
                          RealOff
                 end,
-            Entry1 = Entry #entry { current_offset = NewOffset,
-                                    at_eof = Offset =:= eof },
-            State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                             RefEntry) },
-            {file:read(Hdl, Count), State1};
-        error -> {{error, not_open}, State}
+            put({rabbit_fhc, ref_entry, Ref},
+                Entry #entry { current_offset = NewOffset,
+                               at_eof = Offset =:= eof }),
+            {file:read(Hdl, Count), State};
+        undefined -> {{error, not_open}, State}
     end.
 
 %% if the file was opened in append mode, then Offset is ignored, as
 %% it would only affect the read head for this file.
-write(Ref, Offset, Data, State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                              is_append = IsAppend, at_eof = AtEoF }} ->
+write(Ref, Offset, Data, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                         is_append = IsAppend, at_eof = AtEoF } ->
             NewOffset =
                 case IsAppend of
                     true ->
@@ -128,70 +119,58 @@ write(Ref, Offset, Data, State = #hcstate { ref_entry = RefEntry }) ->
                                      RealOff
                             end
                 end,
-            Entry1 = Entry #entry { current_offset = NewOffset,
-                                    is_dirty = true, at_eof = Offset =:= eof },
-            State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                             RefEntry) },
-            {file:write(Hdl, Data), State1};
-        error -> {{error, not_open}, State}
+            put({rabbit_fhc, ref_entry, Ref},
+                Entry #entry { current_offset = NewOffset,
+                               is_dirty = true, at_eof = Offset =:= eof }),
+            {file:write(Hdl, Data), State};
+        undefined -> {{error, not_open}, State}
     end.
 
-sync(Ref, State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, current_offset = Offset,
-                              last_sync_offset = LastSyncOffset,
-                              is_dirty = true }} ->
+sync(Ref, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        Entry = #entry { hdl = Hdl, current_offset = Offset,
+                         last_sync_offset = LastSyncOffset,
+                         is_dirty = true } ->
             SyncOffset = lists:max([Offset, LastSyncOffset]),
             ok = file:sync(Hdl),
-            Entry1 = Entry #entry { last_sync_offset = SyncOffset,
-                                    is_dirty = false },
-            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                         RefEntry) }};
-        {ok, _Entry_not_dirty} ->
+            put({rabbit_fhc, ref_entry, Ref},
+                Entry #entry { last_sync_offset = SyncOffset,
+                               is_dirty = false }),
             {ok, State};
-        error -> {{error, not_open}, State}
+        #entry { is_dirty = false } -> {ok, State};
+        undefined                   -> {{error, not_open}, State}
     end.
 
-position(Ref, NewOffset, State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, #entry { current_offset = NewOffset }} ->
+position(Ref, NewOffset, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        #entry { current_offset = NewOffset } ->
             {ok, State};
-        {ok, #entry { at_eof = true }} when NewOffset =:= eof ->
+        #entry { at_eof = true } when NewOffset =:= eof ->
             {ok, State};
-        {ok, Entry = #entry { hdl = Hdl }} ->
+        Entry = #entry { hdl = Hdl } ->
             {ok, RealOff} = file:position(Hdl, NewOffset),
-            Entry1 = Entry #entry { current_offset = RealOff,
-                                    at_eof = NewOffset =:= eof },
-            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                         RefEntry) }};
-        error ->
+            put({rabbit_fhc, ref_entry, Ref},
+                Entry #entry { current_offset = RealOff,
+                               at_eof = NewOffset =:= eof }),
+            {ok, State};
+        undefined ->
             {{error, not_open}, State}
     end.
 
-truncate(Ref, State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, current_offset = Offset,
-                              last_sync_offset = LastSyncOffset,
-                              is_dirty = IsDirty }} ->
-            ok = case IsDirty of
-                     true -> file:sync(Hdl);
-                     false -> ok
-                 end,
-            LastSyncOffset1 = lists:min([Offset, LastSyncOffset]),
+truncate(Ref, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        Entry = #entry { hdl = Hdl } ->
             ok = file:truncate(Hdl),
-            Entry1 = Entry #entry { last_sync_offset = LastSyncOffset1,
-                                    is_dirty = false, at_eof = true },
-            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                         RefEntry) }};
-        error -> {{error, not_open}, State}
+            put({rabbit_fhc, ref_entry, Ref}, Entry #entry { at_eof = true }),
+            {ok, State};
+        undefined -> {{error, not_open}, State}
     end.
 
-with_file_handle_at(Ref, Offset, Fun,
-                    State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                              last_sync_offset = LastSyncOffset,
-                              is_dirty = IsDirty, at_eof = AtEoF }} ->
+with_file_handle_at(Ref, Offset, Fun, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                         last_sync_offset = LastSyncOffset,
+                         is_dirty = IsDirty, at_eof = AtEoF } ->
             Offset1 =
                 case Offset of
                     eof when AtEoF -> OldOffset;
@@ -207,19 +186,18 @@ with_file_handle_at(Ref, Offset, Fun,
                     false -> LastSyncOffset
                 end,
             {Offset2, Result} = Fun(Hdl),
-            Entry1 = Entry #entry { current_offset = Offset2,
-                                    last_sync_offset = LastSyncOffset1,
-                                    is_dirty = true, at_eof = false },
-            State1 = State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                             RefEntry) },
-            {Result, State1};
-        error -> {{error, not_open}, State}
+            put({rabbit_fhc, ref_entry, Ref},
+                Entry #entry { current_offset = Offset2,
+                               last_sync_offset = LastSyncOffset1,
+                               is_dirty = true, at_eof = false }),
+            {Result, State};
+        undefined -> {{error, not_open}, State}
     end.
 
-sync_to_offset(Ref, Offset, State = #hcstate { ref_entry = RefEntry }) ->
-    case dict:find(Ref, RefEntry) of
-        {ok, Entry = #entry { hdl = Hdl, last_sync_offset = LastSyncOffset,
-                              current_offset = CurOffset, is_dirty = true }}
+sync_to_offset(Ref, Offset, State) ->
+    case get({rabbit_fhc, ref_entry, Ref}) of
+        Entry = #entry { hdl = Hdl, last_sync_offset = LastSyncOffset,
+                         current_offset = CurOffset, is_dirty = true }
         when (Offset =:= cur andalso CurOffset > LastSyncOffset)
         orelse (Offset > LastSyncOffset) ->
             ok = file:sync(Hdl),
@@ -228,11 +206,11 @@ sync_to_offset(Ref, Offset, State = #hcstate { ref_entry = RefEntry }) ->
                     cur -> lists:max([LastSyncOffset, CurOffset]);
                     _ -> lists:max([LastSyncOffset, CurOffset, Offset])
                 end,
-            Entry1 = Entry #entry { last_sync_offset = LastSyncOffset1,
-                                    is_dirty = false },
-            {ok, State #hcstate { ref_entry = dict:store(Ref, Entry1,
-                                                         RefEntry) }};
-        {ok, _Entry} -> {ok, State};
+            put({rabbit_fhc, ref_entry, Ref},
+                Entry #entry { last_sync_offset = LastSyncOffset1,
+                               is_dirty = false }),
+            {ok, State};
+        #entry {} -> {ok, State};
         error -> {{error, not_open}, State}
     end.
 
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 3a21c236..6df7cc2a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -354,34 +354,37 @@ queues_dir() ->
 rev_sort(List) ->
     lists:sort(fun (A, B) -> B < A end, List).
 
-get_journal_handle(State = #qistate { dir = Dir }) ->
-    Path = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
-    Mode = [raw, binary, delayed_write, write, read],
-    get_handle(journal, Path, Mode, State).
-
-get_seg_handle(SegNum, State = #qistate { dir = Dir }) ->
-    get_handle(SegNum, seg_num_to_path(Dir, SegNum),
-               [binary, raw, read, write,
-                {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000}],
-               State).
-
-get_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
+get_journal_handle(State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
+    case dict:find(journal, SegHdls) of
+        {ok, Hdl} -> {Hdl, State};
+        error ->
+            Path = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
+            Mode = [raw, binary, delayed_write, write, read, read_ahead],
+            new_handle(journal, Path, Mode, State)
+    end.
+
+get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
+    case dict:find(SegNum, SegHdls) of
+        {ok, Hdl} -> {Hdl, State};
+        error ->
+            new_handle(SegNum, seg_num_to_path(Dir, SegNum),
+                       [binary, raw, read, write,
+                        {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000},
+                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+                       State)
+    end.
+
+new_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
     State1 = #qistate { hc_state = HCState,
                         seg_num_handles = SegHdls1 } =
-        case dict:size(SegHdls) > 10 of
+        case dict:size(SegHdls) > 100 of
             true -> close_all_handles(State);
             false -> State
         end,
-    case dict:find(Key, SegHdls1) of
-        {ok, Hdl} -> {Hdl, State1};
-        error ->
-            {{ok, Hdl}, HCState1} = 
-                horrendously_dumb_file_handle_cache:open(Path, Mode, [],
-                                                         HCState),
-            {Hdl, State1 #qistate {
-                    hc_state = HCState1,
-                    seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}
-    end.
+    {{ok, Hdl}, HCState1} = 
+        horrendously_dumb_file_handle_cache:open(Path, Mode, [], HCState),
+    {Hdl, State1 #qistate { hc_state = HCState1,
+                            seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}.
 
 close_handle(Key, State = #qistate { hc_state = HCState,
                                      seg_num_handles = SegHdls }) ->
-- 
cgit v1.2.1


From 1965e72ea7194e06db1519d042cb6ed76843d14d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 21 Oct 2009 12:50:41 +0100
Subject: remove unnecessary rabbit_ prefix in process dictionary entries in
 fhc

---
 src/horrendously_dumb_file_handle_cache.erl | 38 ++++++++++++++---------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/horrendously_dumb_file_handle_cache.erl b/src/horrendously_dumb_file_handle_cache.erl
index 10fc9745..c034569d 100644
--- a/src/horrendously_dumb_file_handle_cache.erl
+++ b/src/horrendously_dumb_file_handle_cache.erl
@@ -49,18 +49,18 @@ open(Path, Mode, [] = _ExtraOptions, State) ->
     Mode1 = lists:usort(Mode),
     Path1 = filename:absname(Path),
     Key = {Path1, Mode1},
-    case get({rabbit_fhc, path_mode_ref, Key}) of
+    case get({fhc, path_mode_ref, Key}) of
         {ref, Ref} -> {{ok, Ref}, State};
         undefined ->
             case file:open(Path1, Mode1) of
                 {ok, Hdl} ->
                     Ref = make_ref(),
-                    put({rabbit_fhc, path_mode_ref, Key}, {ref, Ref}),
+                    put({fhc, path_mode_ref, Key}, {ref, Ref}),
                     Entry = #entry { hdl = Hdl, current_offset = 0,
                                      last_sync_offset = 0, is_dirty = false,
                                      is_append = lists:member(append, Mode1),
                                      at_eof = false, path_mode_key = Key },
-                    put({rabbit_fhc, ref_entry, Ref}, Entry),
+                    put({fhc, ref_entry, Ref}, Entry),
                     {{ok, Ref}, State};
                 {error, Error} ->
                     {{error, Error}, State}
@@ -69,14 +69,14 @@ open(Path, Mode, [] = _ExtraOptions, State) ->
 
 close(Ref, State) ->
     {ok,
-     case erase({rabbit_fhc, ref_entry, Ref}) of
+     case erase({fhc, ref_entry, Ref}) of
          #entry { hdl = Hdl, is_dirty = IsDirty, path_mode_key = Key } ->
              ok = case IsDirty of
                       true -> file:sync(Hdl);
                       false -> ok
                   end,
              ok = file:close(Hdl),
-             erase({rabbit_fhc, path_mode_ref, Key}),
+             erase({fhc, path_mode_ref, Key}),
              State;
          undefined -> State
      end}.
@@ -85,7 +85,7 @@ release(_Ref, State) -> %% noop for the time being
     {ok, State}.
 
 read(Ref, Offset, Count, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, current_offset = OldOffset } ->
             NewOffset = Count +
                 case Offset of
@@ -93,7 +93,7 @@ read(Ref, Offset, Count, State) ->
                     _ -> {ok, RealOff} = file:position(Hdl, Offset),
                          RealOff
                 end,
-            put({rabbit_fhc, ref_entry, Ref},
+            put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = NewOffset,
                                at_eof = Offset =:= eof }),
             {file:read(Hdl, Count), State};
@@ -103,7 +103,7 @@ read(Ref, Offset, Count, State) ->
 %% if the file was opened in append mode, then Offset is ignored, as
 %% it would only affect the read head for this file.
 write(Ref, Offset, Data, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, current_offset = OldOffset,
                          is_append = IsAppend, at_eof = AtEoF } ->
             NewOffset =
@@ -119,7 +119,7 @@ write(Ref, Offset, Data, State) ->
                                      RealOff
                             end
                 end,
-            put({rabbit_fhc, ref_entry, Ref},
+            put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = NewOffset,
                                is_dirty = true, at_eof = Offset =:= eof }),
             {file:write(Hdl, Data), State};
@@ -127,13 +127,13 @@ write(Ref, Offset, Data, State) ->
     end.
 
 sync(Ref, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, current_offset = Offset,
                          last_sync_offset = LastSyncOffset,
                          is_dirty = true } ->
             SyncOffset = lists:max([Offset, LastSyncOffset]),
             ok = file:sync(Hdl),
-            put({rabbit_fhc, ref_entry, Ref},
+            put({fhc, ref_entry, Ref},
                 Entry #entry { last_sync_offset = SyncOffset,
                                is_dirty = false }),
             {ok, State};
@@ -142,14 +142,14 @@ sync(Ref, State) ->
     end.
 
 position(Ref, NewOffset, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         #entry { current_offset = NewOffset } ->
             {ok, State};
         #entry { at_eof = true } when NewOffset =:= eof ->
             {ok, State};
         Entry = #entry { hdl = Hdl } ->
             {ok, RealOff} = file:position(Hdl, NewOffset),
-            put({rabbit_fhc, ref_entry, Ref},
+            put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = RealOff,
                                at_eof = NewOffset =:= eof }),
             {ok, State};
@@ -158,16 +158,16 @@ position(Ref, NewOffset, State) ->
     end.
 
 truncate(Ref, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl } ->
             ok = file:truncate(Hdl),
-            put({rabbit_fhc, ref_entry, Ref}, Entry #entry { at_eof = true }),
+            put({fhc, ref_entry, Ref}, Entry #entry { at_eof = true }),
             {ok, State};
         undefined -> {{error, not_open}, State}
     end.
 
 with_file_handle_at(Ref, Offset, Fun, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, current_offset = OldOffset,
                          last_sync_offset = LastSyncOffset,
                          is_dirty = IsDirty, at_eof = AtEoF } ->
@@ -186,7 +186,7 @@ with_file_handle_at(Ref, Offset, Fun, State) ->
                     false -> LastSyncOffset
                 end,
             {Offset2, Result} = Fun(Hdl),
-            put({rabbit_fhc, ref_entry, Ref},
+            put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = Offset2,
                                last_sync_offset = LastSyncOffset1,
                                is_dirty = true, at_eof = false }),
@@ -195,7 +195,7 @@ with_file_handle_at(Ref, Offset, Fun, State) ->
     end.
 
 sync_to_offset(Ref, Offset, State) ->
-    case get({rabbit_fhc, ref_entry, Ref}) of
+    case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, last_sync_offset = LastSyncOffset,
                          current_offset = CurOffset, is_dirty = true }
         when (Offset =:= cur andalso CurOffset > LastSyncOffset)
@@ -206,7 +206,7 @@ sync_to_offset(Ref, Offset, State) ->
                     cur -> lists:max([LastSyncOffset, CurOffset]);
                     _ -> lists:max([LastSyncOffset, CurOffset, Offset])
                 end,
-            put({rabbit_fhc, ref_entry, Ref},
+            put({fhc, ref_entry, Ref},
                 Entry #entry { last_sync_offset = LastSyncOffset1,
                                is_dirty = false }),
             {ok, State};
-- 
cgit v1.2.1


From c09f59dba234d5368e250b89e7e84abd1f97be23 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 21 Oct 2009 17:04:36 +0100
Subject: Added sync_all for queue_index, trap exits in amqqueue_process, make
 sure that terminate calls terminate in variable_queue which calls through
 into terminate in queue_process.

---
 src/rabbit_amqqueue_process.erl |  3 +++
 src/rabbit_queue_index.erl      | 12 +++++++++++-
 src/rabbit_variable_queue.erl   | 16 ++++++++++------
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 546d8fbe..e2477e98 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -98,6 +98,7 @@ start_link(Q) ->
 
 init(Q = #amqqueue { name = QName }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
+    process_flag(trap_exit, true),
     ok = rabbit_memory_manager:register
            (self(), false, rabbit_amqqueue, set_storage_mode, [self()]),
     VQS = rabbit_variable_queue:init(QName),
@@ -113,6 +114,8 @@ init(Q = #amqqueue { name = QName }) ->
     {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
+terminate(shutdown, #q{variable_queue_state = VQS}) ->
+    _VQS = rabbit_variable_queue:terminate(VQS);
 terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
     %% FIXME: How do we cancel active subscriptions?
     %% Ensure that any persisted tx messages are removed;
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6df7cc2a..48da7e3f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, flush_journal/1,
+         write_delivered/2, write_acks/2, flush_journal/1, sync_all/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
@@ -229,6 +229,16 @@ full_flush_journal(State) ->
         {false, State1} -> State1
     end.
 
+sync_all(State = #qistate { hc_state = HCState, seg_num_handles = SegHdls }) ->
+    HCState1 =
+        dict:fold(
+          fun (_Key, Hdl, HCStateN) ->
+                  {ok, HCStateM} =
+                      horrendously_dumb_file_handle_cache:sync(Hdl, HCStateN),
+                  HCStateM
+          end, HCState, SegHdls),
+    State #qistate { hc_state = HCState1 }.
+
 flush_journal(State = #qistate { journal_ack_count = 0 }) ->
     {false, State};
 flush_journal(State = #qistate { journal_ack_dict = JAckDict,
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 75ff101e..7a85b302 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,10 +31,10 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, publish/2, publish_delivered/2, set_queue_ram_duration_target/2,
-         remeasure_egress_rate/1, fetch/1, ack/2, len/1, is_empty/1,
-         maybe_start_prefetcher/1, purge/1, delete/1, requeue/2,
-         tx_publish/2, tx_rollback/2, tx_commit/4, do_tx_commit/4]).
+-export([init/1, terminate/1, publish/2, publish_delivered/2,
+         set_queue_ram_duration_target/2, remeasure_egress_rate/1, fetch/1,
+         ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1, delete/1,
+         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4, do_tx_commit/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -140,6 +140,9 @@ init(QueueName) ->
                   },
     maybe_load_next_segment(State).
 
+terminate(State = #vqstate { index_state = IndexState }) ->
+    State #vqstate { index_state = rabbit_queue_index:terminate(IndexState) }.
+
 publish(Msg, State) ->
     publish(Msg, false, false, State).
 
@@ -383,9 +386,10 @@ do_tx_commit(Pubs, AckTags, From, State) ->
                   {[SeqId | SeqIdsAcc], StateN1}
           end, {[], State}, Pubs),
     %% TODO need to do something here about syncing the queue index, PubSeqIds
-    State2 = ack(AckTags, State1),
+    State2 = #vqstate { index_state = IndexState } = ack(AckTags, State1),
+    IndexState1 = rabbit_queue_index:sync_all(IndexState),
     gen_server2:reply(From, ok),
-    State2.
+    State2 #vqstate { index_state = IndexState1 }.
 
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From 5913f841cdb34395b5513af18d45c9e185d70d1f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 21 Oct 2009 17:05:38 +0100
Subject: Added write buffers. Performance now very very good, and could get
 better if can get msg_store to use the new fhc. All tests pass. Sometimes.

---
 src/horrendously_dumb_file_handle_cache.erl | 97 ++++++++++++++++++-----------
 1 file changed, 59 insertions(+), 38 deletions(-)

diff --git a/src/horrendously_dumb_file_handle_cache.erl b/src/horrendously_dumb_file_handle_cache.erl
index c034569d..e3aa49b3 100644
--- a/src/horrendously_dumb_file_handle_cache.erl
+++ b/src/horrendously_dumb_file_handle_cache.erl
@@ -38,7 +38,7 @@
         { hdl,
           current_offset,
           last_sync_offset,
-          is_dirty,
+          write_buffer,
           is_append,
           at_eof,
           path_mode_key }).
@@ -57,7 +57,7 @@ open(Path, Mode, [] = _ExtraOptions, State) ->
                     Ref = make_ref(),
                     put({fhc, path_mode_ref, Key}, {ref, Ref}),
                     Entry = #entry { hdl = Hdl, current_offset = 0,
-                                     last_sync_offset = 0, is_dirty = false,
+                                     last_sync_offset = 0, write_buffer = [],
                                      is_append = lists:member(append, Mode1),
                                      at_eof = false, path_mode_key = Key },
                     put({fhc, ref_entry, Ref}, Entry),
@@ -70,10 +70,11 @@ open(Path, Mode, [] = _ExtraOptions, State) ->
 close(Ref, State) ->
     {ok,
      case erase({fhc, ref_entry, Ref}) of
-         #entry { hdl = Hdl, is_dirty = IsDirty, path_mode_key = Key } ->
-             ok = case IsDirty of
-                      true -> file:sync(Hdl);
-                      false -> ok
+         #entry { hdl = Hdl, write_buffer = WriteBuffer, path_mode_key = Key } ->
+             ok = case WriteBuffer of
+                      [] -> ok;
+                      _  -> ok = file:write(Hdl, lists:reverse(WriteBuffer)),
+                            file:sync(Hdl)
                   end,
              ok = file:close(Hdl),
              erase({fhc, path_mode_ref, Key}),
@@ -86,7 +87,12 @@ release(_Ref, State) -> %% noop for the time being
 
 read(Ref, Offset, Count, State) ->
     case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl, current_offset = OldOffset } ->
+        Entry = #entry { hdl = Hdl, current_offset = OldOffset,
+                         write_buffer = WriteBuffer } ->
+            ok = case WriteBuffer of
+                      [] -> ok;
+                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
+                 end,
             NewOffset = Count +
                 case Offset of
                     cur -> OldOffset;
@@ -95,7 +101,8 @@ read(Ref, Offset, Count, State) ->
                 end,
             put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = NewOffset,
-                               at_eof = Offset =:= eof }),
+                               at_eof = Offset =:= eof,
+                               write_buffer = [] }),
             {file:read(Hdl, Count), State};
         undefined -> {{error, not_open}, State}
     end.
@@ -105,7 +112,8 @@ read(Ref, Offset, Count, State) ->
 write(Ref, Offset, Data, State) ->
     case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                         is_append = IsAppend, at_eof = AtEoF } ->
+                         is_append = IsAppend, at_eof = AtEoF,
+                         write_buffer = WriteBuffer } ->
             NewOffset =
                 case IsAppend of
                     true ->
@@ -119,25 +127,28 @@ write(Ref, Offset, Data, State) ->
                                      RealOff
                             end
                 end,
+            WriteBuffer1 = [Data | WriteBuffer],
             put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = NewOffset,
-                               is_dirty = true, at_eof = Offset =:= eof }),
-            {file:write(Hdl, Data), State};
+                               at_eof = Offset =:= eof,
+                               write_buffer = WriteBuffer1 }),
+            {ok, State};
         undefined -> {{error, not_open}, State}
     end.
 
 sync(Ref, State) ->
     case get({fhc, ref_entry, Ref}) of
+        #entry { write_buffer = [] } -> {ok, State};
         Entry = #entry { hdl = Hdl, current_offset = Offset,
                          last_sync_offset = LastSyncOffset,
-                         is_dirty = true } ->
+                         write_buffer = WriteBuffer } ->
             SyncOffset = lists:max([Offset, LastSyncOffset]),
+            ok = file:write(Hdl, lists:reverse(WriteBuffer)),
             ok = file:sync(Hdl),
             put({fhc, ref_entry, Ref},
                 Entry #entry { last_sync_offset = SyncOffset,
-                               is_dirty = false }),
+                               write_buffer = [] }),
             {ok, State};
-        #entry { is_dirty = false } -> {ok, State};
         undefined                   -> {{error, not_open}, State}
     end.
 
@@ -147,10 +158,15 @@ position(Ref, NewOffset, State) ->
             {ok, State};
         #entry { at_eof = true } when NewOffset =:= eof ->
             {ok, State};
-        Entry = #entry { hdl = Hdl } ->
+        Entry = #entry { hdl = Hdl, write_buffer = WriteBuffer } ->
+            ok = case WriteBuffer of
+                      [] -> ok;
+                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
+                 end,
             {ok, RealOff} = file:position(Hdl, NewOffset),
             put({fhc, ref_entry, Ref},
                 Entry #entry { current_offset = RealOff,
+                               write_buffer = [],
                                at_eof = NewOffset =:= eof }),
             {ok, State};
         undefined ->
@@ -159,9 +175,14 @@ position(Ref, NewOffset, State) ->
 
 truncate(Ref, State) ->
     case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl } ->
+        Entry = #entry { hdl = Hdl, write_buffer = WriteBuffer } ->
+            ok = case WriteBuffer of
+                      [] -> ok;
+                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
+                 end,
             ok = file:truncate(Hdl),
-            put({fhc, ref_entry, Ref}, Entry #entry { at_eof = true }),
+            put({fhc, ref_entry, Ref},
+                Entry #entry { at_eof = true, write_buffer = [] }),
             {ok, State};
         undefined -> {{error, not_open}, State}
     end.
@@ -169,27 +190,22 @@ truncate(Ref, State) ->
 with_file_handle_at(Ref, Offset, Fun, State) ->
     case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                         last_sync_offset = LastSyncOffset,
-                         is_dirty = IsDirty, at_eof = AtEoF } ->
-            Offset1 =
-                case Offset of
-                    eof when AtEoF -> OldOffset;
-                    cur       -> OldOffset;
-                    OldOffset -> OldOffset;
-                    _         -> {ok, RealOff} = file:position(Hdl, Offset),
-                                 RealOff
-                end,
-            LastSyncOffset1 =
-                case IsDirty of
-                    true -> ok = file:sync(Hdl),
-                            lists:max([Offset1, OldOffset]);
-                    false -> LastSyncOffset
-                end,
+                         write_buffer = WriteBuffer, at_eof = AtEoF } ->
+            ok = case WriteBuffer of
+                      [] -> ok;
+                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
+                 end,
+            ok = case Offset of
+                     eof when AtEoF -> ok;
+                     cur       -> ok;
+                     OldOffset -> ok;
+                     _         -> {ok, _RealOff} = file:position(Hdl, Offset),
+                                  ok
+                 end,
             {Offset2, Result} = Fun(Hdl),
             put({fhc, ref_entry, Ref},
-                Entry #entry { current_offset = Offset2,
-                               last_sync_offset = LastSyncOffset1,
-                               is_dirty = true, at_eof = false }),
+                Entry #entry { current_offset = Offset2, write_buffer = [],
+                               at_eof = false }),
             {Result, State};
         undefined -> {{error, not_open}, State}
     end.
@@ -197,9 +213,14 @@ with_file_handle_at(Ref, Offset, Fun, State) ->
 sync_to_offset(Ref, Offset, State) ->
     case get({fhc, ref_entry, Ref}) of
         Entry = #entry { hdl = Hdl, last_sync_offset = LastSyncOffset,
-                         current_offset = CurOffset, is_dirty = true }
+                         current_offset = CurOffset,
+                         write_buffer = [_|_] = WriteBuffer }
         when (Offset =:= cur andalso CurOffset > LastSyncOffset)
         orelse (Offset > LastSyncOffset) ->
+            ok = case WriteBuffer of
+                      [] -> ok;
+                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
+                 end,
             ok = file:sync(Hdl),
             LastSyncOffset1 =
                 case Offset of
@@ -208,7 +229,7 @@ sync_to_offset(Ref, Offset, State) ->
                 end,
             put({fhc, ref_entry, Ref},
                 Entry #entry { last_sync_offset = LastSyncOffset1,
-                               is_dirty = false }),
+                               write_buffer = [] }),
             {ok, State};
         #entry {} -> {ok, State};
         error -> {{error, not_open}, State}
-- 
cgit v1.2.1


From 08096879cac147d50e50004552771d05f824ba32 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 00:12:44 +0100
Subject: Started work on the real file handle cache

---
 src/file_handle_cache.erl | 214 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 src/file_handle_cache.erl

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
new file mode 100644
index 00000000..1d291474
--- /dev/null
+++ b/src/file_handle_cache.erl
@@ -0,0 +1,214 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(file_handle_cache).
+
+-export([init/0, open/4, close/2, release/2, read/4, write/4, sync/2,
+         position/3, truncate/2, with_file_handle_at/4, sync_to_offset/3]).
+
+-record(file,
+        { reader_count,
+          has_writer,
+          path
+        }).
+
+-record(handle,
+        { hdl,
+          offset,
+          trusted_offset,
+          write_buffer_size,
+          write_buffer,
+          at_eof,
+          is_append,
+          mode,
+          global_key
+        }).
+
+open(Path, Mode, Options, State) ->
+    Mode1 = lists:usort(Mode),
+    Path1 = filename:absname(Path),
+    case get({Path1, fhc_path}) of
+        {gref, GRef} ->
+            #file { reader_count = RCount, has_writer = HasWriter }
+                = File = get({GRef, fhc_file}),
+            IsWriter = is_writer(Mode1),
+            case IsWriter andalso HasWriter of
+                true ->
+                    {{error, writer_exists}, State};
+                false ->
+                    RCount1 = case is_reader(Mode1) of
+                                  true -> RCount + 1;
+                                  false -> RCount
+                              end,
+                    put({Path1, fhc_file},
+                        File #file { reader_count = RCount1,
+                                     has_writer = HasWriter orelse IsWriter }),
+                    open1(Path1, Mode1, Options, GRef, State)
+            end;
+        undefined ->
+            GRef = make_ref(),
+            %% returns 0 even if file doesn't exist
+            put({Path1, fhc_path}, {gref, GRef}),
+            put({GRef, fhc_file},
+                #file { reader_count = 0, has_writer = false, path = Path1 }),
+            open(Path, Mode, Options, State)
+    end.
+
+close(Ref, State) ->
+    case erase({Ref, fhc_handle}) of
+        undefined -> {ok, State};
+        Handle ->
+            case write_buffer(Handle) of
+                {ok, #handle { hdl = Hdl, mode = Mode, global_key = GRef }} ->
+                    ok = file:sync(Hdl),
+                    ok = file:close(Hdl),
+                    IsReader = is_reader(Mode),
+                    IsWriter = is_writer(Mode),
+                    #file { reader_count = RCount, has_writer = HasWriter,
+                            path = Path } = File = get({GRef, fhc_file}),
+                    RCount1 = case IsReader of
+                                  true -> RCount - 1;
+                                  false -> RCount
+                              end, 
+                    HasWriter1 = HasWriter andalso not IsWriter,
+                    case RCount1 == 0 andalso not HasWriter1 of
+                        true -> erase({GRef, fhc_file}),
+                                erase({Path, fhc_path});
+                        false -> put({GRef, fhc_file},
+                                     File #file { reader_count = RCount1,
+                                                  has_writer = HasWriter1 })
+                    end,
+                    {ok, State};
+                {Error, Handle1} ->
+                    put({Ref, fhc_handle}, Handle1),
+                    {Error, State}
+            end
+    end.
+
+release(_Ref, State) -> %% noop just for now
+    {ok, State}.
+
+read(Ref, NewOffset, Count, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State};
+        Handle ->
+            case write_buffer(Handle) of
+                {ok, Handle1 = #handle { hdl = Hdl, at_eof = AtEoF,
+                                         offset = Offset }} ->
+                    {AtEoF1, Offset1} =
+                        maybe_position(Hdl, AtEoF, Offset, NewOffset),
+                    Handle2 = Handle1 #handle { at_eof = AtEoF1,
+                                                offset = Offset1 },
+                    {Handle3, Result} =
+                        case file:read(Hdl, Count) of
+                            {ok, Data} -> {Handle2, {ok, Data}};
+                            eof -> {Handle2 #handle { at_eof = true }, eof};
+                            {error, Reason} -> {Handle2, {error, Reason}}
+                        end,
+                    put({Ref, fhc_handle}, Handle3),
+                    {Result, State};
+                {Error, Handle1} ->
+                    put({Ref, fhc_handle}, Handle1),
+                    {Error, State}
+            end
+    end.
+
+open1(Path, Mode, Options, GRef, State) ->
+    case file:open(Path, Mode) of
+        {ok, Hdl} ->
+            WriteBufferSize =
+                case proplists:get_value(write_buffer, Options, unbuffered) of
+                    unbuffered -> 0;
+                    infinity -> infinity;
+                    N when is_integer(N) -> N
+                end,
+            Ref = make_ref(),
+            put({Ref, fhc_handle},
+                #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
+                          write_buffer_size = WriteBufferSize,
+                          write_buffer = [], at_eof = false,
+                          is_append = lists:member(append, Mode), mode = Mode,
+                          global_key = GRef }),
+            {{ok, Ref}, State};
+        {error, Reason} ->
+            {{error, Reason}, State}
+    end.
+
+write_buffer(Handle = #handle { write_buffer = [] }) ->
+    Handle;
+write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
+                                write_buffer = WriteBuffer,
+                                is_append = IsAppend, at_eof = AtEoF }) ->
+    DataSize = size_of_write_data(WriteBuffer),
+    case file:write(Hdl, lists:reverse(WriteBuffer)) of
+        ok ->
+            Offset1 = case IsAppend of
+                          true -> Offset;
+                          false -> Offset + DataSize
+                      end,
+            AtEoF1 = AtEoF andalso not IsAppend,
+            {ok, Handle #handle { offset = Offset1, write_buffer = [],
+                                  at_eof = AtEoF1 }};
+        {error, Reason} ->
+            {{error, Reason}, Handle}
+    end.
+
+size_of_write_data(Data) ->
+    size_of_write_data(Data, 0).
+
+size_of_write_data([], Acc) ->
+    Acc;
+size_of_write_data([A|B], Acc) ->
+    size_of_write_data(B, size_of_write_data(A, Acc));
+size_of_write_data(Bin, Acc) when is_binary(Bin) ->
+    size(Bin) + Acc.
+
+is_reader(Mode) ->
+    lists:member(read, Mode).
+
+is_writer(Mode) ->
+    lists:member(write, Mode) orelse lists:member(append, Mode).
+
+%% maybe_position(Hdl, AtEof, CurOffset, DesiredOffset)
+maybe_position(_Hdl, AtEof, CurOffset, cur) ->
+    {AtEof, CurOffset};
+maybe_position(_Hdl, true, CurOffset, eof) ->
+    {true, CurOffset};
+maybe_position(_Hdl, AtEof, CurOffset, CurOffset) ->
+    {AtEof, CurOffset};
+maybe_position(Hdl, true, CurOffset, DesiredOffset)
+  when DesiredOffset >= CurOffset ->
+    {ok, Offset} = file:position(Hdl, DesiredOffset),
+    {true, Offset};
+%% because we can't really track size, we could well end up at EoF and not know
+maybe_position(Hdl, _AtEoF, _CurOffset, DesiredOffset) ->
+    {ok, Offset} = file:position(Hdl, DesiredOffset),
+    {false, Offset}.
-- 
cgit v1.2.1


From 296ff5c7f84e593db874a0b66c07e74b029529e9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 00:23:11 +0100
Subject: trivial and cosmetic

---
 src/file_handle_cache.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 1d291474..c5efa00d 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -53,12 +53,12 @@
         }).
 
 open(Path, Mode, Options, State) ->
-    Mode1 = lists:usort(Mode),
     Path1 = filename:absname(Path),
     case get({Path1, fhc_path}) of
         {gref, GRef} ->
             #file { reader_count = RCount, has_writer = HasWriter }
                 = File = get({GRef, fhc_file}),
+            Mode1 = lists:usort(Mode),
             IsWriter = is_writer(Mode1),
             case IsWriter andalso HasWriter of
                 true ->
@@ -75,7 +75,6 @@ open(Path, Mode, Options, State) ->
             end;
         undefined ->
             GRef = make_ref(),
-            %% returns 0 even if file doesn't exist
             put({Path1, fhc_path}, {gref, GRef}),
             put({GRef, fhc_file},
                 #file { reader_count = 0, has_writer = false, path = Path1 }),
-- 
cgit v1.2.1


From bc9a0bdc8f6acddba1d1f2ec14cf592fee96c64a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 11:49:47 +0100
Subject: More work on the real fhc

---
 src/file_handle_cache.erl | 153 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 124 insertions(+), 29 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index c5efa00d..933f8a83 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -32,7 +32,7 @@
 -module(file_handle_cache).
 
 -export([init/0, open/4, close/2, release/2, read/4, write/4, sync/2,
-         position/3, truncate/2, with_file_handle_at/4, sync_to_offset/3]).
+         position/3, truncate/2, last_sync_offset/2]).
 
 -record(file,
         { reader_count,
@@ -45,11 +45,15 @@
           offset,
           trusted_offset,
           write_buffer_size,
+          write_buffer_size_limit,
           write_buffer,
           at_eof,
           is_append,
+          is_write,
+          is_read,
           mode,
-          global_key
+          global_key,
+          last_used_at
         }).
 
 open(Path, Mode, Options, State) ->
@@ -59,7 +63,7 @@ open(Path, Mode, Options, State) ->
             #file { reader_count = RCount, has_writer = HasWriter }
                 = File = get({GRef, fhc_file}),
             Mode1 = lists:usort(Mode),
-            IsWriter = is_writer(Mode1),
+            IsWriter = is_writer(Mode1) orelse is_appender(Mode),
             case IsWriter andalso HasWriter of
                 true ->
                     {{error, writer_exists}, State};
@@ -118,14 +122,19 @@ release(_Ref, State) -> %% noop just for now
 read(Ref, NewOffset, Count, State) ->
     case get({Ref, fhc_handle}) of
         undefined -> {{error, not_open}, State};
+        #handle { is_read = false } -> {{error, not_open_for_reading}, State};
         Handle ->
             case write_buffer(Handle) of
                 {ok, Handle1 = #handle { hdl = Hdl, at_eof = AtEoF,
                                          offset = Offset }} ->
-                    {AtEoF1, Offset1} =
-                        maybe_position(Hdl, AtEoF, Offset, NewOffset),
+                    {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
+                    {ok, Offset1} = case NeedsSeek of
+                                        true -> file:position(Hdl, NewOffset);
+                                        false -> {ok, Offset}
+                                    end,
                     Handle2 = Handle1 #handle { at_eof = AtEoF1,
-                                                offset = Offset1 },
+                                                offset = Offset1,
+                                                last_used_at = now() },
                     {Handle3, Result} =
                         case file:read(Hdl, Count) of
                             {ok, Data} -> {Handle2, {ok, Data}};
@@ -140,6 +149,56 @@ read(Ref, NewOffset, Count, State) ->
             end
     end.
 
+write(Ref, NewOffset, Data, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State};
+        Handle = #handle { is_append = true, is_write = false } ->
+            {Result, Handle1} = write_to_buffer(Data, Handle),
+            put({Ref, fhc_handle}, Handle1),
+            {Result, State};
+        Handle = #handle { is_append = false, is_write = true, at_eof = AtEoF,
+                           offset = Offset, write_buffer_size = BufSize } ->
+            %% If we wrote the buffer out now, where would we end up?
+            %% Note that if AtEoF == true then it would still be true
+            %% after writing the buffer out, but if AtEoF == false,
+            %% it's possible it should be true after writing the
+            %% buffer out, but we won't know about it.
+            VirtualOffset = Offset + BufSize,
+            %% AtEoF1 says "after writing the buffer out, we will be
+            %% at VirtualOffset. At that point, we travel to
+            %% NewOffset. When we get there, will we be at eof?"
+            {AtEoF1, NeedsSeek} = needs_seek(AtEoF, VirtualOffset, NewOffset),
+            {Error, Handle1} =
+                case NeedsSeek of
+                    %% Now if we don't seek, we don't write the buffer
+                    %% out. This means we'll still be at Offset, and
+                    %% AtEoF still applies. We need to add the data to
+                    %% the buffer and leave it at that.
+                    false -> {ok, Handle};
+                    %% If we do seek, then we write the buffer out,
+                    %% which means that AtEoF1 applies, because after
+                    %% writing the buffer out, we'll be at
+                    %% VirtualOffset, and then want to get to
+                    %% NewOffset.
+                    true ->                
+                        case write_buffer(Handle) of
+                            {ok, Handle2 = #handle { hdl = Hdl }} ->
+                                {ok, Offset2} = file:position(Hdl, NewOffset),
+                                {ok, Handle2 #handle { offset = Offset2,
+                                                       at_eof = AtEoF1 }};
+                            {Error1, Handle2} -> {Error1, Offset, Handle2}
+                        end
+                end,
+            case Error of
+                ok -> {Result, Handle3} = write_to_buffer(Data, Handle1),
+                      put({Ref, fhc_handle}, Handle3),
+                      {Result, State};
+                _ -> put({Ref, fhc_handle}, Handle1),
+                     {Error, State}
+            end;
+        _ -> {{error, not_open_for_writing}, State}
+    end.
+
 open1(Path, Mode, Options, GRef, State) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
@@ -152,21 +211,46 @@ open1(Path, Mode, Options, GRef, State) ->
             Ref = make_ref(),
             put({Ref, fhc_handle},
                 #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
-                          write_buffer_size = WriteBufferSize,
+                          write_buffer_size = 0,
+                          write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false,
-                          is_append = lists:member(append, Mode), mode = Mode,
-                          global_key = GRef }),
+                          is_append = is_appender(Mode), mode = Mode,
+                          is_write = is_writer(Mode), is_read = is_reader(Mode),
+                          global_key = GRef, last_used_at = now() }),
             {{ok, Ref}, State};
         {error, Reason} ->
             {{error, Reason}, State}
     end.
 
+write_to_buffer(Data, Handle = #handle { hdl = Hdl,
+                                         write_buffer_size_limit = 0 }) ->
+    {file:write(Hdl, Data), Handle #handle { last_used_at = now() }};
+write_to_buffer(Data, Handle =
+                #handle { write_buffer = WriteBuffer,
+                          write_buffer_size = Size,
+                          write_buffer_size_limit = infinity }) ->
+    {ok, Handle #handle { write_buffer_size = Size + size_of_write_data(Data),
+                          write_buffer = [ Data | WriteBuffer ],
+                          last_used_at = now() }};
+write_to_buffer(Data, Handle =
+                #handle { write_buffer = WriteBuffer,
+                          write_buffer_size = Size,
+                          write_buffer_size_limit = Limit }) ->
+    Size1 = Size + size_of_write_data(Data),
+    Handle1 = Handle #handle { write_buffer = [ Data | WriteBuffer ],
+                               write_buffer_size = Size1,
+                               last_used_at = now() },
+    case Size1 > Limit of
+        true -> write_buffer(Handle1);
+        false -> {ok, Handle1}
+    end.
+
 write_buffer(Handle = #handle { write_buffer = [] }) ->
-    Handle;
+    {ok, Handle};
 write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
                                 write_buffer = WriteBuffer,
+                                write_buffer_size = DataSize,
                                 is_append = IsAppend, at_eof = AtEoF }) ->
-    DataSize = size_of_write_data(WriteBuffer),
     case file:write(Hdl, lists:reverse(WriteBuffer)) of
         ok ->
             Offset1 = case IsAppend of
@@ -175,7 +259,7 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
                       end,
             AtEoF1 = AtEoF andalso not IsAppend,
             {ok, Handle #handle { offset = Offset1, write_buffer = [],
-                                  at_eof = AtEoF1 }};
+                                  write_buffer_size = 0, at_eof = AtEoF1 }};
         {error, Reason} ->
             {{error, Reason}, Handle}
     end.
@@ -190,24 +274,35 @@ size_of_write_data([A|B], Acc) ->
 size_of_write_data(Bin, Acc) when is_binary(Bin) ->
     size(Bin) + Acc.
 
-is_reader(Mode) ->
-    lists:member(read, Mode).
+is_reader(Mode) -> lists:member(read, Mode).
+
+is_writer(Mode) -> lists:member(write, Mode).
 
-is_writer(Mode) ->
-    lists:member(write, Mode) orelse lists:member(append, Mode).
+is_appender(Mode) -> lists:member(append, Mode).
 
-%% maybe_position(Hdl, AtEof, CurOffset, DesiredOffset)
-maybe_position(_Hdl, AtEof, CurOffset, cur) ->
-    {AtEof, CurOffset};
-maybe_position(_Hdl, true, CurOffset, eof) ->
-    {true, CurOffset};
-maybe_position(_Hdl, AtEof, CurOffset, CurOffset) ->
-    {AtEof, CurOffset};
-maybe_position(Hdl, true, CurOffset, DesiredOffset)
+needs_seek(AtEof, _CurOffset, DesiredOffset)
+  when DesiredOffset == cur orelse DesiredOffset == {cur, 0} ->
+    {AtEof, false};
+needs_seek(true, _CurOffset, DesiredOffset)
+  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
+    {true, false};
+needs_seek(false, _CurOffset, DesiredOffset)
+  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
+    {true, true};
+needs_seek(AtEof, 0, DesiredOffset)
+  when DesiredOffset == bof orelse DesiredOffset == {bof, 0} ->
+    {AtEof, false};
+needs_seek(AtEof, CurOffset, CurOffset) ->
+    {AtEof, false};
+needs_seek(true, CurOffset, {bof, DesiredOffset})
   when DesiredOffset >= CurOffset ->
-    {ok, Offset} = file:position(Hdl, DesiredOffset),
-    {true, Offset};
+    {true, true};
+needs_seek(true, _CurOffset, {cur, DesiredOffset})
+  when DesiredOffset > 0 ->
+    {true, true};
+needs_seek(true, CurOffset, DesiredOffset) %% same as {bof, DO}
+  when is_integer(DesiredOffset) andalso DesiredOffset >= CurOffset ->
+    {true, true};
 %% because we can't really track size, we could well end up at EoF and not know
-maybe_position(Hdl, _AtEoF, _CurOffset, DesiredOffset) ->
-    {ok, Offset} = file:position(Hdl, DesiredOffset),
-    {false, Offset}.
+needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
+    {false, true}.
-- 
cgit v1.2.1


From 504c65eb7ccdd77d7ca30360deb943fa34dd7625 Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Thu, 22 Oct 2009 09:00:57 -0400
Subject: QA: specs, and LOGDEBUGS

---
 src/rabbit_amqqueue_process.erl |  2 +-
 src/rabbit_memory_monitor.erl   | 32 ++++++++++++++++----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index fa3d17a8..d0123989 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -833,7 +833,7 @@ handle_cast({set_bufsec_limit, BufSec}, State) ->
         _ -> BufSec * DrainRatio#ratio.ratio * 1000000
     end,
     %% Just to proove that something is happening.
-    io:format("Queue size is ~8p, should be ~p~n", 
+    ?LOGDEBUG("Queue size is ~8p, should be ~p~n", 
                        [queue:len(State#q.message_buffer), DesiredQueueLength]),
     noreply(State).
 
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index e878edda..87ee96ad 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -70,6 +70,7 @@
 
 
 -module(rabbit_memory_monitor).
+-include("rabbit.hrl").
 
 -behaviour(gen_server2).
 
@@ -91,13 +92,20 @@
 -define(SERVER, ?MODULE).
 -define(DEFAULT_UPDATE_INTERVAL_MS, 2500).
 
-%% Enable debug reports in stdout:
--define(debug, true).
-
 %%----------------------------------------------------------------------------
-
 -ifdef(use_specs).
 
+-spec(start_link/0 :: () -> 'ignore' | {'error',_} | {'ok',pid()}).
+-spec(register/1 :: (pid()) -> ok).
+
+-spec(init/1 :: ([]) -> {ok, #state{}}).
+
+-ifdef(debug).
+-spec(ftoa/1 :: (any()) -> string()).
+-endif.
+
+-spec(count_average/1 :: (list()) -> float() | infinity ).
+-spec(internal_update/1 :: (#state{}) -> #state{}).
 -endif.
 
 %%----------------------------------------------------------------------------
@@ -133,7 +141,7 @@ init([]) ->
     %% We should never use more memory than user requested. As the memory 
     %% manager doesn't really know how much memory queues are using, we shall
     %% try to remain safe distance from real limit. 
-    MemoryLimit = get_user_memory_limit() * 0.6,
+    MemoryLimit = trunc(get_user_memory_limit() * 0.6),
     rabbit_log:warning("Memory monitor limit: ~pMB~n", 
                     [erlang:trunc(MemoryLimit/1024/1024)]),
     
@@ -184,15 +192,6 @@ ftoa(Float) ->
         false -> io_lib:format("~p", [Float])
     end,
     lists:flatten(Str).
-
-print_debug_info(RealDrainAvg, DesiredDrainAvg, MemoryOvercommit) ->
-    io:format("DrainAvg Real/Desired:~s/~s  MemoryOvercommit:~s~n", 
-                [ftoa(RealDrainAvg), ftoa(DesiredDrainAvg),
-                ftoa(MemoryOvercommit)]).
--else.
-print_debug_info(_RealDrainAvg, _DesiredDrainAvg, _MemoryOvercommit) ->
-    ok.
-
 -endif.
 
 %% Count average from numbers, excluding atoms in the list.
@@ -214,11 +213,12 @@ internal_update(State) ->
     %% Not does the queue.
     DesiredDrainAvg = case RealDrainAvg of
         infinity -> infinity;
-        0 -> infinity;
         0.0 -> infinity;
         _ ->  RealDrainAvg / MemoryOvercommit
     end,
-    print_debug_info(RealDrainAvg, DesiredDrainAvg, MemoryOvercommit),
+    ?LOGDEBUG("DrainAvg Real/Desired:~s/~s  MemoryOvercommit:~s~n", 
+                [ftoa(RealDrainAvg), ftoa(DesiredDrainAvg),
+                ftoa(MemoryOvercommit)]),
     %% Inform the queue to reduce it's memory usage when needed.
     %% This can sometimes wake the queue from hibernation. Well, we don't care.
     ReduceMemory = fun ({Pid, QueueDrain}) ->
-- 
cgit v1.2.1


From f24b43d24503b6dd5d31b11032972c632a95ed9f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 15:23:09 +0100
Subject: not quite complete, but much much simpler, and prettier

---
 src/file_handle_cache.erl | 226 ++++++++++++++++++++++------------------------
 1 file changed, 106 insertions(+), 120 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 933f8a83..e86344a0 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,7 +31,7 @@
 
 -module(file_handle_cache).
 
--export([init/0, open/4, close/2, release/2, read/4, write/4, sync/2,
+-export([init/0, open/4, close/2, release/2, read/4, append/3, sync/2,
          position/3, truncate/2, last_sync_offset/2]).
 
 -record(file,
@@ -48,7 +48,6 @@
           write_buffer_size_limit,
           write_buffer,
           at_eof,
-          is_append,
           is_write,
           is_read,
           mode,
@@ -56,33 +55,41 @@
           last_used_at
         }).
 
+init() -> empty_state.
+
 open(Path, Mode, Options, State) ->
-    Path1 = filename:absname(Path),
-    case get({Path1, fhc_path}) of
-        {gref, GRef} ->
-            #file { reader_count = RCount, has_writer = HasWriter }
-                = File = get({GRef, fhc_file}),
-            Mode1 = lists:usort(Mode),
-            IsWriter = is_writer(Mode1) orelse is_appender(Mode),
-            case IsWriter andalso HasWriter of
-                true ->
-                    {{error, writer_exists}, State};
-                false ->
-                    RCount1 = case is_reader(Mode1) of
-                                  true -> RCount + 1;
-                                  false -> RCount
-                              end,
-                    put({Path1, fhc_file},
-                        File #file { reader_count = RCount1,
-                                     has_writer = HasWriter orelse IsWriter }),
-                    open1(Path1, Mode1, Options, GRef, State)
-            end;
-        undefined ->
-            GRef = make_ref(),
-            put({Path1, fhc_path}, {gref, GRef}),
-            put({GRef, fhc_file},
-                #file { reader_count = 0, has_writer = false, path = Path1 }),
-            open(Path, Mode, Options, State)
+    case is_appender(Mode) of
+        true -> {{error, append_not_supported}, State};
+        false ->
+            Path1 = filename:absname(Path),
+            case get({Path1, fhc_path}) of
+                {gref, GRef} ->
+                    #file { reader_count = RCount, has_writer = HasWriter }
+                        = File = get({GRef, fhc_file}),
+                    Mode1 = lists:usort(Mode),
+                    IsWriter = is_writer(Mode1),
+                    case IsWriter andalso HasWriter of
+                        true ->
+                            {{error, writer_exists}, State};
+                        false ->
+                            RCount1 = case is_reader(Mode1) of
+                                          true -> RCount + 1;
+                                          false -> RCount
+                                      end,
+                            put({Path1, fhc_file},
+                                File #file {
+                                  reader_count = RCount1,
+                                  has_writer = HasWriter orelse IsWriter }),
+                            open1(Path1, Mode1, Options, GRef, State)
+                    end;
+                undefined ->
+                    GRef = make_ref(),
+                    put({Path1, fhc_path}, {gref, GRef}),
+                    put({GRef, fhc_file},
+                        #file { reader_count = 0, has_writer = false,
+                                path = Path1 }),
+                    open(Path, Mode, Options, State)
+            end
     end.
 
 close(Ref, State) ->
@@ -90,11 +97,10 @@ close(Ref, State) ->
         undefined -> {ok, State};
         Handle ->
             case write_buffer(Handle) of
-                {ok, #handle { hdl = Hdl, mode = Mode, global_key = GRef }} ->
+                {ok, #handle { hdl = Hdl, global_key = GRef,
+                               is_read = IsReader, is_write = IsWriter }} ->
                     ok = file:sync(Hdl),
                     ok = file:close(Hdl),
-                    IsReader = is_reader(Mode),
-                    IsWriter = is_writer(Mode),
                     #file { reader_count = RCount, has_writer = HasWriter,
                             path = Path } = File = get({GRef, fhc_file}),
                     RCount1 = case IsReader of
@@ -124,81 +130,61 @@ read(Ref, NewOffset, Count, State) ->
         undefined -> {{error, not_open}, State};
         #handle { is_read = false } -> {{error, not_open_for_reading}, State};
         Handle ->
-            case write_buffer(Handle) of
-                {ok, Handle1 = #handle { hdl = Hdl, at_eof = AtEoF,
-                                         offset = Offset }} ->
-                    {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
-                    {ok, Offset1} = case NeedsSeek of
-                                        true -> file:position(Hdl, NewOffset);
-                                        false -> {ok, Offset}
-                                    end,
-                    Handle2 = Handle1 #handle { at_eof = AtEoF1,
-                                                offset = Offset1,
-                                                last_used_at = now() },
-                    {Handle3, Result} =
-                        case file:read(Hdl, Count) of
-                            {ok, Data} -> {Handle2, {ok, Data}};
-                            eof -> {Handle2 #handle { at_eof = true }, eof};
-                            {error, Reason} -> {Handle2, {error, Reason}}
-                        end,
-                    put({Ref, fhc_handle}, Handle3),
-                    {Result, State};
-                {Error, Handle1} ->
-                    put({Ref, fhc_handle}, Handle1),
-                    {Error, State}
-            end
+            {Result, Handle1} =
+                case write_buffer(Handle) of
+                    {ok, Handle2} ->
+                        case maybe_seek(NewOffset, Handle2) of
+                            {ok, Handle3 = #handle { hdl = Hdl }} ->
+                                case file:read(Hdl, Count) of
+                                    {ok, _} = Obj -> {Obj, Handle3};
+                                    eof -> {eof,
+                                            Handle3 #handle { at_eof = true }};
+                                    {error, _} = Error -> {Error, Handle3}
+                                end;
+                            {Error, Handle3} -> {Error, Handle3}
+                        end;
+                    {Error, Handle2} -> {Error, Handle2}
+                end,
+            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            {Result, State}
     end.
 
-write(Ref, NewOffset, Data, State) ->
+append(Ref, Data, State) ->
     case get({Ref, fhc_handle}) of
         undefined -> {{error, not_open}, State};
-        Handle = #handle { is_append = true, is_write = false } ->
-            {Result, Handle1} = write_to_buffer(Data, Handle),
-            put({Ref, fhc_handle}, Handle1),
-            {Result, State};
-        Handle = #handle { is_append = false, is_write = true, at_eof = AtEoF,
-                           offset = Offset, write_buffer_size = BufSize } ->
-            %% If we wrote the buffer out now, where would we end up?
-            %% Note that if AtEoF == true then it would still be true
-            %% after writing the buffer out, but if AtEoF == false,
-            %% it's possible it should be true after writing the
-            %% buffer out, but we won't know about it.
-            VirtualOffset = Offset + BufSize,
-            %% AtEoF1 says "after writing the buffer out, we will be
-            %% at VirtualOffset. At that point, we travel to
-            %% NewOffset. When we get there, will we be at eof?"
-            {AtEoF1, NeedsSeek} = needs_seek(AtEoF, VirtualOffset, NewOffset),
-            {Error, Handle1} =
-                case NeedsSeek of
-                    %% Now if we don't seek, we don't write the buffer
-                    %% out. This means we'll still be at Offset, and
-                    %% AtEoF still applies. We need to add the data to
-                    %% the buffer and leave it at that.
-                    false -> {ok, Handle};
-                    %% If we do seek, then we write the buffer out,
-                    %% which means that AtEoF1 applies, because after
-                    %% writing the buffer out, we'll be at
-                    %% VirtualOffset, and then want to get to
-                    %% NewOffset.
-                    true ->                
-                        case write_buffer(Handle) of
-                            {ok, Handle2 = #handle { hdl = Hdl }} ->
-                                {ok, Offset2} = file:position(Hdl, NewOffset),
-                                {ok, Handle2 #handle { offset = Offset2,
-                                                       at_eof = AtEoF1 }};
-                            {Error1, Handle2} -> {Error1, Offset, Handle2}
-                        end
+        Handle = #handle { is_write = true } ->
+            {Result, Handle1} =
+                case maybe_seek(eof, Handle) of
+                    {ok, Handle2 = #handle { at_eof = true }} ->
+                        write_to_buffer(Data, Handle2);
+                    {{error, _} = Error, Handle2} ->
+                        {Error, Handle2}
                 end,
-            case Error of
-                ok -> {Result, Handle3} = write_to_buffer(Data, Handle1),
-                      put({Ref, fhc_handle}, Handle3),
-                      {Result, State};
-                _ -> put({Ref, fhc_handle}, Handle1),
-                     {Error, State}
-            end;
+            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            {Result, State};
         _ -> {{error, not_open_for_writing}, State}
     end.
 
+last_sync_offset(Ref, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State};
+        #handle { trusted_offset = TrustedOffset } ->
+            {{ok, TrustedOffset}, State}
+    end.
+
+position(Ref, NewOffset, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State};
+        Handle ->
+            {Result, Handle1} =
+                case write_buffer(Handle) of
+                    {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
+                    {Error, Handle2} -> {Error, Handle2}
+                end,
+            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            {Result, State}
+    end.
+
 open1(Path, Mode, Options, GRef, State) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
@@ -213,8 +199,7 @@ open1(Path, Mode, Options, GRef, State) ->
                 #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
                           write_buffer_size = 0,
                           write_buffer_size_limit = WriteBufferSize,
-                          write_buffer = [], at_eof = false,
-                          is_append = is_appender(Mode), mode = Mode,
+                          write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
                           global_key = GRef, last_used_at = now() }),
             {{ok, Ref}, State};
@@ -222,25 +207,30 @@ open1(Path, Mode, Options, GRef, State) ->
             {{error, Reason}, State}
     end.
 
+maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
+                                         offset = Offset }) ->
+    {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
+    Result = case NeedsSeek of
+                 true -> file:position(Hdl, NewOffset);
+                 false -> {ok, Offset}
+             end,
+    case Result of
+        {ok, Offset1} ->
+            {ok, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
+        {error, _} = Error -> {Error, Handle}
+    end.
+
 write_to_buffer(Data, Handle = #handle { hdl = Hdl,
                                          write_buffer_size_limit = 0 }) ->
-    {file:write(Hdl, Data), Handle #handle { last_used_at = now() }};
-write_to_buffer(Data, Handle =
-                #handle { write_buffer = WriteBuffer,
-                          write_buffer_size = Size,
-                          write_buffer_size_limit = infinity }) ->
-    {ok, Handle #handle { write_buffer_size = Size + size_of_write_data(Data),
-                          write_buffer = [ Data | WriteBuffer ],
-                          last_used_at = now() }};
+    {file:write(Hdl, Data), Handle};
 write_to_buffer(Data, Handle =
                 #handle { write_buffer = WriteBuffer,
                           write_buffer_size = Size,
                           write_buffer_size_limit = Limit }) ->
     Size1 = Size + size_of_write_data(Data),
     Handle1 = Handle #handle { write_buffer = [ Data | WriteBuffer ],
-                               write_buffer_size = Size1,
-                               last_used_at = now() },
-    case Size1 > Limit of
+                               write_buffer_size = Size1 },
+    case Limit /= infinity andalso Size1 > Limit of
         true -> write_buffer(Handle1);
         false -> {ok, Handle1}
     end.
@@ -250,18 +240,14 @@ write_buffer(Handle = #handle { write_buffer = [] }) ->
 write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
                                 write_buffer = WriteBuffer,
                                 write_buffer_size = DataSize,
-                                is_append = IsAppend, at_eof = AtEoF }) ->
+                                at_eof = true }) ->
     case file:write(Hdl, lists:reverse(WriteBuffer)) of
         ok ->
-            Offset1 = case IsAppend of
-                          true -> Offset;
-                          false -> Offset + DataSize
-                      end,
-            AtEoF1 = AtEoF andalso not IsAppend,
+            Offset1 = Offset + DataSize,
             {ok, Handle #handle { offset = Offset1, write_buffer = [],
-                                  write_buffer_size = 0, at_eof = AtEoF1 }};
-        {error, Reason} ->
-            {{error, Reason}, Handle}
+                                  write_buffer_size = 0 }};
+        {error, _} = Error ->
+            {Error, Handle}
     end.
 
 size_of_write_data(Data) ->
-- 
cgit v1.2.1


From 6a77d7cd8c7f6f5d54fd8d0a7d9372d2f8c00a36 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 16:23:55 +0100
Subject: bug fix in fhc:read (not tracking change in offset). Also made qi use
 new fhc, and remove hdfhc

---
 src/file_handle_cache.erl                   |  59 ++++++-
 src/horrendously_dumb_file_handle_cache.erl | 246 ----------------------------
 src/rabbit_queue_index.erl                  |  78 +++++----
 3 files changed, 91 insertions(+), 292 deletions(-)
 delete mode 100644 src/horrendously_dumb_file_handle_cache.erl

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index e86344a0..33a69ed7 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -134,12 +134,17 @@ read(Ref, NewOffset, Count, State) ->
                 case write_buffer(Handle) of
                     {ok, Handle2} ->
                         case maybe_seek(NewOffset, Handle2) of
-                            {ok, Handle3 = #handle { hdl = Hdl }} ->
+                            {ok, Handle3 = #handle { hdl = Hdl,
+                                                     offset = Offset }} ->
                                 case file:read(Hdl, Count) of
-                                    {ok, _} = Obj -> {Obj, Handle3};
-                                    eof -> {eof,
-                                            Handle3 #handle { at_eof = true }};
-                                    {error, _} = Error -> {Error, Handle3}
+                                    {ok, _} = Obj ->
+                                        {Obj, Handle3 #handle {
+                                                offset = Offset + Count }};
+                                    eof ->
+                                        {eof, Handle3 #handle {
+                                                at_eof = true }};
+                                    {error, _} = Error ->
+                                        {Error, Handle3}
                                 end;
                             {Error, Handle3} -> {Error, Handle3}
                         end;
@@ -185,6 +190,50 @@ position(Ref, NewOffset, State) ->
             {Result, State}
     end.
 
+sync(Ref, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State}; 
+        Handle = #handle { write_buffer = [], hdl = Hdl, offset = Offset } ->
+            {Result, Handle1} =
+                case file:sync(Hdl) of
+                    ok -> {ok, Handle #handle { trusted_offset = Offset }};
+                    Error -> {Error, Handle}
+                end,
+            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            {Result, State};
+        Handle = #handle { at_eof = true } ->
+            %% we can't have content in the buffer without being at eof
+            {Result, Handle1} = write_buffer(Handle),
+            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            {Result, State}
+    end.
+
+truncate(Ref, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State};
+        Handle = #handle { is_write = true } ->
+            {Result, Handle1} =
+                case write_buffer(Handle) of
+                    {ok,
+                     Handle2 = #handle { hdl = Hdl, offset = Offset,
+                                         trusted_offset = TrustedOffset }} ->
+                        case file:truncate(Hdl) of
+                            ok ->
+                                {ok,
+                                 Handle2 #handle {
+                                   at_eof = true,
+                                   trusted_offset = lists:min([Offset,
+                                                               TrustedOffset])
+                                  }};
+                            Error -> {Error, Handle2}
+                        end;
+                    {Error, Handle2} -> {Error, Handle2}
+                end,
+            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now () }),
+            {Result, State};
+        _Handle -> {{error, not_open_for_writing}, State}
+    end.
+
 open1(Path, Mode, Options, GRef, State) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
diff --git a/src/horrendously_dumb_file_handle_cache.erl b/src/horrendously_dumb_file_handle_cache.erl
deleted file mode 100644
index e3aa49b3..00000000
--- a/src/horrendously_dumb_file_handle_cache.erl
+++ /dev/null
@@ -1,246 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(horrendously_dumb_file_handle_cache).
-
--export([init/0, open/4, close/2, release/2, read/4, write/4, sync/2,
-         position/3, truncate/2, with_file_handle_at/4, sync_to_offset/3]).
-
--record(entry,
-        { hdl,
-          current_offset,
-          last_sync_offset,
-          write_buffer,
-          is_append,
-          at_eof,
-          path_mode_key }).
-
-init() -> empty_state.
-
-open(Path, Mode, [] = _ExtraOptions, State) ->
-    Mode1 = lists:usort(Mode),
-    Path1 = filename:absname(Path),
-    Key = {Path1, Mode1},
-    case get({fhc, path_mode_ref, Key}) of
-        {ref, Ref} -> {{ok, Ref}, State};
-        undefined ->
-            case file:open(Path1, Mode1) of
-                {ok, Hdl} ->
-                    Ref = make_ref(),
-                    put({fhc, path_mode_ref, Key}, {ref, Ref}),
-                    Entry = #entry { hdl = Hdl, current_offset = 0,
-                                     last_sync_offset = 0, write_buffer = [],
-                                     is_append = lists:member(append, Mode1),
-                                     at_eof = false, path_mode_key = Key },
-                    put({fhc, ref_entry, Ref}, Entry),
-                    {{ok, Ref}, State};
-                {error, Error} ->
-                    {{error, Error}, State}
-            end
-    end.
-
-close(Ref, State) ->
-    {ok,
-     case erase({fhc, ref_entry, Ref}) of
-         #entry { hdl = Hdl, write_buffer = WriteBuffer, path_mode_key = Key } ->
-             ok = case WriteBuffer of
-                      [] -> ok;
-                      _  -> ok = file:write(Hdl, lists:reverse(WriteBuffer)),
-                            file:sync(Hdl)
-                  end,
-             ok = file:close(Hdl),
-             erase({fhc, path_mode_ref, Key}),
-             State;
-         undefined -> State
-     end}.
-
-release(_Ref, State) -> %% noop for the time being
-    {ok, State}.
-
-read(Ref, Offset, Count, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                         write_buffer = WriteBuffer } ->
-            ok = case WriteBuffer of
-                      [] -> ok;
-                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
-                 end,
-            NewOffset = Count +
-                case Offset of
-                    cur -> OldOffset;
-                    _ -> {ok, RealOff} = file:position(Hdl, Offset),
-                         RealOff
-                end,
-            put({fhc, ref_entry, Ref},
-                Entry #entry { current_offset = NewOffset,
-                               at_eof = Offset =:= eof,
-                               write_buffer = [] }),
-            {file:read(Hdl, Count), State};
-        undefined -> {{error, not_open}, State}
-    end.
-
-%% if the file was opened in append mode, then Offset is ignored, as
-%% it would only affect the read head for this file.
-write(Ref, Offset, Data, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                         is_append = IsAppend, at_eof = AtEoF,
-                         write_buffer = WriteBuffer } ->
-            NewOffset =
-                case IsAppend of
-                    true ->
-                        OldOffset;
-                    false ->
-                        size_of_write_data(Data) +
-                            case Offset of
-                                cur -> OldOffset;
-                                eof when AtEoF -> OldOffset;
-                                _ -> {ok, RealOff} = file:position(Hdl, Offset),
-                                     RealOff
-                            end
-                end,
-            WriteBuffer1 = [Data | WriteBuffer],
-            put({fhc, ref_entry, Ref},
-                Entry #entry { current_offset = NewOffset,
-                               at_eof = Offset =:= eof,
-                               write_buffer = WriteBuffer1 }),
-            {ok, State};
-        undefined -> {{error, not_open}, State}
-    end.
-
-sync(Ref, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        #entry { write_buffer = [] } -> {ok, State};
-        Entry = #entry { hdl = Hdl, current_offset = Offset,
-                         last_sync_offset = LastSyncOffset,
-                         write_buffer = WriteBuffer } ->
-            SyncOffset = lists:max([Offset, LastSyncOffset]),
-            ok = file:write(Hdl, lists:reverse(WriteBuffer)),
-            ok = file:sync(Hdl),
-            put({fhc, ref_entry, Ref},
-                Entry #entry { last_sync_offset = SyncOffset,
-                               write_buffer = [] }),
-            {ok, State};
-        undefined                   -> {{error, not_open}, State}
-    end.
-
-position(Ref, NewOffset, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        #entry { current_offset = NewOffset } ->
-            {ok, State};
-        #entry { at_eof = true } when NewOffset =:= eof ->
-            {ok, State};
-        Entry = #entry { hdl = Hdl, write_buffer = WriteBuffer } ->
-            ok = case WriteBuffer of
-                      [] -> ok;
-                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
-                 end,
-            {ok, RealOff} = file:position(Hdl, NewOffset),
-            put({fhc, ref_entry, Ref},
-                Entry #entry { current_offset = RealOff,
-                               write_buffer = [],
-                               at_eof = NewOffset =:= eof }),
-            {ok, State};
-        undefined ->
-            {{error, not_open}, State}
-    end.
-
-truncate(Ref, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl, write_buffer = WriteBuffer } ->
-            ok = case WriteBuffer of
-                      [] -> ok;
-                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
-                 end,
-            ok = file:truncate(Hdl),
-            put({fhc, ref_entry, Ref},
-                Entry #entry { at_eof = true, write_buffer = [] }),
-            {ok, State};
-        undefined -> {{error, not_open}, State}
-    end.
-
-with_file_handle_at(Ref, Offset, Fun, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl, current_offset = OldOffset,
-                         write_buffer = WriteBuffer, at_eof = AtEoF } ->
-            ok = case WriteBuffer of
-                      [] -> ok;
-                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
-                 end,
-            ok = case Offset of
-                     eof when AtEoF -> ok;
-                     cur       -> ok;
-                     OldOffset -> ok;
-                     _         -> {ok, _RealOff} = file:position(Hdl, Offset),
-                                  ok
-                 end,
-            {Offset2, Result} = Fun(Hdl),
-            put({fhc, ref_entry, Ref},
-                Entry #entry { current_offset = Offset2, write_buffer = [],
-                               at_eof = false }),
-            {Result, State};
-        undefined -> {{error, not_open}, State}
-    end.
-
-sync_to_offset(Ref, Offset, State) ->
-    case get({fhc, ref_entry, Ref}) of
-        Entry = #entry { hdl = Hdl, last_sync_offset = LastSyncOffset,
-                         current_offset = CurOffset,
-                         write_buffer = [_|_] = WriteBuffer }
-        when (Offset =:= cur andalso CurOffset > LastSyncOffset)
-        orelse (Offset > LastSyncOffset) ->
-            ok = case WriteBuffer of
-                      [] -> ok;
-                      _  -> file:write(Hdl, lists:reverse(WriteBuffer))
-                 end,
-            ok = file:sync(Hdl),
-            LastSyncOffset1 =
-                case Offset of
-                    cur -> lists:max([LastSyncOffset, CurOffset]);
-                    _ -> lists:max([LastSyncOffset, CurOffset, Offset])
-                end,
-            put({fhc, ref_entry, Ref},
-                Entry #entry { last_sync_offset = LastSyncOffset1,
-                               write_buffer = [] }),
-            {ok, State};
-        #entry {} -> {ok, State};
-        error -> {{error, not_open}, State}
-    end.
-
-size_of_write_data(Data) ->
-    size_of_write_data(Data, 0).
-
-size_of_write_data([], Acc) ->
-    Acc;
-size_of_write_data([A|B], Acc) ->
-    size_of_write_data(B, size_of_write_data(A, Acc));
-size_of_write_data(Bin, Acc) when is_binary(Bin) ->
-    size(Bin) + Acc.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 48da7e3f..a50d839c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -156,7 +156,7 @@
 %%----------------------------------------------------------------------------
 
 init(Name) ->
-    HCState = horrendously_dumb_file_handle_cache:init(),
+    HCState = file_handle_cache:init(),
     StrName = queue_name_to_dir_name(Name),
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
@@ -185,19 +185,19 @@ write_published(MsgId, SeqId, IsPersistent, State)
     ?MSG_ID_BYTES = size(MsgId),
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, HCState} = horrendously_dumb_file_handle_cache:write(
-                      Hdl, eof,
-                      <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                       (bool_to_int(IsPersistent)):1,
-                       RelSeq:?REL_SEQ_BITS, MsgId/binary>>,
-                      State1 #qistate.hc_state),
+    {ok, HCState} =
+        file_handle_cache:append(Hdl,
+                                 <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                                  (bool_to_int(IsPersistent)):1,
+                                  RelSeq:?REL_SEQ_BITS, MsgId/binary>>,
+                                 State1 #qistate.hc_state),
     State1 #qistate { hc_state = HCState }.
 
 write_delivered(SeqId, State) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, HCState} = horrendously_dumb_file_handle_cache:write(
-                      Hdl, eof,
+    {ok, HCState} = file_handle_cache:append(
+                      Hdl,
                       <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                        RelSeq:?REL_SEQ_BITS>>,
                       State1 #qistate.hc_state),
@@ -209,9 +209,8 @@ write_acks(SeqIds, State = #qistate { journal_ack_dict  = JAckDict,
     {JAckDict1, JAckCount1, HCState} =
         lists:foldl(
           fun (SeqId, {JAckDict2, JAckCount2, HCStateN}) ->
-                  {ok, HCStateM} =
-                      horrendously_dumb_file_handle_cache:write(
-                        Hdl, eof, <<SeqId:?SEQ_BITS>>, HCStateN),
+                  {ok, HCStateM} = file_handle_cache:append(
+                                     Hdl, <<SeqId:?SEQ_BITS>>, HCStateN),
                   {add_ack_to_ack_dict(SeqId, JAckDict2),
                    JAckCount2 + 1, HCStateM}
           end, {JAckDict, JAckCount, State1 #qistate.hc_state}, SeqIds),
@@ -233,8 +232,7 @@ sync_all(State = #qistate { hc_state = HCState, seg_num_handles = SegHdls }) ->
     HCState1 =
         dict:fold(
           fun (_Key, Hdl, HCStateN) ->
-                  {ok, HCStateM} =
-                      horrendously_dumb_file_handle_cache:sync(Hdl, HCStateN),
+                  {ok, HCStateM} = file_handle_cache:sync(Hdl, HCStateN),
                   HCStateM
           end, HCState, SegHdls),
     State #qistate { hc_state = HCState1 }.
@@ -253,10 +251,8 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
         JAckCount1 == 0 ->
             {Hdl, State3 = #qistate { hc_state = HCState }} =
                 get_journal_handle(State2),
-            {ok, HCState1} =
-                horrendously_dumb_file_handle_cache:position(Hdl, 0, HCState),
-            {ok, HCState2} =
-                horrendously_dumb_file_handle_cache:truncate(Hdl, HCState1),
+            {ok, HCState1} = file_handle_cache:position(Hdl, bof, HCState),
+            {ok, HCState2} = file_handle_cache:truncate(Hdl, HCState1),
             {false, State3 #qistate { hc_state = HCState2 }};
         JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
             flush_journal(State2);
@@ -391,8 +387,8 @@ new_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
             true -> close_all_handles(State);
             false -> State
         end,
-    {{ok, Hdl}, HCState1} = 
-        horrendously_dumb_file_handle_cache:open(Path, Mode, [], HCState),
+    {{ok, Hdl}, HCState1} =
+        file_handle_cache:open(Path, Mode, [{write_buffer, infinity}], HCState),
     {Hdl, State1 #qistate { hc_state = HCState1,
                             seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}.
 
@@ -400,8 +396,7 @@ close_handle(Key, State = #qistate { hc_state = HCState,
                                      seg_num_handles = SegHdls }) ->
     case dict:find(Key, SegHdls) of
         {ok, Hdl} ->
-            {ok, HCState1} =
-                horrendously_dumb_file_handle_cache:close(Hdl, HCState),
+            {ok, HCState1} = file_handle_cache:close(Hdl, HCState),
             State #qistate { hc_state = HCState1,
                              seg_num_handles = dict:erase(Key, SegHdls) };
         error -> State
@@ -411,9 +406,8 @@ close_all_handles(State = #qistate { hc_state = HCState,
                                      seg_num_handles = SegHdls }) ->
     HCState1 =
         dict:fold(
-          fun (_Key, Ref, HCStateN) ->
-                  {ok, HCStateM} =
-                      horrendously_dumb_file_handle_cache:close(Ref, HCStateN),
+          fun (_Key, Hdl, HCStateN) ->
+                  {ok, HCStateM} = file_handle_cache:close(Hdl, HCStateN),
                   HCStateM
           end, HCState, SegHdls),
     State #qistate { hc_state = HCState1, seg_num_handles = dict:new() }.
@@ -527,8 +521,7 @@ scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     {TotalMsgCount1, State3 #qistate { journal_ack_dict = dict:new() }}.
 
 load_journal(Hdl, ADict, HCState) ->
-    case horrendously_dumb_file_handle_cache:read(
-           Hdl, cur, ?SEQ_BYTES, HCState) of
+    case file_handle_cache:read(Hdl, cur, ?SEQ_BYTES, HCState) of
         {{ok, <<SeqId:?SEQ_BITS>>}, HCState1} ->
             load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict), HCState1);
         {_ErrOrEoF, HCState1} -> {ADict, HCState1}
@@ -561,11 +554,16 @@ deliver_transient(SegNum, SDict, State) ->
                   {[RelSeq | AckMeAcc], DeliverMeAcc}
           end, {[], []}, SDict),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, HCState} = horrendously_dumb_file_handle_cache:write(
-                      Hdl, eof,
-                      [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                         RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ],
-                      State1 #qistate.hc_state),
+    {ok, HCState} =
+        case DeliverMe of
+            [] -> {ok, State1 #qistate.hc_state};
+            _ ->
+                file_handle_cache:append(
+                  Hdl,
+                  [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                     RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ],
+                  State1 #qistate.hc_state)
+        end,
     {AckMe, State1 #qistate { hc_state = HCState }}.
 
 
@@ -585,9 +583,7 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
             {Hdl, State1 = #qistate { hc_state = HCState,
                                       journal_ack_dict = JAckDict }} =
                 get_seg_handle(SegNum, State),
-            {ok, HCState1} =
-                horrendously_dumb_file_handle_cache:position(Hdl, 0, HCState),
-            
+            {ok, HCState1} = file_handle_cache:position(Hdl, bof, HCState),
             {SDict, AckCount, HighRelSeq, HCState2} =
                 load_segment_entries(Hdl, dict:new(), 0, 0, HCState1),
             RelSeqs = case dict:find(SegNum, JAckDict) of
@@ -603,11 +599,11 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
     end.
 
 load_segment_entries(Hdl, SDict, AckCount, HighRelSeq, HCState) ->
-    case horrendously_dumb_file_handle_cache:read(Hdl, cur, 1, HCState) of
+    case file_handle_cache:read(Hdl, cur, 1, HCState) of
         {{ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                 MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>}, HCState1} ->
             {{ok, LSB}, HCState2} =
-                horrendously_dumb_file_handle_cache:read(
+                file_handle_cache:read(
                   Hdl, cur, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1, HCState1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
@@ -617,7 +613,7 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq, HCState) ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {{ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>}, HCState2} =
-                horrendously_dumb_file_handle_cache:read(
+                file_handle_cache:read(
                   Hdl, cur, ?PUBLISH_RECORD_LENGTH_BYTES - 1, HCState1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
@@ -673,11 +669,11 @@ append_acks_to_segment(SegNum, AckCount, Acks, State)
         lists:foldl(
           fun (RelSeq, {AckCount2, HCStateN}) ->
                   {ok, HCStateM} =
-                      horrendously_dumb_file_handle_cache:write(
-                        Hdl, eof,
+                      file_handle_cache:append(
+                        Hdl, 
                         <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                          RelSeq:?REL_SEQ_BITS>>, HCStateN),
                   {AckCount2 + 1, HCStateM}
           end, {AckCount, State1 #qistate.hc_state}, Acks),
-    {ok, HCState1} = horrendously_dumb_file_handle_cache:sync(Hdl, HCState),
+    {ok, HCState1} = file_handle_cache:sync(Hdl, HCState),
     {AckCount1, State1 #qistate { hc_state = HCState1 }}.
-- 
cgit v1.2.1


From de5b8001af2c2c6bd86a43b14576cdf316d76fe2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 17:04:51 +0100
Subject: starting to deal with the idea that fhs may disappear because of
 hitting limits

---
 src/file_handle_cache.erl | 99 +++++++++++++++++++++++++++++------------------
 1 file changed, 61 insertions(+), 38 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 33a69ed7..6959dcc2 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -51,6 +51,7 @@
           is_write,
           is_read,
           mode,
+          options,
           global_key,
           last_used_at
         }).
@@ -80,7 +81,12 @@ open(Path, Mode, Options, State) ->
                                 File #file {
                                   reader_count = RCount1,
                                   has_writer = HasWriter orelse IsWriter }),
-                            open1(Path1, Mode1, Options, GRef, State)
+                            Ref = make_ref(),
+                            case open1(Path1, Mode1, Options, Ref, GRef, State)
+                                of
+                                {{ok, _Handle}, State} -> {{ok, Ref}, State};
+                                {Error, State} -> {Error, State}
+                            end
                     end;
                 undefined ->
                     GRef = make_ref(),
@@ -99,8 +105,11 @@ close(Ref, State) ->
             case write_buffer(Handle) of
                 {ok, #handle { hdl = Hdl, global_key = GRef,
                                is_read = IsReader, is_write = IsWriter }} ->
-                    ok = file:sync(Hdl),
-                    ok = file:close(Hdl),
+                    case Hdl of
+                        closed -> ok;
+                        _ -> ok = file:sync(Hdl),
+                             ok = file:close(Hdl)
+                    end,
                     #file { reader_count = RCount, has_writer = HasWriter,
                             path = Path } = File = get({GRef, fhc_file}),
                     RCount1 = case IsReader of
@@ -126,10 +135,10 @@ release(_Ref, State) -> %% noop just for now
     {ok, State}.
 
 read(Ref, NewOffset, Count, State) ->
-    case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State};
-        #handle { is_read = false } -> {{error, not_open_for_reading}, State};
-        Handle ->
+    case get_or_reopen(Ref, State) of
+        {{ok, #handle { is_read = false }}, State1} ->
+            {{error, not_open_for_reading}, State1};
+        {{ok, Handle}, State1} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
                     {ok, Handle2} ->
@@ -151,13 +160,15 @@ read(Ref, NewOffset, Count, State) ->
                     {Error, Handle2} -> {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State}
+            {Result, State1};
+        ErrorAndState -> ErrorAndState
     end.
 
 append(Ref, Data, State) ->
-    case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State};
-        Handle = #handle { is_write = true } ->
+    case get_or_reopen(Ref, State) of
+        {{ok, #handle { is_write = false }}, State1} ->
+            {{error, not_open_for_writing}, State1};
+        {{ok, Handle}, State1} ->
             {Result, Handle1} =
                 case maybe_seek(eof, Handle) of
                     {ok, Handle2 = #handle { at_eof = true }} ->
@@ -166,52 +177,54 @@ append(Ref, Data, State) ->
                         {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State};
-        _ -> {{error, not_open_for_writing}, State}
+            {Result, State1};
+        ErrorAndState -> ErrorAndState
     end.
 
 last_sync_offset(Ref, State) ->
-    case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State};
-        #handle { trusted_offset = TrustedOffset } ->
-            {{ok, TrustedOffset}, State}
+    case get_or_reopen(Ref, State) of
+        {{ok, #handle { trusted_offset = TrustedOffset }}, State1} ->
+            {{ok, TrustedOffset}, State1};
+        ErrorAndState -> ErrorAndState
     end.
 
 position(Ref, NewOffset, State) ->
-    case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State};
-        Handle ->
+    case get_or_reopen(Ref, State) of
+        {{ok, Handle}, State1} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
                     {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
                     {Error, Handle2} -> {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State}
+            {Result, State1};
+        ErrorAndState -> ErrorAndState
     end.
 
 sync(Ref, State) ->
-    case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State}; 
-        Handle = #handle { write_buffer = [], hdl = Hdl, offset = Offset } ->
+    case get_or_reopen(Ref, State) of
+        {{ok, Handle = #handle { write_buffer = [], hdl = Hdl,
+                                 offset = Offset }}, State1} ->
             {Result, Handle1} =
                 case file:sync(Hdl) of
                     ok -> {ok, Handle #handle { trusted_offset = Offset }};
                     Error -> {Error, Handle}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State};
-        Handle = #handle { at_eof = true } ->
+            {Result, State1};
+        {{ok, Handle = #handle { at_eof = true }}, State1} ->
             %% we can't have content in the buffer without being at eof
             {Result, Handle1} = write_buffer(Handle),
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State}
+            {Result, State1};
+        ErrorAndState -> ErrorAndState
     end.
 
 truncate(Ref, State) ->
-    case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State};
-        Handle = #handle { is_write = true } ->
+    case get_or_reopen(Ref, State) of
+        {{ok, #handle { is_write = false }}, State1} ->
+            {{error, not_open_for_writing}, State1};
+        {{ok, Handle}, State1} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
                     {ok,
@@ -230,11 +243,21 @@ truncate(Ref, State) ->
                     {Error, Handle2} -> {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now () }),
-            {Result, State};
-        _Handle -> {{error, not_open_for_writing}, State}
+            {Result, State1};
+        ErrorAndState -> ErrorAndState
+    end.
+
+get_or_reopen(Ref, State) ->
+    case get({Ref, fhc_handle}) of
+        undefined -> {{error, not_open}, State};
+        #handle { hdl = closed, mode = Mode, global_key = GRef,
+                  options = Options } ->
+            #file { path = Path } = get({GRef, fhc_file}),
+            open1(Path, Mode, Options, Ref, GRef, State);
+        Handle -> {{ok, Handle}, State}
     end.
 
-open1(Path, Mode, Options, GRef, State) ->
+open1(Path, Mode, Options, Ref, GRef, State) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
             WriteBufferSize =
@@ -243,15 +266,15 @@ open1(Path, Mode, Options, GRef, State) ->
                     infinity -> infinity;
                     N when is_integer(N) -> N
                 end,
-            Ref = make_ref(),
-            put({Ref, fhc_handle},
+            Handle =
                 #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
-                          write_buffer_size = 0,
+                          write_buffer_size = 0, options = Options,
                           write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
-                          global_key = GRef, last_used_at = now() }),
-            {{ok, Ref}, State};
+                          global_key = GRef, last_used_at = now() },
+            put({Ref, fhc_handle}, Handle),
+            {{ok, Handle}, State};
         {error, Reason} ->
             {{error, Reason}, State}
     end.
-- 
cgit v1.2.1


From 155dab5b8d4cdf6cbea0b3a229cfff3f54ed6db5 Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Thu, 22 Oct 2009 12:13:14 -0400
Subject: QA: changed names to: queue_duration, changed MemoryOvercommit to be
 available/used (instead of used/available)

---
 src/rabbit_amqqueue_process.erl |  2 +-
 src/rabbit_memory_monitor.erl   | 53 ++++++++++++++++++++++-------------------
 2 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index d0123989..a5400254 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -823,7 +823,7 @@ handle_cast(send_memory_monitor_update, State) ->
         true -> infinity;
         false -> queue:len(State#q.message_buffer) / MsgSec
     end,
-    gen_server2:cast(rabbit_memory_monitor, {push_drain_ratio, self(), BufSec}),
+    rabbit_memory_monitor:push_queue_duration(self(), BufSec),
     noreply(State#q{drain_ratio = DrainRatio1});
 
 handle_cast({set_bufsec_limit, BufSec}, State) ->
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 87ee96ad..8c1db615 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -45,7 +45,7 @@
 %%            v |      v |                     |
 %% Monitor X--*-+--X---*-+--X------X----X-----X+----------->
 %%
-%% Or to put it in words. Queue periodically sends (casts) 'push_drain_ratio'
+%% Or to put it in words. Queue periodically sends (casts) 'push_queue_duration'
 %% message to the Monitor (cases 1 and 2 on the asciiart above). Monitor 
 %% _always_ replies with a 'set_bufsec_limit' cast. This way, 
 %% we're pretty sure that the Queue is not hibernated.
@@ -58,15 +58,15 @@
 %% The main job of this module, is to make sure that all the queues have
 %% more or less the same number of seconds till become drained.
 %% This average, seconds-till-queue-is-drained, is then multiplied by 
-%% the ratio of Used/Total memory. So, if we can 'afford' more memory to be
+%% the ratio of Total/Used memory. So, if we can 'afford' more memory to be
 %% used, we'll report greater number back to the queues. In the out of
 %% memory case, we are going to reduce the average drain-seconds.
 %% To acheive all this we need to accumulate the information from every
 %% queue, and count an average from that.
 %% 
-%%  real_drain_avg = avg([drain_from_queue_1, queue_2, queue_3, ...])
-%%  memory_overcommit = used_memory / allowed_memory
-%%  desired_drain_avg = real_drain_avg / memory_overcommit
+%%  real_queue_duration_avg = avg([drain_from_queue_1, queue_2, queue_3, ...])
+%%  memory_overcommit = allowed_memory / used_memory
+%%  desired_queue_duration_avg = real_queue_duration_avg * memory_overcommit
 
 
 -module(rabbit_memory_monitor).
@@ -81,12 +81,12 @@
 
 -export([update/0]).
 
--export([register/1]).
+-export([register/1, push_queue_duration/2]).
 
--record(state, {timer,          %% 'internal_update' timer
-                drain_dict,     %% dict, queue_pid:seconds_till_queue_is_empty
-                drain_avg,      %% global, the desired queue depth (in seconds)
-                memory_limit    %% how much memory we intend to use
+-record(state, {timer,               %% 'internal_update' timer
+                queue_duration_dict, %% dict, qpid:seconds_till_queue_is_empty
+                queue_duration_avg,  %% global, the desired queue depth (in sec)
+                memory_limit         %% how much memory we intend to use
                }).
 
 -define(SERVER, ?MODULE).
@@ -97,6 +97,7 @@
 
 -spec(start_link/0 :: () -> 'ignore' | {'error',_} | {'ok',pid()}).
 -spec(register/1 :: (pid()) -> ok).
+-spec(push_queue_duration/2 :: (pid(), float() | infinity) -> ok).
 
 -spec(init/1 :: ([]) -> {ok, #state{}}).
 
@@ -121,6 +122,9 @@ update() ->
 register(Pid) ->
     gen_server2:cast(?SERVER, {register, Pid}).
 
+push_queue_duration(Pid, BufSec) ->
+    gen_server2:cast(rabbit_memory_monitor, {push_queue_duration, Pid, BufSec}).
+
 %%----------------------------------------------------------------------------
 
 get_user_memory_limit() ->
@@ -148,8 +152,8 @@ init([]) ->
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS, 
                                                         ?SERVER, update, []),
     {ok, #state{timer = TRef,
-                drain_dict = dict:new(),
-                drain_avg = infinity,
+                queue_duration_dict = dict:new(),
+                queue_duration_avg  = infinity,
                 memory_limit = MemoryLimit}}.
 
 handle_call(_Request, _From, State) ->
@@ -163,17 +167,18 @@ handle_cast({register, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
     {noreply, State};
 
-handle_cast({push_drain_ratio, Pid, DrainRatio}, State) ->
-    gen_server2:cast(Pid, {set_bufsec_limit, State#state.drain_avg}),
-    {noreply, State#state{drain_dict = 
-                        dict:store(Pid, DrainRatio, State#state.drain_dict)}};
+handle_cast({push_queue_duration, Pid, DrainRatio}, State) ->
+    gen_server2:cast(Pid, {set_bufsec_limit, State#state.queue_duration_avg}),
+    {noreply, State#state{queue_duration_dict = 
+                dict:store(Pid, DrainRatio, State#state.queue_duration_dict)}};
 
 handle_cast(_Request, State) ->
     {noreply, State}.
 
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
-    {noreply, State#state{drain_dict = dict:erase(Pid, State#state.drain_dict)}};
+    {noreply, State#state{queue_duration_dict = 
+                            dict:erase(Pid, State#state.queue_duration_dict)}};
 
 handle_info(_Info, State) -> 
     {noreply, State}.
@@ -203,18 +208,18 @@ count_average(List) ->
     end.
 
 internal_update(State) ->
-    %% used memory / available memory
-    MemoryOvercommit = erlang:memory(total) / State#state.memory_limit,
-    
+    %% available memory /  used memory
+    UsedMemory = erlang:memory(total),
+    MemoryOvercommit = State#state.memory_limit / UsedMemory,
     RealDrainAvg = count_average([V || {_K, V} <- 
-                                        dict:to_list(State#state.drain_dict)]),
+                                dict:to_list(State#state.queue_duration_dict)]),
     %% In case of no active queues, feel free to grow. We can't make any 
     %% decisionswe have no clue what is the average ram_usage/second.
     %% Not does the queue.
     DesiredDrainAvg = case RealDrainAvg of
         infinity -> infinity;
         0.0 -> infinity;
-        _ ->  RealDrainAvg / MemoryOvercommit
+        _ ->  RealDrainAvg * MemoryOvercommit
     end,
     ?LOGDEBUG("DrainAvg Real/Desired:~s/~s  MemoryOvercommit:~s~n", 
                 [ftoa(RealDrainAvg), ftoa(DesiredDrainAvg),
@@ -228,7 +233,7 @@ internal_update(State) ->
             _ -> ok
         end 
     end,
-    lists:map(ReduceMemory, dict:to_list(State#state.drain_dict)),
-    State#state{drain_avg = DesiredDrainAvg}.
+    lists:map(ReduceMemory, dict:to_list(State#state.queue_duration_dict)),
+    State#state{queue_duration_avg = DesiredDrainAvg}.
 
 
-- 
cgit v1.2.1


From 102d19b4f11ce6834d8044fc94fbd1f0d347dc1b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 22 Oct 2009 17:54:07 +0100
Subject: drop the offset arg to read, also account for the possibility that we
 didn't read as much of the file as we asked for. All tests pass.

---
 src/file_handle_cache.erl  | 39 +++++++++++----------------------------
 src/rabbit_queue_index.erl |  8 ++++----
 2 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 6959dcc2..fe86044b 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,7 +31,7 @@
 
 -module(file_handle_cache).
 
--export([init/0, open/4, close/2, release/2, read/4, append/3, sync/2,
+-export([init/0, open/4, close/2, release/2, read/3, append/3, sync/2,
          position/3, truncate/2, last_sync_offset/2]).
 
 -record(file,
@@ -134,28 +134,21 @@ close(Ref, State) ->
 release(_Ref, State) -> %% noop just for now
     {ok, State}.
 
-read(Ref, NewOffset, Count, State) ->
+read(Ref, Count, State) ->
     case get_or_reopen(Ref, State) of
         {{ok, #handle { is_read = false }}, State1} ->
             {{error, not_open_for_reading}, State1};
         {{ok, Handle}, State1} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
-                    {ok, Handle2} ->
-                        case maybe_seek(NewOffset, Handle2) of
-                            {ok, Handle3 = #handle { hdl = Hdl,
-                                                     offset = Offset }} ->
-                                case file:read(Hdl, Count) of
-                                    {ok, _} = Obj ->
-                                        {Obj, Handle3 #handle {
-                                                offset = Offset + Count }};
-                                    eof ->
-                                        {eof, Handle3 #handle {
-                                                at_eof = true }};
-                                    {error, _} = Error ->
-                                        {Error, Handle3}
-                                end;
-                            {Error, Handle3} -> {Error, Handle3}
+                    {ok, Handle2 = #handle { hdl = Hdl, offset = Offset }} ->
+                        case file:read(Hdl, Count) of
+                            {ok, Data} = Obj ->
+                                Size = iolist_size(Data),
+                                {Obj,
+                                 Handle2 #handle { offset = Offset + Size }};
+                            eof -> {eof, Handle2 #handle { at_eof = true }};
+                            Error -> {Error, Handle2}
                         end;
                     {Error, Handle2} -> {Error, Handle2}
                 end,
@@ -299,7 +292,7 @@ write_to_buffer(Data, Handle =
                 #handle { write_buffer = WriteBuffer,
                           write_buffer_size = Size,
                           write_buffer_size_limit = Limit }) ->
-    Size1 = Size + size_of_write_data(Data),
+    Size1 = Size + iolist_size(Data),
     Handle1 = Handle #handle { write_buffer = [ Data | WriteBuffer ],
                                write_buffer_size = Size1 },
     case Limit /= infinity andalso Size1 > Limit of
@@ -322,16 +315,6 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
             {Error, Handle}
     end.
 
-size_of_write_data(Data) ->
-    size_of_write_data(Data, 0).
-
-size_of_write_data([], Acc) ->
-    Acc;
-size_of_write_data([A|B], Acc) ->
-    size_of_write_data(B, size_of_write_data(A, Acc));
-size_of_write_data(Bin, Acc) when is_binary(Bin) ->
-    size(Bin) + Acc.
-
 is_reader(Mode) -> lists:member(read, Mode).
 
 is_writer(Mode) -> lists:member(write, Mode).
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a50d839c..50f013f8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -521,7 +521,7 @@ scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     {TotalMsgCount1, State3 #qistate { journal_ack_dict = dict:new() }}.
 
 load_journal(Hdl, ADict, HCState) ->
-    case file_handle_cache:read(Hdl, cur, ?SEQ_BYTES, HCState) of
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES, HCState) of
         {{ok, <<SeqId:?SEQ_BITS>>}, HCState1} ->
             load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict), HCState1);
         {_ErrOrEoF, HCState1} -> {ADict, HCState1}
@@ -599,12 +599,12 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
     end.
 
 load_segment_entries(Hdl, SDict, AckCount, HighRelSeq, HCState) ->
-    case file_handle_cache:read(Hdl, cur, 1, HCState) of
+    case file_handle_cache:read(Hdl, 1, HCState) of
         {{ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                 MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>}, HCState1} ->
             {{ok, LSB}, HCState2} =
                 file_handle_cache:read(
-                  Hdl, cur, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1, HCState1),
+                  Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1, HCState1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
             load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq, HCState2);
@@ -614,7 +614,7 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq, HCState) ->
             %% bytes, the size spec is in bytes, not bits.
             {{ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>}, HCState2} =
                 file_handle_cache:read(
-                  Hdl, cur, ?PUBLISH_RECORD_LENGTH_BYTES - 1, HCState1),
+                  Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1, HCState1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
-- 
cgit v1.2.1


From 437164912dc9921ca67ed51259ea2dcf2410ac8b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 12:15:41 +0100
Subject: Removed state from fhc. The reasoning is as follows: 1) the fhc uses
 the process dict 2) the fhc needs to receive msgs from its server process as
 to limiting the age of unused fhs. This is clearly per process as such, it's
 daft to cater for a process having more than one state for the fhc. Thus any
 state necessary will also be put in the process dict.

---
 src/file_handle_cache.erl  | 113 +++++++++++++-------------
 src/rabbit_queue_index.erl | 194 ++++++++++++++++++---------------------------
 2 files changed, 134 insertions(+), 173 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index fe86044b..6cb4c094 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,8 +31,8 @@
 
 -module(file_handle_cache).
 
--export([init/0, open/4, close/2, release/2, read/3, append/3, sync/2,
-         position/3, truncate/2, last_sync_offset/2]).
+-export([open/3, close/1, release/1, read/2, append/2, sync/1,
+         position/2, truncate/1, last_sync_offset/1]).
 
 -record(file,
         { reader_count,
@@ -56,11 +56,9 @@
           last_used_at
         }).
 
-init() -> empty_state.
-
-open(Path, Mode, Options, State) ->
+open(Path, Mode, Options) ->
     case is_appender(Mode) of
-        true -> {{error, append_not_supported}, State};
+        true -> {error, append_not_supported};
         false ->
             Path1 = filename:absname(Path),
             case get({Path1, fhc_path}) of
@@ -71,7 +69,7 @@ open(Path, Mode, Options, State) ->
                     IsWriter = is_writer(Mode1),
                     case IsWriter andalso HasWriter of
                         true ->
-                            {{error, writer_exists}, State};
+                            {error, writer_exists};
                         false ->
                             RCount1 = case is_reader(Mode1) of
                                           true -> RCount + 1;
@@ -82,10 +80,9 @@ open(Path, Mode, Options, State) ->
                                   reader_count = RCount1,
                                   has_writer = HasWriter orelse IsWriter }),
                             Ref = make_ref(),
-                            case open1(Path1, Mode1, Options, Ref, GRef, State)
-                                of
-                                {{ok, _Handle}, State} -> {{ok, Ref}, State};
-                                {Error, State} -> {Error, State}
+                            case open1(Path1, Mode1, Options, Ref, GRef) of
+                                {ok, _Handle} -> {ok, Ref};
+                                Error -> Error
                             end
                     end;
                 undefined ->
@@ -94,13 +91,13 @@ open(Path, Mode, Options, State) ->
                     put({GRef, fhc_file},
                         #file { reader_count = 0, has_writer = false,
                                 path = Path1 }),
-                    open(Path, Mode, Options, State)
+                    open(Path, Mode, Options)
             end
     end.
 
-close(Ref, State) ->
+close(Ref) ->
     case erase({Ref, fhc_handle}) of
-        undefined -> {ok, State};
+        undefined -> ok;
         Handle ->
             case write_buffer(Handle) of
                 {ok, #handle { hdl = Hdl, global_key = GRef,
@@ -124,21 +121,21 @@ close(Ref, State) ->
                                      File #file { reader_count = RCount1,
                                                   has_writer = HasWriter1 })
                     end,
-                    {ok, State};
+                    ok;
                 {Error, Handle1} ->
                     put({Ref, fhc_handle}, Handle1),
-                    {Error, State}
+                    Error
             end
     end.
 
-release(_Ref, State) -> %% noop just for now
-    {ok, State}.
+release(_Ref) -> %% noop just for now
+    ok.
 
-read(Ref, Count, State) ->
-    case get_or_reopen(Ref, State) of
-        {{ok, #handle { is_read = false }}, State1} ->
-            {{error, not_open_for_reading}, State1};
-        {{ok, Handle}, State1} ->
+read(Ref, Count) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { is_read = false }} ->
+            {error, not_open_for_reading};
+        {ok, Handle} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
                     {ok, Handle2 = #handle { hdl = Hdl, offset = Offset }} ->
@@ -153,15 +150,15 @@ read(Ref, Count, State) ->
                     {Error, Handle2} -> {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State1};
+            Result;
         ErrorAndState -> ErrorAndState
     end.
 
-append(Ref, Data, State) ->
-    case get_or_reopen(Ref, State) of
-        {{ok, #handle { is_write = false }}, State1} ->
-            {{error, not_open_for_writing}, State1};
-        {{ok, Handle}, State1} ->
+append(Ref, Data) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { is_write = false }} ->
+            {error, not_open_for_writing};
+        {ok, Handle} ->
             {Result, Handle1} =
                 case maybe_seek(eof, Handle) of
                     {ok, Handle2 = #handle { at_eof = true }} ->
@@ -170,54 +167,54 @@ append(Ref, Data, State) ->
                         {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State1};
+            Result;
         ErrorAndState -> ErrorAndState
     end.
 
-last_sync_offset(Ref, State) ->
-    case get_or_reopen(Ref, State) of
-        {{ok, #handle { trusted_offset = TrustedOffset }}, State1} ->
-            {{ok, TrustedOffset}, State1};
+last_sync_offset(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { trusted_offset = TrustedOffset }} ->
+            {ok, TrustedOffset};
         ErrorAndState -> ErrorAndState
     end.
 
-position(Ref, NewOffset, State) ->
-    case get_or_reopen(Ref, State) of
-        {{ok, Handle}, State1} ->
+position(Ref, NewOffset) ->
+    case get_or_reopen(Ref) of
+        {ok, Handle} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
                     {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
                     {Error, Handle2} -> {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State1};
+            Result;
         ErrorAndState -> ErrorAndState
     end.
 
-sync(Ref, State) ->
-    case get_or_reopen(Ref, State) of
-        {{ok, Handle = #handle { write_buffer = [], hdl = Hdl,
-                                 offset = Offset }}, State1} ->
+sync(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, Handle = #handle { write_buffer = [], hdl = Hdl,
+                                offset = Offset }} ->
             {Result, Handle1} =
                 case file:sync(Hdl) of
                     ok -> {ok, Handle #handle { trusted_offset = Offset }};
                     Error -> {Error, Handle}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State1};
-        {{ok, Handle = #handle { at_eof = true }}, State1} ->
+            Result;
+        {ok, Handle = #handle { at_eof = true }} ->
             %% we can't have content in the buffer without being at eof
             {Result, Handle1} = write_buffer(Handle),
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            {Result, State1};
+            Result;
         ErrorAndState -> ErrorAndState
     end.
 
-truncate(Ref, State) ->
-    case get_or_reopen(Ref, State) of
-        {{ok, #handle { is_write = false }}, State1} ->
-            {{error, not_open_for_writing}, State1};
-        {{ok, Handle}, State1} ->
+truncate(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { is_write = false }} ->
+            {error, not_open_for_writing};
+        {ok, Handle} ->
             {Result, Handle1} =
                 case write_buffer(Handle) of
                     {ok,
@@ -236,21 +233,21 @@ truncate(Ref, State) ->
                     {Error, Handle2} -> {Error, Handle2}
                 end,
             put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now () }),
-            {Result, State1};
+            Result;
         ErrorAndState -> ErrorAndState
     end.
 
-get_or_reopen(Ref, State) ->
+get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
-        undefined -> {{error, not_open}, State};
+        undefined -> {error, not_open};
         #handle { hdl = closed, mode = Mode, global_key = GRef,
                   options = Options } ->
             #file { path = Path } = get({GRef, fhc_file}),
-            open1(Path, Mode, Options, Ref, GRef, State);
-        Handle -> {{ok, Handle}, State}
+            open1(Path, Mode, Options, Ref, GRef);
+        Handle -> {ok, Handle}
     end.
 
-open1(Path, Mode, Options, Ref, GRef, State) ->
+open1(Path, Mode, Options, Ref, GRef) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
             WriteBufferSize =
@@ -267,9 +264,9 @@ open1(Path, Mode, Options, Ref, GRef, State) ->
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
                           global_key = GRef, last_used_at = now() },
             put({Ref, fhc_handle}, Handle),
-            {{ok, Handle}, State};
+            {ok, Handle};
         {error, Reason} ->
-            {{error, Reason}, State}
+            {error, Reason}
     end.
 
 maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 50f013f8..57abfa9d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -109,7 +109,6 @@
 -record(qistate,
         { dir,
           seg_num_handles,
-          hc_state,
           journal_ack_count,
           journal_ack_dict,
           seg_ack_counts
@@ -125,7 +124,6 @@
 -type(seq_id() :: integer()).
 -type(qistate() :: #qistate { dir               :: file_path(),
                               seg_num_handles   :: dict(),
-                              hc_state          :: any(),
                               journal_ack_count :: integer(),
                               journal_ack_dict  :: dict(),
                               seg_ack_counts    :: dict()
@@ -156,13 +154,11 @@
 %%----------------------------------------------------------------------------
 
 init(Name) ->
-    HCState = file_handle_cache:init(),
     StrName = queue_name_to_dir_name(Name),
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     State = #qistate { dir = Dir,
                        seg_num_handles = dict:new(),
-                       hc_state = HCState,
                        journal_ack_count = 0,
                        journal_ack_dict = dict:new(),
                        seg_ack_counts = dict:new() },
@@ -185,38 +181,31 @@ write_published(MsgId, SeqId, IsPersistent, State)
     ?MSG_ID_BYTES = size(MsgId),
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, HCState} =
-        file_handle_cache:append(Hdl,
-                                 <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                                  (bool_to_int(IsPersistent)):1,
-                                  RelSeq:?REL_SEQ_BITS, MsgId/binary>>,
-                                 State1 #qistate.hc_state),
-    State1 #qistate { hc_state = HCState }.
+    ok = file_handle_cache:append(Hdl,
+                                  <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                                   (bool_to_int(IsPersistent)):1,
+                                   RelSeq:?REL_SEQ_BITS, MsgId/binary>>),
+    State1.
 
 write_delivered(SeqId, State) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, HCState} = file_handle_cache:append(
-                      Hdl,
-                      <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                       RelSeq:?REL_SEQ_BITS>>,
-                      State1 #qistate.hc_state),
-    State1 #qistate { hc_state = HCState }.
+    ok = file_handle_cache:append(
+           Hdl, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                 RelSeq:?REL_SEQ_BITS>>),
+    State1.
 
 write_acks(SeqIds, State = #qistate { journal_ack_dict  = JAckDict,
                                       journal_ack_count = JAckCount }) ->
     {Hdl, State1} = get_journal_handle(State),
-    {JAckDict1, JAckCount1, HCState} =
+    {JAckDict1, JAckCount1} =
         lists:foldl(
-          fun (SeqId, {JAckDict2, JAckCount2, HCStateN}) ->
-                  {ok, HCStateM} = file_handle_cache:append(
-                                     Hdl, <<SeqId:?SEQ_BITS>>, HCStateN),
-                  {add_ack_to_ack_dict(SeqId, JAckDict2),
-                   JAckCount2 + 1, HCStateM}
-          end, {JAckDict, JAckCount, State1 #qistate.hc_state}, SeqIds),
+          fun (SeqId, {JAckDict2, JAckCount2}) ->
+                  ok = file_handle_cache:append(Hdl, <<SeqId:?SEQ_BITS>>),
+                  {add_ack_to_ack_dict(SeqId, JAckDict2), JAckCount2 + 1}
+          end, {JAckDict, JAckCount}, SeqIds),
     State2 = State1 #qistate { journal_ack_dict = JAckDict1,
-                               journal_ack_count = JAckCount1,
-                               hc_state = HCState },
+                               journal_ack_count = JAckCount1 },
     case JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
         true  -> full_flush_journal(State2);
         false -> State2
@@ -228,14 +217,11 @@ full_flush_journal(State) ->
         {false, State1} -> State1
     end.
 
-sync_all(State = #qistate { hc_state = HCState, seg_num_handles = SegHdls }) ->
-    HCState1 =
-        dict:fold(
-          fun (_Key, Hdl, HCStateN) ->
-                  {ok, HCStateM} = file_handle_cache:sync(Hdl, HCStateN),
-                  HCStateM
-          end, HCState, SegHdls),
-    State #qistate { hc_state = HCState1 }.
+sync_all(State = #qistate { seg_num_handles = SegHdls }) ->
+    ok = dict:fold(fun (_Key, Hdl, ok) ->
+                           file_handle_cache:sync(Hdl)
+                   end, ok, SegHdls),
+    State.
 
 flush_journal(State = #qistate { journal_ack_count = 0 }) ->
     {false, State};
@@ -249,11 +235,10 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
                                journal_ack_count = JAckCount1 },
     if
         JAckCount1 == 0 ->
-            {Hdl, State3 = #qistate { hc_state = HCState }} =
-                get_journal_handle(State2),
-            {ok, HCState1} = file_handle_cache:position(Hdl, bof, HCState),
-            {ok, HCState2} = file_handle_cache:truncate(Hdl, HCState1),
-            {false, State3 #qistate { hc_state = HCState2 }};
+            {Hdl, State3} = get_journal_handle(State2),
+            ok = file_handle_cache:position(Hdl, bof),
+            ok = file_handle_cache:truncate(Hdl),
+            {false, State3};
         JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
             flush_journal(State2);
         true ->
@@ -381,36 +366,27 @@ get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }
     end.
 
 new_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
-    State1 = #qistate { hc_state = HCState,
-                        seg_num_handles = SegHdls1 } =
+    State1 = #qistate { seg_num_handles = SegHdls1 } =
         case dict:size(SegHdls) > 100 of
             true -> close_all_handles(State);
             false -> State
         end,
-    {{ok, Hdl}, HCState1} =
-        file_handle_cache:open(Path, Mode, [{write_buffer, infinity}], HCState),
-    {Hdl, State1 #qistate { hc_state = HCState1,
-                            seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}.
+    {ok, Hdl} = file_handle_cache:open(Path, Mode, [{write_buffer, infinity}]),
+    {Hdl, State1 #qistate { seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}.
 
-close_handle(Key, State = #qistate { hc_state = HCState,
-                                     seg_num_handles = SegHdls }) ->
+close_handle(Key, State = #qistate { seg_num_handles = SegHdls }) ->
     case dict:find(Key, SegHdls) of
         {ok, Hdl} ->
-            {ok, HCState1} = file_handle_cache:close(Hdl, HCState),
-            State #qistate { hc_state = HCState1,
-                             seg_num_handles = dict:erase(Key, SegHdls) };
+            ok = file_handle_cache:close(Hdl),
+            State #qistate { seg_num_handles = dict:erase(Key, SegHdls) };
         error -> State
     end.
 
-close_all_handles(State = #qistate { hc_state = HCState,
-                                     seg_num_handles = SegHdls }) ->
-    HCState1 =
-        dict:fold(
-          fun (_Key, Hdl, HCStateN) ->
-                  {ok, HCStateM} = file_handle_cache:close(Hdl, HCStateN),
-                  HCStateM
-          end, HCState, SegHdls),
-    State #qistate { hc_state = HCState1, seg_num_handles = dict:new() }.
+close_all_handles(State = #qistate { seg_num_handles = SegHdls }) ->
+    ok = dict:fold(fun (_Key, Hdl, ok) ->
+                           file_handle_cache:close(Hdl)
+                   end, ok, SegHdls),
+    State #qistate { seg_num_handles = dict:new() }.
 
 bool_to_int(true ) -> 1;
 bool_to_int(false) -> 0.
@@ -507,24 +483,23 @@ find_ack_counts_and_deliver_transient_msgs(State = #qistate { dir = Dir }) ->
 
 scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
-    {Hdl, State1 = #qistate { hc_state = HCState,
-                              journal_ack_dict = JAckDict }} =
+    {Hdl, State1 = #qistate { journal_ack_dict = JAckDict }} =
         get_journal_handle(State),
     %% ADict may well contain duplicates. However, this is ok, due to
     %% the use of sets in replay_journal_acks_to_segment
-    {ADict, HCState1} = load_journal(Hdl, JAckDict, HCState),
-    State2 = close_handle(journal, State1 #qistate { hc_state = HCState1 }),
+    ADict = load_journal(Hdl, JAckDict),
+    State2 = close_handle(journal, State1),
     {TotalMsgCount1, State3} =
         dict:fold(fun replay_journal_acks_to_segment/3,
                   {TotalMsgCount, State2}, ADict),
     ok = file:delete(JournalPath),
     {TotalMsgCount1, State3 #qistate { journal_ack_dict = dict:new() }}.
 
-load_journal(Hdl, ADict, HCState) ->
-    case file_handle_cache:read(Hdl, ?SEQ_BYTES, HCState) of
-        {{ok, <<SeqId:?SEQ_BITS>>}, HCState1} ->
-            load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict), HCState1);
-        {_ErrOrEoF, HCState1} -> {ADict, HCState1}
+load_journal(Hdl, ADict) ->
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<SeqId:?SEQ_BITS>>} ->
+            load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict));
+        _ErrOrEoF -> ADict
     end.
 
 replay_journal_acks_to_segment(_, [], Acc) ->
@@ -554,17 +529,14 @@ deliver_transient(SegNum, SDict, State) ->
                   {[RelSeq | AckMeAcc], DeliverMeAcc}
           end, {[], []}, SDict),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, HCState} =
-        case DeliverMe of
-            [] -> {ok, State1 #qistate.hc_state};
-            _ ->
-                file_handle_cache:append(
-                  Hdl,
-                  [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                     RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ],
-                  State1 #qistate.hc_state)
-        end,
-    {AckMe, State1 #qistate { hc_state = HCState }}.
+    ok = case DeliverMe of
+             [] -> ok;
+             _  -> file_handle_cache:append(
+                     Hdl,
+                     [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                        RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ])
+         end,
+    {AckMe, State1}.
 
 
 %%----------------------------------------------------------------------------
@@ -580,12 +552,11 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
     case SegmentExists of
         false -> {dict:new(), 0, 0, State};
         true ->
-            {Hdl, State1 = #qistate { hc_state = HCState,
-                                      journal_ack_dict = JAckDict }} =
+            {Hdl, State1 = #qistate { journal_ack_dict = JAckDict }} =
                 get_seg_handle(SegNum, State),
-            {ok, HCState1} = file_handle_cache:position(Hdl, bof, HCState),
-            {SDict, AckCount, HighRelSeq, HCState2} =
-                load_segment_entries(Hdl, dict:new(), 0, 0, HCState1),
+            ok = file_handle_cache:position(Hdl, bof),
+            {SDict, AckCount, HighRelSeq} =
+                load_segment_entries(Hdl, dict:new(), 0, 0),
             RelSeqs = case dict:find(SegNum, JAckDict) of
                         {ok, RelSeqs1} -> RelSeqs1;
                         error -> []
@@ -594,35 +565,31 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
                 lists:foldl(fun (RelSeq, {SDict2, AckCount2}) ->
                                     {dict:erase(RelSeq, SDict2), AckCount2 + 1}
                             end, {SDict, AckCount}, RelSeqs),
-            {SDict1, AckCount1, HighRelSeq,
-             State1 #qistate { hc_state = HCState2 }}
+            {SDict1, AckCount1, HighRelSeq, State1}
     end.
 
-load_segment_entries(Hdl, SDict, AckCount, HighRelSeq, HCState) ->
-    case file_handle_cache:read(Hdl, 1, HCState) of
-        {{ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>}, HCState1} ->
-            {{ok, LSB}, HCState2} =
-                file_handle_cache:read(
-                  Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1, HCState1),
+load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
+    case file_handle_cache:read(Hdl, 1) of
+        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
+            {ok, LSB} = file_handle_cache:read(
+                          Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
-            load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq, HCState2);
-        {{ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>}, HCState1} ->
+            load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq);
+        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
-            {{ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>}, HCState2} =
+            {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
                 file_handle_cache:read(
-                  Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1, HCState1),
+                  Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
-            load_segment_entries(
-              Hdl, dict:store(RelSeq, {MsgId, false,
-                                       1 == IsPersistentNum},
-                              SDict), AckCount, HighRelSeq1, HCState2);
-        {_ErrOrEoF, HCState1} ->
-            {SDict, AckCount, HighRelSeq, HCState1}
+            load_segment_entries(Hdl, dict:store(RelSeq, {MsgId, false,
+                                                          1 == IsPersistentNum},
+                                                 SDict), AckCount, HighRelSeq1);
+        _ErrOrEoF -> {SDict, AckCount, HighRelSeq}
     end.
 
 deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
@@ -665,15 +632,12 @@ append_acks_to_segment(SegNum, AckCount, Acks, State = #qistate { dir = Dir })
 append_acks_to_segment(SegNum, AckCount, Acks, State)
   when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {AckCount1, HCState} =
+    {ok, AckCount1} =
         lists:foldl(
-          fun (RelSeq, {AckCount2, HCStateN}) ->
-                  {ok, HCStateM} =
-                      file_handle_cache:append(
-                        Hdl, 
-                        <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                         RelSeq:?REL_SEQ_BITS>>, HCStateN),
-                  {AckCount2 + 1, HCStateM}
-          end, {AckCount, State1 #qistate.hc_state}, Acks),
-    {ok, HCState1} = file_handle_cache:sync(Hdl, HCState),
-    {AckCount1, State1 #qistate { hc_state = HCState1 }}.
+          fun (RelSeq, {ok, AckCount2}) ->
+                  {file_handle_cache:append(
+                     Hdl, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                           RelSeq:?REL_SEQ_BITS>>), AckCount2 + 1}
+          end, {ok, AckCount}, Acks),
+    ok = file_handle_cache:sync(Hdl),
+    {AckCount1, State1}.
-- 
cgit v1.2.1


From 35b68c881035bfde9884caff82c8d8134bc32fc5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 13:37:38 +0100
Subject: Added is_dirty, which is necessary so that we can avoid unnecessary
 syncs. It's not good enough to just look at the write buffer, because the
 write_buffer may not be being used. Also corrected gross mistakes in sync,
 moved around the last_used_at update, and some cosmetics too

---
 src/file_handle_cache.erl | 61 +++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 6cb4c094..e45156dc 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -44,6 +44,7 @@
         { hdl,
           offset,
           trusted_offset,
+          is_dirty,
           write_buffer_size,
           write_buffer_size_limit,
           write_buffer,
@@ -100,11 +101,14 @@ close(Ref) ->
         undefined -> ok;
         Handle ->
             case write_buffer(Handle) of
-                {ok, #handle { hdl = Hdl, global_key = GRef,
+                {ok, #handle { hdl = Hdl, global_key = GRef, is_dirty = IsDirty,
                                is_read = IsReader, is_write = IsWriter }} ->
                     case Hdl of
                         closed -> ok;
-                        _ -> ok = file:sync(Hdl),
+                        _ -> ok = case IsDirty of
+                                      true -> file:sync(Hdl);
+                                      false -> ok
+                                  end,
                              ok = file:close(Hdl)
                     end,
                     #file { reader_count = RCount, has_writer = HasWriter,
@@ -149,9 +153,9 @@ read(Ref, Count) ->
                         end;
                     {Error, Handle2} -> {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            put({Ref, fhc_handle}, Handle1),
             Result;
-        ErrorAndState -> ErrorAndState
+        Error -> Error
     end.
 
 append(Ref, Data) ->
@@ -166,16 +170,16 @@ append(Ref, Data) ->
                     {{error, _} = Error, Handle2} ->
                         {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            put({Ref, fhc_handle}, Handle1),
             Result;
-        ErrorAndState -> ErrorAndState
+        Error -> Error
     end.
 
 last_sync_offset(Ref) ->
     case get_or_reopen(Ref) of
         {ok, #handle { trusted_offset = TrustedOffset }} ->
             {ok, TrustedOffset};
-        ErrorAndState -> ErrorAndState
+        Error -> Error
     end.
 
 position(Ref, NewOffset) ->
@@ -186,28 +190,32 @@ position(Ref, NewOffset) ->
                     {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
                     {Error, Handle2} -> {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            put({Ref, fhc_handle}, Handle1),
             Result;
-        ErrorAndState -> ErrorAndState
+        Error -> Error
     end.
 
 sync(Ref) ->
     case get_or_reopen(Ref) of
-        {ok, Handle = #handle { write_buffer = [], hdl = Hdl,
-                                offset = Offset }} ->
+        {ok, #handle { is_dirty = false, write_buffer = [] }} ->
+            ok;
+        {ok, Handle} ->
+            %% write_buffer will set is_dirty, or leave it set if buffer empty
             {Result, Handle1} =
-                case file:sync(Hdl) of
-                    ok -> {ok, Handle #handle { trusted_offset = Offset }};
+                case write_buffer(Handle) of
+                    {ok, Handle2 = #handle {
+                           hdl = Hdl, offset = Offset, is_dirty = true }} ->
+                        case file:sync(Hdl) of
+                            ok -> {ok,
+                                   Handle2 #handle { trusted_offset = Offset,
+                                                     is_dirty = false }};
+                            Error -> {Error, Handle2}
+                        end;
                     Error -> {Error, Handle}
                 end,
-            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
-            Result;
-        {ok, Handle = #handle { at_eof = true }} ->
-            %% we can't have content in the buffer without being at eof
-            {Result, Handle1} = write_buffer(Handle),
-            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now() }),
+            put({Ref, fhc_handle}, Handle1),
             Result;
-        ErrorAndState -> ErrorAndState
+        Error -> Error
     end.
 
 truncate(Ref) ->
@@ -232,9 +240,9 @@ truncate(Ref) ->
                         end;
                     {Error, Handle2} -> {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1 #handle { last_used_at = now () }),
+            put({Ref, fhc_handle}, Handle1),
             Result;
-        ErrorAndState -> ErrorAndState
+        Error -> Error
     end.
 
 get_or_reopen(Ref) ->
@@ -244,7 +252,7 @@ get_or_reopen(Ref) ->
                   options = Options } ->
             #file { path = Path } = get({GRef, fhc_file}),
             open1(Path, Mode, Options, Ref, GRef);
-        Handle -> {ok, Handle}
+        Handle -> {ok, Handle #handle { last_used_at = now() }}
     end.
 
 open1(Path, Mode, Options, Ref, GRef) ->
@@ -262,7 +270,8 @@ open1(Path, Mode, Options, Ref, GRef) ->
                           write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
-                          global_key = GRef, last_used_at = now() },
+                          global_key = GRef, last_used_at = now(),
+                          is_dirty = false },
             put({Ref, fhc_handle}, Handle),
             {ok, Handle};
         {error, Reason} ->
@@ -284,7 +293,7 @@ maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
 
 write_to_buffer(Data, Handle = #handle { hdl = Hdl,
                                          write_buffer_size_limit = 0 }) ->
-    {file:write(Hdl, Data), Handle};
+    {file:write(Hdl, Data), Handle #handle { is_dirty = true }};
 write_to_buffer(Data, Handle =
                 #handle { write_buffer = WriteBuffer,
                           write_buffer_size = Size,
@@ -307,7 +316,7 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
         ok ->
             Offset1 = Offset + DataSize,
             {ok, Handle #handle { offset = Offset1, write_buffer = [],
-                                  write_buffer_size = 0 }};
+                                  write_buffer_size = 0, is_dirty = true }};
         {error, _} = Error ->
             {Error, Handle}
     end.
-- 
cgit v1.2.1


From d0110ac96b37482fedee74a0478081db42b46409 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 13:38:56 +0100
Subject: finally added support for smart syncing of the queue index on commit.
 Note performance is pretty bad as although the msg_store is doing coalescing,
 the queue index isn't yet, so it's fractionally above one call to file:sync
 per commit

---
 src/rabbit_queue_index.erl    | 22 ++++++++++++++++------
 src/rabbit_variable_queue.erl |  9 ++++-----
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 57abfa9d..7259eaa2 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, flush_journal/1, sync_all/1,
+         write_delivered/2, write_acks/2, flush_journal/1, sync_seq_ids/2,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
@@ -217,11 +217,21 @@ full_flush_journal(State) ->
         {false, State1} -> State1
     end.
 
-sync_all(State = #qistate { seg_num_handles = SegHdls }) ->
-    ok = dict:fold(fun (_Key, Hdl, ok) ->
-                           file_handle_cache:sync(Hdl)
-                   end, ok, SegHdls),
-    State.
+sync_seq_ids(SeqIds, State) ->
+    {Hdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:sync(Hdl),
+    SegNumsSet =
+        lists:foldl(
+          fun (SeqId, Set) ->
+                  {SegNum, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+                  sets:add_element(SegNum, Set)
+          end, sets:new(), SeqIds),
+    sets:fold(
+      fun (SegNum, StateN) ->
+              {Hdl1, StateM} = get_seg_handle(SegNum, StateN),
+              ok = file_handle_cache:sync(Hdl1),
+              StateM
+      end, State1, SegNumsSet).
 
 flush_journal(State = #qistate { journal_ack_count = 0 }) ->
     {false, State};
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7a85b302..4dce1f6a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -379,15 +379,14 @@ tx_commit(Pubs, AckTags, From, State) ->
     end.
 
 do_tx_commit(Pubs, AckTags, From, State) ->
-    {_PubSeqIds, State1} =
+    State1 = ack(AckTags, State),
+    {PubSeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg, {SeqIdsAcc, StateN}) ->
                   {SeqId, StateN1} = publish(Msg, false, true, StateN),
                   {[SeqId | SeqIdsAcc], StateN1}
-          end, {[], State}, Pubs),
-    %% TODO need to do something here about syncing the queue index, PubSeqIds
-    State2 = #vqstate { index_state = IndexState } = ack(AckTags, State1),
-    IndexState1 = rabbit_queue_index:sync_all(IndexState),
+          end, {[], State1}, Pubs),
+    IndexState1 = rabbit_queue_index:sync_seq_ids(PubSeqIds, IndexState),
     gen_server2:reply(From, ok),
     State2 #vqstate { index_state = IndexState1 }.
 
-- 
cgit v1.2.1


From ef4c721b49ae6a84dccc202abca3a10246b14bd9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 14:28:24 +0100
Subject: mainly cosmetic renamings. Also added support to queue_index:sync so
 that you can indicate whether or not the ack journal should be sync'd. Use of
 strace shows that in the test in bug 20470 #c6, we're doing 2 fsyncs per txn,
 which makes sense - one for the qi, and one for the msg_store. However, only
 getting about 180 txns/sec, as opposed to 350 as reported in that bug.

---
 src/rabbit_amqqueue.erl         | 11 ++++++-----
 src/rabbit_amqqueue_process.erl |  5 +++--
 src/rabbit_queue_index.erl      | 12 ++++++++----
 src/rabbit_variable_queue.erl   | 15 +++++++++------
 4 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 561e9e69..d0a5f205 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -40,7 +40,7 @@
 -export([list/1, info/1, info/2, info_all/1, info_all/2]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, tx_commit_callback/4]).
+-export([notify_sent/2, unblock/2, tx_commit_msg_store_callback/4]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -107,8 +107,8 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(tx_commit_callback/4 :: (pid(), [message()], [acktag()], {pid(), any()})
-      -> 'ok').
+-spec(tx_commit_msg_store_callback/4 :: (pid(), [message()], [acktag()],
+                                         {pid(), any()}) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -322,8 +322,9 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 8, {unblock, ChPid}).
 
-tx_commit_callback(QPid, Pubs, AckTags, From) ->
-    gen_server2:pcast(QPid, 8, {tx_commit_callback, Pubs, AckTags, From}).
+tx_commit_msg_store_callback(QPid, Pubs, AckTags, From) ->
+    gen_server2:pcast(QPid, 8,
+                      {tx_commit_msg_store_callback, Pubs, AckTags, From}).
 
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index e2477e98..434652a5 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -788,12 +788,13 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({tx_commit_callback, Pubs, AckTags, From},
+handle_cast({tx_commit_msg_store_callback, Pubs, AckTags, From},
             State = #q{variable_queue_state = VQS}) ->
     noreply(
       run_message_queue(
         State#q{variable_queue_state =
-                rabbit_variable_queue:do_tx_commit(Pubs, AckTags, From, VQS)}));
+                rabbit_variable_queue:tx_commit_from_msg_store(
+                  Pubs, AckTags, From, VQS)}));
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 7259eaa2..67637ed2 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, flush_journal/1, sync_seq_ids/2,
+         write_delivered/2, write_acks/2, flush_journal/1, sync_seq_ids/3,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
@@ -217,9 +217,13 @@ full_flush_journal(State) ->
         {false, State1} -> State1
     end.
 
-sync_seq_ids(SeqIds, State) ->
-    {Hdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:sync(Hdl),
+sync_seq_ids(SeqIds, SyncAckJournal, State) ->
+    State1 = case SyncAckJournal of
+                 true -> {Hdl, State2} = get_journal_handle(State),
+                         ok = file_handle_cache:sync(Hdl),
+                         State2;
+                 false -> State
+             end,
     SegNumsSet =
         lists:foldl(
           fun (SeqId, Set) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4dce1f6a..cb2bdca7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -34,7 +34,7 @@
 -export([init/1, terminate/1, publish/2, publish_delivered/2,
          set_queue_ram_duration_target/2, remeasure_egress_rate/1, fetch/1,
          ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1, delete/1,
-         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4, do_tx_commit/4]).
+         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4, tx_commit_from_msg_store/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -367,26 +367,29 @@ tx_rollback(Pubs, State) ->
 tx_commit(Pubs, AckTags, From, State) ->
     case persistent_msg_ids(Pubs) of
         [] ->
-            {true, do_tx_commit(Pubs, AckTags, From, State)};
+            {true, tx_commit_from_msg_store(Pubs, AckTags, From, State)};
         PersistentMsgIds ->
             Self = self(),
             ok = rabbit_msg_store:sync(
                    PersistentMsgIds,
-                   fun () -> ok = rabbit_amqqueue:tx_commit_callback(
+                   fun () -> ok = rabbit_amqqueue:tx_commit_msg_store_callback(
                                     Self, Pubs, AckTags, From)
                    end),
             {false, State}
     end.
 
-do_tx_commit(Pubs, AckTags, From, State) ->
-    State1 = ack(AckTags, State),
+tx_commit_from_msg_store(Pubs, AckTags, From, State) ->
+    DiskAcks =
+        lists:filter(fun (AckTag) -> AckTag /= ack_not_on_disk end, AckTags),
+    State1 = ack(DiskAcks, State),
     {PubSeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg, {SeqIdsAcc, StateN}) ->
                   {SeqId, StateN1} = publish(Msg, false, true, StateN),
                   {[SeqId | SeqIdsAcc], StateN1}
           end, {[], State1}, Pubs),
-    IndexState1 = rabbit_queue_index:sync_seq_ids(PubSeqIds, IndexState),
+    IndexState1 =
+        rabbit_queue_index:sync_seq_ids(PubSeqIds, [] /= DiskAcks, IndexState),
     gen_server2:reply(From, ok),
     State2 #vqstate { index_state = IndexState1 }.
 
-- 
cgit v1.2.1


From b15cd73c908aa6978ebea90fce14918bc36ac9ac Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 14:39:55 +0100
Subject: Yes, turns out it's a good idea to observe the is_persistent flag...

---
 src/rabbit_variable_queue.erl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cb2bdca7..d9520d00 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -384,9 +384,14 @@ tx_commit_from_msg_store(Pubs, AckTags, From, State) ->
     State1 = ack(DiskAcks, State),
     {PubSeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
-          fun (Msg, {SeqIdsAcc, StateN}) ->
+          fun (Msg = #basic_message { is_persistent = IsPersistent },
+               {SeqIdsAcc, StateN}) ->
                   {SeqId, StateN1} = publish(Msg, false, true, StateN),
-                  {[SeqId | SeqIdsAcc], StateN1}
+                  SeqIdsAcc1 = case IsPersistent of
+                                   true -> [SeqId | SeqIdsAcc];
+                                   false -> SeqIdsAcc
+                               end,
+                  {SeqIdsAcc1, StateN1}
           end, {[], State1}, Pubs),
     IndexState1 =
         rabbit_queue_index:sync_seq_ids(PubSeqIds, [] /= DiskAcks, IndexState),
-- 
cgit v1.2.1


From 9effab3edd8b3b9bc8f70dab0b9e25d88a3b228a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 14:48:27 +0100
Subject: should definitely sync after truncating the ack journal

---
 src/rabbit_queue_index.erl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 67637ed2..d9302dde 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -252,6 +252,7 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
             {Hdl, State3} = get_journal_handle(State2),
             ok = file_handle_cache:position(Hdl, bof),
             ok = file_handle_cache:truncate(Hdl),
+            ok = file_handle_cache:sync(Hdl),
             {false, State3};
         JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
             flush_journal(State2);
-- 
cgit v1.2.1


From c6ceacd274f52ccbb40d1e4ace14e17b1939de7e Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Fri, 23 Oct 2009 10:59:29 -0400
Subject: dict->ets, and a refactoring

---
 src/rabbit_amqqueue_process.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a5400254..d402ef97 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -826,11 +826,11 @@ handle_cast(send_memory_monitor_update, State) ->
     rabbit_memory_monitor:push_queue_duration(self(), BufSec),
     noreply(State#q{drain_ratio = DrainRatio1});
 
-handle_cast({set_bufsec_limit, BufSec}, State) ->
+handle_cast({set_queue_duration, QueueDuration}, State) ->
     DrainRatio = State#q.drain_ratio,
-    DesiredQueueLength = case BufSec of 
+    DesiredQueueLength = case QueueDuration of
         infinity -> infinity;
-        _ -> BufSec * DrainRatio#ratio.ratio * 1000000
+        _ -> QueueDuration * DrainRatio#ratio.ratio * 1000000
     end,
     %% Just to proove that something is happening.
     ?LOGDEBUG("Queue size is ~8p, should be ~p~n", 
-- 
cgit v1.2.1


From 9379dcca35ed11fc16c2954e832f66f3f9475346 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 17:17:28 +0100
Subject: removal of dumb code - need to find a better way of dealing with this
 problem

---
 src/rabbit_queue_index.erl | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d9302dde..e3057f9c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -381,13 +381,8 @@ get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }
     end.
 
 new_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
-    State1 = #qistate { seg_num_handles = SegHdls1 } =
-        case dict:size(SegHdls) > 100 of
-            true -> close_all_handles(State);
-            false -> State
-        end,
     {ok, Hdl} = file_handle_cache:open(Path, Mode, [{write_buffer, infinity}]),
-    {Hdl, State1 #qistate { seg_num_handles = dict:store(Key, Hdl, SegHdls1) }}.
+    {Hdl, State #qistate { seg_num_handles = dict:store(Key, Hdl, SegHdls) }}.
 
 close_handle(Key, State = #qistate { seg_num_handles = SegHdls }) ->
     case dict:find(Key, SegHdls) of
-- 
cgit v1.2.1


From 33821e63e59386760d49667ee2f368e7f463d97a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 23 Oct 2009 18:13:16 +0100
Subject: in queue tx coalescing is in. It works too - doubling the number of
 producers does not halve the tx commit rate for each producer. It does go
 down slightly, on each doubling, but appears more log like. Also, debugging
 shows that the coalescing really is working

---
 src/rabbit_amqqueue.erl         |  6 ++++-
 src/rabbit_amqqueue_process.erl | 49 ++++++++++++++++++++++++++++++++++++-----
 src/rabbit_variable_queue.erl   | 32 +++++++++++++++++++--------
 3 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index d0a5f205..f421d6aa 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -40,7 +40,8 @@
 -export([list/1, info/1, info/2, info_all/1, info_all/2]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, tx_commit_msg_store_callback/4]).
+-export([notify_sent/2, unblock/2, tx_commit_msg_store_callback/4,
+         tx_commit_vq_callback/1]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -326,6 +327,9 @@ tx_commit_msg_store_callback(QPid, Pubs, AckTags, From) ->
     gen_server2:pcast(QPid, 8,
                       {tx_commit_msg_store_callback, Pubs, AckTags, From}).
 
+tx_commit_vq_callback(QPid) ->
+    gen_server2:pcast(QPid, 8, tx_commit_vq_callback).
+
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 434652a5..1e37a98f 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -38,6 +38,7 @@
 -define(UNSENT_MESSAGE_LIMIT, 100).
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
+-define(SYNC_INTERVAL,         5). %% milliseconds
 
 -export([start_link/1]).
 
@@ -56,7 +57,8 @@
             variable_queue_state,
             next_msg_id,
             active_consumers,
-            blocked_consumers
+            blocked_consumers,
+            sync_timer_ref
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -109,7 +111,8 @@ init(Q = #amqqueue { name = QName }) ->
                variable_queue_state = VQS,
                next_msg_id = 1,
                active_consumers = queue:new(),
-               blocked_consumers = queue:new()
+               blocked_consumers = queue:new(),
+               sync_timer_ref = undefined
               },
     {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -142,11 +145,34 @@ code_change(_OldVsn, State, _Extra) ->
 
 reply(Reply, NewState) ->
     assert_invariant(NewState),
-    {reply, Reply, NewState, hibernate}.
+    {NewState1, Timeout} = next_state(NewState),
+    {reply, Reply, NewState1, Timeout}.
 
 noreply(NewState) ->
     assert_invariant(NewState),
-    {noreply, NewState, hibernate}.
+    {NewState1, Timeout} = next_state(NewState),
+    {noreply, NewState1, Timeout}.
+
+next_state(State = #q { variable_queue_state = VQS }) ->
+    next_state1(State, rabbit_variable_queue:needs_sync(VQS)).
+
+next_state1(State = #q { sync_timer_ref = undefined }, true) ->
+    {start_sync_timer(State), 0};
+next_state1(State, true) ->
+    {State, 0};
+next_state1(State = #q { sync_timer_ref = undefined }, false) ->
+    {State, hibernate};
+next_state1(State, false) ->
+    {stop_sync_timer(State), 0}.
+
+start_sync_timer(State = #q { sync_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, rabbit_amqqueue,
+                                   tx_commit_vq_callback, [self()]),
+    State #q { sync_timer_ref = TRef }.
+
+stop_sync_timer(State = #q { sync_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #q { sync_timer_ref = undefined }.
 
 assert_invariant(#q { active_consumers = AC, variable_queue_state = VQS }) ->
     true = (queue:is_empty(AC) orelse rabbit_variable_queue:is_empty(VQS)).
@@ -790,11 +816,16 @@ handle_cast({notify_sent, ChPid}, State) ->
 
 handle_cast({tx_commit_msg_store_callback, Pubs, AckTags, From},
             State = #q{variable_queue_state = VQS}) ->
+    noreply(
+      State#q{variable_queue_state =
+              rabbit_variable_queue:tx_commit_from_msg_store(
+                Pubs, AckTags, From, VQS)});
+
+handle_cast(tx_commit_vq_callback, State = #q{variable_queue_state = VQS}) ->
     noreply(
       run_message_queue(
         State#q{variable_queue_state =
-                rabbit_variable_queue:tx_commit_from_msg_store(
-                  Pubs, AckTags, From, VQS)}));
+                rabbit_variable_queue:tx_commit_from_vq(VQS)}));
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
@@ -828,6 +859,12 @@ handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
 handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     handle_ch_down(DownPid, State);
 
+handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
+    noreply(
+      run_message_queue(
+        State#q{variable_queue_state =
+                rabbit_variable_queue:tx_commit_from_vq(VQS)}));    
+
 handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d9520d00..33e09c11 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -34,7 +34,8 @@
 -export([init/1, terminate/1, publish/2, publish_delivered/2,
          set_queue_ram_duration_target/2, remeasure_egress_rate/1, fetch/1,
          ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1, delete/1,
-         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4, tx_commit_from_msg_store/4]).
+         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
+         tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -54,7 +55,8 @@
           avg_egress_rate,
           egress_rate_timestamp,
           prefetcher,
-          len
+          len,
+          on_sync
         }).
 
 -record(alpha,
@@ -136,7 +138,8 @@ init(QueueName) ->
                    avg_egress_rate = 0,
                    egress_rate_timestamp = now(),
                    prefetcher = undefined,
-                   len = GammaCount
+                   len = GammaCount,
+                   on_sync = {[], [], []}
                   },
     maybe_load_next_segment(State).
 
@@ -378,10 +381,16 @@ tx_commit(Pubs, AckTags, From, State) ->
             {false, State}
     end.
 
-tx_commit_from_msg_store(Pubs, AckTags, From, State) ->
+tx_commit_from_msg_store(Pubs, AckTags, From,
+                         State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
     DiskAcks =
         lists:filter(fun (AckTag) -> AckTag /= ack_not_on_disk end, AckTags),
-    State1 = ack(DiskAcks, State),
+    State #vqstate { on_sync = { [DiskAcks | SAcks],
+                                 [Pubs | SPubs],
+                                 [From | SFroms] }}.
+
+tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
+    State1 = ack(lists:flatten(SAcks), State),
     {PubSeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
@@ -392,11 +401,16 @@ tx_commit_from_msg_store(Pubs, AckTags, From, State) ->
                                    false -> SeqIdsAcc
                                end,
                   {SeqIdsAcc1, StateN1}
-          end, {[], State1}, Pubs),
+          end, {[], State1}, lists:flatten(lists:reverse(SPubs))),
     IndexState1 =
-        rabbit_queue_index:sync_seq_ids(PubSeqIds, [] /= DiskAcks, IndexState),
-    gen_server2:reply(From, ok),
-    State2 #vqstate { index_state = IndexState1 }.
+        rabbit_queue_index:sync_seq_ids(PubSeqIds, [] /= SAcks, IndexState),
+    [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
+    State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
+
+needs_sync(#vqstate { on_sync = {_, _, []} }) ->
+    false;
+needs_sync(_) ->
+    true.
 
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From 9eab724403f60b0a82708327bff72686b2119bc4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 26 Oct 2009 15:29:56 +0000
Subject: Mainly a load of cosmetics, and some minor API changes, but also
 manage to hook in getting the queue to flush the journal out if the queue is
 idle and has no work to do. This takes advantage of the ability to
 incrementally flush out the ack journal.

---
 src/rabbit_amqqueue_process.erl | 29 ++++++++++++++++++++---------
 src/rabbit_queue_index.erl      | 39 +++++++++++++++++++++++----------------
 src/rabbit_tests.erl            | 32 +++++++++++++++++---------------
 src/rabbit_variable_queue.erl   | 10 +++++++++-
 4 files changed, 69 insertions(+), 41 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index cd94c6e4..180a9f8a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -153,28 +153,32 @@ noreply(NewState) ->
     {NewState1, Timeout} = next_state(NewState),
     {noreply, NewState1, Timeout}.
 
-next_state(State = #q { variable_queue_state = VQS }) ->
+next_state(State = #q{variable_queue_state = VQS}) ->
     next_state1(State, rabbit_variable_queue:needs_sync(VQS)).
 
-next_state1(State = #q { sync_timer_ref = undefined }, true) ->
+next_state1(State = #q{sync_timer_ref = undefined}, true) ->
     {start_sync_timer(State), 0};
 next_state1(State, true) ->
     {State, 0};
-next_state1(State = #q { sync_timer_ref = undefined }, false) ->
-    {State, hibernate};
+next_state1(State = #q{sync_timer_ref = undefined,
+                       variable_queue_state = VQS}, false) ->
+    {State, case rabbit_variable_queue:can_flush_journal(VQS) of
+                true -> 0;
+                false -> hibernate
+            end};
 next_state1(State, false) ->
     {stop_sync_timer(State), 0}.
 
-start_sync_timer(State = #q { sync_timer_ref = undefined }) ->
+start_sync_timer(State = #q{sync_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, rabbit_amqqueue,
                                    tx_commit_vq_callback, [self()]),
-    State #q { sync_timer_ref = TRef }.
+    State#q{sync_timer_ref = TRef}.
 
-stop_sync_timer(State = #q { sync_timer_ref = TRef }) ->
+stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
-    State #q { sync_timer_ref = undefined }.
+    State#q{sync_timer_ref = undefined}.
 
-assert_invariant(#q { active_consumers = AC, variable_queue_state = VQS }) ->
+assert_invariant(#q{active_consumers = AC, variable_queue_state = VQS}) ->
     true = (queue:is_empty(AC) orelse rabbit_variable_queue:is_empty(VQS)).
 
 lookup_ch(ChPid) ->
@@ -868,6 +872,13 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
         {stop, NewState} -> {stop, normal, NewState}
     end;
 
+handle_info(timeout, State = #q{variable_queue_state = VQS,
+                                sync_timer_ref = undefined}) ->
+    %% if sync_timer_ref is undefined then we must have set the
+    %% timeout to zero because we thought we could flush the journal
+    noreply(State#q{variable_queue_state =
+                    rabbit_variable_queue:flush_journal(VQS)});
+
 handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
     noreply(
       run_message_queue(
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e3057f9c..39116b0d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, flush_journal/1, sync_seq_ids/3,
+         write_delivered/2, write_acks/2, can_flush_journal/1, flush_journal/1, sync_seq_ids/3,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
@@ -136,7 +136,8 @@
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(flush_journal/1 :: (qistate()) -> {boolean(), qistate()}).
+-spec(flush_journal/1 :: (qistate()) -> qistate()).
+-spec(can_flush_journal/1 :: (qistate()) -> boolean()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
              {( [{msg_id(), seq_id(), boolean(), boolean()}]
               | 'not_found'), qistate()}).
@@ -211,12 +212,6 @@ write_acks(SeqIds, State = #qistate { journal_ack_dict  = JAckDict,
         false -> State2
     end.
 
-full_flush_journal(State) ->
-    case flush_journal(State) of
-        {true,  State1} -> full_flush_journal(State1);
-        {false, State1} -> State1
-    end.
-
 sync_seq_ids(SeqIds, SyncAckJournal, State) ->
     State1 = case SyncAckJournal of
                  true -> {Hdl, State2} = get_journal_handle(State),
@@ -237,8 +232,13 @@ sync_seq_ids(SeqIds, SyncAckJournal, State) ->
               StateM
       end, State1, SegNumsSet).
 
+can_flush_journal(#qistate { journal_ack_count = 0 }) ->
+    false;
+can_flush_journal(_) ->
+    true.
+
 flush_journal(State = #qistate { journal_ack_count = 0 }) ->
-    {false, State};
+    State;
 flush_journal(State = #qistate { journal_ack_dict = JAckDict,
                                  journal_ack_count = JAckCount }) ->
     [SegNum|_] = dict:fetch_keys(JAckDict),
@@ -253,11 +253,11 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
             ok = file_handle_cache:position(Hdl, bof),
             ok = file_handle_cache:truncate(Hdl),
             ok = file_handle_cache:sync(Hdl),
-            {false, State3};
+            State3;
         JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
             flush_journal(State2);
         true ->
-            {true, State2}
+            State2
     end.
 
 read_segment_entries(InitSeqId, State) ->
@@ -348,6 +348,13 @@ start_msg_store(DurableQueues) ->
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+full_flush_journal(State) ->
+    case can_flush_journal(State) of
+        true -> State1 = flush_journal(State),
+                full_flush_journal(State1);
+        false -> State
+    end.
+
 queue_name_to_dir_name(Name = #resource { kind = queue }) ->
     Bin = term_to_binary(Name),
     Size = 8*size(Bin),
@@ -421,6 +428,11 @@ add_ack_to_ack_dict(SeqId, ADict) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
 
+all_segment_nums(Dir) ->
+    [list_to_integer(
+       lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
+     || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
+
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
@@ -454,11 +466,6 @@ queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Entries],
 %% Startup Functions
 %%----------------------------------------------------------------------------
 
-all_segment_nums(Dir) ->
-    [list_to_integer(
-       lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
-     || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
-
 find_ack_counts_and_deliver_transient_msgs(State = #qistate { dir = Dir }) ->
     SegNums = all_segment_nums(Dir),
     {TotalMsgCount, State1} =
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 81142476..ebd8432a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1040,31 +1040,33 @@ test_queue_index() ->
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0, as all the msgs were transient
     {0, Qi6} = rabbit_queue_index:init(test_queue()),
-    {false, Qi7} = rabbit_queue_index:flush_journal(Qi6),
-    {0, 10001, Qi8} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi7),
-    {Qi9, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi8),
-    {0, 20001, Qi10} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi9),
-    {ReadB, Qi11} = rabbit_queue_index:read_segment_entries(0, Qi10),
+    false = rabbit_queue_index:can_flush_journal(Qi6),
+    {0, 10001, Qi7} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
+    {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
+    {0, 20001, Qi9} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
+    {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsMsgIdsB)),
-    _Qi12 = rabbit_queue_index:terminate(Qi11),
+    _Qi11 = rabbit_queue_index:terminate(Qi10),
     ok = rabbit_msg_store:stop(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
-    {LenB, Qi13} = rabbit_queue_index:init(test_queue()),
-    {0, 20001, Qi14} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi13),
-    Qi15 = lists:foldl(
+    {LenB, Qi12} = rabbit_queue_index:init(test_queue()),
+    {0, 20001, Qi13} =
+        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
+    Qi14 = lists:foldl(
              fun (SeqId, QiN) ->
                      rabbit_queue_index:write_delivered(SeqId, QiN)
-             end, Qi14, SeqIdsB),
-    {ReadC, Qi16} = rabbit_queue_index:read_segment_entries(0, Qi15),
+             end, Qi13, SeqIdsB),
+    {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsMsgIdsB)),
-    Qi17 = rabbit_queue_index:write_acks(SeqIdsB, Qi16),
+    Qi16 = rabbit_queue_index:write_acks(SeqIdsB, Qi15),
+    true = rabbit_queue_index:can_flush_journal(Qi16),
+    Qi17 = rabbit_queue_index:flush_journal(Qi16),
     {0, 20001, Qi18} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate(Qi18),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 33e09c11..da56487e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -35,7 +35,8 @@
          set_queue_ram_duration_target/2, remeasure_egress_rate/1, fetch/1,
          ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1, delete/1,
          requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
-         tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1]).
+         tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
+         can_flush_journal/1, flush_journal/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -412,6 +413,13 @@ needs_sync(#vqstate { on_sync = {_, _, []} }) ->
 needs_sync(_) ->
     true.
 
+can_flush_journal(#vqstate { index_state = IndexState }) ->
+    rabbit_queue_index:can_flush_journal(IndexState).
+
+flush_journal(State = #vqstate { index_state = IndexState }) ->
+    State #vqstate { index_state =
+                     rabbit_queue_index:flush_journal(IndexState) }.
+
 %%----------------------------------------------------------------------------
 
 persistent_msg_ids(Pubs) ->
-- 
cgit v1.2.1


From e8fe13c67bcbcf44beb619b6130114a3731ad6a7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 26 Oct 2009 16:05:17 +0000
Subject: correction of ordering of exports, specs and functions. And
 correction of specs. All just for qi though

---
 src/rabbit_queue_index.erl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 39116b0d..dd62a7ed 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,9 +32,10 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, can_flush_journal/1, flush_journal/1, sync_seq_ids/3,
-         read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
+         write_delivered/2, write_acks/2, sync_seq_ids/3, can_flush_journal/1,
+         flush_journal/1, read_segment_entries/2, next_segment_boundary/1,
+         segment_size/0, find_lowest_seq_id_seg_and_next_seq_id/1,
+         start_msg_store/1]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -136,11 +137,11 @@
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(flush_journal/1 :: (qistate()) -> qistate()).
+-spec(sync_seq_ids/3 :: ([seq_id()], boolean(), qistate()) -> qistate()).
 -spec(can_flush_journal/1 :: (qistate()) -> boolean()).
+-spec(flush_journal/1 :: (qistate()) -> qistate()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
-             {( [{msg_id(), seq_id(), boolean(), boolean()}]
-              | 'not_found'), qistate()}).
+             {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
-- 
cgit v1.2.1


From dc2c9f175f0cb5938a53bee14fada1b301df6882 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 26 Oct 2009 16:28:53 +0000
Subject: rewrote the qi populating the msg_store (delta function),
 sidestepping queue scattering. Seems to work fine.

---
 src/rabbit_queue_index.erl | 55 +++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index dd62a7ed..0870acc5 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -156,21 +156,14 @@
 %%----------------------------------------------------------------------------
 
 init(Name) ->
-    StrName = queue_name_to_dir_name(Name),
-    Dir = filename:join(queues_dir(), StrName),
-    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    State = #qistate { dir = Dir,
-                       seg_num_handles = dict:new(),
-                       journal_ack_count = 0,
-                       journal_ack_dict = dict:new(),
-                       seg_ack_counts = dict:new() },
+    State = blank_state(Name),
     {TotalMsgCount, State1} = find_ack_counts_and_deliver_transient_msgs(State),
     scatter_journal(TotalMsgCount, State1).
 
 terminate(State = #qistate { seg_num_handles = SegHdls }) ->
     case 0 == dict:size(SegHdls) of
         true  -> State;
-        false -> close_all_handles(full_flush_journal(State))
+        false -> close_all_handles(State)
     end.
 
 terminate_and_erase(State) ->
@@ -434,6 +427,16 @@ all_segment_nums(Dir) ->
        lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
      || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
 
+blank_state(QueueName) ->
+    StrName = queue_name_to_dir_name(QueueName),
+    Dir = filename:join(queues_dir(), StrName),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    #qistate { dir = Dir,
+               seg_num_handles = dict:new(),
+               journal_ack_count = 0,
+               journal_ack_dict = dict:new(),
+               seg_ack_counts = dict:new() }.
+    
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
@@ -442,25 +445,27 @@ all_segment_nums(Dir) ->
 queue_index_walker([]) ->
     finished;
 queue_index_walker([QueueName|QueueNames]) ->
-    {TotalMsgCount, State} = init(QueueName),
-    {LowSeqIdSeg, _NextSeqId, State1} =
-        find_lowest_seq_id_seg_and_next_seq_id(State),
-    queue_index_walker({TotalMsgCount, LowSeqIdSeg, State1, QueueNames});
+    State = blank_state(QueueName),
+    {Hdl, State1} = get_journal_handle(State),
+    JAckDict = load_journal(Hdl, dict:new()),
+    State2 = #qistate { dir = Dir } =
+        close_handle(journal, State1 #qistate { journal_ack_dict = JAckDict }),
+    SegNums = all_segment_nums(Dir),
+    queue_index_walker({SegNums, State2, QueueNames});
 
-queue_index_walker({0, _LowSeqIdSeg, State, QueueNames}) ->
-    terminate(State),
+queue_index_walker({[], State, QueueNames}) ->
+    _State = terminate(State),
     queue_index_walker(QueueNames);
-queue_index_walker({N, LowSeqIdSeg, State, QueueNames}) ->
-    {Entries, State1} = read_segment_entries(LowSeqIdSeg, State),
-    LowSeqIdSeg1 = LowSeqIdSeg + segment_size(),
-    queue_index_walker({Entries, N, LowSeqIdSeg1, State1, QueueNames});
-
-queue_index_walker({[], N, LowSeqIdSeg, State, QueueNames}) ->
-    queue_index_walker({N, LowSeqIdSeg, State, QueueNames});
-queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Entries],
-                    N, LowSeqIdSeg, State, QueueNames}) ->
+queue_index_walker({[SegNum | SegNums], State, QueueNames}) ->
+    {SDict, _AckCount, _HighRelSeq, State1} = load_segment(SegNum, State),
+    queue_index_walker({dict:to_list(SDict), State1, SegNums, QueueNames});
+
+queue_index_walker({[], State, SegNums, QueueNames}) ->
+    queue_index_walker({SegNums, State, QueueNames});
+queue_index_walker({[{_RelSeq, {MsgId, _IsDelivered, IsPersistent}} | Msgs],
+                    State, SegNums, QueueNames}) ->
     {MsgId, bool_to_int(IsPersistent),
-     {Entries, N - 1, LowSeqIdSeg, State, QueueNames}}.
+     {Msgs, State, SegNums, QueueNames}}.
 
 
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From a1e8ba425ab8cf0a3c882ce14a059f3bcd5f09cd Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Mon, 26 Oct 2009 12:54:48 -0400
Subject: Major refactoring including ets, saving last-send-value and call
 instead of cast.

---
 src/rabbit_amqqueue_process.erl |  18 +++--
 src/rabbit_memory_monitor.erl   | 169 ++++++++++++++++++++++++++--------------
 2 files changed, 120 insertions(+), 67 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index d402ef97..3cedfd20 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -819,22 +819,26 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
 handle_cast(send_memory_monitor_update, State) ->
     DrainRatio1 = update_ratio(State#q.drain_ratio, State#q.next_msg_id),
     MsgSec = DrainRatio1#ratio.ratio * 1000000, % msg/sec
-    BufSec = case MsgSec < 0.016 of  %% less than 1 msg/1 minute
+    QueueDuration = case MsgSec < 0.016 of  %% less than 1 msg/1 minute
         true -> infinity;
         false -> queue:len(State#q.message_buffer) / MsgSec
     end,
-    rabbit_memory_monitor:push_queue_duration(self(), BufSec),
+    DesiredQueueDuration = rabbit_memory_monitor:push_queue_duration(
+                                                        self(), QueueDuration),
+    ?LOGDEBUG("~p Queue duration current/desired ~p/~p~n",
+            [(State#q.q)#amqqueue.name, QueueDuration, DesiredQueueDuration]),
     noreply(State#q{drain_ratio = DrainRatio1});
 
-handle_cast({set_queue_duration, QueueDuration}, State) ->
+handle_cast({set_queue_duration, DesiredQueueDuration}, State) ->
     DrainRatio = State#q.drain_ratio,
-    DesiredQueueLength = case QueueDuration of
+    DesiredBufLength = case DesiredQueueDuration of
         infinity -> infinity;
-        _ -> QueueDuration * DrainRatio#ratio.ratio * 1000000
+        _ -> DesiredQueueDuration * DrainRatio#ratio.ratio * 1000000
     end,
     %% Just to proove that something is happening.
-    ?LOGDEBUG("Queue size is ~8p, should be ~p~n", 
-                       [queue:len(State#q.message_buffer), DesiredQueueLength]),
+    ?LOGDEBUG("~p Queue length is~8p, should be ~p~n",
+                  [(State#q.q)#amqqueue.name, queue:len(State#q.message_buffer),
+                   DesiredBufLength]),
     noreply(State).
 
 
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 8c1db615..db4949e4 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -41,13 +41,13 @@
 %% Timer      |        |
 %%            v        v
 %% Queue -----+--------+-----<***hibernated***>------------->
-%%            | ^      | ^                     ^               
+%%            | ^      | ^                     ^
 %%            v |      v |                     |
 %% Monitor X--*-+--X---*-+--X------X----X-----X+----------->
 %%
 %% Or to put it in words. Queue periodically sends (casts) 'push_queue_duration'
 %% message to the Monitor (cases 1 and 2 on the asciiart above). Monitor 
-%% _always_ replies with a 'set_bufsec_limit' cast. This way, 
+%% _always_ replies with a 'set_queue_duration' cast. This way, 
 %% we're pretty sure that the Queue is not hibernated.
 %% Monitor periodically recounts numbers ('X' on asciiart). If, during this
 %% update we notice that a queue was using too much memory, we send a message
@@ -84,29 +84,37 @@
 -export([register/1, push_queue_duration/2]).
 
 -record(state, {timer,               %% 'internal_update' timer
-                queue_duration_dict, %% dict, qpid:seconds_till_queue_is_empty
-                queue_duration_avg,  %% global, the desired queue depth (in sec)
-                memory_limit         %% how much memory we intend to use
+                queue_durations,     %% ets, (qpid, seconds_till_queue_is_empty)
+                queue_duration_sum,  %% sum of all queue_durations
+                queue_duration_items,%% number of elements in sum
+                memory_limit,        %% how much memory we intend to use
+                memory_ratio         %% how much more memory we can use
                }).
 
 -define(SERVER, ?MODULE).
 -define(DEFAULT_UPDATE_INTERVAL_MS, 2500).
-
+-define(TABLE_NAME, ?MODULE).
+-define(MAX_QUEUE_DURATION_ALLOWED, 60*60*24). % 1 day
 %%----------------------------------------------------------------------------
 -ifdef(use_specs).
-
--spec(start_link/0 :: () -> 'ignore' | {'error',_} | {'ok',pid()}).
+-type(state() :: #state{timer               :: timer:tref(),
+                        queue_durations     :: tid(),
+                        queue_duration_sum  :: float(),
+                        queue_duration_items:: non_neg_integer(),
+                        memory_limit        :: pos_integer(),
+                        memory_ratio        :: float() }).
+
+-spec(start_link/0 :: () -> ignore | {error, _} | {ok, pid()}).
 -spec(register/1 :: (pid()) -> ok).
 -spec(push_queue_duration/2 :: (pid(), float() | infinity) -> ok).
 
--spec(init/1 :: ([]) -> {ok, #state{}}).
+-spec(init/1 :: ([]) -> {ok, state()}).
 
 -ifdef(debug).
 -spec(ftoa/1 :: (any()) -> string()).
 -endif.
 
--spec(count_average/1 :: (list()) -> float() | infinity ).
--spec(internal_update/1 :: (#state{}) -> #state{}).
+-spec(internal_update/1 :: (state()) -> state()).
 -endif.
 
 %%----------------------------------------------------------------------------
@@ -122,8 +130,9 @@ update() ->
 register(Pid) ->
     gen_server2:cast(?SERVER, {register, Pid}).
 
-push_queue_duration(Pid, BufSec) ->
-    gen_server2:cast(rabbit_memory_monitor, {push_queue_duration, Pid, BufSec}).
+push_queue_duration(Pid, QueueDuration) ->
+    gen_server2:call(rabbit_memory_monitor,
+                                    {push_queue_duration, Pid, QueueDuration}).
 
 %%----------------------------------------------------------------------------
 
@@ -141,20 +150,58 @@ get_user_memory_limit() ->
     end.
 
 
-init([]) -> 
+init([]) ->
     %% We should never use more memory than user requested. As the memory 
     %% manager doesn't really know how much memory queues are using, we shall
     %% try to remain safe distance from real limit. 
     MemoryLimit = trunc(get_user_memory_limit() * 0.6),
     rabbit_log:warning("Memory monitor limit: ~pMB~n", 
-                    [erlang:trunc(MemoryLimit/1024/1024)]),
-    
+                    [erlang:trunc(MemoryLimit/1048576)]),
+
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS, 
                                                         ?SERVER, update, []),
     {ok, #state{timer = TRef,
-                queue_duration_dict = dict:new(),
-                queue_duration_avg  = infinity,
-                memory_limit = MemoryLimit}}.
+                queue_durations = ets:new(?TABLE_NAME, [set, private]),
+                queue_duration_sum = 0.0,
+                queue_duration_items = 0,
+                memory_limit = MemoryLimit,
+                memory_ratio = 1.0}}.
+
+get_avg_duration(#state{queue_duration_sum = Sum,
+                        queue_duration_items = Items}) ->
+    case Items of
+        0 -> infinity;
+        _ -> Sum / Items
+    end.
+
+get_desired_duration(State) ->
+    case get_avg_duration(State) of
+        infinity -> infinity;
+        AvgQueueDuration -> AvgQueueDuration * State#state.memory_ratio
+    end.
+
+handle_call({push_queue_duration, Pid, QueueDuration0}, From, State) ->
+    SendDuration = get_desired_duration(State),
+    gen_server2:reply(From, SendDuration),
+
+    QueueDuration = case QueueDuration0 > ?MAX_QUEUE_DURATION_ALLOWED of
+        true -> infinity;
+        false -> QueueDuration0
+    end,
+
+    {Sum, Items} = {State#state.queue_duration_sum,
+                    State#state.queue_duration_items},
+    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(State#state.queue_durations, Pid),
+    {Sum1, Items1} =
+            case {PrevQueueDuration == infinity, QueueDuration == infinity} of
+        {true, true} -> {Sum, Items};
+        {true, false} -> {Sum + QueueDuration, Items + 1};
+        {false, true} -> {Sum - PrevQueueDuration, Items - 1};
+        {false, false} -> {Sum - PrevQueueDuration + QueueDuration, Items}
+    end,
+    ets:insert(State#state.queue_durations, {Pid, QueueDuration, SendDuration}),
+    {noreply, State#state{queue_duration_sum = Sum1,
+                          queue_duration_items = Items1}};
 
 handle_call(_Request, _From, State) ->
     {noreply, State}.
@@ -165,20 +212,24 @@ handle_cast(update, State) ->
 
 handle_cast({register, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
+    ets:insert(State#state.queue_durations, {Pid, infinity, infinity}),
     {noreply, State};
 
-handle_cast({push_queue_duration, Pid, DrainRatio}, State) ->
-    gen_server2:cast(Pid, {set_bufsec_limit, State#state.queue_duration_avg}),
-    {noreply, State#state{queue_duration_dict = 
-                dict:store(Pid, DrainRatio, State#state.queue_duration_dict)}};
-
 handle_cast(_Request, State) ->
     {noreply, State}.
 
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
-    {noreply, State#state{queue_duration_dict = 
-                            dict:erase(Pid, State#state.queue_duration_dict)}};
+    {Sum, Items} = {State#state.queue_duration_sum,
+                    State#state.queue_duration_items},
+    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(State#state.queue_durations, Pid),
+    Sum1 = case PrevQueueDuration == infinity of
+        true  -> Sum;
+        false -> Sum - PrevQueueDuration
+    end,
+    ets:delete(State#state.queue_durations, Pid),
+    {noreply, State#state{queue_duration_sum = Sum1,
+                          queue_duration_items = Items-1}};
 
 handle_info(_Info, State) -> 
     {noreply, State}.
@@ -190,7 +241,11 @@ terminate(_Reason, _State) ->
 code_change(_OldVsn, State, _Extra) -> 
     {ok, State}.
 
--ifdef(debug). 
+
+set_queue_duration(Pid, QueueDuration) ->
+    gen_server2:pcast(Pid, 7, {set_queue_duration, QueueDuration}).
+
+-ifdef(debug).
 ftoa(Float) ->
     Str = case is_float(Float) of
         true  -> io_lib:format("~11.3f",[Float]);
@@ -199,41 +254,35 @@ ftoa(Float) ->
     lists:flatten(Str).
 -endif.
 
-%% Count average from numbers, excluding atoms in the list.
-count_average(List) ->
-    List1 = [V || V <- List, is_number(V) or is_float(V)],
-    case length(List1) of
-        0 -> infinity;
-        Len -> lists:sum(List1) / Len
-    end.
 
-internal_update(State) ->
+%% Update memory ratio. Count new DesiredQueueDuration.
+%% Get queues that are using more than that, and send
+%% pessimistic information back to them.
+internal_update(State0) ->
     %% available memory /  used memory
-    UsedMemory = erlang:memory(total),
-    MemoryOvercommit = State#state.memory_limit / UsedMemory,
-    RealDrainAvg = count_average([V || {_K, V} <- 
-                                dict:to_list(State#state.queue_duration_dict)]),
-    %% In case of no active queues, feel free to grow. We can't make any 
-    %% decisionswe have no clue what is the average ram_usage/second.
-    %% Not does the queue.
-    DesiredDrainAvg = case RealDrainAvg of
-        infinity -> infinity;
-        0.0 -> infinity;
-        _ ->  RealDrainAvg * MemoryOvercommit
-    end,
-    ?LOGDEBUG("DrainAvg Real/Desired:~s/~s  MemoryOvercommit:~s~n", 
-                [ftoa(RealDrainAvg), ftoa(DesiredDrainAvg),
-                ftoa(MemoryOvercommit)]),
-    %% Inform the queue to reduce it's memory usage when needed.
-    %% This can sometimes wake the queue from hibernation. Well, we don't care.
-    ReduceMemory = fun ({Pid, QueueDrain}) ->
-        case QueueDrain > DesiredDrainAvg of 
-            true -> 
-                gen_server2:cast(Pid, {set_bufsec_limit, DesiredDrainAvg});
-            _ -> ok
-        end 
+    MemoryRatio = State0#state.memory_limit / erlang:memory(total),
+    State = State0#state{memory_ratio = MemoryRatio},
+
+    DesiredDurationAvg = get_desired_duration(State),
+
+    ?LOGDEBUG("Avg duration: real/desired:~s/~s  Memory ratio:~s  Queues:~p~n",
+                [ftoa(get_avg_duration(State)), ftoa(DesiredDurationAvg),
+                ftoa(MemoryRatio),
+                ets:foldl(fun (_, Acc) -> Acc+1 end,
+                                            0, State#state.queue_durations)] ),
+
+    %% If we have pessimistic information, we need to inform queues
+    %% to reduce it's memory usage when needed.
+    %% This sometimes wakes up queues from hibernation. Well, we don't care.
+    PromptReduceDuraton = fun ({Pid, QueueDuration, PrevSendDuration}, Acc) ->
+        case (PrevSendDuration > DesiredDurationAvg) and (QueueDuration > DesiredDurationAvg) of
+            true -> set_queue_duration(Pid, DesiredDurationAvg),
+                    ets:insert(State#state.queue_durations, {Pid, QueueDuration, DesiredDurationAvg}),
+                    Acc + 1;
+            _ -> Acc
+        end
     end,
-    lists:map(ReduceMemory, dict:to_list(State#state.queue_duration_dict)),
-    State#state{queue_duration_avg = DesiredDrainAvg}.
+    ets:foldl(PromptReduceDuraton, 0, State#state.queue_durations),
+    State.
 
 
-- 
cgit v1.2.1


From c23627fc1512aab016da2cc0a791e2056b9b7829 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 26 Oct 2009 17:39:55 +0000
Subject: Added check in the qi startup that msgs must be in the msg_store.
 This affected some of the tests. All seems to work.

---
 src/rabbit_queue_index.erl | 17 +++++++++++++----
 src/rabbit_tests.erl       |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 0870acc5..db738857 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -464,8 +464,10 @@ queue_index_walker({[], State, SegNums, QueueNames}) ->
     queue_index_walker({SegNums, State, QueueNames});
 queue_index_walker({[{_RelSeq, {MsgId, _IsDelivered, IsPersistent}} | Msgs],
                     State, SegNums, QueueNames}) ->
-    {MsgId, bool_to_int(IsPersistent),
-     {Msgs, State, SegNums, QueueNames}}.
+    case IsPersistent of
+        true -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
+        false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
+    end.
 
 
 %%----------------------------------------------------------------------------
@@ -544,8 +546,15 @@ replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
 deliver_transient(SegNum, SDict, State) ->
     {AckMe, DeliverMe} =
         dict:fold(
-          fun (_RelSeq, {_MsgId, _IsDelivered, true}, Acc) ->
-                  Acc;
+          fun (RelSeq, {MsgId, IsDelivered, true}, {AckMeAcc, DeliverMeAcc}) ->
+                  case {IsDelivered, rabbit_msg_store:contains(MsgId)} of
+                      {_, true} ->
+                          {AckMeAcc, DeliverMeAcc};
+                      {true, false} ->
+                          {[RelSeq | AckMeAcc], DeliverMeAcc};
+                      {false, false} ->
+                          {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]}
+                  end;
               (RelSeq, {_MsgId, false, false}, {AckMeAcc, DeliverMeAcc}) ->
                   {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]};
               (RelSeq, {_MsgId, true, false}, {AckMeAcc, DeliverMeAcc}) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ebd8432a..7bf480d7 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1008,6 +1008,7 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
               MsgId = rabbit_guid:guid(),
               QiM = rabbit_queue_index:write_published(MsgId, SeqId, Persistent,
                                                        QiN),
+              ok = rabbit_msg_store:write(MsgId, MsgId),
               {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]}
       end, {Qi, []}, SeqIds).
 
-- 
cgit v1.2.1


From a96409aa48869b3e6bd43932d0b7811fe4ad5160 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 27 Oct 2009 11:31:57 +0000
Subject: added to qi so that it will store that it was cleanly shutdown, and
 on startup, if it wasn't, then it marks all msgs as delivered.

---
 src/rabbit_queue_index.erl | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index db738857..4ed22cec 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -74,6 +74,8 @@
 %%
 %%----------------------------------------------------------------------------
 
+-define(CLEAN_FILENAME, "cl.ean").
+
 -define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
 -define(ACK_JOURNAL_FILENAME, "ack_journal.jif").
 -define(SEQ_BYTES, 8).
@@ -157,13 +159,15 @@
 
 init(Name) ->
     State = blank_state(Name),
-    {TotalMsgCount, State1} = find_ack_counts_and_deliver_transient_msgs(State),
+    {TotalMsgCount, State1} = read_and_prune_segments(State),
     scatter_journal(TotalMsgCount, State1).
 
 terminate(State = #qistate { seg_num_handles = SegHdls }) ->
     case 0 == dict:size(SegHdls) of
         true  -> State;
-        false -> close_all_handles(State)
+        false -> State1 = #qistate { dir = Dir } = close_all_handles(State),
+                 store_clean_shutdown(Dir),
+                 State1
     end.
 
 terminate_and_erase(State) ->
@@ -436,6 +440,18 @@ blank_state(QueueName) ->
                journal_ack_count = 0,
                journal_ack_dict = dict:new(),
                seg_ack_counts = dict:new() }.
+
+detect_clean_shutdown(Dir) ->
+    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+        ok -> true;
+        {error, enoent} -> false
+    end.
+
+store_clean_shutdown(Dir) ->
+    {ok, Hdl} = file_handle_cache:open(filename:join(Dir, ?CLEAN_FILENAME),
+                                       [write, raw, binary],
+                                       [{write_buffer, unbuffered}]),
+    ok = file_handle_cache:close(Hdl).
     
 
 %%----------------------------------------------------------------------------
@@ -474,8 +490,9 @@ queue_index_walker({[{_RelSeq, {MsgId, _IsDelivered, IsPersistent}} | Msgs],
 %% Startup Functions
 %%----------------------------------------------------------------------------
 
-find_ack_counts_and_deliver_transient_msgs(State = #qistate { dir = Dir }) ->
+read_and_prune_segments(State = #qistate { dir = Dir }) ->
     SegNums = all_segment_nums(Dir),
+    CleanShutdown = detect_clean_shutdown(Dir),
     {TotalMsgCount, State1} =
         lists:foldl(
           fun (SegNum, {TotalMsgCount1, StateN}) ->
@@ -484,7 +501,7 @@ find_ack_counts_and_deliver_transient_msgs(State = #qistate { dir = Dir }) ->
                   {TransientMsgsAcks, StateL =
                    #qistate { seg_ack_counts = AckCounts,
                               journal_ack_dict = JAckDict }} =
-                      deliver_transient(SegNum, SDict, StateM),
+                      drop_and_deliver(SegNum, SDict, CleanShutdown, StateM),
                   %% ignore TransientMsgsAcks in AckCounts and
                   %% JAckDict1 because the TransientMsgsAcks fall
                   %% through into scatter_journal at which point the
@@ -543,11 +560,15 @@ replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
     {TotalMsgCount - length(ValidAcks),
      append_acks_to_segment(SegNum, ValidAcks, State2)}.
 
-deliver_transient(SegNum, SDict, State) ->
+drop_and_deliver(SegNum, SDict, CleanShutdown, State) ->
     {AckMe, DeliverMe} =
         dict:fold(
           fun (RelSeq, {MsgId, IsDelivered, true}, {AckMeAcc, DeliverMeAcc}) ->
+                  %% msg is persistent, keep only if the msg_store has it
                   case {IsDelivered, rabbit_msg_store:contains(MsgId)} of
+                      {false, true} when not CleanShutdown ->
+                          %% not delivered, but dirty shutdown => mark delivered
+                          {AckMeAcc, [RelSeq | DeliverMeAcc]};
                       {_, true} ->
                           {AckMeAcc, DeliverMeAcc};
                       {true, false} ->
@@ -556,8 +577,10 @@ deliver_transient(SegNum, SDict, State) ->
                           {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]}
                   end;
               (RelSeq, {_MsgId, false, false}, {AckMeAcc, DeliverMeAcc}) ->
+                  %% not persistent and not delivered => deliver and ack it
                   {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]};
               (RelSeq, {_MsgId, true, false}, {AckMeAcc, DeliverMeAcc}) ->
+                  %% not persistent but delivered => ack it
                   {[RelSeq | AckMeAcc], DeliverMeAcc}
           end, {[], []}, SDict),
     {Hdl, State1} = get_seg_handle(SegNum, State),
-- 
cgit v1.2.1


From 6789edada0300d45c826dbcd9ce36c36c1c0b8af Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 27 Oct 2009 12:44:43 +0000
Subject: make

---
 src/rabbit.erl             | 9 ++++++---
 src/rabbit_queue_index.erl | 6 +++---
 src/rabbit_tests.erl       | 2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 215c1bc4..016a461a 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -37,7 +37,7 @@
 
 -export([start/2, stop/1]).
 
--export([log_location/1]).
+-export([log_location/1, start_child/2]).
 
 -import(application).
 -import(mnesia).
@@ -67,6 +67,7 @@
               {nodes, [erlang_node()]} |
               {running_nodes, [erlang_node()]}]).
 -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()).
+-spec(start_child/2 :: (atom(), [any()]) -> 'ok'). 
 
 -endif.
 
@@ -155,7 +156,6 @@ start(normal, []) ->
         fun () ->
                 ok = maybe_insert_default_data(),
                 ok = rabbit_exchange:recover(),
-                %% TODO - this should probably use start_child somehow too
                 DurableQueues = rabbit_amqqueue:find_durable_queues(),
                 ok = rabbit_queue_index:start_msg_store(DurableQueues),
                 {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues)
@@ -267,8 +267,11 @@ print_banner() ->
     io:nl().
 
 start_child(Mod) ->
+    start_child(Mod, []).
+
+start_child(Mod, Args) ->
     {ok,_} = supervisor:start_child(rabbit_sup,
-                                    {Mod, {Mod, start_link, []},
+                                    {Mod, {Mod, start_link, Args},
                                      transient, 5000, worker, [Mod]}),
     ok.
 
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 4ed22cec..5eed90b0 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -332,9 +332,9 @@ start_msg_store(DurableQueues) ->
                   end
           end, {[], []}, Directories),
     MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
-    {ok, _Pid} = rabbit_msg_store:start_link(MsgStoreDir,
-                                             fun queue_index_walker/1,
-                                             DurableQueueNames),
+    ok = rabbit:start_child(rabbit_msg_store, [MsgStoreDir,
+                                               fun queue_index_walker/1,
+                                               DurableQueueNames]),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = delete_queue_directory(Dir)
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7bf480d7..390ff2ef 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1022,7 +1022,7 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
 
 test_queue_index() ->
-    rabbit_msg_store:stop(),
+    io:format("~p~n", [rabbit_msg_store:stop()]),
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(1,10000),
     SeqIdsB = lists:seq(10001,20000),
-- 
cgit v1.2.1


From e8c7ef0c40572a79b468aa3da1b5335529d42c37 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 27 Oct 2009 13:07:24 +0000
Subject: Apologies for the previous commit message. What this commit and the
 previous do is to move the msg_store under the rabbit_sup. This turned out to
 require some changes to the tests too.

---
 src/rabbit_tests.erl | 49 +++++++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 390ff2ef..2034cd54 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -830,8 +830,14 @@ start_msg_store_empty() ->
     start_msg_store(fun (ok) -> finished end, ok).
 
 start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
-    {ok, _Pid} = rabbit_msg_store:start_link(msg_store_dir(), MsgRefDeltaGen,
-                                             MsgRefDeltaGenInit).
+    rabbit:start_child(rabbit_msg_store, [msg_store_dir(), MsgRefDeltaGen,
+                                          MsgRefDeltaGenInit]).
+
+stop_msg_store() ->
+    case supervisor:terminate_child(rabbit_sup, rabbit_msg_store) of
+        ok -> supervisor:delete_child(rabbit_sup, rabbit_msg_store);
+        E -> E
+    end.
 
 msg_store_contains(Atom, MsgIds) ->
     Atom = lists:foldl(
@@ -863,8 +869,8 @@ msg_store_write(MsgIds) ->
            ok, MsgIds).
                             
 test_msg_store() ->
-    rabbit_msg_store:stop(),
-    {ok, _Pid} = start_msg_store_empty(),
+    stop_msg_store(),
+    ok = start_msg_store_empty(),
     Self = self(),
     MsgIds = [term_to_binary(M) || M <- lists:seq(1,100)],
     {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
@@ -938,23 +944,22 @@ test_msg_store() ->
               end
       end, ok, MsgIds2ndHalf),
     %% stop and restart, preserving every other msg in 2nd half
-    ok = rabbit_msg_store:stop(),
-    {ok, _Pid1} =
-        start_msg_store(fun ([]) -> finished;
-                            ([MsgId|MsgIdsTail])
-                            when length(MsgIdsTail) rem 2 == 0 ->
-                                {MsgId, 1, MsgIdsTail};
-                            ([MsgId|MsgIdsTail]) ->
-                                {MsgId, 0, MsgIdsTail}
-                        end, MsgIds2ndHalf),
+    ok = stop_msg_store(),
+    ok = start_msg_store(fun ([]) -> finished;
+                             ([MsgId|MsgIdsTail])
+                             when length(MsgIdsTail) rem 2 == 0 ->
+                                 {MsgId, 1, MsgIdsTail};
+                             ([MsgId|MsgIdsTail]) ->
+                                 {MsgId, 0, MsgIdsTail}
+                         end, MsgIds2ndHalf),
     %% check we have the right msgs left
     lists:foldl(
       fun (MsgId, Bool) ->
               not(Bool = rabbit_msg_store:contains(MsgId))
       end, false, MsgIds2ndHalf),
     %% restart empty
-    ok = rabbit_msg_store:stop(),
-    {ok, _Pid2} = start_msg_store_empty(),
+    ok = stop_msg_store(),
+    ok = start_msg_store_empty(),
     %% check we don't contain any of the msgs
     false = msg_store_contains(false, MsgIds),
     %% push a lot of msgs in...
@@ -979,8 +984,8 @@ test_msg_store() ->
     false =
         msg_store_contains(false, lists:map(fun term_to_binary/1, MsgIdsBig)),
     %% restart empty
-    ok = rabbit_msg_store:stop(),
-    {ok, _Pid3} = start_msg_store_empty(),
+    ok = stop_msg_store(),
+    ok = start_msg_store_empty(),
     passed.
 
 queue_name(Name) ->
@@ -1022,7 +1027,7 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
 
 test_queue_index() ->
-    io:format("~p~n", [rabbit_msg_store:stop()]),
+    stop_msg_store(),
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(1,10000),
     SeqIdsB = lists:seq(10001,20000),
@@ -1037,7 +1042,7 @@ test_queue_index() ->
                                     lists:reverse(SeqIdsMsgIdsA)),
     %% call terminate twice to prove it's idempotent
     _Qi5 = rabbit_queue_index:terminate(rabbit_queue_index:terminate(Qi4)),
-    ok = rabbit_msg_store:stop(),
+    ok = stop_msg_store(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0, as all the msgs were transient
     {0, Qi6} = rabbit_queue_index:init(test_queue()),
@@ -1051,7 +1056,7 @@ test_queue_index() ->
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsMsgIdsB)),
     _Qi11 = rabbit_queue_index:terminate(Qi10),
-    ok = rabbit_msg_store:stop(),
+    ok = stop_msg_store(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
@@ -1071,10 +1076,10 @@ test_queue_index() ->
     {0, 20001, Qi18} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate(Qi18),
-    ok = rabbit_msg_store:stop(),
+    ok = stop_msg_store(),
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0 because all persistent msgs have been acked
     {0, Qi20} = rabbit_queue_index:init(test_queue()),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
-    ok = rabbit_msg_store:stop(),
+    ok = stop_msg_store(),
     passed.
-- 
cgit v1.2.1


From d326749ccbcf9e071e7a4106510730409fdc78bb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 27 Oct 2009 15:38:52 +0000
Subject: changed the queue supervisor from simple_one_for_one to one_for_one.
 This then means that children can take as long as they need (well, up to
 2^32-1 millis) to shutdown (flush files out to disk), which turns out to be
 important. This change has impact on queue creation and deletion too. Also,
 cl.ean => clean.dot

---
 src/rabbit_amqqueue.erl         | 36 ++++++++++++++++++++++++++++++++----
 src/rabbit_amqqueue_process.erl |  5 +----
 src/rabbit_amqqueue_sup.erl     |  4 +---
 src/rabbit_queue_index.erl      |  2 +-
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index f421d6aa..d18c5a2c 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -177,6 +177,10 @@ declare(QueueName, Durable, AutoDelete, Args) ->
 internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
     case rabbit_misc:execute_mnesia_transaction(
            fun () ->
+                   %% we could still find that mnesia has another
+                   %% entry here because the queue may exist on
+                   %% another node, beyond the knowledge of our own
+                   %% local queue_sup.
                    case mnesia:wread({rabbit_queue, QueueName}) of
                        [] -> ok = store_queue(Q),
                              case WantDefaultBinding of
@@ -200,9 +204,30 @@ store_queue(Q = #amqqueue{durable = false}) ->
     ok = mnesia:write(rabbit_queue, Q, write),
     ok.
 
-start_queue_process(Q) ->
-    {ok, Pid} = supervisor:start_child(rabbit_amqqueue_sup, [Q]),
-    Q#amqqueue{pid = Pid}.
+start_queue_process(Q = #amqqueue{name = QueueName}) ->
+    case supervisor:start_child(
+           rabbit_amqqueue_sup,
+           {QueueName, {rabbit_amqqueue_process, start_link, [Q]},
+            %% 4294967295 is 2^32 - 1, which is the highest value allowed
+            temporary, 4294967295, worker, [rabbit_amqqueue_process]}) of
+        {ok, Pid} ->
+            Q#amqqueue{pid = Pid};
+        {error, already_present} ->
+            supervisor:delete_child(rabbit_amqqueue_sup, QueueName),
+            start_queue_process(Q);
+        {error, {already_started, _QPid}} ->
+            case rabbit_misc:execute_mnesia_transaction(
+                   fun () ->
+                           case mnesia:wread({rabbit_queue, QueueName}) of
+                               %% it's vanished in the mean time, try again
+                               [] -> try_again;
+                               [ExistingQ] -> ExistingQ
+                           end
+                   end) of
+                try_again -> start_queue_process(Q);
+                ExistingQ -> ExistingQ
+            end
+    end.
 
 add_default_binding(#amqqueue{name = QueueName}) ->
     Exchange = rabbit_misc:r(QueueName, exchange, <<>>),
@@ -250,7 +275,10 @@ stat_all() ->
     lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)).
 
 delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) ->
-    gen_server2:call(QPid, {delete, IfUnused, IfEmpty}, infinity).
+    case gen_server2:call(QPid, {delete, IfUnused, IfEmpty}, infinity) of
+        ok -> supervisor:delete_child(rabbit_amqqueue_sup, QPid);
+        E -> E
+    end.
 
 purge(#amqqueue{ pid = QPid }) -> gen_server2:call(QPid, purge, infinity).
 
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 180a9f8a..9d27fd0f 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -121,10 +121,7 @@ terminate(shutdown, #q{variable_queue_state = VQS}) ->
     _VQS = rabbit_variable_queue:terminate(VQS);
 terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
     %% FIXME: How do we cancel active subscriptions?
-    %% Ensure that any persisted tx messages are removed;
-    %% mixed_queue:delete_queue cannot do that for us since neither
-    %% mixed_queue nor disk_queue keep a record of uncommitted tx
-    %% messages.
+    %% Ensure that any persisted tx messages are removed.
     %% TODO: wait for all in flight tx_commits to complete
     VQS1 = rabbit_variable_queue:tx_rollback(
              lists:concat([PM || #tx { pending_messages = PM } <-
diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl
index 46d23a40..f06e4c53 100644
--- a/src/rabbit_amqqueue_sup.erl
+++ b/src/rabbit_amqqueue_sup.erl
@@ -43,6 +43,4 @@ start_link() ->
     supervisor:start_link({local, ?SERVER}, ?MODULE, []).
 
 init([]) ->
-    {ok, {{simple_one_for_one, 10, 10},
-          [{rabbit_amqqueue, {rabbit_amqqueue_process, start_link, []},
-            temporary, brutal_kill, worker, [rabbit_amqqueue_process]}]}}.
+    {ok, {{one_for_one, 10, 10}, []}}.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5eed90b0..7c317b30 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -74,7 +74,7 @@
 %%
 %%----------------------------------------------------------------------------
 
--define(CLEAN_FILENAME, "cl.ean").
+-define(CLEAN_FILENAME, "clean.dot").
 
 -define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
 -define(ACK_JOURNAL_FILENAME, "ack_journal.jif").
-- 
cgit v1.2.1


From 622d192eeeb97f6b51c38fc03eee89ce01ef96e0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 27 Oct 2009 16:49:53 +0000
Subject: Firstly, delete does not return ok, but {ok, Len}. Secondly, whilst
 we could delete the child spec on an explicit delete, given the issues raised
 by auto delete, there's not much point; so Thirdly, add
 prune_queue_childspecs which is called whenever we delete or declare a queue.
 This means there will be at most one unused child spec under the queue_sup at
 any time, so we still need the already_present case in start_queue_process,
 in case we're unlucky and come across the unused spec (actually not that
 unlikely).

---
 src/rabbit_amqqueue.erl | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index d18c5a2c..833dada4 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -167,6 +167,7 @@ find_durable_queues() ->
       end).
 
 declare(QueueName, Durable, AutoDelete, Args) ->
+    prune_queue_childspecs(),
     Q = start_queue_process(#amqqueue{name = QueueName,
                                       durable = Durable,
                                       auto_delete = AutoDelete,
@@ -275,10 +276,8 @@ stat_all() ->
     lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)).
 
 delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) ->
-    case gen_server2:call(QPid, {delete, IfUnused, IfEmpty}, infinity) of
-        ok -> supervisor:delete_child(rabbit_amqqueue_sup, QPid);
-        E -> E
-    end.
+    prune_queue_childspecs(),
+    gen_server2:call(QPid, {delete, IfUnused, IfEmpty}, infinity).
 
 purge(#amqqueue{ pid = QPid }) -> gen_server2:call(QPid, purge, infinity).
 
@@ -371,6 +370,14 @@ internal_delete(QueueName) ->
               end
       end).
 
+prune_queue_childspecs() ->
+    lists:foreach(
+      fun ({Name, undefined, _Type, _Mods}) ->
+              supervisor:delete_child(rabbit_amqqueue_sup, Name);
+          (_) -> ok
+      end, supervisor:which_children(rabbit_amqqueue_sup)),
+    ok.
+
 on_node_down(Node) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
-- 
cgit v1.2.1


From 7a9a800ea493ed1cd384e0b80bf12993ff8a5b99 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 28 Oct 2009 13:31:50 +0000
Subject: Work around in scanning in msg_store files to ensure we don't explode
 memory. Also drop read_ahead when scanning files because there's just no
 point given the way erlang works

---
 src/rabbit_msg_file.erl  |  5 ++++-
 src/rabbit_msg_store.erl | 15 ++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 94525d84..1b9587fb 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -118,8 +118,9 @@ read_next(FileHdl, Offset) ->
                         KO                   -> KO
                     end;
                true -> %% all good, let's continue
+                    MsgIdSizeBits = MsgIdSize * 8,
                     case file:read(FileHdl, MsgIdSize) of
-                        {ok, <<MsgId:MsgIdSize/binary>>} ->
+                        {ok, <<MsgIdNum:MsgIdSizeBits>>} ->
                             TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
                             ExpectedAbsPos = Offset + TotalSize - 1,
                             case file:position(
@@ -129,6 +130,8 @@ read_next(FileHdl, Offset) ->
                                     case file:read(FileHdl, 1) of
                                         {ok, <<?WRITE_OK_MARKER:
                                                ?WRITE_OK_SIZE_BITS>>} ->
+                                            <<MsgId:MsgIdSize/binary>> =
+                                                <<MsgIdNum:MsgIdSizeBits>>,
                                             {ok, {MsgId,
                                                   TotalSize, NextOffset}};
                                         {ok, _SomeOtherData} ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 3f2f4cfe..a492a024 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -102,9 +102,10 @@
 -define(FILE_EXTENSION_TMP,    ".rdt").
 -define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
 
--define(BINARY_MODE, [raw, binary]).
--define(READ_MODE,   [read, read_ahead]).
--define(WRITE_MODE,  [write, delayed_write]).
+-define(BINARY_MODE,     [raw, binary]).
+-define(READ_MODE,       [read]).
+-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
+-define(WRITE_MODE,      [write, delayed_write]).
 
 %% The components:
 %%
@@ -741,7 +742,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% move. If we run out of disk space, this truncate could
             %% fail, but we still aren't risking losing data
             ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
-            {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_MODE),
+            {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE),
             {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
             ok = file:sync(MainHdl),
             ok = file:close(MainHdl),
@@ -959,9 +960,9 @@ combine_files(#file_summary { file = Source,
     SourceName = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
     State1 = close_file(SourceName, close_file(DestinationName, State)),
-    {ok, SourceHdl} = open_file(Dir, SourceName, ?READ_MODE),
+    {ok, SourceHdl} = open_file(Dir, SourceName, ?READ_AHEAD_MODE),
     {ok, DestinationHdl} = open_file(Dir, DestinationName,
-                                     ?READ_MODE ++ ?WRITE_MODE),
+                                     ?READ_AHEAD_MODE ++ ?WRITE_MODE),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't
     %% need a tmp file
@@ -974,7 +975,7 @@ combine_files(#file_summary { file = Source,
                                           DestinationValid, ExpectedSize);
        true ->
             Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_MODE ++ ?WRITE_MODE),
+            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
             Worklist =
                 lists:dropwhile(
                   fun (#msg_location { offset = Offset })
-- 
cgit v1.2.1


From 14a19f42e85282f9ad7867c15606fd29b9754837 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 28 Oct 2009 13:43:35 +0000
Subject: Added comment explaining delightful code

---
 src/rabbit_msg_file.erl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 1b9587fb..ea50cc64 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -118,6 +118,11 @@ read_next(FileHdl, Offset) ->
                         KO                   -> KO
                     end;
                true -> %% all good, let's continue
+                    %% Here we take option 5 from
+                    %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569
+                    %% in which we read the MsgId as a number, and
+                    %% then convert it back to a binary in order to
+                    %% work around bugs in Erlang's GC.
                     MsgIdSizeBits = MsgIdSize * 8,
                     case file:read(FileHdl, MsgIdSize) of
                         {ok, <<MsgIdNum:MsgIdSizeBits>>} ->
-- 
cgit v1.2.1


From a20cd3fe6ba74247b73a07da0fb28e4a3ac3d3f1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 28 Oct 2009 14:36:17 +0000
Subject: Statically configurable fixed width msg id size in msg_store.
 Associated changes in tests. All tests pass.

---
 src/rabbit_msg_file.erl | 95 +++++++++++++++++++++----------------------------
 src/rabbit_tests.erl    | 16 +++++----
 2 files changed, 49 insertions(+), 62 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index ea50cc64..84dce90e 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -39,7 +39,10 @@
 -define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(WRITE_OK_SIZE_BITS,      8).
 -define(WRITE_OK_MARKER,         255).
--define(FILE_PACKING_ADJUSTMENT, (1 + (2 * (?INTEGER_SIZE_BYTES)))).
+-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
+-define(MSG_ID_SIZE_BYTES,       16).
+-define(MSG_ID_SIZE_BITS,        (8 * ?MSG_ID_SIZE_BYTES)).
+-define(SIZE_AND_MSG_ID_BYTES,   (?MSG_ID_SIZE_BYTES + ?INTEGER_SIZE_BYTES)).
 
 %%----------------------------------------------------------------------------
 
@@ -62,13 +65,13 @@
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, MsgId, MsgBody) when is_binary(MsgId) ->
+append(FileHdl, MsgId, MsgBody)
+  when is_binary(MsgId) andalso size(MsgId) =< ?MSG_ID_SIZE_BYTES ->
     MsgBodyBin  = term_to_binary(MsgBody),
-    [MsgIdSize, MsgBodyBinSize] = Sizes = [size(B) || B <- [MsgId, MsgBodyBin]],
-    Size = lists:sum(Sizes),
+    MsgBodyBinSize = size(MsgBodyBin),
+    Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES,
     case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
-                               MsgIdSize:?INTEGER_SIZE_BITS,
-                               MsgId:MsgIdSize/binary,
+                               MsgId:?MSG_ID_SIZE_BYTES/binary,
                                MsgBodyBin:MsgBodyBinSize/binary,
                                ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
@@ -77,15 +80,12 @@ append(FileHdl, MsgId, MsgBody) when is_binary(MsgId) ->
 
 read(FileHdl, TotalSize) ->
     Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
-    SizeWriteOkBytes = Size + 1,
+    BodyBinSize = Size - ?MSG_ID_SIZE_BYTES,
     case file:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
-               MsgIdSize:?INTEGER_SIZE_BITS,
-               Rest:SizeWriteOkBytes/binary>>} ->
-            BodyBinSize = Size - MsgIdSize,
-            <<MsgId:MsgIdSize/binary,
-              MsgBodyBin:BodyBinSize/binary,
-              ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>> = Rest,
+               MsgId:?MSG_ID_SIZE_BYTES/binary,
+               MsgBodyBin:BodyBinSize/binary,
+               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
             {ok, {MsgId, binary_to_term(MsgBodyBin)}};
         KO -> KO
     end.
@@ -105,50 +105,35 @@ scan(FileHdl, Offset, Acc) ->
     end.
 
 read_next(FileHdl, Offset) ->
-    TwoIntegers = 2 * ?INTEGER_SIZE_BYTES,
-    case file:read(FileHdl, TwoIntegers) of
-        {ok, <<Size:?INTEGER_SIZE_BITS, MsgIdSize:?INTEGER_SIZE_BITS>>} ->
-            if Size == 0 -> eof; %% Nothing we can do other than stop
-               MsgIdSize == 0 ->
-                    %% current message corrupted, try skipping past it
-                    ExpectedAbsPos = Offset + Size + ?FILE_PACKING_ADJUSTMENT,
-                    case file:position(FileHdl, {cur, Size + 1}) of
-                        {ok, ExpectedAbsPos} -> {corrupted, ExpectedAbsPos};
-                        {ok, _SomeOtherPos}  -> eof; %% seek failed, so give up
-                        KO                   -> KO
-                    end;
-               true -> %% all good, let's continue
-                    %% Here we take option 5 from
-                    %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569
-                    %% in which we read the MsgId as a number, and
-                    %% then convert it back to a binary in order to
-                    %% work around bugs in Erlang's GC.
-                    MsgIdSizeBits = MsgIdSize * 8,
-                    case file:read(FileHdl, MsgIdSize) of
-                        {ok, <<MsgIdNum:MsgIdSizeBits>>} ->
-                            TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
-                            ExpectedAbsPos = Offset + TotalSize - 1,
-                            case file:position(
-                                   FileHdl, {cur, Size - MsgIdSize}) of
-                                {ok, ExpectedAbsPos} ->
-                                    NextOffset = ExpectedAbsPos + 1,
-                                    case file:read(FileHdl, 1) of
-                                        {ok, <<?WRITE_OK_MARKER:
-                                               ?WRITE_OK_SIZE_BITS>>} ->
-                                            <<MsgId:MsgIdSize/binary>> =
-                                                <<MsgIdNum:MsgIdSizeBits>>,
-                                            {ok, {MsgId,
-                                                  TotalSize, NextOffset}};
-                                        {ok, _SomeOtherData} ->
-                                            {corrupted, NextOffset};
-                                        KO -> KO
-                                    end;
-                                {ok, _SomeOtherPos} ->
-                                    %% seek failed, so give up
-                                    eof;
+    case file:read(FileHdl, ?SIZE_AND_MSG_ID_BYTES) of
+        %% Here we take option 5 from
+        %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in which
+        %% we read the MsgId as a number, and then convert it back to
+        %% a binary in order to work around bugs in Erlang's GC.
+        {ok, <<Size:?INTEGER_SIZE_BITS, MsgIdNum:?MSG_ID_SIZE_BITS>>} ->
+            case Size of
+                0 -> eof; %% Nothing we can do other than stop
+                _ ->
+                    TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+                    ExpectedAbsPos = Offset + TotalSize - 1,
+                    case file:position(
+                           FileHdl, {cur, Size - ?MSG_ID_SIZE_BYTES}) of
+                        {ok, ExpectedAbsPos} ->
+                            NextOffset = ExpectedAbsPos + 1,
+                            case file:read(FileHdl, 1) of
+                                {ok,
+                                 <<?WRITE_OK_MARKER: ?WRITE_OK_SIZE_BITS>>} ->
+                                    <<MsgId:?MSG_ID_SIZE_BYTES/binary>> =
+                                        <<MsgIdNum:?MSG_ID_SIZE_BITS>>,
+                                    {ok, {MsgId, TotalSize, NextOffset}};
+                                {ok, _SomeOtherData} ->
+                                    {corrupted, NextOffset};
                                 KO -> KO
                             end;
-                        Other -> Other
+                        {ok, _SomeOtherPos} ->
+                            %% seek failed, so give up
+                            eof;
+                        KO -> KO
                     end
             end;
         Other -> Other
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 2034cd54..56dd3483 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -839,6 +839,9 @@ stop_msg_store() ->
         E -> E
     end.
 
+msg_id_bin(X) ->
+    erlang:md5(term_to_binary(X)).
+
 msg_store_contains(Atom, MsgIds) ->
     Atom = lists:foldl(
               fun (MsgId, Atom1) when Atom1 =:= Atom ->
@@ -872,7 +875,7 @@ test_msg_store() ->
     stop_msg_store(),
     ok = start_msg_store_empty(),
     Self = self(),
-    MsgIds = [term_to_binary(M) || M <- lists:seq(1,100)],
+    MsgIds = [msg_id_bin(M) || M <- lists:seq(1,100)],
     {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
     %% check we don't contain any of the msgs we're about to publish
     false = msg_store_contains(false, MsgIds),
@@ -967,22 +970,21 @@ test_msg_store() ->
     MsgIdsBig = lists:seq(1, BigCount),
     Payload = << 0:65536 >>,
     ok = lists:foldl(
-           fun (MsgId, ok) -> rabbit_msg_store:write(term_to_binary(MsgId),
-                                                     Payload) end,
-           ok, MsgIdsBig),
+           fun (MsgId, ok) ->
+                   rabbit_msg_store:write(msg_id_bin(MsgId), Payload)
+           end, ok, MsgIdsBig),
     %% .., then remove even numbers ascending, and odd numbers
     %% descending. This hits the GC.
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove([term_to_binary(
+                   rabbit_msg_store:remove([msg_id_bin(
                                               case MsgId rem 2 of
                                                   0 -> MsgId;
                                                   1 -> BigCount - MsgId
                                               end)])
            end, ok, MsgIdsBig),
     %% ensure empty
-    false =
-        msg_store_contains(false, lists:map(fun term_to_binary/1, MsgIdsBig)),
+    false = msg_store_contains(false, [msg_id_bin(M) || M <- MsgIdsBig]),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(),
-- 
cgit v1.2.1


From c45f4e8b04ccea5014c9526ec67d096b82c7dc3d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 28 Oct 2009 15:25:14 +0000
Subject: Some minor cosmetics in qi, but mainly extend the qi tests so to
 cover one other code path that is pretty easy to hit and deserves testing
 (auto deletion of full segment files).

---
 src/rabbit_queue_index.erl | 20 ++++++++++----------
 src/rabbit_tests.erl       | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 7c317b30..e0634bee 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -524,7 +524,6 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
     {TotalMsgCount, State1}.
 
 scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
-    JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
     {Hdl, State1 = #qistate { journal_ack_dict = JAckDict }} =
         get_journal_handle(State),
     %% ADict may well contain duplicates. However, this is ok, due to
@@ -533,9 +532,15 @@ scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     State2 = close_handle(journal, State1),
     {TotalMsgCount1, State3} =
         dict:fold(fun replay_journal_acks_to_segment/3,
-                  {TotalMsgCount, State2}, ADict),
+                  {TotalMsgCount,
+                   %% supply empty dict so that when
+                   %% replay_journal_acks_to_segment loads segments,
+                   %% it gets all msgs, and ignores anything we've
+                   %% found in the journal.
+                   State2 #qistate { journal_ack_dict = dict:new() }}, ADict),
+    JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
     ok = file:delete(JournalPath),
-    {TotalMsgCount1, State3 #qistate { journal_ack_dict = dict:new() }}.
+    {TotalMsgCount1, State3}.
 
 load_journal(Hdl, ADict) ->
     case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
@@ -547,18 +552,13 @@ load_journal(Hdl, ADict) ->
 replay_journal_acks_to_segment(_, [], Acc) ->
     Acc;
 replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
-    %% supply empty dict so that we get all msgs in SDict that have
-    %% not been acked in the segment file itself
-    {SDict, _AckCount, _HighRelSeq, State1} =
-        load_segment(SegNum, State #qistate { journal_ack_dict = dict:new() }),
+    {SDict, _AckCount, _HighRelSeq, State1} = load_segment(SegNum, State),
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:to_list(sets:intersection(sets:from_list(ValidRelSeqIds),
                                                sets:from_list(Acks))),
     %% ValidAcks will not contain any duplicates at this point.
-    State2 =
-        State1 #qistate { journal_ack_dict = State #qistate.journal_ack_dict },
     {TotalMsgCount - length(ValidAcks),
-     append_acks_to_segment(SegNum, ValidAcks, State2)}.
+     append_acks_to_segment(SegNum, ValidAcks, State1)}.
 
 drop_and_deliver(SegNum, SDict, CleanShutdown, State) ->
     {AckMe, DeliverMe} =
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 56dd3483..3bf6dd36 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1084,4 +1084,24 @@ test_queue_index() ->
     {0, Qi20} = rabbit_queue_index:init(test_queue()),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = stop_msg_store(),
+    ok = empty_test_queue(),
+    %% this next bit is just to hit the auto deletion of segment files
+    SeqIdsC = lists:seq(1,65536),
+    {0, Qi22} = rabbit_queue_index:init(test_queue()),
+    {Qi23, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi22),
+    Qi24 = lists:foldl(
+             fun (SeqId, QiN) ->
+                     rabbit_queue_index:write_delivered(SeqId, QiN)
+             end, Qi23, SeqIdsC),
+    Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
+    {_Oks, {false, Qi26}} =
+        rabbit_misc:unfold(
+          fun ({true, QiN}) ->
+                  QiM = rabbit_queue_index:flush_journal(QiN),
+                  {true, ok, {rabbit_queue_index:can_flush_journal(QiM), QiM}};
+              ({false, _QiN}) ->
+                  false
+          end, {true, Qi25}),
+    _Qi27 = rabbit_queue_index:terminate_and_erase(Qi26),
+    ok = stop_msg_store(),
     passed.
-- 
cgit v1.2.1


From e125d96bc87c3f69eeef03a02a17a02b98002a74 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Oct 2009 12:28:19 +0000
Subject: cosmetics to fhc - just moving and tidying

---
 src/file_handle_cache.erl | 56 ++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index e45156dc..d2b0ba2e 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,8 +31,10 @@
 
 -module(file_handle_cache).
 
--export([open/3, close/1, release/1, read/2, append/2, sync/1,
-         position/2, truncate/1, last_sync_offset/1]).
+-export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
+         last_sync_offset/1]).
+
+%%----------------------------------------------------------------------------
 
 -record(file,
         { reader_count,
@@ -57,6 +59,9 @@
           last_used_at
         }).
 
+%%----------------------------------------------------------------------------
+%% Public API
+
 open(Path, Mode, Options) ->
     case is_appender(Mode) of
         true -> {error, append_not_supported};
@@ -132,9 +137,6 @@ close(Ref) ->
             end
     end.
 
-release(_Ref) -> %% noop just for now
-    ok.
-
 read(Ref, Count) ->
     case get_or_reopen(Ref) of
         {ok, #handle { is_read = false }} ->
@@ -175,26 +177,6 @@ append(Ref, Data) ->
         Error -> Error
     end.
 
-last_sync_offset(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { trusted_offset = TrustedOffset }} ->
-            {ok, TrustedOffset};
-        Error -> Error
-    end.
-
-position(Ref, NewOffset) ->
-    case get_or_reopen(Ref) of
-        {ok, Handle} ->
-            {Result, Handle1} =
-                case write_buffer(Handle) of
-                    {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
-                    {Error, Handle2} -> {Error, Handle2}
-                end,
-            put({Ref, fhc_handle}, Handle1),
-            Result;
-        Error -> Error
-    end.
-
 sync(Ref) ->
     case get_or_reopen(Ref) of
         {ok, #handle { is_dirty = false, write_buffer = [] }} ->
@@ -218,6 +200,19 @@ sync(Ref) ->
         Error -> Error
     end.
 
+position(Ref, NewOffset) ->
+    case get_or_reopen(Ref) of
+        {ok, Handle} ->
+            {Result, Handle1} =
+                case write_buffer(Handle) of
+                    {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
+                    {Error, Handle2} -> {Error, Handle2}
+                end,
+            put({Ref, fhc_handle}, Handle1),
+            Result;
+        Error -> Error
+    end.
+
 truncate(Ref) ->
     case get_or_reopen(Ref) of
         {ok, #handle { is_write = false }} ->
@@ -245,6 +240,17 @@ truncate(Ref) ->
         Error -> Error
     end.
 
+last_sync_offset(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { trusted_offset = TrustedOffset }} ->
+            {ok, TrustedOffset};
+        Error -> Error
+    end.
+
+%%----------------------------------------------------------------------------
+%% Internal functions
+%%----------------------------------------------------------------------------
+
 get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
         undefined -> {error, not_open};
-- 
cgit v1.2.1


From 9377560954edc65a7af2b32e3b6a65f26c52331b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Oct 2009 12:36:19 +0000
Subject: bool() => boolean()

---
 src/rabbit_basic.erl          | 2 +-
 src/rabbit_channel.erl        | 2 +-
 src/rabbit_memory_manager.erl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index 63260669..14c655a6 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -52,7 +52,7 @@
 -spec(message/5 :: (exchange_name(), routing_key(), properties_input(),
                     binary(), guid()) -> message()).
 -spec(message/6 :: (exchange_name(), routing_key(), properties_input(),
-                    binary(), guid(), bool()) -> message()).
+                    binary(), guid(), boolean()) -> message()).
 -spec(properties/1 :: (properties_input()) -> amqp_properties()).
 -spec(publish/4 :: (exchange_name(), routing_key(), properties_input(),
                     binary()) -> publish_result()).
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 42097739..6afd0bc9 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -55,7 +55,7 @@
 -ifdef(use_specs).
 
 -type(msg_id() :: non_neg_integer()).
--type(msg() :: {queue_name(), pid(), msg_id(), bool(), message()}).
+-type(msg() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
 
 -spec(start_link/5 ::
       (channel_number(), pid(), pid(), username(), vhost()) -> pid()).
diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
index aaacaef7..a73f03e2 100644
--- a/src/rabbit_memory_manager.erl
+++ b/src/rabbit_memory_manager.erl
@@ -55,7 +55,7 @@
 -spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
 -spec(report_memory/3 :: (pid(), non_neg_integer(), boolean()) -> 'ok').
 -spec(info/0 :: () -> [{atom(), any()}]).
--spec(conserve_memory/2 :: (pid(), bool()) -> 'ok').
+-spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
 
 -endif.
 
-- 
cgit v1.2.1


From 66bc12c1e037152ce96b6c3e96ebb11ab80d7fff Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Oct 2009 13:39:08 +0000
Subject: Added specs to fhc

---
 src/file_handle_cache.erl | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index d2b0ba2e..fe4e9077 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -32,7 +32,7 @@
 -module(file_handle_cache).
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
-         last_sync_offset/1]).
+         last_sync_offset/1, append_write_buffer/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -59,6 +59,30 @@
           last_used_at
         }).
 
+%%----------------------------------------------------------------------------
+%% Specs
+
+-ifdef(use_specs).
+
+-type(ref() :: any()).
+-type(error() :: {'error', any()}).
+-type(ok_or_error() :: ('ok' | error())).
+-type(position() :: ('bof' | 'eof' | {'bof',integer()} | {'eof',integer()}
+                     | {'cur',integer()} | integer())).
+
+-spec(open/3 :: (string(), [any()], [any()]) -> ({'ok', ref()} | error())).
+-spec(close/1 :: (ref()) -> ('ok' | error())).
+-spec(read/2 :: (ref(), integer()) ->
+             ({'ok', ([char()]|binary())} | eof | error())). 
+-spec(append/2 :: (ref(), iodata()) -> ok_or_error()).
+-spec(sync/1 :: (ref()) ->  ok_or_error()).
+-spec(position/2 :: (ref(), position()) -> ok_or_error()).
+-spec(truncate/1 :: (ref()) -> ok_or_error()).
+-spec(last_sync_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
+-spec(append_write_buffer/1 :: (ref()) -> ok_or_error()). 
+
+-endif.
+
 %%----------------------------------------------------------------------------
 %% Public API
 
@@ -247,6 +271,15 @@ last_sync_offset(Ref) ->
         Error -> Error
     end.
 
+append_write_buffer(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, Handle} ->
+            {Result, Handle1} = write_buffer(Handle),
+            put({Ref, fhc_handle}, Handle1),
+            Result;
+        Error -> Error
+    end.
+
 %%----------------------------------------------------------------------------
 %% Internal functions
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From fa0aa701eaa3006c297f02122f21cfccc528b7cc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Oct 2009 15:28:25 +0000
Subject: Made the qi keep counters for the publishes and deliveries and flush
 to disk (minus sync) at the right points. All tests pass. However...

...it's actually wrong. Deliveries really don't have to happen in the right order. For example, if you deliver a load of messages, then publish a load, then requeue the initial lot, then you'll be publishing those msgs with new seqids, and marking them delivered. That could be in a different segment file, thus upsetting the counters.

So really deliveries need journalling too. In theory that looks straight forward because deliveries and acks look exactly the same in segment files, and so just using the very same code for acking and deliveries should be totally fine. But, it's not because of the issue of duplicates. During scattering of the journal out to the segments, there's a period in which there could be three entries (pub + del + ack) for a msg in the segment file, *and* an entry in the journal (ack). This is fine, and we make sure when scattering at startup that we don't reack msgs that have already been acked in the segment. However, we could now have two entries in the segment file (pub + del) and an entry in the journal, and not know whether that entry is the ack or a dup of the del. So we have a choice - either use 1 bit in the journal to indicate whether the entry is a del or an ack (bringing the space for seqids down to 2^63), or use two journals, one for acks and one for dels. Assuming Rabbit can do 50kHz, 2^63 will still last 5.8million years, or 2.9million years at 100kHz. I think this is still fine, so will take 1 bit from the seq id.
---
 src/rabbit_queue_index.erl    | 133 +++++++++++++++++++++++++++++++-----------
 src/rabbit_tests.erl          |  19 +++---
 src/rabbit_variable_queue.erl |   7 +--
 3 files changed, 112 insertions(+), 47 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e0634bee..62c6af53 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -114,7 +114,9 @@
           seg_num_handles,
           journal_ack_count,
           journal_ack_dict,
-          seg_ack_counts
+          seg_ack_counts,
+          publish_handle,
+          deliver_handle
         }).
 
 -include("rabbit.hrl").
@@ -123,13 +125,18 @@
 
 -ifdef(use_specs).
 
+-type(hdl() :: ('undefined' | any())).
 -type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
+-type(hdl_and_count() :: ('undefined' |
+                          {non_neg_integer(), hdl(), non_neg_integer()})).
 -type(qistate() :: #qistate { dir               :: file_path(),
                               seg_num_handles   :: dict(),
                               journal_ack_count :: integer(),
                               journal_ack_dict  :: dict(),
-                              seg_ack_counts    :: dict()
+                              seg_ack_counts    :: dict(),
+                              publish_handle    :: hdl_and_count(),
+                              deliver_handle    :: hdl_and_count()
                             }).
 
 -spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
@@ -179,7 +186,7 @@ write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
     ?MSG_ID_BYTES = size(MsgId),
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {Hdl, State1} = get_pub_handle(SegNum, State),
     ok = file_handle_cache:append(Hdl,
                                   <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                                    (bool_to_int(IsPersistent)):1,
@@ -188,7 +195,7 @@ write_published(MsgId, SeqId, IsPersistent, State)
 
 write_delivered(SeqId, State) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {Hdl, State1} = get_del_handle(SegNum, State),
     ok = file_handle_cache:append(
            Hdl, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                  RelSeq:?REL_SEQ_BITS>>),
@@ -260,7 +267,8 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
 
 read_segment_entries(InitSeqId, State) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    {SDict, _AckCount, _HighRelSeq, State1} = load_segment(SegNum, State),
+    {SDict, _PubCount, _DelCount, _AckCount, _HighRelSeq, State1} =
+        load_segment(SegNum, State),
     %% deliberately sort the list desc, because foldl will reverse it
     RelSeqs = rev_sort(dict:fetch_keys(SDict)),
     {lists:foldl(fun (RelSeq, Acc) ->
@@ -284,23 +292,24 @@ find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
     %% of the lowest segment. That seq_id may not actually exist, but
     %% that's fine. The important thing is that the segment exists and
     %% the seq_id reported is on a segment boundary.
+
+    %% SegNums is sorted, ascending.
     LowSeqIdSeg =
         case SegNums of
             [] -> 0;
-            _  -> reconstruct_seq_id(lists:min(SegNums), 0)
+            _  -> reconstruct_seq_id(hd(SegNums), 0)
         end,
     {NextSeqId, State1} =
         case SegNums of
             [] -> {0, State};
-            _  -> SegNum2 = lists:max(SegNums),
-                  {SDict, AckCount, HighRelSeq, State2} =
+            _  -> SegNum2 = lists:last(SegNums),
+                  {_SDict, PubCount, _DelCount, _AckCount, HighRelSeq, State2} =
                       load_segment(SegNum2, State),
                   NextSeqId1 = reconstruct_seq_id(SegNum2, HighRelSeq),
                   NextSeqId2 =
-                      case 0 == AckCount andalso 0 == HighRelSeq andalso
-                          0 == dict:size(SDict) of
-                          true -> NextSeqId1;
-                          false -> NextSeqId1 + 1
+                      case PubCount of
+                          0 -> NextSeqId1;
+                          _ -> NextSeqId1 + 1
                       end,
                   {NextSeqId2, State2}
         end,
@@ -374,6 +383,30 @@ get_journal_handle(State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
             new_handle(journal, Path, Mode, State)
     end.
 
+get_pub_handle(SegNum, State = #qistate { publish_handle = PubHandle }) ->
+    {State1, PubHandle1 = {_SegNum, Hdl, _Count}} =
+        get_counted_handle(SegNum, State, PubHandle),
+    {Hdl, State1 #qistate { publish_handle = PubHandle1 }}.
+
+get_del_handle(SegNum, State = #qistate { deliver_handle = DelHandle }) ->
+    {State1, DelHandle1 = {_SegNum, Hdl, _Count}} =
+        get_counted_handle(SegNum, State, DelHandle),
+    {Hdl, State1 #qistate { deliver_handle = DelHandle1 }}.
+
+get_counted_handle(SegNum, State, undefined) ->
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {State1, {SegNum, Hdl, 1}};
+get_counted_handle(SegNum, State, {SegNum, undefined, Count}) ->
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {State1, {SegNum, Hdl, Count + 1}};
+get_counted_handle(SegNum, State, {SegNum, Hdl, Count})
+  when Count < ?SEGMENT_ENTRIES_COUNT ->
+    {State, {SegNum, Hdl, Count + 1}};
+get_counted_handle(SegNumA, State, {SegNumB, Hdl, ?SEGMENT_ENTRIES_COUNT})
+  when SegNumA == SegNumB + 1 ->
+    ok = file_handle_cache:append_write_buffer(Hdl),
+    get_counted_handle(SegNumA, State, undefined).
+
 get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
     case dict:find(SegNum, SegHdls) of
         {ok, Hdl} -> {Hdl, State};
@@ -427,9 +460,10 @@ add_ack_to_ack_dict(SeqId, ADict) ->
     dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
 
 all_segment_nums(Dir) ->
-    [list_to_integer(
-       lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
-     || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)].
+    lists:sort(
+      [list_to_integer(
+         lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
+       || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)]).
 
 blank_state(QueueName) ->
     StrName = queue_name_to_dir_name(QueueName),
@@ -439,7 +473,10 @@ blank_state(QueueName) ->
                seg_num_handles = dict:new(),
                journal_ack_count = 0,
                journal_ack_dict = dict:new(),
-               seg_ack_counts = dict:new() }.
+               seg_ack_counts = dict:new(),
+               publish_handle = undefined,
+               deliver_handle = undefined
+             }.
 
 detect_clean_shutdown(Dir) ->
     case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
@@ -473,7 +510,8 @@ queue_index_walker({[], State, QueueNames}) ->
     _State = terminate(State),
     queue_index_walker(QueueNames);
 queue_index_walker({[SegNum | SegNums], State, QueueNames}) ->
-    {SDict, _AckCount, _HighRelSeq, State1} = load_segment(SegNum, State),
+    {SDict, _PubCount, _DelCount, _AckCount, _HighRelSeq, State1} =
+        load_segment(SegNum, State),
     queue_index_walker({dict:to_list(SDict), State1, SegNums, QueueNames});
 
 queue_index_walker({[], State, SegNums, QueueNames}) ->
@@ -495,8 +533,10 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
     CleanShutdown = detect_clean_shutdown(Dir),
     {TotalMsgCount, State1} =
         lists:foldl(
-          fun (SegNum, {TotalMsgCount1, StateN}) ->
-                  {SDict, AckCount, _HighRelSeq, StateM} =
+          fun (SegNum, {TotalMsgCount1, StateN =
+                        #qistate { publish_handle = PublishHandle,
+                                   deliver_handle = DeliverHandle }}) ->
+                  {SDict, PubCount, DelCount, AckCount, _HighRelSeq, StateM} =
                       load_segment(SegNum, StateN),
                   {TransientMsgsAcks, StateL =
                    #qistate { seg_ack_counts = AckCounts,
@@ -517,9 +557,27 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
                           [] -> JAckDict;
                           _  -> dict:store(SegNum, TransientMsgsAcks, JAckDict)
                       end,
+                  %% In each of the following, there should only be
+                  %% one segment that matches the 3rd case. All other
+                  %% segments should either be full or empty. There
+                  %% could be no partial segments.
+                  PublishHandle1 = case PubCount of
+                                       ?SEGMENT_ENTRIES_COUNT -> PublishHandle;
+                                       0 -> PublishHandle;
+                                       _ when PublishHandle == undefined ->
+                                           {SegNum, undefined, PubCount}
+                                   end,
+                  DeliverHandle1 = case DelCount of
+                                       ?SEGMENT_ENTRIES_COUNT -> DeliverHandle;
+                                       0 -> DeliverHandle;
+                                       _ when DeliverHandle == undefined ->
+                                           {SegNum, undefined, DelCount}
+                                   end,
                   {TotalMsgCount2,
                    StateL #qistate { seg_ack_counts = AckCounts1,
-                                     journal_ack_dict = JAckDict1 }}
+                                     journal_ack_dict = JAckDict1,
+                                     publish_handle = PublishHandle1,
+                                     deliver_handle = DeliverHandle1 }}
           end, {0, State}, SegNums),
     {TotalMsgCount, State1}.
 
@@ -552,7 +610,8 @@ load_journal(Hdl, ADict) ->
 replay_journal_acks_to_segment(_, [], Acc) ->
     Acc;
 replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
-    {SDict, _AckCount, _HighRelSeq, State1} = load_segment(SegNum, State),
+    {SDict, _PubCount, _DelCount, _AckCount, _HighRelSeq, State1} =
+        load_segment(SegNum, State),
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:to_list(sets:intersection(sets:from_list(ValidRelSeqIds),
                                                sets:from_list(Acks))),
@@ -605,13 +664,13 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
                         error -> filelib:is_file(seg_num_to_path(Dir, SegNum))
                     end,
     case SegmentExists of
-        false -> {dict:new(), 0, 0, State};
+        false -> {dict:new(), 0, 0, 0, 0, State};
         true ->
             {Hdl, State1 = #qistate { journal_ack_dict = JAckDict }} =
                 get_seg_handle(SegNum, State),
             ok = file_handle_cache:position(Hdl, bof),
-            {SDict, AckCount, HighRelSeq} =
-                load_segment_entries(Hdl, dict:new(), 0, 0),
+            {SDict, PubCount, DelCount, AckCount, HighRelSeq} =
+                load_segment_entries(Hdl, dict:new(), 0, 0, 0, 0),
             RelSeqs = case dict:find(SegNum, JAckDict) of
                         {ok, RelSeqs1} -> RelSeqs1;
                         error -> []
@@ -620,18 +679,20 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
                 lists:foldl(fun (RelSeq, {SDict2, AckCount2}) ->
                                     {dict:erase(RelSeq, SDict2), AckCount2 + 1}
                             end, {SDict, AckCount}, RelSeqs),
-            {SDict1, AckCount1, HighRelSeq, State1}
+            {SDict1, PubCount, DelCount, AckCount1, HighRelSeq, State1}
     end.
 
-load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
+load_segment_entries(Hdl, SDict, PubCount, DelCount, AckCount, HighRelSeq) ->
     case file_handle_cache:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                 MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file_handle_cache:read(
                           Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
-            load_segment_entries(Hdl, SDict1, AckCount1, HighRelSeq);
+            {SDict1, DelCount1, AckCount1} =
+                deliver_or_ack_msg(SDict, DelCount, AckCount, RelSeq),
+            load_segment_entries(
+              Hdl, SDict1, PubCount, DelCount1, AckCount1, HighRelSeq);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                 IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
@@ -641,18 +702,20 @@ load_segment_entries(Hdl, SDict, AckCount, HighRelSeq) ->
                   Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
-            load_segment_entries(Hdl, dict:store(RelSeq, {MsgId, false,
-                                                          1 == IsPersistentNum},
-                                                 SDict), AckCount, HighRelSeq1);
-        _ErrOrEoF -> {SDict, AckCount, HighRelSeq}
+            load_segment_entries(
+              Hdl, dict:store(RelSeq, {MsgId, false, 1 == IsPersistentNum},
+                              SDict),
+              PubCount + 1, DelCount, AckCount, HighRelSeq1);
+        _ErrOrEoF -> {SDict, PubCount, DelCount, AckCount, HighRelSeq}
     end.
 
-deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
+deliver_or_ack_msg(SDict, DelCount, AckCount, RelSeq) ->
     case dict:find(RelSeq, SDict) of
         {ok, {MsgId, false, IsPersistent}} ->
-            {dict:store(RelSeq, {MsgId, true, IsPersistent}, SDict), AckCount};
+            {dict:store(RelSeq, {MsgId, true, IsPersistent}, SDict),
+             DelCount + 1, AckCount};
         {ok, {_MsgId, true, _IsPersistent}} ->
-            {dict:erase(RelSeq, SDict), AckCount + 1}
+            {dict:erase(RelSeq, SDict), DelCount, AckCount + 1}
     end.
 
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3bf6dd36..e3f8ddac 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1031,13 +1031,13 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
 test_queue_index() ->
     stop_msg_store(),
     ok = empty_test_queue(),
-    SeqIdsA = lists:seq(1,10000),
-    SeqIdsB = lists:seq(10001,20000),
+    SeqIdsA = lists:seq(0,9999),
+    SeqIdsB = lists:seq(10000,19999),
     {0, Qi0} = rabbit_queue_index:init(test_queue()),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
-    {0, 10001, Qi3} =
+    {0, 10000, Qi3} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
     {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
@@ -1049,10 +1049,10 @@ test_queue_index() ->
     %% should get length back as 0, as all the msgs were transient
     {0, Qi6} = rabbit_queue_index:init(test_queue()),
     false = rabbit_queue_index:can_flush_journal(Qi6),
-    {0, 10001, Qi7} =
+    {0, 10000, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
-    {0, 20001, Qi9} =
+    {0, 20000, Qi9} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
     {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
@@ -1063,7 +1063,7 @@ test_queue_index() ->
     %% should get length back as 10000
     LenB = length(SeqIdsB),
     {LenB, Qi12} = rabbit_queue_index:init(test_queue()),
-    {0, 20001, Qi13} =
+    {0, 20000, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
     Qi14 = lists:foldl(
              fun (SeqId, QiN) ->
@@ -1075,7 +1075,10 @@ test_queue_index() ->
     Qi16 = rabbit_queue_index:write_acks(SeqIdsB, Qi15),
     true = rabbit_queue_index:can_flush_journal(Qi16),
     Qi17 = rabbit_queue_index:flush_journal(Qi16),
-    {0, 20001, Qi18} =
+    %% the entire first segment will have gone as they were firstly
+    %% transient, and secondly ack'd
+    SegmentSize = rabbit_queue_index:segment_size(),
+    {SegmentSize, 20000, Qi18} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate(Qi18),
     ok = stop_msg_store(),
@@ -1086,7 +1089,7 @@ test_queue_index() ->
     ok = stop_msg_store(),
     ok = empty_test_queue(),
     %% this next bit is just to hit the auto deletion of segment files
-    SeqIdsC = lists:seq(1,65536),
+    SeqIdsC = lists:seq(0,65535),
     {0, Qi22} = rabbit_queue_index:init(test_queue()),
     {Qi23, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = lists:foldl(
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index da56487e..7851d8f6 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -475,10 +475,9 @@ remove_queue_entries(Q, IndexState) ->
                                    false -> MsgIdsAcc
                                end,
                   {CountN + 1, MsgIdsAcc1, SeqIdsAcc1, IndexStateN1}
-          %% the foldl is going to reverse the result lists, so start
-          %% by reversing so that we maintain doing things in
-          %% ascending seqid order
-          end, {0, [], [], IndexState}, lists:reverse(queue:to_list(Q))),
+          %% we need to write the delivered records in order otherwise
+          %% we upset the qi. So don't reverse.
+          end, {0, [], [], IndexState}, queue:to_list(Q)),
     ok = case MsgIds of
              [] -> ok;
              _  -> rabbit_msg_store:remove(MsgIds)
-- 
cgit v1.2.1


From 6d7ef18dddcdc8965a949ac1e2e9ef48f5edfdad Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Oct 2009 18:00:26 +0000
Subject: Well, the transformation is done. Hilariously it works, first time.
 However, whilst all tests pass, I suspect there may still be faults, eg in
 the counting of entries in the journal etc. It still needs further
 checking...

---
 src/rabbit_queue_index.erl | 364 +++++++++++++++++++++++++--------------------
 1 file changed, 206 insertions(+), 158 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 62c6af53..acda1636 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -77,9 +77,12 @@
 -define(CLEAN_FILENAME, "clean.dot").
 
 -define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
--define(ACK_JOURNAL_FILENAME, "ack_journal.jif").
+-define(ACK_JOURNAL_FILENAME, "journal.jif").
+
+-define(DEL_BIT, 0).
+-define(ACK_BIT, 1).
 -define(SEQ_BYTES, 8).
--define(SEQ_BITS, (?SEQ_BYTES * 8)).
+-define(SEQ_BITS, ((?SEQ_BYTES * 8) - 1)).
 -define(SEGMENT_EXTENSION, ".idx").
 
 -define(REL_SEQ_BITS, 14).
@@ -112,11 +115,11 @@
 -record(qistate,
         { dir,
           seg_num_handles,
-          journal_ack_count,
+          journal_count,
           journal_ack_dict,
+          journal_del_dict,
           seg_ack_counts,
-          publish_handle,
-          deliver_handle
+          publish_handle
         }).
 
 -include("rabbit.hrl").
@@ -132,11 +135,11 @@
                           {non_neg_integer(), hdl(), non_neg_integer()})).
 -type(qistate() :: #qistate { dir               :: file_path(),
                               seg_num_handles   :: dict(),
-                              journal_ack_count :: integer(),
+                              journal_count     :: integer(),
                               journal_ack_dict  :: dict(),
+                              journal_del_dict  :: dict(),
                               seg_ack_counts    :: dict(),
-                              publish_handle    :: hdl_and_count(),
-                              deliver_handle    :: hdl_and_count()
+                              publish_handle    :: hdl_and_count()
                             }).
 
 -spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
@@ -193,29 +196,17 @@ write_published(MsgId, SeqId, IsPersistent, State)
                                    RelSeq:?REL_SEQ_BITS, MsgId/binary>>),
     State1.
 
-write_delivered(SeqId, State) ->
-    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    {Hdl, State1} = get_del_handle(SegNum, State),
-    ok = file_handle_cache:append(
-           Hdl, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                 RelSeq:?REL_SEQ_BITS>>),
-    State1.
+write_delivered(SeqId, State = #qistate { journal_del_dict = JDelDict }) ->
+    {JDelDict1, State1} =
+        write_to_journal([<<?DEL_BIT:1, SeqId:?SEQ_BITS>>],
+                         [SeqId], JDelDict, State),
+    maybe_full_flush(State1 #qistate { journal_del_dict = JDelDict1 }).
 
-write_acks(SeqIds, State = #qistate { journal_ack_dict  = JAckDict,
-                                      journal_ack_count = JAckCount }) ->
-    {Hdl, State1} = get_journal_handle(State),
-    {JAckDict1, JAckCount1} =
-        lists:foldl(
-          fun (SeqId, {JAckDict2, JAckCount2}) ->
-                  ok = file_handle_cache:append(Hdl, <<SeqId:?SEQ_BITS>>),
-                  {add_ack_to_ack_dict(SeqId, JAckDict2), JAckCount2 + 1}
-          end, {JAckDict, JAckCount}, SeqIds),
-    State2 = State1 #qistate { journal_ack_dict = JAckDict1,
-                               journal_ack_count = JAckCount1 },
-    case JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
-        true  -> full_flush_journal(State2);
-        false -> State2
-    end.
+write_acks(SeqIds, State = #qistate { journal_ack_dict = JAckDict }) ->
+    {JAckDict1, State1} = write_to_journal([<<?ACK_BIT:1, SeqId:?SEQ_BITS>> ||
+                                               SeqId <- SeqIds],
+                                           SeqIds, JAckDict, State),
+    maybe_full_flush(State1 #qistate { journal_ack_dict = JAckDict1 }).
 
 sync_seq_ids(SeqIds, SyncAckJournal, State) ->
     State1 = case SyncAckJournal of
@@ -237,37 +228,44 @@ sync_seq_ids(SeqIds, SyncAckJournal, State) ->
               StateM
       end, State1, SegNumsSet).
 
-can_flush_journal(#qistate { journal_ack_count = 0 }) ->
+can_flush_journal(#qistate { journal_count = 0 }) ->
     false;
 can_flush_journal(_) ->
     true.
 
-flush_journal(State = #qistate { journal_ack_count = 0 }) ->
+flush_journal(State = #qistate { journal_count = 0 }) ->
     State;
 flush_journal(State = #qistate { journal_ack_dict = JAckDict,
-                                 journal_ack_count = JAckCount }) ->
-    [SegNum|_] = dict:fetch_keys(JAckDict),
-    Acks = dict:fetch(SegNum, JAckDict),
-    State1 = append_acks_to_segment(SegNum, Acks, State),
-    JAckCount1 = JAckCount - length(Acks),
-    State2 = State1 #qistate { journal_ack_dict = dict:erase(SegNum, JAckDict),
-                               journal_ack_count = JAckCount1 },
+                                 journal_del_dict = JDelDict,
+                                 journal_count    = JCount }) ->
+    SegNum = case dict:fetch_keys(JAckDict) of
+                 []    -> hd(dict:fetch_keys(JDelDict));
+                 [N|_] -> N
+             end,
+    Dels = seg_entries_from_dict(SegNum, JDelDict),
+    Acks = seg_entries_from_dict(SegNum, JAckDict),
+    State1 = append_dels_to_segment(SegNum, Dels, State),
+    State2 = append_acks_to_segment(SegNum, Acks, State1),
+    JCount1 = JCount - length(Dels) - length(Acks),
+    State3 = State2 #qistate { journal_del_dict = dict:erase(SegNum, JDelDict),
+                               journal_ack_dict = dict:erase(SegNum, JAckDict),
+                               journal_count    = JCount1 },
     if
-        JAckCount1 == 0 ->
-            {Hdl, State3} = get_journal_handle(State2),
+        JCount1 == 0 ->
+            {Hdl, State4} = get_journal_handle(State3),
             ok = file_handle_cache:position(Hdl, bof),
             ok = file_handle_cache:truncate(Hdl),
             ok = file_handle_cache:sync(Hdl),
-            State3;
-        JAckCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
-            flush_journal(State2);
+            State4;
+        JCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
+            flush_journal(State3);
         true ->
-            State2
+            State3
     end.
 
 read_segment_entries(InitSeqId, State) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    {SDict, _PubCount, _DelCount, _AckCount, _HighRelSeq, State1} =
+    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
         load_segment(SegNum, State),
     %% deliberately sort the list desc, because foldl will reverse it
     RelSeqs = rev_sort(dict:fetch_keys(SDict)),
@@ -303,7 +301,7 @@ find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
         case SegNums of
             [] -> {0, State};
             _  -> SegNum2 = lists:last(SegNums),
-                  {_SDict, PubCount, _DelCount, _AckCount, HighRelSeq, State2} =
+                  {_SDict, PubCount, _AckCount, HighRelSeq, State2} =
                       load_segment(SegNum2, State),
                   NextSeqId1 = reconstruct_seq_id(SegNum2, HighRelSeq),
                   NextSeqId2 =
@@ -355,6 +353,23 @@ start_msg_store(DurableQueues) ->
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+write_to_journal(BinList, SeqIds, Dict,
+                 State = #qistate { journal_count = JCount }) ->
+    {Hdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(Hdl, BinList),
+    {Dict1, JCount1} =
+        lists:foldl(
+          fun (SeqId, {Dict2, JCount2}) ->
+                  {add_seqid_to_dict(SeqId, Dict2), JCount2 + 1}
+          end, {Dict, JCount}, SeqIds),
+    {Dict1, State1 #qistate { journal_count = JCount1 }}.
+
+maybe_full_flush(State = #qistate { journal_count = JCount }) ->
+    case JCount > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
+        true  -> full_flush_journal(State);
+        false -> State
+    end.
+
 full_flush_journal(State) ->
     case can_flush_journal(State) of
         true -> State1 = flush_journal(State),
@@ -388,11 +403,6 @@ get_pub_handle(SegNum, State = #qistate { publish_handle = PubHandle }) ->
         get_counted_handle(SegNum, State, PubHandle),
     {Hdl, State1 #qistate { publish_handle = PubHandle1 }}.
 
-get_del_handle(SegNum, State = #qistate { deliver_handle = DelHandle }) ->
-    {State1, DelHandle1 = {_SegNum, Hdl, _Count}} =
-        get_counted_handle(SegNum, State, DelHandle),
-    {Hdl, State1 #qistate { deliver_handle = DelHandle1 }}.
-
 get_counted_handle(SegNum, State, undefined) ->
     {Hdl, State1} = get_seg_handle(SegNum, State),
     {State1, {SegNum, Hdl, 1}};
@@ -455,9 +465,12 @@ delete_queue_directory(Dir) ->
                   [ filename:join(Dir, Entry) || Entry <- Entries ]),
     ok = file:del_dir(Dir).
 
-add_ack_to_ack_dict(SeqId, ADict) ->
+add_seqid_to_dict(SeqId, Dict) ->
     {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], ADict).
+    add_seqid_to_dict(SegNum, RelSeq, Dict).
+
+add_seqid_to_dict(SegNum, RelSeq, Dict) ->
+    dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], Dict).
 
 all_segment_nums(Dir) ->
     lists:sort(
@@ -471,11 +484,11 @@ blank_state(QueueName) ->
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     #qistate { dir = Dir,
                seg_num_handles = dict:new(),
-               journal_ack_count = 0,
+               journal_count = 0,
                journal_ack_dict = dict:new(),
+               journal_del_dict = dict:new(),
                seg_ack_counts = dict:new(),
-               publish_handle = undefined,
-               deliver_handle = undefined
+               publish_handle = undefined
              }.
 
 detect_clean_shutdown(Dir) ->
@@ -489,6 +502,12 @@ store_clean_shutdown(Dir) ->
                                        [write, raw, binary],
                                        [{write_buffer, unbuffered}]),
     ok = file_handle_cache:close(Hdl).
+
+seg_entries_from_dict(SegNum, Dict) ->
+    case dict:find(SegNum, Dict) of
+        {ok, Entries} -> Entries; 
+        error -> []
+    end.
     
 
 %%----------------------------------------------------------------------------
@@ -500,7 +519,7 @@ queue_index_walker([]) ->
 queue_index_walker([QueueName|QueueNames]) ->
     State = blank_state(QueueName),
     {Hdl, State1} = get_journal_handle(State),
-    JAckDict = load_journal(Hdl, dict:new()),
+    {_JDelDict, JAckDict} = load_journal(Hdl, dict:new(), dict:new()),
     State2 = #qistate { dir = Dir } =
         close_handle(journal, State1 #qistate { journal_ack_dict = JAckDict }),
     SegNums = all_segment_nums(Dir),
@@ -510,7 +529,7 @@ queue_index_walker({[], State, QueueNames}) ->
     _State = terminate(State),
     queue_index_walker(QueueNames);
 queue_index_walker({[SegNum | SegNums], State, QueueNames}) ->
-    {SDict, _PubCount, _DelCount, _AckCount, _HighRelSeq, State1} =
+    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
         load_segment(SegNum, State),
     queue_index_walker({dict:to_list(SDict), State1, SegNums, QueueNames});
 
@@ -534,31 +553,18 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
     {TotalMsgCount, State1} =
         lists:foldl(
           fun (SegNum, {TotalMsgCount1, StateN =
-                        #qistate { publish_handle = PublishHandle,
-                                   deliver_handle = DeliverHandle }}) ->
-                  {SDict, PubCount, DelCount, AckCount, _HighRelSeq, StateM} =
+                        #qistate { publish_handle = PublishHandle }}) ->
+                  {SDict, PubCount, AckCount, _HighRelSeq, StateM} =
                       load_segment(SegNum, StateN),
-                  {TransientMsgsAcks, StateL =
-                   #qistate { seg_ack_counts = AckCounts,
-                              journal_ack_dict = JAckDict }} =
+                  StateL = #qistate { seg_ack_counts = AckCounts } =
                       drop_and_deliver(SegNum, SDict, CleanShutdown, StateM),
-                  %% ignore TransientMsgsAcks in AckCounts and
-                  %% JAckDict1 because the TransientMsgsAcks fall
-                  %% through into scatter_journal at which point the
-                  %% AckCounts and TotalMsgCount will be correctly
-                  %% adjusted.
                   TotalMsgCount2 = TotalMsgCount1 + dict:size(SDict),
                   AckCounts1 = case AckCount of
                                    0 -> AckCounts;
                                    N -> dict:store(SegNum, N, AckCounts)
                                end,
-                  JAckDict1 =
-                      case TransientMsgsAcks of
-                          [] -> JAckDict;
-                          _  -> dict:store(SegNum, TransientMsgsAcks, JAckDict)
-                      end,
-                  %% In each of the following, there should only be
-                  %% one segment that matches the 3rd case. All other
+                  %% In the following, there should only be max one
+                  %% segment that matches the 3rd case. All other
                   %% segments should either be full or empty. There
                   %% could be no partial segments.
                   PublishHandle1 = case PubCount of
@@ -567,90 +573,117 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
                                        _ when PublishHandle == undefined ->
                                            {SegNum, undefined, PubCount}
                                    end,
-                  DeliverHandle1 = case DelCount of
-                                       ?SEGMENT_ENTRIES_COUNT -> DeliverHandle;
-                                       0 -> DeliverHandle;
-                                       _ when DeliverHandle == undefined ->
-                                           {SegNum, undefined, DelCount}
-                                   end,
                   {TotalMsgCount2,
                    StateL #qistate { seg_ack_counts = AckCounts1,
-                                     journal_ack_dict = JAckDict1,
-                                     publish_handle = PublishHandle1,
-                                     deliver_handle = DeliverHandle1 }}
+                                     publish_handle = PublishHandle1 }}
           end, {0, State}, SegNums),
     {TotalMsgCount, State1}.
 
 scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
-    {Hdl, State1 = #qistate { journal_ack_dict = JAckDict }} =
+    {Hdl, State1 = #qistate { journal_del_dict = JDelDict,
+                              journal_ack_dict = JAckDict }} =
         get_journal_handle(State),
-    %% ADict may well contain duplicates. However, this is ok, due to
-    %% the use of sets in replay_journal_acks_to_segment
-    ADict = load_journal(Hdl, JAckDict),
+    %% ADict and DDict may well contain duplicates. However, this is
+    %% ok, because we use sets to eliminate dups before writing to
+    %% segments
+    {ADict, DDict} = load_journal(Hdl, JAckDict, JDelDict),
     State2 = close_handle(journal, State1),
-    {TotalMsgCount1, State3} =
-        dict:fold(fun replay_journal_acks_to_segment/3,
-                  {TotalMsgCount,
+    {TotalMsgCount1, ADict1, State3} =
+        dict:fold(fun replay_journal_to_segment/3,
+                  {TotalMsgCount, ADict,
                    %% supply empty dict so that when
                    %% replay_journal_acks_to_segment loads segments,
                    %% it gets all msgs, and ignores anything we've
                    %% found in the journal.
-                   State2 #qistate { journal_ack_dict = dict:new() }}, ADict),
+                   State2 #qistate { journal_del_dict = dict:new(),
+                                     journal_ack_dict = dict:new() }}, DDict),
+    {TotalMsgCount2, State4} =
+        dict:fold(fun replay_journal_acks_to_segment/3,
+                  {TotalMsgCount1, State3}, ADict1),
     JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
     ok = file:delete(JournalPath),
-    {TotalMsgCount1, State3}.
+    {TotalMsgCount2, State4}.
 
-load_journal(Hdl, ADict) ->
+load_journal(Hdl, ADict, DDict) ->
     case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
-        {ok, <<SeqId:?SEQ_BITS>>} ->
-            load_journal(Hdl, add_ack_to_ack_dict(SeqId, ADict));
-        _ErrOrEoF -> ADict
+        {ok, <<?DEL_BIT:1, SeqId:?SEQ_BITS>>} ->
+            load_journal(Hdl, ADict, add_seqid_to_dict(SeqId, DDict));
+        {ok, <<?ACK_BIT:1, SeqId:?SEQ_BITS>>} ->
+            load_journal(Hdl, add_seqid_to_dict(SeqId, ADict), DDict);
+        _ErrOrEoF -> {ADict, DDict}
     end.
 
-replay_journal_acks_to_segment(_, [], Acc) ->
-    Acc;
+replay_journal_to_segment(_SegNum, [], {TotalMsgCount, ADict, State}) ->
+    {TotalMsgCount, ADict, State};
+replay_journal_to_segment(SegNum, Dels, {TotalMsgCount, ADict, State}) ->
+    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
+        load_segment(SegNum, State),
+    ValidDels = sets:to_list(
+                  sets:filter(
+                    fun (RelSeq) ->
+                            case dict:find(RelSeq, SDict) of
+                                {ok, {_MsgId, false, _IsPersistent}} -> true;
+                                _ -> false
+                            end
+                    end, sets:from_list(Dels))),
+    State2 = append_dels_to_segment(SegNum, ValidDels, State1),
+    Acks = seg_entries_from_dict(SegNum, ADict),
+    case Acks of
+        [] -> {TotalMsgCount, ADict, State2};
+        _  -> 
+            ADict1 = dict:erase(SegNum, ADict),
+            {Count, State3} = filter_acks_and_append_to_segment(SegNum, SDict,
+                                                                Acks, State2),        
+            {TotalMsgCount - Count, ADict1, State3}
+    end.
+
+replay_journal_acks_to_segment(_SegNum, [], {TotalMsgCount, State}) ->
+    {TotalMsgCount, State};
 replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
-    {SDict, _PubCount, _DelCount, _AckCount, _HighRelSeq, State1} =
+    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
         load_segment(SegNum, State),
+    {Count, State2} =
+        filter_acks_and_append_to_segment(SegNum, SDict, Acks, State1),
+    {TotalMsgCount - Count, State2}.
+
+filter_acks_and_append_to_segment(SegNum, SDict, Acks, State) ->
     ValidRelSeqIds = dict:fetch_keys(SDict),
     ValidAcks = sets:to_list(sets:intersection(sets:from_list(ValidRelSeqIds),
                                                sets:from_list(Acks))),
-    %% ValidAcks will not contain any duplicates at this point.
-    {TotalMsgCount - length(ValidAcks),
-     append_acks_to_segment(SegNum, ValidAcks, State1)}.
+    {length(ValidAcks), append_acks_to_segment(SegNum, ValidAcks, State)}.
 
-drop_and_deliver(SegNum, SDict, CleanShutdown, State) ->
-    {AckMe, DeliverMe} =
+drop_and_deliver(SegNum, SDict, CleanShutdown,
+                 State = #qistate { journal_del_dict = JDelDict,
+                                    journal_ack_dict = JAckDict }) ->
+    {JDelDict1, JAckDict1} =
         dict:fold(
-          fun (RelSeq, {MsgId, IsDelivered, true}, {AckMeAcc, DeliverMeAcc}) ->
+          fun (RelSeq, {MsgId, IsDelivered, true}, {JDelDict2, JAckDict2}) ->
                   %% msg is persistent, keep only if the msg_store has it
                   case {IsDelivered, rabbit_msg_store:contains(MsgId)} of
                       {false, true} when not CleanShutdown ->
                           %% not delivered, but dirty shutdown => mark delivered
-                          {AckMeAcc, [RelSeq | DeliverMeAcc]};
+                          {add_seqid_to_dict(SegNum, RelSeq, JDelDict2),
+                           JAckDict2};
                       {_, true} ->
-                          {AckMeAcc, DeliverMeAcc};
+                          {JDelDict2, JAckDict2};
                       {true, false} ->
-                          {[RelSeq | AckMeAcc], DeliverMeAcc};
+                          {JDelDict2,
+                           add_seqid_to_dict(SegNum, RelSeq, JAckDict2)};
                       {false, false} ->
-                          {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]}
+                          {add_seqid_to_dict(SegNum, RelSeq, JDelDict2),
+                           add_seqid_to_dict(SegNum, RelSeq, JAckDict2)}
                   end;
-              (RelSeq, {_MsgId, false, false}, {AckMeAcc, DeliverMeAcc}) ->
+              (RelSeq, {_MsgId, false, false}, {JDelDict2, JAckDict2}) ->
                   %% not persistent and not delivered => deliver and ack it
-                  {[RelSeq | AckMeAcc], [RelSeq | DeliverMeAcc]};
-              (RelSeq, {_MsgId, true, false}, {AckMeAcc, DeliverMeAcc}) ->
+                  {add_seqid_to_dict(SegNum, RelSeq, JDelDict2),
+                   add_seqid_to_dict(SegNum, RelSeq, JAckDict2)};
+              (RelSeq, {_MsgId, true, false}, {JDelDict2, JAckDict2}) ->
                   %% not persistent but delivered => ack it
-                  {[RelSeq | AckMeAcc], DeliverMeAcc}
-          end, {[], []}, SDict),
-    {Hdl, State1} = get_seg_handle(SegNum, State),
-    ok = case DeliverMe of
-             [] -> ok;
-             _  -> file_handle_cache:append(
-                     Hdl,
-                     [ <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                        RelSeq:?REL_SEQ_BITS>> || RelSeq <- DeliverMe ])
-         end,
-    {AckMe, State1}.
+                  {JDelDict2,
+                   add_seqid_to_dict(SegNum, RelSeq, JAckDict2)}
+          end, {JDelDict, JAckDict}, SDict),
+    State #qistate { journal_del_dict = JDelDict1,
+                     journal_ack_dict = JAckDict1 }.
 
 
 %%----------------------------------------------------------------------------
@@ -664,35 +697,44 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
                         error -> filelib:is_file(seg_num_to_path(Dir, SegNum))
                     end,
     case SegmentExists of
-        false -> {dict:new(), 0, 0, 0, 0, State};
+        false -> {dict:new(), 0, 0, 0, State};
         true ->
-            {Hdl, State1 = #qistate { journal_ack_dict = JAckDict }} =
+            {Hdl, State1 = #qistate { journal_del_dict = JDelDict,
+                                      journal_ack_dict = JAckDict }} =
                 get_seg_handle(SegNum, State),
             ok = file_handle_cache:position(Hdl, bof),
-            {SDict, PubCount, DelCount, AckCount, HighRelSeq} =
-                load_segment_entries(Hdl, dict:new(), 0, 0, 0, 0),
-            RelSeqs = case dict:find(SegNum, JAckDict) of
-                        {ok, RelSeqs1} -> RelSeqs1;
-                        error -> []
-                      end,
+            {SDict, PubCount, AckCount, HighRelSeq} =
+                load_segment_entries(Hdl, dict:new(), 0, 0, 0),
+            %% delete ack'd msgs first
             {SDict1, AckCount1} =
                 lists:foldl(fun (RelSeq, {SDict2, AckCount2}) ->
                                     {dict:erase(RelSeq, SDict2), AckCount2 + 1}
-                            end, {SDict, AckCount}, RelSeqs),
-            {SDict1, PubCount, DelCount, AckCount1, HighRelSeq, State1}
+                            end, {SDict, AckCount},
+                            seg_entries_from_dict(SegNum, JAckDict)),
+            %% ensure remaining msgs are delivered as necessary
+            SDict3 =
+                lists:foldl(
+                  fun (RelSeq, SDict4) ->
+                          case dict:find(RelSeq, SDict4) of
+                              {ok, {MsgId, false, IsPersistent}} ->
+                                  dict:store(RelSeq, {MsgId, true, IsPersistent},
+                                             SDict4);
+                              _ -> SDict4
+                          end
+                  end, SDict1, seg_entries_from_dict(SegNum, JDelDict)),
+                                        
+            {SDict3, PubCount, AckCount1, HighRelSeq, State1}
     end.
 
-load_segment_entries(Hdl, SDict, PubCount, DelCount, AckCount, HighRelSeq) ->
+load_segment_entries(Hdl, SDict, PubCount, AckCount, HighRelSeq) ->
     case file_handle_cache:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                 MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file_handle_cache:read(
                           Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            {SDict1, DelCount1, AckCount1} =
-                deliver_or_ack_msg(SDict, DelCount, AckCount, RelSeq),
-            load_segment_entries(
-              Hdl, SDict1, PubCount, DelCount1, AckCount1, HighRelSeq);
+            {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
+            load_segment_entries(Hdl, SDict1, PubCount, AckCount1, HighRelSeq);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                 IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
@@ -704,23 +746,21 @@ load_segment_entries(Hdl, SDict, PubCount, DelCount, AckCount, HighRelSeq) ->
             HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
             load_segment_entries(
               Hdl, dict:store(RelSeq, {MsgId, false, 1 == IsPersistentNum},
-                              SDict),
-              PubCount + 1, DelCount, AckCount, HighRelSeq1);
-        _ErrOrEoF -> {SDict, PubCount, DelCount, AckCount, HighRelSeq}
+                              SDict), PubCount + 1, AckCount, HighRelSeq1);
+        _ErrOrEoF -> {SDict, PubCount, AckCount, HighRelSeq}
     end.
 
-deliver_or_ack_msg(SDict, DelCount, AckCount, RelSeq) ->
+deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
     case dict:find(RelSeq, SDict) of
         {ok, {MsgId, false, IsPersistent}} ->
-            {dict:store(RelSeq, {MsgId, true, IsPersistent}, SDict),
-             DelCount + 1, AckCount};
+            {dict:store(RelSeq, {MsgId, true, IsPersistent}, SDict), AckCount};
         {ok, {_MsgId, true, _IsPersistent}} ->
-            {dict:erase(RelSeq, SDict), DelCount, AckCount + 1}
+            {dict:erase(RelSeq, SDict), AckCount + 1}
     end.
 
 
 %%----------------------------------------------------------------------------
-%% Appending Acks to Segments
+%% Appending Acks or Dels to Segments
 %%----------------------------------------------------------------------------
 
 append_acks_to_segment(SegNum, Acks,
@@ -749,13 +789,21 @@ append_acks_to_segment(SegNum, AckCount, Acks, State = #qistate { dir = Dir })
     {?SEGMENT_ENTRIES_COUNT, State1};
 append_acks_to_segment(SegNum, AckCount, Acks, State)
   when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
-    {Hdl, State1} = get_seg_handle(SegNum, State),
-    {ok, AckCount1} =
-        lists:foldl(
-          fun (RelSeq, {ok, AckCount2}) ->
-                  {file_handle_cache:append(
-                     Hdl, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                           RelSeq:?REL_SEQ_BITS>>), AckCount2 + 1}
-          end, {ok, AckCount}, Acks),
+    {Count, Hdl, State1} = append_to_segment(SegNum, Acks, State),
     ok = file_handle_cache:sync(Hdl),
-    {AckCount1, State1}.
+    {AckCount + Count, State1}.
+
+append_dels_to_segment(SegNum, Dels, State) ->
+    {_Count, _Hdl, State1} = append_to_segment(SegNum, Dels, State),
+    State1.
+
+append_to_segment(SegNum, AcksOrDels, State) ->
+    {Hdl, State1} = get_seg_handle(SegNum, State),
+    {Count, List} =
+        lists:foldl(fun (RelSeq, {Count1, Acc}) ->
+                            {Count1 + 1,
+                             [<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                               RelSeq:?REL_SEQ_BITS>> | Acc]}
+                    end, {0, []}, AcksOrDels),
+    ok = file_handle_cache:append(Hdl, List),
+    {Count, Hdl, State1}.
-- 
cgit v1.2.1


From ce51c81b39394fada2e30190440e5009b1eba6fb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 2 Nov 2009 11:50:13 +0000
Subject: Documentation, cosmetic and tidying up test artifacts

---
 src/rabbit_queue_index.erl | 56 ++++++++++++++++++++++------------------------
 src/rabbit_tests.erl       |  2 ++
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index acda1636..c68a3d04 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -40,37 +40,35 @@
 %%----------------------------------------------------------------------------
 %% The queue disk index
 %%
-%% The queue disk index operates over an ack journal, and a number of
+%% The queue disk index operates over a journal, and a number of
 %% segment files. Each segment is the same size, both in max number of
 %% entries, and max file size, owing to fixed sized records.
 %%
-%% Publishes and delivery notes are written directly to the segment
-%% files. The segment is found by dividing the sequence id by the the
-%% max number of entries per segment. Only the relative sequence
-%% within the segment is recorded as the sequence id within a segment
-%% file (i.e. sequence id modulo max number of entries per segment).
-%% This is keeps entries as small as possible. Publishes and
-%% deliveries are only ever going to be received in contiguous
-%% ascending order.
+%% Publishes are written directly to the segment files. The segment is
+%% found by dividing the sequence id by the the max number of entries
+%% per segment. Only the relative sequence within the segment is
+%% recorded as the sequence id within a segment file (i.e. sequence id
+%% modulo max number of entries per segment).  This is keeps entries
+%% as small as possible. Publishes are only ever going to be received
+%% in contiguous ascending order.
 %%
-%% Acks are written to a bounded journal and are also held in memory,
-%% in a dict with the segment file as the key. Again, the records are
-%% fixed size: the entire sequence id is written and is limited to a
-%% 64-bit unsigned integer. When the journal gets too big, or
-%% flush_journal is called, the journal is (possibly incrementally)
-%% flushed out to the segment files. As acks can be received from any
-%% delivered message in any order, this journal reduces seeking, and
-%% batches writes to the segment files, keeping performance high. The
-%% flush_journal/1 function returns a boolean indicating whether there
-%% is more flushing work that can be done. This means that the process
-%% can call this whenever it has an empty mailbox, only a small amount
-%% of work is done, allowing the process to respond quickly to new
-%% messages if they arrive, or to call flush_journal/1 several times
-%% until the result indicates there is no more flushing to be done.
+%% Acks and deliveries are written to a bounded journal and are also
+%% held in memory, each in a dict with the segment as the key. Again,
+%% the records are fixed size: the entire sequence id is written and
+%% is limited to a 63-bit unsigned integer. The remaining bit
+%% indicates whether the journal entry is for a delivery or an
+%% ack. When the journal gets too big, or flush_journal is called, the
+%% journal is (possibly incrementally) flushed out to the segment
+%% files. As acks and delivery notes can be received in any order
+%% (this is not obvious for deliveries, but consider what happens when
+%% eg msgs are *re*queued - you'll publish and then mark the msgs
+%% delivered immediately, which may be out of order), this journal
+%% reduces seeking, and batches writes to the segment files, keeping
+%% performance high.
 %%
-%% On startup, the ack journal is read along with all the segment
-%% files, and the ack journal is fully flushed out to the segment
-%% files. Care is taken to ensure that no message can be ack'd twice.
+%% On startup, the journal is read along with all the segment files,
+%% and the journal is fully flushed out to the segment files. Care is
+%% taken to ensure that no message can be delivered or ack'd twice.
 %%
 %%----------------------------------------------------------------------------
 
@@ -203,9 +201,9 @@ write_delivered(SeqId, State = #qistate { journal_del_dict = JDelDict }) ->
     maybe_full_flush(State1 #qistate { journal_del_dict = JDelDict1 }).
 
 write_acks(SeqIds, State = #qistate { journal_ack_dict = JAckDict }) ->
-    {JAckDict1, State1} = write_to_journal([<<?ACK_BIT:1, SeqId:?SEQ_BITS>> ||
-                                               SeqId <- SeqIds],
-                                           SeqIds, JAckDict, State),
+    {JAckDict1, State1} =
+        write_to_journal([<<?ACK_BIT:1, SeqId:?SEQ_BITS>> || SeqId <- SeqIds],
+                         SeqIds, JAckDict, State),
     maybe_full_flush(State1 #qistate { journal_ack_dict = JAckDict1 }).
 
 sync_seq_ids(SeqIds, SyncAckJournal, State) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index e3f8ddac..c84de421 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1107,4 +1107,6 @@ test_queue_index() ->
           end, {true, Qi25}),
     _Qi27 = rabbit_queue_index:terminate_and_erase(Qi26),
     ok = stop_msg_store(),
+    ok = rabbit_queue_index:start_msg_store([]),
+    ok = stop_msg_store(),
     passed.
-- 
cgit v1.2.1


From 0476f5a25b18a1979499124cd08e976125cc6acb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 2 Nov 2009 15:43:04 +0000
Subject: Still haven't discovered any real bugs in the refactoring that
 happened to qi on Friday, but quite a lot of cleaning, extra code comments
 and general polish to qi

---
 src/rabbit_queue_index.erl | 82 +++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c68a3d04..4b48df82 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -292,21 +292,20 @@ find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
     %% SegNums is sorted, ascending.
     LowSeqIdSeg =
         case SegNums of
-            [] -> 0;
-            _  -> reconstruct_seq_id(hd(SegNums), 0)
+            []            -> 0;
+            [MinSegNum|_] -> reconstruct_seq_id(MinSegNum, 0)
         end,
     {NextSeqId, State1} =
         case SegNums of
             [] -> {0, State};
-            _  -> SegNum2 = lists:last(SegNums),
+            _  -> MaxSegNum = lists:last(SegNums),
                   {_SDict, PubCount, _AckCount, HighRelSeq, State2} =
-                      load_segment(SegNum2, State),
-                  NextSeqId1 = reconstruct_seq_id(SegNum2, HighRelSeq),
-                  NextSeqId2 =
-                      case PubCount of
-                          0 -> NextSeqId1;
-                          _ -> NextSeqId1 + 1
-                      end,
+                      load_segment(MaxSegNum, State),
+                  NextSeqId1 = reconstruct_seq_id(MaxSegNum, HighRelSeq),
+                  NextSeqId2 = case PubCount of
+                                   0 -> NextSeqId1;
+                                   _ -> NextSeqId1 + 1
+                               end,
                   {NextSeqId2, State2}
         end,
     {LowSeqIdSeg, NextSeqId, State1}.
@@ -459,8 +458,9 @@ seg_num_to_path(Dir, SegNum) ->
 
 delete_queue_directory(Dir) ->
     {ok, Entries} = file:list_dir(Dir),
-    lists:foreach(fun file:delete/1,
-                  [ filename:join(Dir, Entry) || Entry <- Entries ]),
+    ok = lists:foldl(fun (Entry, ok) ->
+                             file:delete(filename:join(Dir, Entry))
+                     end, ok, Entries),
     ok = file:del_dir(Dir).
 
 add_seqid_to_dict(SeqId, Dict) ->
@@ -556,6 +556,10 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
                       load_segment(SegNum, StateN),
                   StateL = #qistate { seg_ack_counts = AckCounts } =
                       drop_and_deliver(SegNum, SDict, CleanShutdown, StateM),
+                  %% ignore the effect of drop_and_deliver on
+                  %% TotalMsgCount and AckCounts, as drop_and_deliver
+                  %% will add to the journal dicts, which will then
+                  %% effect TotalMsgCount when we scatter the journal
                   TotalMsgCount2 = TotalMsgCount1 + dict:size(SDict),
                   AckCounts1 = case AckCount of
                                    0 -> AckCounts;
@@ -589,12 +593,13 @@ scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     {TotalMsgCount1, ADict1, State3} =
         dict:fold(fun replay_journal_to_segment/3,
                   {TotalMsgCount, ADict,
-                   %% supply empty dict so that when
-                   %% replay_journal_acks_to_segment loads segments,
-                   %% it gets all msgs, and ignores anything we've
-                   %% found in the journal.
+                   %% supply empty dicts so that when
+                   %% replay_journal_to_segment loads segments, it
+                   %% gets all msgs, and ignores anything we've found
+                   %% in the journal.
                    State2 #qistate { journal_del_dict = dict:new(),
                                      journal_ack_dict = dict:new() }}, DDict),
+    %% replay for segments which only had acks, and no deliveries
     {TotalMsgCount2, State4} =
         dict:fold(fun replay_journal_acks_to_segment/3,
                   {TotalMsgCount1, State3}, ADict1),
@@ -767,41 +772,36 @@ append_acks_to_segment(SegNum, Acks,
                    {ok, AckCount1} -> AckCount1;
                    error           -> 0
                end,
-    case append_acks_to_segment(SegNum, AckCount, Acks, State) of
-        {0, State1} -> State1;
-        {?SEGMENT_ENTRIES_COUNT,
-         State1 = #qistate { seg_ack_counts = AckCounts1 }} ->
-            State1 #qistate { seg_ack_counts = dict:erase(SegNum, AckCounts1) };
-        {AckCount2, State1 = #qistate { seg_ack_counts = AckCounts1 }} ->
-            State1 #qistate { seg_ack_counts = dict:store(SegNum, AckCount2,
-                                                          AckCounts1) }
-    end.
-
-append_acks_to_segment(SegNum, AckCount, Acks, State = #qistate { dir = Dir })
-  when length(Acks) + AckCount == ?SEGMENT_ENTRIES_COUNT ->
+    AckCount2 = AckCount + length(Acks),
+    AckCounts1 = case AckCount2 of
+                     0 -> AckCounts;
+                     ?SEGMENT_ENTRIES_COUNT -> dict:erase(SegNum, AckCounts);
+                     _ -> dict:store(SegNum, AckCount2, AckCounts)
+                 end,
+    append_acks_to_segment(SegNum, AckCount2, Acks,
+                           State #qistate { seg_ack_counts = AckCounts1 }).
+
+append_acks_to_segment(SegNum, AckCount, _Acks, State = #qistate { dir = Dir })
+  when AckCount == ?SEGMENT_ENTRIES_COUNT ->
     State1 = close_handle(SegNum, State),
     ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
              ok -> ok;
              {error, enoent} -> ok
          end,
-    {?SEGMENT_ENTRIES_COUNT, State1};
+    State1;
 append_acks_to_segment(SegNum, AckCount, Acks, State)
-  when length(Acks) + AckCount < ?SEGMENT_ENTRIES_COUNT ->
-    {Count, Hdl, State1} = append_to_segment(SegNum, Acks, State),
+  when AckCount < ?SEGMENT_ENTRIES_COUNT ->
+    {Hdl, State1} = append_to_segment(SegNum, Acks, State),
     ok = file_handle_cache:sync(Hdl),
-    {AckCount + Count, State1}.
+    State1.
 
 append_dels_to_segment(SegNum, Dels, State) ->
-    {_Count, _Hdl, State1} = append_to_segment(SegNum, Dels, State),
+    {_Hdl, State1} = append_to_segment(SegNum, Dels, State),
     State1.
 
 append_to_segment(SegNum, AcksOrDels, State) ->
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {Count, List} =
-        lists:foldl(fun (RelSeq, {Count1, Acc}) ->
-                            {Count1 + 1,
-                             [<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                               RelSeq:?REL_SEQ_BITS>> | Acc]}
-                    end, {0, []}, AcksOrDels),
-    ok = file_handle_cache:append(Hdl, List),
-    {Count, Hdl, State1}.
+    ok = file_handle_cache:append(
+           Hdl, [<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                  RelSeq:?REL_SEQ_BITS>> || RelSeq <- AcksOrDels ]),
+    {Hdl, State1}.
-- 
cgit v1.2.1


From 8d94df206d25bc16eed3d65b691498ed496403c3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 2 Nov 2009 18:11:18 +0000
Subject: started work on properly testing the vq. Caught several bugs already.
 Also hooked in the remeasuring of the egress rate of the variable queue,
 which will also eventually form the driver to inform the memory_manager.
 eventually.

---
 src/rabbit_amqqueue.erl         | 11 ++++++---
 src/rabbit_amqqueue_process.erl | 49 ++++++++++++++++++++++++++++++++---------
 src/rabbit_tests.erl            | 46 ++++++++++++++++++++++++++++++++++++++
 src/rabbit_variable_queue.erl   | 25 ++++++++++++++++-----
 4 files changed, 111 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 833dada4..82a0f5b4 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -33,7 +33,7 @@
 
 -export([start/0, recover/1, find_durable_queues/0, declare/4, delete/3,
          purge/1]).
--export([internal_declare/2, internal_delete/1]).
+-export([internal_declare/2, internal_delete/1, remeasure_egress_rate/1]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
          stat/1, stat_all/0, deliver/2, redeliver/2, requeue/3, ack/4]).
@@ -108,10 +108,12 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(tx_commit_msg_store_callback/4 :: (pid(), [message()], [acktag()],
-                                         {pid(), any()}) -> 'ok').
+-spec(tx_commit_msg_store_callback/4 ::
+      (pid(), [message()], [acktag()], {pid(), any()}) -> 'ok').
+-spec(tx_commit_vq_callback/1 :: (pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
+-spec(remeasure_egress_rate/1 :: (pid()) -> 'ok').
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
 
@@ -370,6 +372,9 @@ internal_delete(QueueName) ->
               end
       end).
 
+remeasure_egress_rate(QPid) ->
+    gen_server2:pcast(QPid, 8, remeasure_egress_rate).    
+
 prune_queue_childspecs() ->
     lists:foreach(
       fun ({Name, undefined, _Type, _Mods}) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9d27fd0f..cd70979a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -35,10 +35,11 @@
 
 -behaviour(gen_server2).
 
--define(UNSENT_MESSAGE_LIMIT, 100).
--define(HIBERNATE_AFTER_MIN, 1000).
--define(DESIRED_HIBERNATE, 10000).
--define(SYNC_INTERVAL,         5). %% milliseconds
+-define(UNSENT_MESSAGE_LIMIT,        100).
+-define(HIBERNATE_AFTER_MIN,        1000).
+-define(DESIRED_HIBERNATE,         10000).
+-define(SYNC_INTERVAL,                 5). %% milliseconds
+-define(EGRESS_REMEASURE_INTERVAL,  5000).
 
 -export([start_link/1]).
 
@@ -58,7 +59,8 @@
             next_msg_id,
             active_consumers,
             blocked_consumers,
-            sync_timer_ref
+            sync_timer_ref,
+            egress_rate_timer_ref
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -112,7 +114,8 @@ init(Q = #amqqueue { name = QName }) ->
                next_msg_id = 1,
                active_consumers = queue:new(),
                blocked_consumers = queue:new(),
-               sync_timer_ref = undefined
+               sync_timer_ref = undefined,
+               egress_rate_timer_ref = undefined
               },
     {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -151,7 +154,8 @@ noreply(NewState) ->
     {noreply, NewState1, Timeout}.
 
 next_state(State = #q{variable_queue_state = VQS}) ->
-    next_state1(State, rabbit_variable_queue:needs_sync(VQS)).
+    next_state1(ensure_egress_rate_timer(State),
+                rabbit_variable_queue:needs_sync(VQS)).
 
 next_state1(State = #q{sync_timer_ref = undefined}, true) ->
     {start_sync_timer(State), 0};
@@ -160,12 +164,29 @@ next_state1(State, true) ->
 next_state1(State = #q{sync_timer_ref = undefined,
                        variable_queue_state = VQS}, false) ->
     {State, case rabbit_variable_queue:can_flush_journal(VQS) of
-                true -> 0;
+                true  -> 0;
                 false -> hibernate
             end};
 next_state1(State, false) ->
     {stop_sync_timer(State), 0}.
 
+ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = undefined}) ->
+    {ok, TRef} = timer:apply_after(?EGRESS_REMEASURE_INTERVAL, rabbit_amqqueue,
+                                   remeasure_egress_rate, [self()]),
+    State#q{egress_rate_timer_ref = TRef};
+ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = just_measured}) ->
+    State#q{egress_rate_timer_ref = undefined};
+ensure_egress_rate_timer(State) ->
+    State.
+
+stop_egress_rate_timer(State = #q{egress_rate_timer_ref = undefined}) ->
+    State;
+stop_egress_rate_timer(State = #q{egress_rate_timer_ref = just_measured}) ->
+    State#q{egress_rate_timer_ref = undefined};
+stop_egress_rate_timer(State = #q{egress_rate_timer_ref = TRef}) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State#q{egress_rate_timer_ref = undefined}.
+
 start_sync_timer(State = #q{sync_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, rabbit_amqqueue,
                                    tx_commit_vq_callback, [self()]),
@@ -848,7 +869,12 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 end,
                 NewLimited = Limited andalso LimiterPid =/= undefined,
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
-        end)).
+        end));
+
+handle_cast(remeasure_egress_rate, State = #q{variable_queue_state = VQS}) ->
+    noreply(State#q{egress_rate_timer_ref = just_measured,
+                    variable_queue_state =
+                    rabbit_variable_queue:remeasure_egress_rate(VQS)}).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -886,6 +912,7 @@ handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
 
-handle_pre_hibernate(State = #q { variable_queue_state = VQS }) ->
+handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
     VQS1 = rabbit_variable_queue:maybe_start_prefetcher(VQS),
-    {hibernate, State #q { variable_queue_state = VQS1 }}.
+    {hibernate, stop_egress_rate_timer(
+                  State#q{ variable_queue_state = VQS1 })}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c84de421..9b53334e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1110,3 +1110,49 @@ test_queue_index() ->
     ok = rabbit_queue_index:start_msg_store([]),
     ok = stop_msg_store(),
     passed.
+
+variable_queue_publish(IsPersistent, Count, VQ) ->
+    lists:foldl(
+      fun (_N, {Acc, VQ1}) ->
+              {SeqId, VQ2} = rabbit_variable_queue:publish(
+                               rabbit_basic:message(
+                                 <<>>, <<>>, [], <<>>, rabbit_guid:guid(),
+                                 IsPersistent), VQ1),
+              {[SeqId | Acc], VQ2}
+      end, {[], VQ}, lists:seq(1, Count)).
+
+test_variable_queue() ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    stop_msg_store(),
+    ok = empty_test_queue(),
+    VQ0 = rabbit_variable_queue:init(test_queue()),
+    S0 = rabbit_variable_queue:status(VQ0),
+    0 = proplists:get_value(len, S0),
+    false = proplists:get_value(prefetching, S0),
+
+    VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ0),
+    0 = proplists:get_value(target_ram_msg_count,
+                            rabbit_variable_queue:status(VQ1)),
+
+    {SeqIds, VQ2} = variable_queue_publish(false, 3 * SegmentSize, VQ1),
+    S2 = rabbit_variable_queue:status(VQ2),
+    TwoSegments = 2*SegmentSize,
+    {gamma, SegmentSize, TwoSegments} = proplists:get_value(gamma, S2),
+    SegmentSize = proplists:get_value(q3, S2),
+    ThreeSegments = 3*SegmentSize,
+    ThreeSegments = proplists:get_value(len, S2),
+
+    VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
+    io:format("~p~n", [rabbit_variable_queue:status(VQ3)]),
+    {{Msg, false, AckTag, Len1} = Obj, VQ4} =
+        rabbit_variable_queue:fetch(VQ3),
+    io:format("~p~n", [Obj]),
+    timer:sleep(1000),
+    VQ5 = rabbit_variable_queue:remeasure_egress_rate(VQ4),
+    VQ6 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ5),
+    io:format("~p~n", [rabbit_variable_queue:status(VQ6)]),
+    {{Msg1, false, AckTag1, Len11} = Obj1, VQ7} =
+        rabbit_variable_queue:fetch(VQ6),
+    io:format("~p~n", [Obj1]),
+    io:format("~p~n", [rabbit_variable_queue:status(VQ7)]),
+    passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7851d8f6..af8a4775 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -36,7 +36,7 @@
          ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1, delete/1,
          requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
          tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
-         can_flush_journal/1, flush_journal/1]).
+         can_flush_journal/1, flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -189,7 +189,8 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
     %% incorporates the last two values, and not the current value and
     %% the last average. Averaging helps smooth out spikes.
     Now = now(),
-    EgressRate = OutCount / timer:now_diff(Now, Timestamp),
+    %% EgressRate is in seconds, and now_diff is in microseconds
+    EgressRate = 1000000 * OutCount / timer:now_diff(Now, Timestamp),
     AvgEgressRate = (EgressRate + OldEgressRate) / 2,
     State #vqstate { egress_rate = EgressRate,
                      avg_egress_rate = AvgEgressRate,
@@ -420,6 +421,21 @@ flush_journal(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
                      rabbit_queue_index:flush_journal(IndexState) }.
 
+status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
+                  len = Len, on_sync = {_, _, From},
+                  target_ram_msg_count = TargetRamMsgCount,
+                  ram_msg_count = RamMsgCount, prefetcher = Prefetcher }) ->
+    [ {q1, queue:len(Q1)},
+      {q2, queue:len(Q2)},
+      {gamma, Gamma},
+      {q3, queue:len(Q3)},
+      {q4, Q4},
+      {len, Len},
+      {outstanding_txns, length(From)},
+      {target_ram_msg_count, TargetRamMsgCount},
+      {ram_msg_count, RamMsgCount},
+      {prefetching, Prefetcher /= undefined} ].
+
 %%----------------------------------------------------------------------------
 
 persistent_msg_ids(Pubs) ->
@@ -895,8 +911,5 @@ combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
 combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
 combine_gammas(#gamma { seq_id = SeqIdLow,  count = CountLow },
                #gamma { seq_id = SeqIdHigh, count = CountHigh}) ->
-    true = SeqIdLow + CountLow =< SeqIdHigh, %% ASSERTION
-    %% note the above assertion does not say ==. This is because acks
-    %% may mean that the counts are not straight multiples of
-    %% segment_size.
+    true = SeqIdLow =< SeqIdHigh, %% ASSERTION
     #gamma { seq_id = SeqIdLow, count = CountLow + CountHigh}.
-- 
cgit v1.2.1


From 68f228d617db4854b438856b9437b40fd901ec54 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 3 Nov 2009 16:58:17 +0000
Subject: Fixed a few bugs in fhc, pushed fhc through to msg_store and
 msg_file. API change to fhc:position to match file, also extended fhc with
 copy. msg_store must also trap exits so that it will shut down cleanly -
 especially important given that data to be written is now cached more
 aggressively. Removal of stop from msg_store as it's part of a supervisor and
 so the correct way to stop it is via the supervisor.

---
 src/file_handle_cache.erl        |  72 +++++++++++--
 src/rabbit_file_handle_cache.erl | 128 ----------------------
 src/rabbit_msg_file.erl          |  17 +--
 src/rabbit_msg_store.erl         | 226 ++++++++++++++++++---------------------
 src/rabbit_queue_index.erl       |   4 +-
 5 files changed, 180 insertions(+), 267 deletions(-)
 delete mode 100644 src/rabbit_file_handle_cache.erl

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index fe4e9077..38aa4820 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -32,7 +32,7 @@
 -module(file_handle_cache).
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
-         last_sync_offset/1, append_write_buffer/1]).
+         last_sync_offset/1, current_offset/1, append_write_buffer/1, copy/3]).
 
 %%----------------------------------------------------------------------------
 
@@ -76,10 +76,14 @@
              ({'ok', ([char()]|binary())} | eof | error())). 
 -spec(append/2 :: (ref(), iodata()) -> ok_or_error()).
 -spec(sync/1 :: (ref()) ->  ok_or_error()).
--spec(position/2 :: (ref(), position()) -> ok_or_error()).
+-spec(position/2 :: (ref(), position()) ->
+             ({'ok', non_neg_integer()} | error())).
 -spec(truncate/1 :: (ref()) -> ok_or_error()).
 -spec(last_sync_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
--spec(append_write_buffer/1 :: (ref()) -> ok_or_error()). 
+-spec(current_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
+-spec(append_write_buffer/1 :: (ref()) -> ok_or_error()).
+-spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
+             ({'ok', integer()} | error())).
 
 -endif.
 
@@ -191,7 +195,7 @@ append(Ref, Data) ->
         {ok, Handle} ->
             {Result, Handle1} =
                 case maybe_seek(eof, Handle) of
-                    {ok, Handle2 = #handle { at_eof = true }} ->
+                    {{ok, _Offset}, Handle2 = #handle { at_eof = true }} ->
                         write_to_buffer(Data, Handle2);
                     {{error, _} = Error, Handle2} ->
                         {Error, Handle2}
@@ -266,8 +270,17 @@ truncate(Ref) ->
 
 last_sync_offset(Ref) ->
     case get_or_reopen(Ref) of
-        {ok, #handle { trusted_offset = TrustedOffset }} ->
-            {ok, TrustedOffset};
+        {ok, #handle { trusted_offset = TrustedOffset }} -> {ok, TrustedOffset};
+        Error -> Error
+    end.
+
+current_offset(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { at_eof = true, is_write = true, offset = Offset,
+                       write_buffer_size = Size }} ->
+            {ok, Offset + Size};
+        {ok, #handle { offset = Offset }} ->
+            {ok, Offset};
         Error -> Error
     end.
 
@@ -280,6 +293,45 @@ append_write_buffer(Ref) ->
         Error -> Error
     end.
 
+copy(Src, Dest, Count) ->
+    case get_or_reopen(Src) of
+        {ok, SHandle = #handle { is_read = true }} ->
+            case get_or_reopen(Dest) of
+                {ok, DHandle = #handle { is_write = true }} ->
+                    {Result, SHandle1, DHandle1} =
+                        case write_buffer(SHandle) of
+                            {ok, SHandle2 = #handle { hdl = SHdl,
+                                                      offset = SOffset }} ->
+                                case write_buffer(DHandle) of
+                                    {ok,
+                                     DHandle2 = #handle { hdl = DHdl,
+                                                          offset = DOffset }} ->
+                                        Result1 = file:copy(SHdl, DHdl, Count),
+                                        case Result1 of
+                                            {ok, Count1} ->
+                                                {Result1,
+                                                 SHandle2 #handle {
+                                                   offset = SOffset + Count1 },
+                                                 DHandle2 #handle {
+                                                   offset = DOffset + Count1 }};
+                                            Error ->
+                                                {Error, SHandle2, DHandle2}
+                                        end;
+                                    Error -> {Error, SHandle2, DHandle}
+                                end;
+                            Error -> {Error, SHandle, DHandle}
+                        end,
+                    put({Src, fhc_handle}, SHandle1),
+                    put({Dest, fhc_handle}, DHandle1),
+                    Result;
+                {ok, _} -> {error, destination_not_open_for_writing};
+                Error -> Error
+            end;
+        {ok, _} -> {error, source_not_open_for_reading};
+        Error -> Error
+    end.
+                                
+
 %%----------------------------------------------------------------------------
 %% Internal functions
 %%----------------------------------------------------------------------------
@@ -326,13 +378,15 @@ maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
              end,
     case Result of
         {ok, Offset1} ->
-            {ok, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
+            {Result, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
         {error, _} = Error -> {Error, Handle}
     end.
 
-write_to_buffer(Data, Handle = #handle { hdl = Hdl,
+write_to_buffer(Data, Handle = #handle { hdl = Hdl, offset = Offset,
                                          write_buffer_size_limit = 0 }) ->
-    {file:write(Hdl, Data), Handle #handle { is_dirty = true }};
+    Offset1 = Offset + iolist_size(Data),
+    {file:write(Hdl, Data),
+     Handle #handle { is_dirty = true, offset = Offset1 }};
 write_to_buffer(Data, Handle =
                 #handle { write_buffer = WriteBuffer,
                           write_buffer_size = Size,
diff --git a/src/rabbit_file_handle_cache.erl b/src/rabbit_file_handle_cache.erl
deleted file mode 100644
index 85a5d6e9..00000000
--- a/src/rabbit_file_handle_cache.erl
+++ /dev/null
@@ -1,128 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_file_handle_cache).
-
--export([init/2, close_all/1, close_file/2, with_file_handle_at/4]).
-
-%%----------------------------------------------------------------------------
-
--include("rabbit.hrl").
-
--record(hcstate,
-        { limit,   %% how many file handles can we open?
-          handles, %% dict of the files to their handles, age and offset
-          ages,    %% gb_tree of the files, keyed by age
-          mode     %% the mode to open the files as
-        }).
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--type(hcstate() :: #hcstate { limit   :: non_neg_integer(),
-                              handles :: dict(),
-                              ages    :: gb_tree(),
-                              mode    :: [file_open_mode()]
-                            }).
-
--spec(init/2 :: (non_neg_integer(), [file_open_mode()]) -> hcstate()).
--spec(close_all/1 :: (hcstate()) -> hcstate()).
--spec(close_file/2 :: (file_path(), hcstate()) -> hcstate()).
--spec(with_file_handle_at/4 :: (file_path(), non_neg_integer(),
-                                fun ((io_device()) -> {non_neg_integer(), A}),
-                                    hcstate()) ->
-             {A, hcstate()}).
--endif.
-
-%%----------------------------------------------------------------------------
-
-init(Limit, OpenMode) ->
-    #hcstate { limit   = Limit,
-               handles = dict:new(),
-               ages    = gb_trees:empty(),
-               mode    = OpenMode
-             }.
-
-close_all(State = #hcstate { handles = Handles }) ->
-    dict:fold(fun (_File, {Hdl, _Offset, _Then}, _Acc) ->
-                      file:close(Hdl)
-              end, ok, Handles),
-    State #hcstate { handles = dict:new(), ages = gb_trees:empty() }.
-
-close_file(File, State = #hcstate { handles = Handles,
-                                    ages = Ages }) ->
-    case dict:find(File, Handles) of
-        error ->
-            State;
-        {ok, {Hdl, _Offset, Then}} ->
-            ok = file:close(Hdl),
-            State #hcstate { handles = dict:erase(File, Handles),
-                             ages    = gb_trees:delete(Then, Ages)
-                           }
-    end.
-
-with_file_handle_at(File, Offset, Fun, State = #hcstate { handles = Handles,
-                                                          ages    = Ages,
-                                                          limit   = Limit,
-                                                          mode    = Mode }) ->
-    {FileHdl, OldOffset, Handles1, Ages1} =
-        case dict:find(File, Handles) of
-            error ->
-                {ok, Hdl} = file:open(File, Mode),
-                case dict:size(Handles) < Limit of
-                    true  ->
-                        {Hdl, 0, Handles, Ages};
-                    false ->
-                        {Then, OldFile, Ages2} = gb_trees:take_smallest(Ages),
-                        {ok, {OldHdl, _Offset, Then}} =
-                            dict:find(OldFile, Handles),
-                        ok = file:close(OldHdl),
-                        {Hdl, 0, dict:erase(OldFile, Handles), Ages2}
-                end;
-            {ok, {Hdl, OldOffset1, Then}} ->
-                {Hdl, OldOffset1, Handles, gb_trees:delete(Then, Ages)}
-        end,
-    SeekRes = case Offset == OldOffset of
-                  true  -> ok;
-                  false -> case file:position(FileHdl, {bof, Offset}) of
-                               {ok, Offset} -> ok;
-                               KO           -> KO
-                           end
-              end,
-    {NewOffset, Result} = case SeekRes of
-                              ok  -> Fun(FileHdl);
-                              KO1 -> {OldOffset, KO1}
-                          end,
-    Now = now(),
-    Handles2 = dict:store(File, {FileHdl, NewOffset, Now}, Handles1),
-    Ages3 = gb_trees:enter(Now, File, Ages1),
-    {Result, State #hcstate { handles = Handles2, ages = Ages3 }}.
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 84dce90e..c0826159 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -70,10 +70,11 @@ append(FileHdl, MsgId, MsgBody)
     MsgBodyBin  = term_to_binary(MsgBody),
     MsgBodyBinSize = size(MsgBodyBin),
     Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES,
-    case file:write(FileHdl, <<Size:?INTEGER_SIZE_BITS,
-                               MsgId:?MSG_ID_SIZE_BYTES/binary,
-                               MsgBodyBin:MsgBodyBinSize/binary,
-                               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
+    case file_handle_cache:append(FileHdl,
+                                  <<Size:?INTEGER_SIZE_BITS,
+                                   MsgId:?MSG_ID_SIZE_BYTES/binary,
+                                   MsgBodyBin:MsgBodyBinSize/binary,
+                                   ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
         KO -> KO
     end.
@@ -81,7 +82,7 @@ append(FileHdl, MsgId, MsgBody)
 read(FileHdl, TotalSize) ->
     Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
     BodyBinSize = Size - ?MSG_ID_SIZE_BYTES,
-    case file:read(FileHdl, TotalSize) of
+    case file_handle_cache:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
                MsgId:?MSG_ID_SIZE_BYTES/binary,
                MsgBodyBin:BodyBinSize/binary,
@@ -105,7 +106,7 @@ scan(FileHdl, Offset, Acc) ->
     end.
 
 read_next(FileHdl, Offset) ->
-    case file:read(FileHdl, ?SIZE_AND_MSG_ID_BYTES) of
+    case file_handle_cache:read(FileHdl, ?SIZE_AND_MSG_ID_BYTES) of
         %% Here we take option 5 from
         %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in which
         %% we read the MsgId as a number, and then convert it back to
@@ -116,11 +117,11 @@ read_next(FileHdl, Offset) ->
                 _ ->
                     TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
                     ExpectedAbsPos = Offset + TotalSize - 1,
-                    case file:position(
+                    case file_handle_cache:position(
                            FileHdl, {cur, Size - ?MSG_ID_SIZE_BYTES}) of
                         {ok, ExpectedAbsPos} ->
                             NextOffset = ExpectedAbsPos + 1,
-                            case file:read(FileHdl, 1) of
+                            case file_handle_cache:read(FileHdl, 1) of
                                 {ok,
                                  <<?WRITE_OK_MARKER: ?WRITE_OK_SIZE_BITS>>} ->
                                     <<MsgId:?MSG_ID_SIZE_BYTES/binary>> =
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index a492a024..89f13c6f 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -34,7 +34,7 @@
 -behaviour(gen_server2).
 
 -export([start_link/3, write/2, read/1, peruse/2, contains/1, remove/1,
-         release/1, sync/2, stop/0]).
+         release/1, sync/2]).
 
 -export([sync/0]). %% internal
 
@@ -46,6 +46,7 @@
 -define(MAX_READ_FILE_HANDLES, 256).
 -define(FILE_SIZE_LIMIT,       (256*1024*1024)).
 -define(SYNC_INTERVAL,         5). %% milliseconds
+-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
 
 %%----------------------------------------------------------------------------
 
@@ -67,7 +68,6 @@
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
 -spec(sync/2 :: ([msg_id()], fun (() -> any())) -> 'ok').
--spec(stop/0 :: () -> 'ok').
 
 -endif.
 
@@ -79,12 +79,9 @@
          file_summary,           %% what's in the files?
          current_file,           %% current file name as number
          current_file_handle,    %% current file handle
-         current_offset,         %% current offset within current file
-         current_dirty,          %% has the current file been written to
                                  %% since the last fsync?
          file_size_limit,        %% how big can our files get?
-         read_file_handle_cache, %% file handle cache for reading
-         last_sync_offset,       %% current_offset at the last time we sync'd
+         file_handle_cache,      %% file handle cache
          on_sync,                %% pending sync requests
          sync_timer_ref,         %% TRef for our interval timer
          message_cache           %% ets message cache
@@ -241,7 +238,6 @@ contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
 remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
 sync(MsgIds, K)    -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-stop()             -> gen_server2:call(?SERVER, stop, infinity).
 sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 %%----------------------------------------------------------------------------
@@ -249,15 +245,14 @@ sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 %%----------------------------------------------------------------------------
 
 init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
-
+    process_flag(trap_exit, true),
+    
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     MsgLocations = ets:new(?MSG_LOC_NAME,
                            [set, private, {keypos, #msg_location.msg_id}]),
 
     InitFile = 0,
-    HandleCache = rabbit_file_handle_cache:init(?MAX_READ_FILE_HANDLES,
-                                                ?BINARY_MODE ++ [read]),
     FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
                           [set, private, {keypos, #file_summary.file}]),
     MessageCache = ets:new(?CACHE_ETS_NAME, [set, private]),
@@ -267,11 +262,8 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                    file_summary           = FileSummary,
                    current_file           = InitFile,
                    current_file_handle    = undefined,
-                   current_offset         = 0,
-                   current_dirty          = false,
                    file_size_limit        = ?FILE_SIZE_LIMIT,
-                   read_file_handle_cache = HandleCache,
-                   last_sync_offset       = 0,
+                   file_handle_cache      = dict:new(),
                    on_sync                = [],
                    sync_timer_ref         = undefined,
                    message_cache          = MessageCache
@@ -286,13 +278,14 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
-    State1 = #msstate { current_file = CurFile, current_offset = Offset } =
+    {Offset, State1 = #msstate { current_file = CurFile }} =
         build_index(Files, State),
 
     %% read is only needed so that we can seek
     {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
-                              ?WRITE_MODE ++ [read]),
-    {ok, Offset} = file:position(FileHdl, Offset),
+                              [read | ?WRITE_MODE]),
+    {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
+    ok = file_handle_cache:truncate(FileHdl),
 
     {ok, State1 #msstate { current_file_handle = FileHdl }}.
 
@@ -304,19 +297,16 @@ handle_call({contains, MsgId}, _From, State) ->
     reply(case index_lookup(MsgId, State) of
               not_found        -> false;
               #msg_location {} -> true
-          end, State);
-
-handle_call(stop, _From, State) ->
-    {stop, normal, ok, State}.
+          end, State).
 
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
                                current_file        = CurFile,
-                               current_offset      = CurOffset,
                                file_summary        = FileSummary }) ->
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
+            {ok, CurOffset} = file_handle_cache:current_offset(CurHdl),
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
                                 msg_id = MsgId, ref_count = 1, file = CurFile,
@@ -336,10 +326,7 @@ handle_cast({write, MsgId, Msg},
                                              valid_total_size = ValidTotalSize1,
                                              contiguous_top = ContiguousTop1 }),
             NextOffset = CurOffset + TotalSize,
-            noreply(
-              maybe_roll_to_new_file(
-                NextOffset, State #msstate {current_offset = NextOffset,
-                                            current_dirty = true}));
+            noreply(maybe_roll_to_new_file(NextOffset, State));
         StoreEntry = #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter
             ok = index_update(StoreEntry #msg_location {
@@ -371,15 +358,11 @@ handle_cast({release, MsgIds}, State) ->
     lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
     noreply(State);
 
-handle_cast({sync, _MsgIds, K},
-            State = #msstate { current_dirty = false }) ->
-    K(),
-    noreply(State);
-
 handle_cast({sync, MsgIds, K},
-            State = #msstate { current_file     = CurFile,
-                               last_sync_offset = SyncOffset,
-                               on_sync          = Syncs }) ->
+            State = #msstate { current_file        = CurFile,
+                               current_file_handle = CurHdl,
+                               on_sync             = Syncs }) ->
+    {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
     case lists:any(fun (MsgId) ->
                            #msg_location { file = File, offset = Offset } =
                                index_lookup(MsgId, State),
@@ -398,22 +381,19 @@ handle_info(timeout, State) ->
 
 terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
                                       file_summary           = FileSummary,
-                                      current_file_handle    = FileHdl,
-                                      read_file_handle_cache = HC }) ->
+                                      current_file_handle    = FileHdl }) ->
     State1 = case FileHdl of
                  undefined -> State;
                  _ -> State2 = sync(State),
-                      file:close(FileHdl),
+                      file_handle_cache:close(FileHdl),
                       State2
              end,
-    HC1 = rabbit_file_handle_cache:close_all(HC),
+    State3 = close_all_handles(State1),
     ets:delete(MsgLocations),
     ets:delete(FileSummary),
-    State1 #msstate { msg_locations          = undefined,
-                      file_summary           = undefined,
-                      current_file_handle    = undefined,
-                      current_dirty          = false,
-                      read_file_handle_cache = HC1 }.
+    State3 #msstate { msg_locations       = undefined,
+                      file_summary        = undefined,
+                      current_file_handle = undefined }.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -455,50 +435,28 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 
 filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
-open_file(Dir, FileName, Mode) ->
-    file:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode).
-
 sort_file_names(FileNames) ->
     lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
                FileNames).
 
 preallocate(Hdl, FileSizeLimit, FinalPos) ->
-    {ok, FileSizeLimit} = file:position(Hdl, FileSizeLimit),
-    ok = file:truncate(Hdl),
-    {ok, FinalPos} = file:position(Hdl, FinalPos),
+    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
+    ok = file_handle_cache:truncate(Hdl),
+    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
     ok.
 
 truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
-    {ok, Lowpoint} = file:position(FileHdl, Lowpoint),
-    ok = file:truncate(FileHdl),
+    {ok, Lowpoint} = file_handle_cache:position(FileHdl, Lowpoint),
+    ok = file_handle_cache:truncate(FileHdl),
     ok = preallocate(FileHdl, Highpoint, Lowpoint).
 
-sync(State = #msstate { current_dirty = false }) ->
-    State;
 sync(State = #msstate { current_file_handle = CurHdl,
-                        current_offset = CurOffset,
                         on_sync = Syncs }) ->
     State1 = stop_sync_timer(State),
-    ok = file:sync(CurHdl),
+    %% we depend on this really calling sync, even if [] == Syncs
+    ok = file_handle_cache:sync(CurHdl),
     lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
-    State1 #msstate { current_dirty = false,
-                      last_sync_offset = CurOffset,
-                      on_sync = [] }.
-
-with_read_handle_at(File, Offset, Fun,
-                    State = #msstate { dir                    = Dir,
-                                       read_file_handle_cache = HC,
-                                       current_file           = CurFile,
-                                       current_dirty          = IsDirty,
-                                       last_sync_offset       = SyncOffset }) ->
-    State1 = if CurFile == File andalso IsDirty andalso Offset >= SyncOffset ->
-                     sync(State);
-                true -> State
-             end,
-    FilePath = form_filename(Dir, filenum_to_name(File)),
-    {Result, HC1} =
-        rabbit_file_handle_cache:with_file_handle_at(FilePath, Offset, Fun, HC),
-    {Result, State1 #msstate { read_file_handle_cache = HC1 }}.
+    State1 #msstate { on_sync = [] }.
 
 remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
     StoreEntry = #msg_location { ref_count = RefCount, file = File,
@@ -524,7 +482,9 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             no_compact
     end.
 
-internal_read_message(MsgId, State) ->
+internal_read_message(MsgId,
+                      State = #msstate { current_file = CurFile,
+                                         current_file_handle = CurHdl }) ->
     case index_lookup(MsgId, State) of
         not_found -> {not_found, State};
         #msg_location { ref_count  = RefCount,
@@ -533,37 +493,70 @@ internal_read_message(MsgId, State) ->
                         total_size = TotalSize } ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
-                    {{ok, {MsgId, Msg}}, State1} =
-                        with_read_handle_at(
-                          File, Offset,
-                          fun(Hdl) ->
-                                  Res = case rabbit_msg_file:read(
-                                               Hdl, TotalSize) of
-                                            {ok, {MsgId, _}} = Obj -> Obj;
-                                            {ok, Rest} ->
-                                                throw({error,
-                                                       {misread, 
-                                                        [{old_state, State},
+                    {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
+                    State1 =
+                        case CurFile =:= File andalso Offset >= SyncOffset of
+                            true  -> sync(State);
+                            false -> State
+                        end,
+                    {Hdl, State2} = get_read_handle(File, State1),
+                    {ok, Offset} = file_handle_cache:position(Hdl, Offset),
+                    {ok, {MsgId, Msg}} =
+                        case rabbit_msg_file:read(Hdl, TotalSize) of
+                            {ok, {MsgId, _}} = Obj -> Obj;
+                            Rest ->
+                                throw({error, {misread, [{old_state, State},
                                                          {file_num, File},
                                                          {offset, Offset},
                                                          {read, Rest}]}})
-                                        end,
-                                  {Offset + TotalSize, Res}
-                          end, State),
+                        end,
                     ok = if RefCount > 1 ->
-                                 insert_into_cache(MsgId, Msg, State1);
+                                 insert_into_cache(MsgId, Msg, State2);
                             true -> ok
                                     %% it's not in the cache and we
                                     %% only have one reference to the
                                     %% message. So don't bother
                                     %% putting it in the cache.
                          end,
-                    {{ok, Msg}, State1};
+                    {{ok, Msg}, State2};
                 {Msg, _RefCount} ->
                     {{ok, Msg}, State}
             end
     end.
 
+close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
+    case dict:find(Key, FHC) of
+        {ok, Hdl} ->
+            ok = close_file(Hdl),
+            State #msstate { file_handle_cache = dict:erase(Key, FHC) };
+        error -> State
+    end.
+
+close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
+    ok = dict:fold(fun (_Key, Hdl, ok) ->
+                           file_handle_cache:close(Hdl)
+                   end, ok, FHC),
+    State #msstate { file_handle_cache = dict:new() }.
+
+get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC }) ->
+    case dict:find(FileNum, FHC) of
+        {ok, Hdl} -> {Hdl, State}; 
+        error -> new_handle(FileNum, filenum_to_name(FileNum),
+                            [read | ?BINARY_MODE], State)
+    end.
+
+new_handle(Key, FileName, Mode, State = #msstate { file_handle_cache = FHC,
+                                                   dir = Dir }) ->
+    {ok, Hdl} = open_file(Dir, FileName, Mode),
+    {Hdl, State #msstate { file_handle_cache = dict:store(Key, Hdl, FHC) }}.
+
+open_file(Dir, FileName, Mode) ->
+    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
+                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
+
+close_file(Hdl) ->
+    file_handle_cache:close(Hdl).
+
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
@@ -732,7 +725,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             true = is_disjoint(MsgIds1, MsgIdsTmp),
             %% must open with read flag, otherwise will stomp over contents
             {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
-                                      ?WRITE_MODE ++ [read]),
+                                      [read | ?WRITE_MODE]),
             %% Wipe out any rubbish at the end of the file. Remember
             %% the head of the list will be the highest entry in the
             %% file.
@@ -743,10 +736,9 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% fail, but we still aren't risking losing data
             ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
             {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE),
-            {ok, TmpSize} = file:copy(TmpHdl, MainHdl, TmpSize),
-            ok = file:sync(MainHdl),
-            ok = file:close(MainHdl),
-            ok = file:close(TmpHdl),
+            {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize),
+            ok = file_handle_cache:close(MainHdl),
+            ok = file_handle_cache:close(TmpHdl),
             ok = file:delete(TmpPath),
 
             {ok, _MainMessages, MsgIdsMain} =
@@ -775,7 +767,7 @@ scan_file_for_valid_messages(Dir, FileName) ->
             Valid = rabbit_msg_file:scan(Hdl),
             %% if something really bad's happened, the close could fail,
             %% but ignore
-            file:close(Hdl),
+            file_handle_cache:close(Hdl),
             Valid;
         {error, enoent} -> {ok, []};
         {error, Reason} -> throw({error,
@@ -812,8 +804,8 @@ build_index(Left, [], FilesToCompact, State) ->
                                   total_size = TotalSize } | _] ->
                      MaxOffset + TotalSize
              end,
-    compact(FilesToCompact, %% this never includes the current file
-            State #msstate { current_file = Left, current_offset = Offset });
+    {Offset, compact(FilesToCompact, %% this never includes the current file
+                     State #msstate { current_file = Left })};
 build_index(Left, [File|Files], FilesToCompact,
             State = #msstate { dir = Dir, file_summary = FileSummary }) ->
     {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
@@ -860,7 +852,7 @@ maybe_roll_to_new_file(Offset,
                                           file_summary        = FileSummary })
   when Offset >= FileSizeLimit ->
     State1 = sync(State),
-    ok = file:close(CurHdl),
+    ok = close_file(CurHdl),
     NextFile = CurFile + 1,
     {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurFile,
@@ -870,9 +862,7 @@ maybe_roll_to_new_file(Offset,
                file = NextFile, valid_total_size = 0, contiguous_top = 0,
                left = CurFile, right = undefined }),
     State2 = State1 #msstate { current_file_handle = NextHdl,
-                               current_file        = NextFile,
-                               current_offset      = 0,
-                               last_sync_offset    = 0 },
+                               current_file        = NextFile },
     compact([CurFile], State2);
 maybe_roll_to_new_file(_, State) ->
     State.
@@ -957,9 +947,9 @@ combine_files(#file_summary { file = Source,
                               contiguous_top = DestinationContiguousTop,
                               right = Source },
               State = #msstate { dir = Dir }) ->
+    State1 = close_handle(Source, close_handle(Destination, State)),
     SourceName = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
-    State1 = close_file(SourceName, close_file(DestinationName, State)),
     {ok, SourceHdl} = open_file(Dir, SourceName, ?READ_AHEAD_MODE),
     {ok, DestinationHdl} = open_file(Dir, DestinationName,
                                      ?READ_AHEAD_MODE ++ ?WRITE_MODE),
@@ -998,21 +988,22 @@ combine_files(#file_summary { file = Source,
             %% Destination, and MsgLocationDets has been updated to
             %% reflect compaction of Destination so truncate
             %% Destination and copy from Tmp back to the end
-            {ok, 0} = file:position(TmpHdl, 0),
+            {ok, 0} = file_handle_cache:position(TmpHdl, 0),
             ok = truncate_and_extend_file(
                    DestinationHdl, DestinationContiguousTop, ExpectedSize),
-            {ok, TmpSize} = file:copy(TmpHdl, DestinationHdl, TmpSize),
+            {ok, TmpSize} =
+                file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
             %% position in DestinationHdl should now be DestinationValid
-            ok = file:sync(DestinationHdl),
-            ok = file:close(TmpHdl),
+            ok = file_handle_cache:sync(DestinationHdl),
+            ok = close_file(TmpHdl),
             ok = file:delete(form_filename(Dir, Tmp))
     end,
     SourceWorkList = index_search_by_file(Source, State1),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
-    ok = file:close(SourceHdl),
-    ok = file:close(DestinationHdl),
+    ok = close_file(SourceHdl),
+    ok = close_file(DestinationHdl),
     ok = file:delete(form_filename(Dir, SourceName)),
     State1.
 
@@ -1042,24 +1033,19 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                           %% the previous block
                           BSize = BlockEnd - BlockStart,
                           {ok, BlockStart} =
-                              file:position(SourceHdl, BlockStart),
-                          {ok, BSize} =
-                              file:copy(SourceHdl, DestinationHdl, BSize),
+                              file_handle_cache:position(SourceHdl, BlockStart),
+                          {ok, BSize} = file_handle_cache:copy(
+                                          SourceHdl, DestinationHdl, BSize),
                           {NextOffset, Offset, Offset + TotalSize}
                   end
           end, {InitOffset, undefined, undefined}, WorkList),
     %% do the last remaining block
     BSize1 = BlockEnd1 - BlockStart1,
-    {ok, BlockStart1} = file:position(SourceHdl, BlockStart1),
-    {ok, BSize1} = file:copy(SourceHdl, DestinationHdl, BSize1),
-    ok = file:sync(DestinationHdl),
+    {ok, BlockStart1} = file_handle_cache:position(SourceHdl, BlockStart1),
+    {ok, BSize1} = file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
+    ok = file_handle_cache:sync(DestinationHdl),
     ok.
 
-close_file(FileName,
-           State = #msstate { dir = Dir, read_file_handle_cache = HC }) ->
-    HC1 = rabbit_file_handle_cache:close_file(form_filename(Dir, FileName), HC),
-    State #msstate { read_file_handle_cache = HC1 }.
-
 delete_file_if_empty(File,
                      #msstate { dir = Dir, file_summary = FileSummary }) ->
     [#file_summary { valid_total_size = ValidData,
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 4b48df82..febf3217 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -251,7 +251,7 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
     if
         JCount1 == 0 ->
             {Hdl, State4} = get_journal_handle(State3),
-            ok = file_handle_cache:position(Hdl, bof),
+            {ok, 0} = file_handle_cache:position(Hdl, bof),
             ok = file_handle_cache:truncate(Hdl),
             ok = file_handle_cache:sync(Hdl),
             State4;
@@ -705,7 +705,7 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
             {Hdl, State1 = #qistate { journal_del_dict = JDelDict,
                                       journal_ack_dict = JAckDict }} =
                 get_seg_handle(SegNum, State),
-            ok = file_handle_cache:position(Hdl, bof),
+            {ok, 0} = file_handle_cache:position(Hdl, bof),
             {SDict, PubCount, AckCount, HighRelSeq} =
                 load_segment_entries(Hdl, dict:new(), 0, 0, 0),
             %% delete ack'd msgs first
-- 
cgit v1.2.1


From 8bedab94680d0f6b6f634ebb32be1d8d80ff9955 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 3 Nov 2009 18:23:18 +0000
Subject: subtle bug in queue_index in that there's a sort of race because
 scattering the journal out to the segments can cause the publish file handle
 to be closed, should the segment become full. Fixed.

---
 src/file_handle_cache.erl  |  2 +-
 src/rabbit_queue_index.erl | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 38aa4820..ae9133b8 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -338,7 +338,7 @@ copy(Src, Dest, Count) ->
 
 get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
-        undefined -> {error, not_open};
+        undefined -> {error, not_open, Ref};
         #handle { hdl = closed, mode = Mode, global_key = GRef,
                   options = Options } ->
             #file { path = Path } = get({GRef, fhc_file}),
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index febf3217..f21f9e17 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -175,7 +175,7 @@ terminate(State = #qistate { seg_num_handles = SegHdls }) ->
         true  -> State;
         false -> State1 = #qistate { dir = Dir } = close_all_handles(State),
                  store_clean_shutdown(Dir),
-                 State1
+                 State1 #qistate { publish_handle = undefined }
     end.
 
 terminate_and_erase(State) ->
@@ -781,9 +781,15 @@ append_acks_to_segment(SegNum, Acks,
     append_acks_to_segment(SegNum, AckCount2, Acks,
                            State #qistate { seg_ack_counts = AckCounts1 }).
 
-append_acks_to_segment(SegNum, AckCount, _Acks, State = #qistate { dir = Dir })
+append_acks_to_segment(SegNum, AckCount, _Acks,
+                       State = #qistate { dir = Dir, publish_handle = PubHdl })
   when AckCount == ?SEGMENT_ENTRIES_COUNT ->
-    State1 = close_handle(SegNum, State),
+    PubHdl1 = case PubHdl of
+                  {SegNum, Hdl, ?SEGMENT_ENTRIES_COUNT} when Hdl /= undefined ->
+                      {SegNum + 1, undefined, 0};
+                  _ -> PubHdl
+              end,
+    State1 = close_handle(SegNum, State #qistate { publish_handle = PubHdl1 }),
     ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
              ok -> ok;
              {error, enoent} -> ok
-- 
cgit v1.2.1


From 45b10e946c2e362b475102445386254db21a486a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 6 Nov 2009 14:18:51 +0000
Subject: man 2 write states:  POSIX requires that a read(2) which can be
 proved to occur after a write() has returned returns the new data.

This means that in the msg_store, when we're reading a msg, we only need to make sure that write has really been called on the msg. We do not need to sync here. So, add a test, explicitly to hit this condition.

We were using both buffering in the fhc, and also delayed_write. This is dangerous, because we can think that we've written out from the fhc, but even though we're writing > the 64 kbyte default limit in delayed_write, we may not be writing a whole number of 64kb blocks, thus there may be something left in the delayed_write buffer, so remove that. Performance doesn't suffer because we're writing blocks of 1MB anyway!

Also, the current_offset/1 fun in fhc was returning the virtual address - i.e. where we would be if we flushed out the write buffer. We actually need both variants - getting the real raw offset of the handle, and getting the virtual offset.

Finally, as a result of all of this, the sync fun in msg_store now has no need to do any work unless there are outstanding transactions.
---
 src/file_handle_cache.erl | 23 +++++++++++++++--------
 src/rabbit_msg_store.erl  | 38 ++++++++++++++++++++++----------------
 src/rabbit_tests.erl      |  5 +++++
 3 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index ae9133b8..5c1c5a83 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -32,7 +32,8 @@
 -module(file_handle_cache).
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
-         last_sync_offset/1, current_offset/1, append_write_buffer/1, copy/3]).
+         last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
+         append_write_buffer/1, copy/3]).
 
 %%----------------------------------------------------------------------------
 
@@ -80,7 +81,8 @@
              ({'ok', non_neg_integer()} | error())).
 -spec(truncate/1 :: (ref()) -> ok_or_error()).
 -spec(last_sync_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
--spec(current_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
+-spec(current_virtual_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
+-spec(current_raw_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
 -spec(append_write_buffer/1 :: (ref()) -> ok_or_error()).
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
              ({'ok', integer()} | error())).
@@ -109,7 +111,7 @@ open(Path, Mode, Options) ->
                                           true -> RCount + 1;
                                           false -> RCount
                                       end,
-                            put({Path1, fhc_file},
+                            put({GRef, fhc_file},
                                 File #file {
                                   reader_count = RCount1,
                                   has_writer = HasWriter orelse IsWriter }),
@@ -149,9 +151,9 @@ close(Ref) ->
                     RCount1 = case IsReader of
                                   true -> RCount - 1;
                                   false -> RCount
-                              end, 
+                              end,
                     HasWriter1 = HasWriter andalso not IsWriter,
-                    case RCount1 == 0 andalso not HasWriter1 of
+                    case RCount1 =:= 0 andalso not HasWriter1 of
                         true -> erase({GRef, fhc_file}),
                                 erase({Path, fhc_path});
                         false -> put({GRef, fhc_file},
@@ -274,16 +276,21 @@ last_sync_offset(Ref) ->
         Error -> Error
     end.
 
-current_offset(Ref) ->
+current_virtual_offset(Ref) ->
     case get_or_reopen(Ref) of
         {ok, #handle { at_eof = true, is_write = true, offset = Offset,
                        write_buffer_size = Size }} ->
             {ok, Offset + Size};
-        {ok, #handle { offset = Offset }} ->
-            {ok, Offset};
+        {ok, #handle { offset = Offset }} -> {ok, Offset};
         Error -> Error
     end.
 
+current_raw_offset(Ref) ->
+    case get_or_reopen(Ref) of
+        {ok, #handle { offset = Offset }} -> {ok, Offset};
+        Error -> Error
+    end.    
+
 append_write_buffer(Ref) ->
     case get_or_reopen(Ref) of
         {ok, Handle} ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 89f13c6f..591435ba 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -102,7 +102,7 @@
 -define(BINARY_MODE,     [raw, binary]).
 -define(READ_MODE,       [read]).
 -define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
--define(WRITE_MODE,      [write, delayed_write]).
+-define(WRITE_MODE,      [write]).
 
 %% The components:
 %%
@@ -306,7 +306,7 @@ handle_cast({write, MsgId, Msg},
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
-            {ok, CurOffset} = file_handle_cache:current_offset(CurHdl),
+            {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
                                 msg_id = MsgId, ref_count = 1, file = CurFile,
@@ -453,10 +453,13 @@ truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
 sync(State = #msstate { current_file_handle = CurHdl,
                         on_sync = Syncs }) ->
     State1 = stop_sync_timer(State),
-    %% we depend on this really calling sync, even if [] == Syncs
-    ok = file_handle_cache:sync(CurHdl),
-    lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
-    State1 #msstate { on_sync = [] }.
+    case Syncs of
+        [] -> State1;
+        _ ->
+            ok = file_handle_cache:sync(CurHdl),
+            lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
+            State1 #msstate { on_sync = [] }
+    end.
 
 remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
     StoreEntry = #msg_location { ref_count = RefCount, file = File,
@@ -493,13 +496,15 @@ internal_read_message(MsgId,
                         total_size = TotalSize } ->
             case fetch_and_increment_cache(MsgId, State) of
                 not_found ->
-                    {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
-                    State1 =
-                        case CurFile =:= File andalso Offset >= SyncOffset of
-                            true  -> sync(State);
-                            false -> State
-                        end,
-                    {Hdl, State2} = get_read_handle(File, State1),
+                    {ok, CurOffset} =
+                        file_handle_cache:current_raw_offset(CurHdl),
+                    ok = case CurFile =:= File andalso Offset >= CurOffset of
+                             true ->
+                                 file_handle_cache:append_write_buffer(CurHdl);
+                             false ->
+                                 ok
+                         end,
+                    {Hdl, State1} = get_read_handle(File, State),
                     {ok, Offset} = file_handle_cache:position(Hdl, Offset),
                     {ok, {MsgId, Msg}} =
                         case rabbit_msg_file:read(Hdl, TotalSize) of
@@ -508,17 +513,18 @@ internal_read_message(MsgId,
                                 throw({error, {misread, [{old_state, State},
                                                          {file_num, File},
                                                          {offset, Offset},
-                                                         {read, Rest}]}})
+                                                         {read, Rest},
+                                                         {proc_dict, get()}]}})
                         end,
                     ok = if RefCount > 1 ->
-                                 insert_into_cache(MsgId, Msg, State2);
+                                 insert_into_cache(MsgId, Msg, State1);
                             true -> ok
                                     %% it's not in the cache and we
                                     %% only have one reference to the
                                     %% message. So don't bother
                                     %% putting it in the cache.
                          end,
-                    {{ok, Msg}, State2};
+                    {{ok, Msg}, State1};
                 {Msg, _RefCount} ->
                     {{ok, Msg}, State}
             end
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 9b53334e..d618d3e0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -965,6 +965,11 @@ test_msg_store() ->
     ok = start_msg_store_empty(),
     %% check we don't contain any of the msgs
     false = msg_store_contains(false, MsgIds),
+    %% publish the first half again
+    ok = msg_store_write(MsgIds1stHalf),
+    %% this should force some sort of sync internally otherwise misread
+    ok = msg_store_read(MsgIds1stHalf),
+    ok = rabbit_msg_store:remove(MsgIds1stHalf),
     %% push a lot of msgs in...
     BigCount = 100000,
     MsgIdsBig = lists:seq(1, BigCount),
-- 
cgit v1.2.1


From 76b0c9b26b9eb62163455a4b831a4c0c73fd0faf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 6 Nov 2009 18:20:24 +0000
Subject: Making progress with vq testing and debugging - up to 75% code
 coverage overall, and ironed out several bugs

---
 src/rabbit_tests.erl          | 103 +++++++++++++++++++++++++++++++++-------
 src/rabbit_variable_queue.erl | 108 ++++++++++++++++++++++++++----------------
 2 files changed, 152 insertions(+), 59 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index d618d3e0..15b9161b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -52,6 +52,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
 all_tests() ->
     passed = test_msg_store(),
     passed = test_queue_index(),
+    passed = test_variable_queue(),
     passed = test_priority_queue(),
     passed = test_unfold(),
     passed = test_parsing(),
@@ -1126,38 +1127,104 @@ variable_queue_publish(IsPersistent, Count, VQ) ->
               {[SeqId | Acc], VQ2}
       end, {[], VQ}, lists:seq(1, Count)).
 
+variable_queue_fetch(Count, IsPersistent, Len, VQ) ->
+    lists:foldl(fun (N, {VQN, AckTagsAcc}) ->
+                        Rem = Len - N,
+                        {{_MsgN, IsPersistent, AckTagN, Rem}, VQM} =
+                            rabbit_variable_queue:fetch(VQN),
+                        {VQM, [AckTagN | AckTagsAcc]}
+                end, {VQ, []}, lists:seq(1, Count)).
+
+assert_prop(List, Prop, Value) ->
+    Value = proplists:get_value(Prop, List).
+
 test_variable_queue() ->
     SegmentSize = rabbit_queue_index:segment_size(),
     stop_msg_store(),
     ok = empty_test_queue(),
     VQ0 = rabbit_variable_queue:init(test_queue()),
     S0 = rabbit_variable_queue:status(VQ0),
-    0 = proplists:get_value(len, S0),
-    false = proplists:get_value(prefetching, S0),
+    assert_prop(S0, len, 0),
+    assert_prop(S0, prefetching, false),
 
     VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ0),
-    0 = proplists:get_value(target_ram_msg_count,
-                            rabbit_variable_queue:status(VQ1)),
+    assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
 
     {SeqIds, VQ2} = variable_queue_publish(false, 3 * SegmentSize, VQ1),
     S2 = rabbit_variable_queue:status(VQ2),
-    TwoSegments = 2*SegmentSize,
-    {gamma, SegmentSize, TwoSegments} = proplists:get_value(gamma, S2),
-    SegmentSize = proplists:get_value(q3, S2),
-    ThreeSegments = 3*SegmentSize,
-    ThreeSegments = proplists:get_value(len, S2),
+    assert_prop(S2, gamma, {gamma, SegmentSize, 2*SegmentSize}),
+    assert_prop(S2, q3, SegmentSize),
+    assert_prop(S2, len, 3*SegmentSize),
 
     VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
-    io:format("~p~n", [rabbit_variable_queue:status(VQ3)]),
-    {{Msg, false, AckTag, Len1} = Obj, VQ4} =
-        rabbit_variable_queue:fetch(VQ3),
-    io:format("~p~n", [Obj]),
+    Len1 = 3*SegmentSize - 1,
+    {{_Msg, false, AckTag, Len1}, VQ4} = rabbit_variable_queue:fetch(VQ3),
     timer:sleep(1000),
     VQ5 = rabbit_variable_queue:remeasure_egress_rate(VQ4),
     VQ6 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ5),
-    io:format("~p~n", [rabbit_variable_queue:status(VQ6)]),
-    {{Msg1, false, AckTag1, Len11} = Obj1, VQ7} =
-        rabbit_variable_queue:fetch(VQ6),
-    io:format("~p~n", [Obj1]),
-    io:format("~p~n", [rabbit_variable_queue:status(VQ7)]),
+    timer:sleep(1000), %% let the prefetcher run and grab enough - about 4 msgs
+    S6 = rabbit_variable_queue:status(VQ6),
+    RamCount = proplists:get_value(target_ram_msg_count, S6),
+    assert_prop(S6, prefetching, true),
+    assert_prop(S6, q4, 0),
+    assert_prop(S6, q3, (SegmentSize - 1 - RamCount)),
+
+    Len2 = Len1 - 1,
+    %% this should be enough to stop + drain the prefetcher
+    {{_Msg1, false, AckTag1, Len2}, VQ7} = rabbit_variable_queue:fetch(VQ6),
+    S7 = rabbit_variable_queue:status(VQ7),
+    assert_prop(S7, prefetching, false),
+    assert_prop(S7, q4, (RamCount - 1)),
+    assert_prop(S7, q3, (SegmentSize - 1 - RamCount)),
+
+    %% now fetch SegmentSize - 1 which will exhaust q4 and q3,
+    %% bringing in a segment from gamma:
+    {VQ8, AckTags} = variable_queue_fetch(SegmentSize-1, false, Len2, VQ7),
+    S8 = rabbit_variable_queue:status(VQ8),
+    assert_prop(S8, prefetching, false),
+    assert_prop(S8, q4, 0),
+    assert_prop(S8, q3, (SegmentSize - 1)),
+    assert_prop(S8, gamma, {gamma, (2*SegmentSize), SegmentSize}),
+
+    VQ9 = rabbit_variable_queue:remeasure_egress_rate(VQ8),
+    VQ10 = rabbit_variable_queue:ack(AckTags, VQ9),
+
+    S10 = rabbit_variable_queue:status(VQ10),
+    assert_prop(S10, prefetching, true),
+    %% egress rate should be really high, so it's likely if we wait a
+    %% little bit, the next segment should be brought in
+    timer:sleep(2000),
+    Len3 = (2*SegmentSize) - 2,
+    {{_Msg2, false, AckTag2, Len3}, VQ11} = rabbit_variable_queue:fetch(VQ10),
+    S11 = rabbit_variable_queue:status(VQ11),
+    assert_prop(S11, prefetching, false),
+    assert_prop(S11, q4, (SegmentSize - 2)),
+    assert_prop(S11, q3, SegmentSize),
+    assert_prop(S11, gamma, {gamma, undefined, 0}),
+    assert_prop(S11, q2, 0),
+    assert_prop(S11, q1, 0),
+
+    VQ12 = rabbit_variable_queue:maybe_start_prefetcher(VQ11),
+    S12 = rabbit_variable_queue:status(VQ12),
+    assert_prop(S12, prefetching, true),
+    PrefetchCount = lists:min([proplists:get_value(target_ram_msg_count, S12) -
+                               proplists:get_value(ram_msg_count, S12),
+                               SegmentSize]),
+    timer:sleep(2000),
+    %% we have to fetch all of q4 before the prefetcher will be drained
+    {VQ13, AckTags1} = variable_queue_fetch(SegmentSize-2, false, Len3, VQ12),
+    Len4 = SegmentSize - 1,
+    {{_Msg3, false, AckTag3, Len4}, VQ14} = rabbit_variable_queue:fetch(VQ13),
+    S14 = rabbit_variable_queue:status(VQ14),
+    assert_prop(S14, prefetching, false),
+    assert_prop(S14, q4, (PrefetchCount - 1)),
+    assert_prop(S14, q3, (Len4 - (PrefetchCount - 1))),
+                               
+    VQ15 = rabbit_variable_queue:ack([AckTag3, AckTag2, AckTag1, AckTag], VQ14),
+    VQ16 = rabbit_variable_queue:ack(AckTags1, VQ15),
+
+    {VQ17, AckTags2} = variable_queue_fetch(Len4, false, Len4, VQ16),
+    VQ18 = rabbit_variable_queue:ack(AckTags2, VQ17),
+
+    rabbit_variable_queue:terminate(VQ18),
     passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index af8a4775..b967e4a2 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -46,6 +46,7 @@
           gamma,
           q3,
           q4,
+          duration_target,
           target_ram_msg_count,
           ram_msg_count,
           queue,
@@ -130,6 +131,7 @@ init(QueueName) ->
                    gamma = Gamma,
                    q3 = queue:new(), q4 = queue:new(),
                    target_ram_msg_count = undefined,
+                   duration_target = undefined,
                    ram_msg_count = 0,
                    queue = QueueName,
                    index_state = IndexState1,
@@ -166,12 +168,15 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
             {ack_not_on_disk, State}
     end.
 
+set_queue_ram_duration_target(undefined, State) ->
+    State;
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
                                      target_ram_msg_count = TargetRamMsgCount
                                    }) ->
     TargetRamMsgCount1 = trunc(DurationTarget * EgressRate), %% msgs = sec * msgs/sec
-    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1 },
+    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
+                              duration_target = DurationTarget },
     if TargetRamMsgCount == TargetRamMsgCount1 ->
             State1;
        TargetRamMsgCount == undefined orelse
@@ -183,7 +188,8 @@ set_queue_ram_duration_target(
 
 remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                                          egress_rate_timestamp = Timestamp,
-                                         out_counter = OutCount }) ->
+                                         out_counter = OutCount,
+                                         duration_target = DurationTarget }) ->
     %% We do an average over the last two values, but also hold the
     %% current value separately so that the average always only
     %% incorporates the last two values, and not the current value and
@@ -192,13 +198,15 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
     %% EgressRate is in seconds, and now_diff is in microseconds
     EgressRate = 1000000 * OutCount / timer:now_diff(Now, Timestamp),
     AvgEgressRate = (EgressRate + OldEgressRate) / 2,
-    State #vqstate { egress_rate = EgressRate,
-                     avg_egress_rate = AvgEgressRate,
-                     egress_rate_timestamp = Now,
-                     out_counter = 0 }.
+    set_queue_ram_duration_target(
+      DurationTarget,
+      State #vqstate { egress_rate = EgressRate,
+                       avg_egress_rate = AvgEgressRate,
+                       egress_rate_timestamp = Now,
+                       out_counter = 0 }).
 
 fetch(State =
-      #vqstate { q4 = Q4,
+      #vqstate { q4 = Q4, ram_msg_count = RamMsgCount,
                  out_counter = OutCount, prefetcher = Prefetcher,
                  index_state = IndexState, len = Len }) ->
     case queue:out(Q4) of
@@ -246,6 +254,7 @@ fetch(State =
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
+                              ram_msg_count = RamMsgCount - 1,
                               index_state = IndexState1, len = Len1 }}
     end.
 
@@ -258,21 +267,29 @@ is_empty(State) ->
 maybe_start_prefetcher(State = #vqstate {
                          ram_msg_count = RamMsgCount,
                          target_ram_msg_count = TargetRamMsgCount,
-                         q1 = Q1, q3 = Q3, prefetcher = undefined
+                         q1 = Q1, q3 = Q3, prefetcher = undefined,
+                         gamma = #gamma { count = GammaCount }
                         }) ->
-    %% prefetched content takes priority over q1
-    AvailableSpace = case TargetRamMsgCount of
-                         undefined -> queue:len(Q3);
-                         _ -> (TargetRamMsgCount - RamMsgCount) + queue:len(Q1)
-                     end,
-    PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
-    if PrefetchCount =< 0 -> State;
-       true ->
-            {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
-            {ok, Prefetcher} =
-                rabbit_queue_prefetcher:start_link(PrefetchQueue),
-            maybe_load_next_segment(State #vqstate { q3 = Q3a,
-                                                     prefetcher = Prefetcher })
+    case queue:is_empty(Q3) andalso GammaCount > 0 of
+        true ->
+            maybe_start_prefetcher(maybe_load_next_segment(State));
+        false ->
+            %% prefetched content takes priority over q1
+            AvailableSpace =
+                case TargetRamMsgCount of
+                    undefined -> queue:len(Q3);
+                    _ -> (TargetRamMsgCount - RamMsgCount) + queue:len(Q1)
+                end,
+            PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
+            case PrefetchCount =< 0 of
+                true -> State;
+                false ->
+                    {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
+                    {ok, Prefetcher} =
+                        rabbit_queue_prefetcher:start_link(PrefetchQueue),
+                    maybe_load_next_segment(
+                      State #vqstate { q3 = Q3a, prefetcher = Prefetcher })
+            end
     end;
 maybe_start_prefetcher(State) ->
     State.
@@ -429,7 +446,7 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
       {q2, queue:len(Q2)},
       {gamma, Gamma},
       {q3, queue:len(Q3)},
-      {q4, Q4},
+      {q4, queue:len(Q4)},
       {len, Len},
       {outstanding_txns, length(From)},
       {target_ram_msg_count, TargetRamMsgCount},
@@ -570,9 +587,9 @@ publish(neither, Msg = #basic_message { guid = MsgId,
     State #vqstate { index_state = IndexState1,
                      gamma = combine_gammas(Gamma, Gamma1) }.
 
-fetch_from_q3_or_gamma(State = #vqstate { q1 = Q1, q2 = Q2,
-                                          gamma = #gamma { count = GammaCount },
-                                          q3 = Q3, q4 = Q4 }) ->
+fetch_from_q3_or_gamma(State = #vqstate {
+                         q1 = Q1, q2 = Q2, gamma = #gamma { count = GammaCount },
+                         q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount }) ->
     case queue:out(Q3) of
         {empty, _Q3} ->
             0 = GammaCount, %% ASSERTION
@@ -590,7 +607,8 @@ fetch_from_q3_or_gamma(State = #vqstate { q1 = Q1, q2 = Q2,
                     #alpha { msg = Msg, seq_id = SeqId,
                              is_delivered = IsDelivered, msg_on_disk = true,
                              index_on_disk = IndexOnDisk }, Q4),
-            State1 = State #vqstate { q3 = Q3a, q4 = Q4a },
+            State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
+                                      ram_msg_count = RamMsgCount + 1 },
             State2 =
                 case {queue:is_empty(Q3a), 0 == GammaCount} of
                     {true, true} ->
@@ -615,23 +633,31 @@ maybe_load_next_segment(State = #vqstate { gamma = #gamma { count = 0 }} ) ->
     State;
 maybe_load_next_segment(State =
                         #vqstate { index_state = IndexState, q2 = Q2,
+                                   q3 = Q3,
                                    gamma = #gamma { seq_id = GammaSeqId,
                                                     count = GammaCount }}) ->
-    {List, IndexState1, Gamma1SeqId} =
-        read_index_segment(GammaSeqId, IndexState),
-    State1 = State #vqstate { index_state = IndexState1 },
-    %% length(List) may be < segment_size because of acks. But it
-    %% can't be []
-    Q3a = betas_from_segment_entries(List),
-    case GammaCount - length(List) of
-        0 ->
-            %% gamma is now empty, but it wasn't before, so can now
-            %% join q2 onto q3
-            State1 #vqstate { gamma = #gamma { seq_id = undefined, count = 0 },
-                              q2 = queue:new(), q3 = queue:join(Q3a, Q2) };
-        N when N > 0 ->
-            State1 #vqstate { gamma = #gamma { seq_id = Gamma1SeqId,
-                                               count = N }, q3 = Q3a }
+    case queue:is_empty(Q3) of
+        false ->
+            State;
+        true ->
+            {List, IndexState1, Gamma1SeqId} =
+                read_index_segment(GammaSeqId, IndexState),
+            State1 = State #vqstate { index_state = IndexState1 },
+            %% length(List) may be < segment_size because of acks. But
+            %% it can't be []
+            Q3a = betas_from_segment_entries(List),
+            case GammaCount - length(List) of
+                0 ->
+                    %% gamma is now empty, but it wasn't before, so
+                    %% can now join q2 onto q3
+                    State1 #vqstate { gamma = #gamma { seq_id = undefined,
+                                                       count = 0 },
+                                      q2 = queue:new(),
+                                      q3 = queue:join(Q3a, Q2) };
+                N when N > 0 ->
+                    State1 #vqstate { gamma = #gamma { seq_id = Gamma1SeqId,
+                                                       count = N }, q3 = Q3a }
+            end
     end.
 
 betas_from_segment_entries(List) ->
-- 
cgit v1.2.1


From 385bd93debe0775acf0bcd4881d2e2639292f776 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 9 Nov 2009 09:08:42 +0000
Subject: modified vq such that when we're bringing entries back in from gamma
 to beta (q3), we bring all entries in, unless we know that our target_ram
 count is 0, in which case we only bring one segment in. This massively helps
 the prefetcher as it means the prefetcher is finally allowed to prefetch more
 than one segment. However, it has had a negative impact on the tests, which
 I'm still working through to correct.

---
 src/rabbit_tests.erl          | 43 ++++++++++++++++---------------
 src/rabbit_variable_queue.erl | 59 +++++++++++++++++++++++--------------------
 2 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 15b9161b..bef2264c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1167,7 +1167,8 @@ test_variable_queue() ->
     RamCount = proplists:get_value(target_ram_msg_count, S6),
     assert_prop(S6, prefetching, true),
     assert_prop(S6, q4, 0),
-    assert_prop(S6, q3, (SegmentSize - 1 - RamCount)),
+    assert_prop(S6, q3, (Len1 - RamCount)),
+    assert_prop(S6, gamma, {gamma, undefined, 0}),
 
     Len2 = Len1 - 1,
     %% this should be enough to stop + drain the prefetcher
@@ -1175,16 +1176,17 @@ test_variable_queue() ->
     S7 = rabbit_variable_queue:status(VQ7),
     assert_prop(S7, prefetching, false),
     assert_prop(S7, q4, (RamCount - 1)),
-    assert_prop(S7, q3, (SegmentSize - 1 - RamCount)),
+    assert_prop(S7, q3, (Len1 - RamCount)),
 
-    %% now fetch SegmentSize - 1 which will exhaust q4 and q3,
+    %% now fetch SegmentSize - 1 which will exhaust q4 and work through a bit of q3
     %% bringing in a segment from gamma:
     {VQ8, AckTags} = variable_queue_fetch(SegmentSize-1, false, Len2, VQ7),
+    Len3 = Len2 - (SegmentSize - 1),
     S8 = rabbit_variable_queue:status(VQ8),
     assert_prop(S8, prefetching, false),
     assert_prop(S8, q4, 0),
-    assert_prop(S8, q3, (SegmentSize - 1)),
-    assert_prop(S8, gamma, {gamma, (2*SegmentSize), SegmentSize}),
+    assert_prop(S8, q3, Len3),
+    assert_prop(S8, len, Len3),
 
     VQ9 = rabbit_variable_queue:remeasure_egress_rate(VQ8),
     VQ10 = rabbit_variable_queue:ack(AckTags, VQ9),
@@ -1192,38 +1194,39 @@ test_variable_queue() ->
     S10 = rabbit_variable_queue:status(VQ10),
     assert_prop(S10, prefetching, true),
     %% egress rate should be really high, so it's likely if we wait a
-    %% little bit, the next segment should be brought in
+    %% little bit, lots of msgs will be brought in
     timer:sleep(2000),
-    Len3 = (2*SegmentSize) - 2,
-    {{_Msg2, false, AckTag2, Len3}, VQ11} = rabbit_variable_queue:fetch(VQ10),
+    PrefetchCount = lists:min([proplists:get_value(target_ram_msg_count, S10) -
+                               proplists:get_value(ram_msg_count, S10),
+                               Len3]),
+    Len4 = Len3 - 1,
+    {{_Msg2, false, AckTag2, Len4}, VQ11} = rabbit_variable_queue:fetch(VQ10),
     S11 = rabbit_variable_queue:status(VQ11),
+    %% prefetcher will stop if it's fast enough and has completed by now, or may still be running if PrefetchCount > 1
     assert_prop(S11, prefetching, false),
-    assert_prop(S11, q4, (SegmentSize - 2)),
-    assert_prop(S11, q3, SegmentSize),
+    Prefetched = proplists:get_value(q4, S11),
+    true = (PrefetchCount - 1) >= Prefetched,
+    assert_prop(S11, q3, Len4 - Prefetched),
     assert_prop(S11, gamma, {gamma, undefined, 0}),
     assert_prop(S11, q2, 0),
     assert_prop(S11, q1, 0),
 
     VQ12 = rabbit_variable_queue:maybe_start_prefetcher(VQ11),
     S12 = rabbit_variable_queue:status(VQ12),
-    assert_prop(S12, prefetching, true),
-    PrefetchCount = lists:min([proplists:get_value(target_ram_msg_count, S12) -
-                               proplists:get_value(ram_msg_count, S12),
-                               SegmentSize]),
+    assert_prop(S12, prefetching, (Len4 - Prefetched) > 0),
     timer:sleep(2000),
     %% we have to fetch all of q4 before the prefetcher will be drained
-    {VQ13, AckTags1} = variable_queue_fetch(SegmentSize-2, false, Len3, VQ12),
-    Len4 = SegmentSize - 1,
-    {{_Msg3, false, AckTag3, Len4}, VQ14} = rabbit_variable_queue:fetch(VQ13),
+    {VQ13, AckTags1} =
+        variable_queue_fetch(Prefetched, false, Len4, VQ12),
+    Len5 = Len4 - Prefetched - 1,
+    {{_Msg3, false, AckTag3, Len5}, VQ14} = rabbit_variable_queue:fetch(VQ13),
     S14 = rabbit_variable_queue:status(VQ14),
     assert_prop(S14, prefetching, false),
-    assert_prop(S14, q4, (PrefetchCount - 1)),
-    assert_prop(S14, q3, (Len4 - (PrefetchCount - 1))),
                                
     VQ15 = rabbit_variable_queue:ack([AckTag3, AckTag2, AckTag1, AckTag], VQ14),
     VQ16 = rabbit_variable_queue:ack(AckTags1, VQ15),
 
-    {VQ17, AckTags2} = variable_queue_fetch(Len4, false, Len4, VQ16),
+    {VQ17, AckTags2} = variable_queue_fetch(Len5, false, Len5, VQ16),
     VQ18 = rabbit_variable_queue:ack(AckTags2, VQ17),
 
     rabbit_variable_queue:terminate(VQ18),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b967e4a2..64c9d199 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -144,7 +144,7 @@ init(QueueName) ->
                    len = GammaCount,
                    on_sync = {[], [], []}
                   },
-    maybe_load_next_segment(State).
+    maybe_gammas_to_betas(State).
 
 terminate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state = rabbit_queue_index:terminate(IndexState) }.
@@ -264,15 +264,16 @@ len(#vqstate { len = Len }) ->
 is_empty(State) ->
     0 == len(State).
 
-maybe_start_prefetcher(State = #vqstate {
-                         ram_msg_count = RamMsgCount,
-                         target_ram_msg_count = TargetRamMsgCount,
-                         q1 = Q1, q3 = Q3, prefetcher = undefined,
-                         gamma = #gamma { count = GammaCount }
-                        }) ->
-    case queue:is_empty(Q3) andalso GammaCount > 0 of
-        true ->
-            maybe_start_prefetcher(maybe_load_next_segment(State));
+maybe_start_prefetcher(State = #vqstate { target_ram_msg_count = 0 }) ->
+    State;
+maybe_start_prefetcher(State = #vqstate { prefetcher = undefined }) ->
+    %% ensure we have as much index in RAM as we can
+    State1 = #vqstate { ram_msg_count = RamMsgCount,
+                        target_ram_msg_count = TargetRamMsgCount,
+                        q1 = Q1, q3 = Q3 } = maybe_gammas_to_betas(State),
+    case queue:is_empty(Q3) of
+        true -> %% nothing to do
+            State1;
         false ->
             %% prefetched content takes priority over q1
             AvailableSpace =
@@ -282,13 +283,12 @@ maybe_start_prefetcher(State = #vqstate {
                 end,
             PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
             case PrefetchCount =< 0 of
-                true -> State;
+                true -> State1;
                 false ->
                     {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
                     {ok, Prefetcher} =
                         rabbit_queue_prefetcher:start_link(PrefetchQueue),
-                    maybe_load_next_segment(
-                      State #vqstate { q3 = Q3a, prefetcher = Prefetcher })
+                    State1 #vqstate { q3 = Q3a, prefetcher = Prefetcher }
             end
     end;
 maybe_start_prefetcher(State) ->
@@ -483,7 +483,7 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
         false ->
             {Q3Count, IndexState1} = remove_queue_entries(Q3, IndexState),
             purge1(Count + Q3Count,
-                   maybe_load_next_segment(
+                   maybe_gammas_to_betas(
                      State #vqstate { index_state = IndexState1,
                                       q3 = queue:new() }))
     end.
@@ -619,7 +619,7 @@ fetch_from_q3_or_gamma(State = #vqstate {
                         State1 #vqstate { q1 = queue:new(),
                                           q4 = queue:join(Q4a, Q1) };
                     {true, false} ->
-                        maybe_load_next_segment(State1);
+                        maybe_gammas_to_betas(State1);
                     {false, _} ->
                         %% q3 still isn't empty, we've not touched
                         %% gamma, so the invariants between q1, q2,
@@ -629,23 +629,26 @@ fetch_from_q3_or_gamma(State = #vqstate {
             fetch(State2)
     end.
 
-maybe_load_next_segment(State = #vqstate { gamma = #gamma { count = 0 }} ) ->
+maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 }} ) ->
     State;
-maybe_load_next_segment(State =
-                        #vqstate { index_state = IndexState, q2 = Q2,
-                                   q3 = Q3,
-                                   gamma = #gamma { seq_id = GammaSeqId,
-                                                    count = GammaCount }}) ->
-    case queue:is_empty(Q3) of
-        false ->
-            State;
+maybe_gammas_to_betas(State =
+                      #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
+                                 target_ram_msg_count = TargetRamMsgCount,
+                                 gamma = #gamma { seq_id = GammaSeqId,
+                                                  count = GammaCount }}) ->
+    case (not queue:is_empty(Q3)) andalso 0 == TargetRamMsgCount of
         true ->
+            State;
+        false ->
+            %% either q3 is empty, in which case we load at least one
+            %% segment, or TargetRamMsgCount > 0, meaning we should
+            %% really be holding all the betas in memory.
             {List, IndexState1, Gamma1SeqId} =
                 read_index_segment(GammaSeqId, IndexState),
             State1 = State #vqstate { index_state = IndexState1 },
             %% length(List) may be < segment_size because of acks. But
             %% it can't be []
-            Q3a = betas_from_segment_entries(List),
+            Q3a = queue:join(Q3, betas_from_segment_entries(List)),
             case GammaCount - length(List) of
                 0 ->
                     %% gamma is now empty, but it wasn't before, so
@@ -655,8 +658,10 @@ maybe_load_next_segment(State =
                                       q2 = queue:new(),
                                       q3 = queue:join(Q3a, Q2) };
                 N when N > 0 ->
-                    State1 #vqstate { gamma = #gamma { seq_id = Gamma1SeqId,
-                                                       count = N }, q3 = Q3a }
+                    maybe_gammas_to_betas(
+                      State1 #vqstate { q3 = Q3a, 
+                                        gamma = #gamma { seq_id = Gamma1SeqId,
+                                                         count = N } })
             end
     end.
 
-- 
cgit v1.2.1


From 7289f895d293adad9f320a0ee6caa82c4b731e1c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 9 Nov 2009 13:46:19 +0000
Subject: Made the tests work again. Also one tiny cosmetic in vq. However,
 uncovered major mistake in VQ which is that currently, when the prefetcher
 starts, that may empty q3. A subsequent publish may then go straight to Q4,
 thus overtaking the previous msgs. Even worse, when the prefetcher is
 drained, there is no attempt to join it into the existing q4, it just
 replaces it. What should happen is that the existence of the prefetcher is
 treated as if both q3 and q4 are non empty. This makes sense, because there
 are some ways in which the prefetcher can exit, returning entries for both q4
 and q3. Thus pubs that happen after the prefetcher is started must go to
 q1/q2/?, and so we know that entries already in q4 when the prefetcher is
 drained must have got there before the prefetcher was started. Finally, when
 the prefetcher is drained, if ? is empty, q2 and q3 can be joined, and if q2,
 ? and q3 are empty, q1 and q4 can be joined. Or something like that.

---
 src/rabbit_tests.erl          | 52 ++++++++++++++++++++++++++++---------------
 src/rabbit_variable_queue.erl |  2 +-
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index bef2264c..13d3cd1b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1127,10 +1127,11 @@ variable_queue_publish(IsPersistent, Count, VQ) ->
               {[SeqId | Acc], VQ2}
       end, {[], VQ}, lists:seq(1, Count)).
 
-variable_queue_fetch(Count, IsPersistent, Len, VQ) ->
+variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
     lists:foldl(fun (N, {VQN, AckTagsAcc}) ->
                         Rem = Len - N,
-                        {{_MsgN, IsPersistent, AckTagN, Rem}, VQM} =
+                        {{#basic_message { is_persistent = IsPersistent },
+                          IsDelivered, AckTagN, Rem}, VQM} =
                             rabbit_variable_queue:fetch(VQN),
                         {VQM, [AckTagN | AckTagsAcc]}
                 end, {VQ, []}, lists:seq(1, Count)).
@@ -1180,7 +1181,7 @@ test_variable_queue() ->
 
     %% now fetch SegmentSize - 1 which will exhaust q4 and work through a bit of q3
     %% bringing in a segment from gamma:
-    {VQ8, AckTags} = variable_queue_fetch(SegmentSize-1, false, Len2, VQ7),
+    {VQ8, AckTags} = variable_queue_fetch(SegmentSize-1, false, false, Len2, VQ7),
     Len3 = Len2 - (SegmentSize - 1),
     S8 = rabbit_variable_queue:status(VQ8),
     assert_prop(S8, prefetching, false),
@@ -1202,11 +1203,19 @@ test_variable_queue() ->
     Len4 = Len3 - 1,
     {{_Msg2, false, AckTag2, Len4}, VQ11} = rabbit_variable_queue:fetch(VQ10),
     S11 = rabbit_variable_queue:status(VQ11),
-    %% prefetcher will stop if it's fast enough and has completed by now, or may still be running if PrefetchCount > 1
-    assert_prop(S11, prefetching, false),
+    %% prefetcher will stop if it's fast enough and has completed by
+    %% now, or may still be running if PrefetchCount > 1
     Prefetched = proplists:get_value(q4, S11),
-    true = (PrefetchCount - 1) >= Prefetched,
-    assert_prop(S11, q3, Len4 - Prefetched),
+    true = PrefetchCount > Prefetched, %% already fetched 1, thus >, not >=
+    %% q3 will contain whatever the prefetcher was not allowed to
+    %% prefetch, due to memory constraints. If the prefetcher is still
+    %% running, this will be less than (Len4 - Prefetched) because
+    %% Prefetched will not reflect the true number of msgs that it's
+    %% trying to prefetch.
+    case proplists:get_value(prefetching, S11) of
+        true  -> true = (Len4 - Prefetched) > proplists:get_value(q3, S11);
+        false -> assert_prop(S11, q3, Len4 - Prefetched)
+    end,
     assert_prop(S11, gamma, {gamma, undefined, 0}),
     assert_prop(S11, q2, 0),
     assert_prop(S11, q1, 0),
@@ -1217,17 +1226,24 @@ test_variable_queue() ->
     timer:sleep(2000),
     %% we have to fetch all of q4 before the prefetcher will be drained
     {VQ13, AckTags1} =
-        variable_queue_fetch(Prefetched, false, Len4, VQ12),
-    Len5 = Len4 - Prefetched - 1,
-    {{_Msg3, false, AckTag3, Len5}, VQ14} = rabbit_variable_queue:fetch(VQ13),
-    S14 = rabbit_variable_queue:status(VQ14),
-    assert_prop(S14, prefetching, false),
-                               
-    VQ15 = rabbit_variable_queue:ack([AckTag3, AckTag2, AckTag1, AckTag], VQ14),
-    VQ16 = rabbit_variable_queue:ack(AckTags1, VQ15),
-
-    {VQ17, AckTags2} = variable_queue_fetch(Len5, false, Len5, VQ16),
-    VQ18 = rabbit_variable_queue:ack(AckTags2, VQ17),
+        variable_queue_fetch(Prefetched, false, false, Len4, VQ12),
+    {VQ16, Acks} =
+        case Len4 == Prefetched of
+            true ->
+                {VQ13, [AckTag2, AckTag1, AckTag, AckTags1]};
+            false ->
+                Len5 = Len4 - Prefetched - 1,
+                {{_Msg3, false, AckTag3, Len5}, VQ14} =
+                    rabbit_variable_queue:fetch(VQ13),
+                assert_prop(rabbit_variable_queue:status(VQ14),
+                            prefetching, false),
+                {VQ15, AckTags2} =
+                    variable_queue_fetch(Len5, false, false, Len5, VQ14),
+                {VQ15, [AckTag3, AckTag2, AckTag1, AckTag, AckTags1, AckTags2]}
+        end,
+    VQ17 = rabbit_variable_queue:ack(lists:flatten(Acks), VQ16),
+
+    {empty, VQ18} = rabbit_variable_queue:fetch(VQ17),
 
     rabbit_variable_queue:terminate(VQ18),
     passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 64c9d199..2624a9fb 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -629,7 +629,7 @@ fetch_from_q3_or_gamma(State = #vqstate {
             fetch(State2)
     end.
 
-maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 }} ) ->
+maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 } }) ->
     State;
 maybe_gammas_to_betas(State =
                       #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
-- 
cgit v1.2.1


From 3fa441167d0501b7c2cbec4d68f84c1f898f5b9d Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Mon, 9 Nov 2009 09:57:28 -0500
Subject: Using vm_memory_manager.

---
 src/rabbit_memory_monitor.erl | 26 +++++++++++---------------
 src/vm_memory_monitor.erl     | 14 +++++++++++++-
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index db4949e4..0629591a 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -95,6 +95,10 @@
 -define(DEFAULT_UPDATE_INTERVAL_MS, 2500).
 -define(TABLE_NAME, ?MODULE).
 -define(MAX_QUEUE_DURATION_ALLOWED, 60*60*24). % 1 day
+
+%% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
+-define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
+
 %%----------------------------------------------------------------------------
 -ifdef(use_specs).
 -type(state() :: #state{timer               :: timer:tref(),
@@ -136,26 +140,18 @@ push_queue_duration(Pid, QueueDuration) ->
 
 %%----------------------------------------------------------------------------
 
-get_user_memory_limit() ->
-    %% TODO: References to os_mon and rabbit_memsup_linux 
-    %%       should go away as bug 21457 removes it.
-    %%       BTW: memsup:get_system_memory_data() doesn't work.
-    {state, TotalMemory, _Allocated} = rabbit_memsup_linux:update({state, 0,0}),
-    MemoryHighWatermark = os_mon:get_env(memsup, system_memory_high_watermark),
-    Limit = erlang:trunc(TotalMemory * MemoryHighWatermark),
-    %% no more than two gigs on 32 bits.
-    case (Limit > 2*1024*1024*1024) and (erlang:system_info(wordsize) == 4) of
-        true -> 2*1024*1024*1024;
-        false -> Limit
+get_memory_limit() ->
+    RabbitMemoryLimit = case vm_memory_monitor:get_memory_limit() of
+        undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
+        A -> A
     end.
 
-
 init([]) ->
     %% We should never use more memory than user requested. As the memory 
     %% manager doesn't really know how much memory queues are using, we shall
-    %% try to remain safe distance from real limit. 
-    MemoryLimit = trunc(get_user_memory_limit() * 0.6),
-    rabbit_log:warning("Memory monitor limit: ~pMB~n", 
+    %% try to remain safe distance from real throttle limit.
+    MemoryLimit = trunc(get_memory_limit() * 0.6),
+    rabbit_log:warning("Queues go to disk when memory is above: ~pMB~n", 
                     [erlang:trunc(MemoryLimit/1048576)]),
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS, 
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index 6da47933..6da6704d 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -51,7 +51,8 @@
 
 -export([update/0, get_total_memory/0,
          get_check_interval/0, set_check_interval/1,
-         get_vm_memory_high_watermark/0, set_vm_memory_high_watermark/1]).
+         get_vm_memory_high_watermark/0, set_vm_memory_high_watermark/1,
+         get_memory_limit/0]).
 
 
 -define(SERVER, ?MODULE).
@@ -76,6 +77,7 @@
 -spec(start_link/1 :: (float()) -> ('ignore' | {error, any()} | {'ok', pid()})).
 -spec(update/0 :: () -> 'ok').
 -spec(get_total_memory/0 :: () -> (non_neg_integer() | unknown)).
+-spec(get_memory_limit/0 :: () -> (non_neg_integer() | undefined)).
 -spec(get_check_interval/0 :: () -> non_neg_integer()).
 -spec(set_check_interval/1 :: (non_neg_integer()) -> 'ok').
 -spec(get_vm_memory_high_watermark/0 :: () -> float()).
@@ -128,6 +130,9 @@ handle_call({set_check_interval, Timeout}, _From, State) ->
     {ok, cancel} = timer:cancel(State#state.timer),
     {reply, ok, State#state{timeout = Timeout, timer = start_timer(Timeout)}};
 
+handle_call(get_memory_limit, _From, State) ->
+    {reply, State#state.memory_limit, State};
+
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
@@ -168,6 +173,13 @@ get_vm_memory_high_watermark() ->
 set_vm_memory_high_watermark(Fraction) ->
     gen_server2:call(?MODULE, {set_vm_memory_high_watermark, Fraction}).
 
+get_memory_limit() ->
+    try
+        gen_server2:call(?MODULE, get_memory_limit)
+    catch
+        exit:{noproc, _} -> undefined
+    end.
+
 %%----------------------------------------------------------------------------
 %% Server Internals
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From af54f182a282e2c578463aff2c833e190353afb9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 9 Nov 2009 17:10:28 +0000
Subject: Wrote specific test of the case mentioned in previous commit, and
 fixed code accordingly. Also added some other tests. With the overal test
 suite, up to 92% code coverage of vq, and 70% on prefetcher. Also factored
 out common records between those two modules to rabbit_queue.hrl

---
 include/rabbit_queue.hrl        |  51 +++++++++++
 src/rabbit_queue_prefetcher.erl |  17 +---
 src/rabbit_tests.erl            | 194 ++++++++++++++++++++++++++++++++++++++--
 src/rabbit_variable_queue.erl   |  53 +++++------
 4 files changed, 264 insertions(+), 51 deletions(-)
 create mode 100644 include/rabbit_queue.hrl

diff --git a/include/rabbit_queue.hrl b/include/rabbit_queue.hrl
new file mode 100644
index 00000000..5833b056
--- /dev/null
+++ b/include/rabbit_queue.hrl
@@ -0,0 +1,51 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-record(alpha,
+        { msg,
+          seq_id,
+          is_delivered,
+          msg_on_disk,
+          index_on_disk
+        }).
+
+-record(beta,
+        { msg_id,
+          seq_id,
+          is_persistent,
+          is_delivered,
+          index_on_disk
+        }).
+
+-record(gamma,
+        { seq_id,
+          count
+        }).
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
index fd407c9d..f5e717f5 100644
--- a/src/rabbit_queue_prefetcher.erl
+++ b/src/rabbit_queue_prefetcher.erl
@@ -41,6 +41,7 @@
 -export([publish/2, drain/1, drain_and_stop/1, stop/1]).
 
 -include("rabbit.hrl").
+-include("rabbit_queue.hrl").
 
 -define(HIBERNATE_AFTER_MIN, 1000).
 -define(DESIRED_HIBERNATE, 10000).
@@ -52,22 +53,6 @@
           peruse_cb
         }).
 
--record(alpha,
-        { msg,
-          seq_id,
-          is_delivered,
-          msg_on_disk,
-          index_on_disk
-        }).
-
--record(beta,
-        { msg_id,
-          seq_id,
-          is_persistent,
-          is_delivered,
-          index_on_disk
-        }).
-
 %%----------------------------------------------------------------------------
 %% Novel
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 13d3cd1b..64143476 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1139,19 +1139,203 @@ variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
 assert_prop(List, Prop, Value) ->
     Value = proplists:get_value(Prop, List).
 
-test_variable_queue() ->
-    SegmentSize = rabbit_queue_index:segment_size(),
+fresh_variable_queue() ->
     stop_msg_store(),
     ok = empty_test_queue(),
-    VQ0 = rabbit_variable_queue:init(test_queue()),
-    S0 = rabbit_variable_queue:status(VQ0),
+    VQ = rabbit_variable_queue:init(test_queue()),
+    S0 = rabbit_variable_queue:status(VQ),
     assert_prop(S0, len, 0),
     assert_prop(S0, prefetching, false),
+    assert_prop(S0, q1, 0),
+    assert_prop(S0, q2, 0),
+    assert_prop(S0, gamma, {gamma, undefined, 0}),
+    assert_prop(S0, q3, 0),
+    assert_prop(S0, q4, 0),
+    VQ.
+
+test_variable_queue() ->
+    passed = test_variable_queue_prefetching_and_gammas_to_betas(),
+    passed = test_variable_queue_prefetching_during_publish(0),
+    passed = test_variable_queue_prefetching_during_publish(5000),
+    passed = test_variable_queue_prefetch_evicts_q1(),
+    passed.
+
+test_variable_queue_prefetch_evicts_q1() ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    VQ0 = fresh_variable_queue(),
+    VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ0),
+    assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
+    Len1 = 2*SegmentSize,
+    {_SeqIds, VQ2} = variable_queue_publish(true, Len1, VQ1),
+    %% one segment will be in q3, the other in gamma. We want to fetch
+    %% all of q3 so that gamma is then moved into q3, emptying gamma
+
+    VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
+    Start = now(),
+    {VQ4, AckTags} = variable_queue_fetch(SegmentSize, true, false, Len1, VQ3),
+    End = now(),
+    VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
+    S5 = rabbit_variable_queue:status(VQ5),
+    assert_prop(S5, q4, 0),
+    assert_prop(S5, q3, SegmentSize),
+    assert_prop(S5, gamma, {gamma, undefined, 0}),
+    assert_prop(S5, len, SegmentSize),
+    assert_prop(S5, prefetching, false),
+
+    VQ6 = rabbit_variable_queue:remeasure_egress_rate(VQ5),
+    %% half the seconds taken to fetch one segment
+    Duration = timer:now_diff(End, Start) / 2000000,
+    VQ7 = rabbit_variable_queue:set_queue_ram_duration_target(Duration, VQ6),
+    S7 = rabbit_variable_queue:status(VQ7),
+    assert_prop(S7, q4, 0),
+    Q3 = proplists:get_value(q3, S7),
+    true = Q3 > 0, %% not prefetching everything
+    assert_prop(S7, gamma, {gamma, undefined, 0}),
+    assert_prop(S7, len, SegmentSize),
+    assert_prop(S7, prefetching, true),
+
+    %% now publish a segment, this'll go half in q1, half in q3, in
+    %% theory.
+    {_SeqIds1, VQ8} = variable_queue_publish(true, SegmentSize, VQ7),
+    S8 = rabbit_variable_queue:status(VQ8),
+    assert_prop(S8, q4, 0),
+    assert_prop(S8, q2, 0),
+    assert_prop(S8, len, Len1),
+    assert_prop(S8, prefetching, true),
+    Q3a = proplists:get_value(q3, S8),
+    Q3a_new = Q3a - Q3,
+    Q1a = proplists:get_value(q1, S8),
+    true = (Q3a_new + Q1a == SegmentSize) andalso Q1a < SegmentSize,
+
+    %% wait a bit, to let the prefetcher do its thing
+    timer:sleep(2000),
+    %% fetch a msg. The prefetcher *should* have finished, but can't
+    %% guarantee it.
+    Len2 = Len1-1,
+    {{_Msg, false, AckTag, Len2}, VQ9} = rabbit_variable_queue:fetch(VQ8),
+    S9 = rabbit_variable_queue:status(VQ9),
+    case proplists:get_value(prefetching, S9) of
+        true ->
+            %% bits of q1 could have moved into q3, and the prefetcher
+            %% won't have returned any betas for q3. So q3 can not
+            %% have shrunk.
+            Q3b = proplists:get_value(q3, S9),
+            Q1b = proplists:get_value(q1, S9),
+            true = (Q1a + Q3a) == (Q1b + Q3b) andalso Q3b >= Q3a;
+        false ->
+            %% there should be content in q4 and q3 (we only did 1
+            %% fetch. This is not sufficient to kill the prefetcher
+            %% through draining it when it's empty, thus if it's not
+            %% running, it must have finished, not been killed, thus
+            %% q4 will not be empty), and q1 should have gone into q3.
+            Q1b = proplists:get_value(q1, S9),
+            Q3b = proplists:get_value(q3, S9),
+            Q4b = proplists:get_value(q4, S9),
+            NotPrefetched = Q3b - (SegmentSize - Q1b),
+            SegmentSize = NotPrefetched + Q4b + 1 %% we fetched one
+    end,
+
+    %% just for the fun of it, set duration to 0. This should push
+    %% everything back into gamma, except the eldest (partial) segment
+    %% in q3
+    VQ10 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ9),
+    S10 = rabbit_variable_queue:status(VQ10),
+    assert_prop(S10, len, Len2),
+    assert_prop(S10, prefetching, false),
+    assert_prop(S10, q1, 0),
+    assert_prop(S10, q2, 0),
+    assert_prop(S10, gamma, {gamma, Len1, SegmentSize}),
+    assert_prop(S10, q3, (Len2 - SegmentSize)),
+    assert_prop(S10, q4, 0),
+
+    {VQ11, AckTags1} = variable_queue_fetch(Len2, true, false, Len2, VQ10),
+    VQ12 = rabbit_variable_queue:ack([AckTag|AckTags1], VQ11),
+    {empty, VQ13} = rabbit_variable_queue:fetch(VQ12),
+    rabbit_variable_queue:terminate(VQ13),
+
+    passed.
+
+test_variable_queue_prefetching_during_publish(PrefetchDelay) ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    VQ0 = fresh_variable_queue(),
+    VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ0),
+    assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
+
+    Len1 = 2*SegmentSize,
+    {_SeqIds, VQ2} = variable_queue_publish(true, Len1, VQ1),
+    %% one segment will be in q3, the other in gamma. We want to fetch
+    %% all of q3 so that gamma is then moved into q3, emptying gamma
+
+    VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
+    {VQ4, AckTags} = variable_queue_fetch(SegmentSize, true, false, Len1, VQ3),
+    VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
+    S5 = rabbit_variable_queue:status(VQ5),
+    assert_prop(S5, q4, 0),
+    assert_prop(S5, q3, SegmentSize),
+    assert_prop(S5, gamma, {gamma, undefined, 0}),
+    assert_prop(S5, len, SegmentSize),
+    assert_prop(S5, prefetching, false),
+
+    %% we assume that we can fetch at > 1 msg a second
+    VQ6 = rabbit_variable_queue:remeasure_egress_rate(VQ5),
+    VQ7 = rabbit_variable_queue:set_queue_ram_duration_target(Len1, VQ6),
+    S7 = rabbit_variable_queue:status(VQ7),
+    assert_prop(S7, q4, 0),
+    assert_prop(S7, q3, 0),
+    assert_prop(S7, gamma, {gamma, undefined, 0}),
+    assert_prop(S7, len, SegmentSize),
+    assert_prop(S7, prefetching, true),
+
+    timer:sleep(PrefetchDelay),
+
+    {_SeqIds1, VQ8} = variable_queue_publish(true, SegmentSize, VQ7),
+    S8 = rabbit_variable_queue:status(VQ8),
+    assert_prop(S8, q4, 0),
+    assert_prop(S8, q2, 0),
+    assert_prop(S8, q1, SegmentSize),
+    assert_prop(S8, len, Len1),
+    assert_prop(S8, prefetching, true),
+
+    {VQ9, AckTags1} =
+        variable_queue_fetch(SegmentSize-1, true, false, Len1, VQ8),
+    VQ10 = rabbit_variable_queue:ack(AckTags1, VQ9),
+    %% can't guarantee the prefetcher has stopped here. If it is still
+    %% running, then we must have SegmentSize is q1. If it's not
+    %% running, and it completed, then we'll find SegmentSize + 1 in
+    %% q4 (q1 will have been joined to q4), otherwise, we'll find
+    %% SegmentSize in q1 and 1 in q3 and q4 empty.
+    S10 = rabbit_variable_queue:status(VQ10),
+    assert_prop(S10, q2, 0),
+    assert_prop(S10, len, (SegmentSize+1)),
+    case proplists:get_value(prefetching, S10) of
+        true -> assert_prop(S10, q1, SegmentSize),
+                assert_prop(S10, q3, 0),
+                assert_prop(S10, q4, 0);
+        false -> case proplists:get_value(q3, S10) of
+                     0 -> assert_prop(S10, q4, SegmentSize+1),
+                          assert_prop(S10, q1, 0);
+                     1 -> assert_prop(S10, q4, 0),
+                          assert_prop(S10, q1, SegmentSize)
+                 end
+    end,
+
+    {VQ11, AckTags2} =
+        variable_queue_fetch(SegmentSize+1, true, false, SegmentSize+1, VQ10),
+    VQ12 = rabbit_variable_queue:ack(AckTags2, VQ11),
+
+    {empty, VQ13} = rabbit_variable_queue:fetch(VQ12),
+    rabbit_variable_queue:terminate(VQ13),
+
+    passed.
+
+test_variable_queue_prefetching_and_gammas_to_betas() ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    VQ0 = fresh_variable_queue(),
 
     VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ0),
     assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
 
-    {SeqIds, VQ2} = variable_queue_publish(false, 3 * SegmentSize, VQ1),
+    {_SeqIds, VQ2} = variable_queue_publish(false, 3 * SegmentSize, VQ1),
     S2 = rabbit_variable_queue:status(VQ2),
     assert_prop(S2, gamma, {gamma, SegmentSize, 2*SegmentSize}),
     assert_prop(S2, q3, SegmentSize),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 2624a9fb..d7b9dafb 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -61,28 +61,8 @@
           on_sync
         }).
 
--record(alpha,
-        { msg,
-          seq_id,
-          is_delivered,
-          msg_on_disk,
-          index_on_disk
-        }).
-
--record(beta,
-        { msg_id,
-          seq_id,
-          is_persistent,
-          is_delivered,
-          index_on_disk
-        }).
-
--record(gamma,
-        { seq_id,
-          count
-        }).
-
 -include("rabbit.hrl").
+-include("rabbit_queue.hrl").
 
 %%----------------------------------------------------------------------------
 
@@ -682,7 +662,9 @@ read_index_segment(SeqId, IndexState) ->
 drain_prefetcher(_DrainOrStop, State = #vqstate { prefetcher = undefined }) ->
     State;
 drain_prefetcher(DrainOrStop,
-                 State = #vqstate { prefetcher = Prefetcher, q3 = Q3, q4 = Q4,
+                 State = #vqstate { prefetcher = Prefetcher, q1 = Q1, q2 = Q2,
+                                    gamma = #gamma { count = GammaCount },
+                                    q3 = Q3, q4 = Q4,
                                     ram_msg_count = RamMsgCount }) ->
     Fun = case DrainOrStop of
               drain -> fun rabbit_queue_prefetcher:drain/1;
@@ -693,16 +675,27 @@ drain_prefetcher(DrainOrStop,
             {empty, Betas} ->       %% drain or drain_and_stop
                 {queue:join(Betas, Q3), Q4, undefined, 0};
             {finished, Alphas} ->   %% just drain
-                {Q3, Alphas, undefined, queue:len(Alphas)};
+                {Q3, queue:join(Q4, Alphas), undefined, queue:len(Alphas)};
             {continuing, Alphas} -> %% just drain
-                {Q3, Alphas, Prefetcher, queue:len(Alphas)};
+                {Q3, queue:join(Q4, Alphas), Prefetcher, queue:len(Alphas)};
             {Alphas, Betas} ->      %% just drain_and_stop
                 {queue:join(Betas, Q3), queue:join(Q4, Alphas), undefined,
                  queue:len(Alphas)}
         end,
-    maybe_push_q1_to_betas(
-      State #vqstate { prefetcher = Prefetcher1, q3 = Q3a, q4 = Q4a,
-                       ram_msg_count = RamMsgCount + RamMsgCountAdj }).
+    State1 = State #vqstate { prefetcher = Prefetcher1, q3 = Q3a, q4 = Q4a,
+                              ram_msg_count = RamMsgCount + RamMsgCountAdj },
+    %% don't join up with q1/q2 unless the prefetcher has stopped
+    State2 = case GammaCount == 0 andalso Prefetcher1 == undefined of
+                 true -> case queue:is_empty(Q3a) andalso queue:is_empty(Q2) of
+                             true ->
+                                 State1 #vqstate { q1 = queue:new(),
+                                                   q4 = queue:join(Q4a, Q1) };
+                             false ->
+                                 State1 #vqstate { q3 = queue:join(Q3a, Q2) }
+                         end;
+                 false -> State1
+             end,
+    maybe_push_q1_to_betas(State2).
 
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
                                      target_ram_msg_count = TargetRamMsgCount })
@@ -796,9 +789,9 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
 store_alpha_entry(Entry = #alpha {}, State =
                   #vqstate { q1 = Q1, q2 = Q2,
                              gamma = #gamma { count = GammaCount },
-                             q3 = Q3, q4 = Q4 }) ->
-    case queue:is_empty(Q2) andalso GammaCount == 0 andalso queue:is_empty(Q3)
-        of
+                             q3 = Q3, q4 = Q4, prefetcher = Prefetcher }) ->
+    case queue:is_empty(Q2) andalso GammaCount == 0 andalso
+        queue:is_empty(Q3) andalso Prefetcher == undefined of
         true ->
             State #vqstate { q4 = queue:in(Entry, Q4) };
         false ->
-- 
cgit v1.2.1


From d813aa860b01d726be464d81ae26d5d3aa32c271 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 9 Nov 2009 18:12:00 +0000
Subject: Cosmetic reorganisation of vq. Added specs and made sure usage
 matches specs

---
 include/rabbit_queue.hrl      |   7 +
 src/rabbit_tests.erl          |   3 +-
 src/rabbit_variable_queue.erl | 444 +++++++++++++++++++++++++-----------------
 3 files changed, 270 insertions(+), 184 deletions(-)

diff --git a/include/rabbit_queue.hrl b/include/rabbit_queue.hrl
index 5833b056..165a7e7b 100644
--- a/include/rabbit_queue.hrl
+++ b/include/rabbit_queue.hrl
@@ -49,3 +49,10 @@
         { seq_id,
           count
         }).
+
+-ifdef(use_specs).
+
+-type(gamma() :: #gamma { seq_id :: non_neg_integer(),
+                          count :: non_neg_integer () }).
+
+-endif.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 64143476..1becda86 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1122,7 +1122,8 @@ variable_queue_publish(IsPersistent, Count, VQ) ->
       fun (_N, {Acc, VQ1}) ->
               {SeqId, VQ2} = rabbit_variable_queue:publish(
                                rabbit_basic:message(
-                                 <<>>, <<>>, [], <<>>, rabbit_guid:guid(),
+                                 rabbit_misc:r(<<>>, exchange, <<>>),
+                                 <<>>, [], <<>>, rabbit_guid:guid(),
                                  IsPersistent), VQ1),
               {[SeqId | Acc], VQ2}
       end, {[], VQ}, lists:seq(1, Count)).
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d7b9dafb..c698e31e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -93,6 +93,67 @@
 %% contain all msgs in the queue.  Also, if q4 is non empty and gamma
 %% is non empty then q3 must be non empty.
 
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(msg_id()  :: binary()).
+-type(seq_id()  :: non_neg_integer()).
+-type(ack()     :: {'ack_index_and_store', msg_id(), seq_id()}
+                 | 'ack_not_on_disk').
+-type(vqstate() :: #vqstate {
+               q1                    :: queue(),
+               q2                    :: queue(),
+               gamma                 :: gamma(),
+               q3                    :: queue(),
+               q4                    :: queue(),
+               duration_target       :: non_neg_integer(),
+               target_ram_msg_count  :: non_neg_integer(),
+               queue                 :: queue_name(),
+               index_state           :: any(),
+               next_seq_id           :: seq_id(),
+               out_counter           :: non_neg_integer(),
+               egress_rate           :: float(),
+               avg_egress_rate       :: float(),
+               egress_rate_timestamp :: {integer(), integer(), integer()},
+               prefetcher            :: ('undefined' | pid()),
+               len                   :: non_neg_integer(),
+               on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]}
+              }).
+
+-spec(init/1 :: (queue_name()) -> vqstate()).
+-spec(terminate/1 :: (vqstate()) -> vqstate()).
+-spec(publish/2 :: (basic_message(), vqstate()) ->
+             {seq_id(), vqstate()}).
+-spec(publish_delivered/2 :: (basic_message(), vqstate()) ->
+             {ack(), vqstate()}).
+-spec(set_queue_ram_duration_target/2 ::
+      (('undefined' | number()), vqstate()) -> vqstate()).
+-spec(remeasure_egress_rate/1 :: (vqstate()) -> vqstate()).
+-spec(fetch/1 :: (vqstate()) ->
+             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
+              vqstate()}).
+-spec(ack/2 :: ([ack()], vqstate()) -> vqstate()).
+-spec(len/1 :: (vqstate()) -> non_neg_integer()).
+-spec(is_empty/1 :: (vqstate()) -> boolean()).
+-spec(maybe_start_prefetcher/1 :: (vqstate()) -> vqstate()).
+-spec(purge/1 :: (vqstate()) -> {non_neg_integer(), vqstate()}).
+-spec(delete/1 :: (vqstate()) -> vqstate()).
+-spec(requeue/2 :: ([{basic_message(), ack()}], vqstate()) -> vqstate()).
+-spec(tx_publish/2 :: (basic_message(), vqstate()) -> vqstate()).
+-spec(tx_rollback/2 :: ([msg_id()], vqstate()) -> vqstate()).
+-spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, vqstate()) ->
+             {boolean(), vqstate()}).
+-spec(tx_commit_from_msg_store/4 ::
+      ([msg_id()], [ack()], {pid(), any()}, vqstate()) -> vqstate()).
+-spec(tx_commit_from_vq/1 :: (vqstate()) -> vqstate()).
+-spec(needs_sync/1 :: (vqstate()) -> boolean()).
+-spec(can_flush_journal/1 :: (vqstate()) -> boolean()).
+-spec(flush_journal/1 :: (vqstate()) -> vqstate()).
+-spec(status/1 :: (vqstate()) -> [{atom(), any()}]).
+
+-endif.
+
 %%----------------------------------------------------------------------------
 %% Public API
 %%----------------------------------------------------------------------------
@@ -238,6 +299,23 @@ fetch(State =
                               index_state = IndexState1, len = Len1 }}
     end.
 
+ack(AckTags, State = #vqstate { index_state = IndexState }) ->
+    {MsgIds, SeqIds} =
+        lists:foldl(
+          fun (ack_not_on_disk, Acc) -> Acc;
+              ({ack_index_and_store, MsgId, SeqId}, {MsgIds, SeqIds}) ->
+                  {[MsgId | MsgIds], [SeqId | SeqIds]}
+          end, {[], []}, AckTags),
+    IndexState1 = case SeqIds of
+                      [] -> IndexState;
+                      _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
+                  end,
+    ok = case MsgIds of
+             [] -> ok;
+             _  -> rabbit_msg_store:remove(MsgIds)
+         end,
+    State #vqstate { index_state = IndexState1 }.
+
 len(#vqstate { len = Len }) ->
     Len.
 
@@ -274,23 +352,6 @@ maybe_start_prefetcher(State = #vqstate { prefetcher = undefined }) ->
 maybe_start_prefetcher(State) ->
     State.
 
-ack(AckTags, State = #vqstate { index_state = IndexState }) ->
-    {MsgIds, SeqIds} =
-        lists:foldl(
-          fun (ack_not_on_disk, Acc) -> Acc;
-              ({ack_index_and_store, MsgId, SeqId}, {MsgIds, SeqIds}) ->
-                  {[MsgId | MsgIds], [SeqId | SeqIds]}
-          end, {[], []}, AckTags),
-    IndexState1 = case SeqIds of
-                      [] -> IndexState;
-                      _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
-                  end,
-    ok = case MsgIds of
-             [] -> ok;
-             _  -> rabbit_msg_store:remove(MsgIds)
-         end,
-    State #vqstate { index_state = IndexState1 }.
-
 purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
                          index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} = remove_queue_entries(Q4, IndexState),
@@ -434,11 +495,56 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
       {prefetching, Prefetcher /= undefined} ].
 
 %%----------------------------------------------------------------------------
+%% Minor helpers
+%%----------------------------------------------------------------------------
 
 persistent_msg_ids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
               Obj #basic_message.is_persistent].
 
+entry_salient_details(#alpha { msg = #basic_message { guid = MsgId },
+                               seq_id = SeqId, is_delivered = IsDelivered,
+                               msg_on_disk = MsgOnDisk,
+                               index_on_disk = IndexOnDisk }) ->
+    {MsgId, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk};
+entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
+                              is_delivered = IsDelivered,
+                              index_on_disk = IndexOnDisk }) ->
+    {MsgId, SeqId, IsDelivered, true, IndexOnDisk}.
+
+betas_from_segment_entries(List) ->
+    queue:from_list([#beta { msg_id = MsgId, seq_id = SeqId,
+                             is_persistent = IsPersistent,
+                             is_delivered = IsDelivered,
+                             index_on_disk = true }
+                     || {MsgId, SeqId, IsPersistent, IsDelivered} <- List]).
+
+read_index_segment(SeqId, IndexState) ->
+    SeqId1 = SeqId + rabbit_queue_index:segment_size(),
+    case rabbit_queue_index:read_segment_entries(SeqId, IndexState) of
+        {[], IndexState1} -> read_index_segment(SeqId1, IndexState1);
+        {List, IndexState1} -> {List, IndexState1, SeqId1}
+    end.
+
+ensure_binary_properties(Msg = #basic_message { content = Content }) ->
+    Msg #basic_message {
+      content = rabbit_binary_parser:clear_decoded_content(
+                  rabbit_binary_generator:ensure_content_encoded(Content)) }.
+
+%% the first arg is the older gamma            
+combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) ->
+    #gamma { seq_id = undefined, count = 0 };
+combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
+combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
+combine_gammas(#gamma { seq_id = SeqIdLow,  count = CountLow },
+               #gamma { seq_id = SeqIdHigh, count = CountHigh}) ->
+    true = SeqIdLow =< SeqIdHigh, %% ASSERTION
+    #gamma { seq_id = SeqIdLow, count = CountLow + CountHigh}.
+
+%%----------------------------------------------------------------------------
+%% Internal major helpers for Public API
+%%----------------------------------------------------------------------------
+
 delete1(NextSeqId, Count, GammaSeqId, IndexState)
   when GammaSeqId >= NextSeqId ->
     {Count, IndexState};
@@ -502,71 +608,6 @@ remove_queue_entries(Q, IndexState) ->
         end,
     {Count, IndexState2}.
 
-entry_salient_details(#alpha { msg = #basic_message { guid = MsgId },
-                               seq_id = SeqId, is_delivered = IsDelivered,
-                               msg_on_disk = MsgOnDisk,
-                               index_on_disk = IndexOnDisk }) ->
-    {MsgId, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk};
-entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
-                              is_delivered = IsDelivered,
-                              index_on_disk = IndexOnDisk }) ->
-    {MsgId, SeqId, IsDelivered, true, IndexOnDisk}.
-
-publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { next_seq_id = SeqId, len = Len }) ->
-    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
-                    PersistentMsgsAlreadyOnDisk,
-                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 })}.
-
-publish(msg, Msg = #basic_message { guid = MsgId,
-                                    is_persistent = IsPersistent },
-        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { index_state = IndexState,
-                           ram_msg_count = RamMsgCount }) ->
-    MsgOnDisk =
-        maybe_write_msg_to_disk(false, PersistentMsgsAlreadyOnDisk, Msg),
-    {IndexOnDisk, IndexState1} =
-        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    Entry = #alpha { msg = Msg, seq_id = SeqId, is_delivered = IsDelivered,
-                     msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk },
-    State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
-                              index_state = IndexState1 },
-    store_alpha_entry(Entry, State1);
-
-publish(index, Msg = #basic_message { guid = MsgId,
-                                      is_persistent = IsPersistent },
-        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { index_state = IndexState, q1 = Q1 }) ->
-    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
-    {IndexOnDisk, IndexState1} =
-        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    Entry = #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
-                    is_persistent = IsPersistent, index_on_disk = IndexOnDisk },
-    State1 = State #vqstate { index_state = IndexState1 },
-    true = queue:is_empty(Q1), %% ASSERTION
-    store_beta_entry(Entry, State1);
-
-publish(neither, Msg = #basic_message { guid = MsgId,
-                                        is_persistent = IsPersistent },
-        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                           gamma = Gamma }) ->
-    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
-    {true, IndexState1} =
-        maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
-    %% gamma may be empty, seq_id > next_segment_boundary from q3
-    %% head, so we need to find where the segment boundary is before
-    %% or equal to seq_id
-    GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
-        rabbit_queue_index:segment_size(),
-    Gamma1 = #gamma { seq_id = GammaSeqId, count = 1 },
-    State #vqstate { index_state = IndexState1,
-                     gamma = combine_gammas(Gamma, Gamma1) }.
-
 fetch_from_q3_or_gamma(State = #vqstate {
                          q1 = Q1, q2 = Q2, gamma = #gamma { count = GammaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount }) ->
@@ -609,56 +650,6 @@ fetch_from_q3_or_gamma(State = #vqstate {
             fetch(State2)
     end.
 
-maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 } }) ->
-    State;
-maybe_gammas_to_betas(State =
-                      #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
-                                 target_ram_msg_count = TargetRamMsgCount,
-                                 gamma = #gamma { seq_id = GammaSeqId,
-                                                  count = GammaCount }}) ->
-    case (not queue:is_empty(Q3)) andalso 0 == TargetRamMsgCount of
-        true ->
-            State;
-        false ->
-            %% either q3 is empty, in which case we load at least one
-            %% segment, or TargetRamMsgCount > 0, meaning we should
-            %% really be holding all the betas in memory.
-            {List, IndexState1, Gamma1SeqId} =
-                read_index_segment(GammaSeqId, IndexState),
-            State1 = State #vqstate { index_state = IndexState1 },
-            %% length(List) may be < segment_size because of acks. But
-            %% it can't be []
-            Q3a = queue:join(Q3, betas_from_segment_entries(List)),
-            case GammaCount - length(List) of
-                0 ->
-                    %% gamma is now empty, but it wasn't before, so
-                    %% can now join q2 onto q3
-                    State1 #vqstate { gamma = #gamma { seq_id = undefined,
-                                                       count = 0 },
-                                      q2 = queue:new(),
-                                      q3 = queue:join(Q3a, Q2) };
-                N when N > 0 ->
-                    maybe_gammas_to_betas(
-                      State1 #vqstate { q3 = Q3a, 
-                                        gamma = #gamma { seq_id = Gamma1SeqId,
-                                                         count = N } })
-            end
-    end.
-
-betas_from_segment_entries(List) ->
-    queue:from_list([#beta { msg_id = MsgId, seq_id = SeqId,
-                             is_persistent = IsPersistent,
-                             is_delivered = IsDelivered,
-                             index_on_disk = true }
-                     || {MsgId, SeqId, IsPersistent, IsDelivered} <- List]).
-
-read_index_segment(SeqId, IndexState) ->
-    SeqId1 = SeqId + rabbit_queue_index:segment_size(),
-    case rabbit_queue_index:read_segment_entries(SeqId, IndexState) of
-        {[], IndexState1} -> read_index_segment(SeqId1, IndexState1);
-        {List, IndexState1} -> {List, IndexState1, SeqId1}
-    end.
-
 drain_prefetcher(_DrainOrStop, State = #vqstate { prefetcher = undefined }) ->
     State;
 drain_prefetcher(DrainOrStop,
@@ -714,42 +705,9 @@ reduce_memory_use(State =
         _ -> State1
     end.
 
-%% Bool  IsPersistent PersistentMsgsAlreadyOnDisk | WriteToDisk?
-%% -----------------------------------------------+-------------
-%% false false        false                       | false      1
-%% false true         false                       | true       2
-%% false false        true                        | false      3
-%% false true         true                        | false      4
-%% true  false        false                       | true       5
-%% true  true         false                       | true       6
-%% true  false        true                        | true       7
-%% true  true         true                        | false      8
-
-%% (Bool and not (IsPersistent and PersistentMsgsAlreadyOnDisk)) or  | 5 6 7
-%% (IsPersistent and (not PersistentMsgsAlreadyOnDisk))              | 2 6
-maybe_write_msg_to_disk(Bool, PersistentMsgsAlreadyOnDisk,
-                        Msg = #basic_message { guid = MsgId,
-                                               is_persistent = IsPersistent })
-  when (Bool andalso not (IsPersistent andalso PersistentMsgsAlreadyOnDisk))
-       orelse (IsPersistent andalso not PersistentMsgsAlreadyOnDisk) ->
-    ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
-    true;
-maybe_write_msg_to_disk(_Bool, true, #basic_message { is_persistent = true }) ->
-    true;
-maybe_write_msg_to_disk(_Bool, _PersistentMsgsAlreadyOnDisk, _Msg) ->
-    false.
-
-maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
-                          IndexState) when Bool orelse IsPersistent ->
-    IndexState1 = rabbit_queue_index:write_published(
-                    MsgId, SeqId, IsPersistent, IndexState),
-    {true, case IsDelivered of
-               true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
-               false -> IndexState1
-           end};
-maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
-                          IndexState) ->
-    {false, IndexState}.
+%%----------------------------------------------------------------------------
+%% Internal gubbins for publishing
+%%----------------------------------------------------------------------------
 
 test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
                                        ram_msg_count = RamMsgCount,
@@ -766,7 +724,10 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
                     %% q3
                     index;
                 {{value, #beta { seq_id = OldSeqId }}, _Q3a} ->
-                    %% don't look at the current gamma as it may be empty
+                    %% Don't look at the current gamma as it may be
+                    %% empty. If the SeqId is still within the current
+                    %% segment, it'll be a beta, else it'll go into
+                    %% gamma
                     case SeqId >= rabbit_queue_index:next_segment_boundary(OldSeqId) of
                         true -> neither;
                         false -> index
@@ -781,10 +742,60 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
             end
     end.
 
-ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-    Msg #basic_message {
-      content = rabbit_binary_parser:clear_decoded_content(
-                  rabbit_binary_generator:ensure_content_encoded(Content)) }.
+publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { next_seq_id = SeqId, len = Len }) ->
+    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
+                    PersistentMsgsAlreadyOnDisk,
+                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 })}.
+
+publish(msg, Msg = #basic_message { guid = MsgId,
+                                    is_persistent = IsPersistent },
+        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { index_state = IndexState,
+                           ram_msg_count = RamMsgCount }) ->
+    MsgOnDisk =
+        maybe_write_msg_to_disk(false, PersistentMsgsAlreadyOnDisk, Msg),
+    {IndexOnDisk, IndexState1} =
+        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    Entry = #alpha { msg = Msg, seq_id = SeqId, is_delivered = IsDelivered,
+                     msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk },
+    State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
+                              index_state = IndexState1 },
+    store_alpha_entry(Entry, State1);
+
+publish(index, Msg = #basic_message { guid = MsgId,
+                                      is_persistent = IsPersistent },
+        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { index_state = IndexState, q1 = Q1 }) ->
+    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
+    {IndexOnDisk, IndexState1} =
+        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    Entry = #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
+                    is_persistent = IsPersistent, index_on_disk = IndexOnDisk },
+    State1 = State #vqstate { index_state = IndexState1 },
+    true = queue:is_empty(Q1), %% ASSERTION
+    store_beta_entry(Entry, State1);
+
+publish(neither, Msg = #basic_message { guid = MsgId,
+                                        is_persistent = IsPersistent },
+        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
+        State = #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
+                           gamma = Gamma }) ->
+    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
+    {true, IndexState1} =
+        maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
+                                  IsDelivered, IndexState),
+    true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
+    %% gamma may be empty, seq_id > next_segment_boundary from q3
+    %% head, so we need to find where the segment boundary is before
+    %% or equal to seq_id
+    GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
+        rabbit_queue_index:segment_size(),
+    Gamma1 = #gamma { seq_id = GammaSeqId, count = 1 },
+    State #vqstate { index_state = IndexState1,
+                     gamma = combine_gammas(Gamma, Gamma1) }.
 
 store_alpha_entry(Entry = #alpha {}, State =
                   #vqstate { q1 = Q1, q2 = Q2,
@@ -806,6 +817,83 @@ store_beta_entry(Entry = #beta {}, State =
         false -> State #vqstate { q2 = queue:in(Entry, Q2) }
     end.
 
+%% Bool  IsPersistent PersistentMsgsAlreadyOnDisk | WriteToDisk?
+%% -----------------------------------------------+-------------
+%% false false        false                       | false      1
+%% false true         false                       | true       2
+%% false false        true                        | false      3
+%% false true         true                        | false      4
+%% true  false        false                       | true       5
+%% true  true         false                       | true       6
+%% true  false        true                        | true       7
+%% true  true         true                        | false      8
+
+%% (Bool and not (IsPersistent and PersistentMsgsAlreadyOnDisk)) or  | 5 6 7
+%% (IsPersistent and (not PersistentMsgsAlreadyOnDisk))              | 2 6
+maybe_write_msg_to_disk(Bool, PersistentMsgsAlreadyOnDisk,
+                        Msg = #basic_message { guid = MsgId,
+                                               is_persistent = IsPersistent })
+  when (Bool andalso not (IsPersistent andalso PersistentMsgsAlreadyOnDisk))
+       orelse (IsPersistent andalso not PersistentMsgsAlreadyOnDisk) ->
+    ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
+    true;
+maybe_write_msg_to_disk(_Bool, true, #basic_message { is_persistent = true }) ->
+    true;
+maybe_write_msg_to_disk(_Bool, _PersistentMsgsAlreadyOnDisk, _Msg) ->
+    false.
+
+maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
+                          IndexState) when Bool orelse IsPersistent ->
+    IndexState1 = rabbit_queue_index:write_published(
+                    MsgId, SeqId, IsPersistent, IndexState),
+    {true, case IsDelivered of
+               true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
+               false -> IndexState1
+           end};
+maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
+                          IndexState) ->
+    {false, IndexState}.
+
+%%----------------------------------------------------------------------------
+%% Phase changes
+%%----------------------------------------------------------------------------
+
+maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 } }) ->
+    State;
+maybe_gammas_to_betas(State =
+                      #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
+                                 target_ram_msg_count = TargetRamMsgCount,
+                                 gamma = #gamma { seq_id = GammaSeqId,
+                                                  count = GammaCount }}) ->
+    case (not queue:is_empty(Q3)) andalso 0 == TargetRamMsgCount of
+        true ->
+            State;
+        false ->
+            %% either q3 is empty, in which case we load at least one
+            %% segment, or TargetRamMsgCount > 0, meaning we should
+            %% really be holding all the betas in memory.
+            {List, IndexState1, Gamma1SeqId} =
+                read_index_segment(GammaSeqId, IndexState),
+            State1 = State #vqstate { index_state = IndexState1 },
+            %% length(List) may be < segment_size because of acks. But
+            %% it can't be []
+            Q3a = queue:join(Q3, betas_from_segment_entries(List)),
+            case GammaCount - length(List) of
+                0 ->
+                    %% gamma is now empty, but it wasn't before, so
+                    %% can now join q2 onto q3
+                    State1 #vqstate { gamma = #gamma { seq_id = undefined,
+                                                       count = 0 },
+                                      q2 = queue:new(),
+                                      q3 = queue:join(Q3a, Q2) };
+                N when N > 0 ->
+                    maybe_gammas_to_betas(
+                      State1 #vqstate { q3 = Q3a, 
+                                        gamma = #gamma { seq_id = Gamma1SeqId,
+                                                         count = N } })
+            end
+    end.
+
 maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out/1,
@@ -927,13 +1015,3 @@ push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
                 end,
             push_betas_to_gammas(Generator, Limit, Qa, Count + 1, IndexState1)
     end.
-
-%% the first arg is the older gamma            
-combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) ->
-    #gamma { seq_id = undefined, count = 0 };
-combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
-combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
-combine_gammas(#gamma { seq_id = SeqIdLow,  count = CountLow },
-               #gamma { seq_id = SeqIdHigh, count = CountHigh}) ->
-    true = SeqIdLow =< SeqIdHigh, %% ASSERTION
-    #gamma { seq_id = SeqIdLow, count = CountLow + CountHigh}.
-- 
cgit v1.2.1


From 51fc6c56f2394a93dd0d19430b5d8dbe60dbb164 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 10 Nov 2009 11:15:39 +0000
Subject: Added test which slowly reduces the duration target of the queue as
 messages are pumped through at high rate. This has revealed major flaw in the
 queue index which goes as follows. In the queue index, we assume that all
 segments have the same number of entries. This is in fact not necessarily the
 case, because a segment may very well have a mixture of messages, some of
 which are on disk and some of which are not. There are a few solutions to
 this, and I've not decided yet which is right.

---
 src/rabbit_tests.erl          | 41 +++++++++++++++++++++++++++++++++++++++++
 src/rabbit_variable_queue.erl |  4 +++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1becda86..d1131ed0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1159,8 +1159,49 @@ test_variable_queue() ->
     passed = test_variable_queue_prefetching_during_publish(0),
     passed = test_variable_queue_prefetching_during_publish(5000),
     passed = test_variable_queue_prefetch_evicts_q1(),
+    passed = test_variable_queue_dynamic_duration_change(),
     passed.
 
+test_variable_queue_dynamic_duration_change() ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    VQ0 = fresh_variable_queue(),
+    %% start by sending in a couple of segments worth
+    Len1 = 2*SegmentSize,
+    {_SeqIds, VQ1} = variable_queue_publish(true, Len1, VQ0),
+    VQ2 = rabbit_variable_queue:remeasure_egress_rate(VQ1),
+    {ok, _TRef} = timer:send_after(1000, {duration, 30, fun erlang:'-'/2}),
+    VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
+    {VQ4, AckTags} = variable_queue_fetch(Len1, false, false, Len1, VQ3),
+    VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
+    {empty, VQ6} = rabbit_variable_queue:fetch(VQ5),
+    rabbit_variable_queue:terminate(VQ6),
+
+    passed.
+
+test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
+    {_SeqIds, VQ1} = variable_queue_publish(false, 1, VQ0),
+    {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(VQ1),
+    VQ3 = rabbit_variable_queue:ack([AckTag], VQ2),
+    receive
+        {duration, 30, stop} ->
+            VQ3;
+        {duration, N, Fun} ->
+            N1 = Fun(N, 1),
+            Fun1 = case N1 of
+                       0  -> fun erlang:'+'/2;
+                       30 -> stop;
+                       _  -> Fun
+                   end,
+            {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
+            VQ4 = rabbit_variable_queue:remeasure_egress_rate(VQ3),
+            VQ5 = %% /37 otherwise the duration is just to high to stress things
+                rabbit_variable_queue:set_queue_ram_duration_target(N/37, VQ4),
+            io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
+            test_variable_queue_dynamic_duration_change_f(Len, VQ5)
+    after 0 ->
+            test_variable_queue_dynamic_duration_change_f(Len, VQ3)
+    end.
+
 test_variable_queue_prefetch_evicts_q1() ->
     SegmentSize = rabbit_queue_index:segment_size(),
     VQ0 = fresh_variable_queue(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c698e31e..15caf81b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -482,7 +482,8 @@ flush_journal(State = #vqstate { index_state = IndexState }) ->
 status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
-                  ram_msg_count = RamMsgCount, prefetcher = Prefetcher }) ->
+                  ram_msg_count = RamMsgCount, prefetcher = Prefetcher,
+                  avg_egress_rate = AvgEgressRate }) ->
     [ {q1, queue:len(Q1)},
       {q2, queue:len(Q2)},
       {gamma, Gamma},
@@ -492,6 +493,7 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
       {outstanding_txns, length(From)},
       {target_ram_msg_count, TargetRamMsgCount},
       {ram_msg_count, RamMsgCount},
+      {avg_egress_rate, AvgEgressRate},
       {prefetching, Prefetcher /= undefined} ].
 
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From a7d7153255ebf435a8f70fa16b21dbeaa28a6625 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 10 Nov 2009 14:07:28 +0000
Subject: Fixed. There was a choice of having the qi do its own seqids, which
 would have been fine, but for the fact that at any point, there is the
 possibility of the vq deciding to flush everything out to disk. Thus to avoid
 rewriting in order to preserve order, instead we cope with the fact that
 there may be partial segments. The solution is to have a dict of segments
 which received fewer than the expected number of publishes before a different
 segment was written to. Then, when adding acks, we check to see if the number
 of acks is now equal to the partial number of segments, if it is, delete.
 Also, when moving to a different segment, we potentially delete the seg file
 if the number of acks is equal to the number of publishes. One key aspect of
 this is that the current segment to which we are publishing never appears in
 this dict, so it is not possible to delete the current publish segment by
 having the same number of pubs and acks, without it being really really full.
 Added and modified tests accordingly.

---
 src/rabbit_queue_index.erl | 133 +++++++++++++++++++++++++++++++--------------
 src/rabbit_tests.erl       |  96 +++++++++++++++++++++++---------
 2 files changed, 160 insertions(+), 69 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f21f9e17..a198ba51 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -117,7 +117,8 @@
           journal_ack_dict,
           journal_del_dict,
           seg_ack_counts,
-          publish_handle
+          publish_handle,
+          partial_segments
         }).
 
 -include("rabbit.hrl").
@@ -137,7 +138,8 @@
                               journal_ack_dict  :: dict(),
                               journal_del_dict  :: dict(),
                               seg_ack_counts    :: dict(),
-                              publish_handle    :: hdl_and_count()
+                              publish_handle    :: hdl_and_count(),
+                              partial_segments  :: dict()
                             }).
 
 -spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
@@ -401,18 +403,40 @@ get_pub_handle(SegNum, State = #qistate { publish_handle = PubHandle }) ->
     {Hdl, State1 #qistate { publish_handle = PubHandle1 }}.
 
 get_counted_handle(SegNum, State, undefined) ->
+    get_counted_handle(SegNum, State, {SegNum, undefined, 0});
+get_counted_handle(SegNum, State = #qistate { partial_segments = Partials },
+                   {SegNum, undefined, Count}) ->
     {Hdl, State1} = get_seg_handle(SegNum, State),
-    {State1, {SegNum, Hdl, 1}};
-get_counted_handle(SegNum, State, {SegNum, undefined, Count}) ->
-    {Hdl, State1} = get_seg_handle(SegNum, State),
-    {State1, {SegNum, Hdl, Count + 1}};
+    {CountExtra, Partials1} =
+        case dict:find(SegNum, Partials) of
+            {ok, CountExtra1} -> {CountExtra1, dict:erase(SegNum, Partials)};
+            error             -> {0, Partials}
+        end,
+    Count1 = Count + 1 + CountExtra,
+    {State1 #qistate { partial_segments = Partials1 }, {SegNum, Hdl, Count1}};
 get_counted_handle(SegNum, State, {SegNum, Hdl, Count})
   when Count < ?SEGMENT_ENTRIES_COUNT ->
     {State, {SegNum, Hdl, Count + 1}};
 get_counted_handle(SegNumA, State, {SegNumB, Hdl, ?SEGMENT_ENTRIES_COUNT})
   when SegNumA == SegNumB + 1 ->
     ok = file_handle_cache:append_write_buffer(Hdl),
-    get_counted_handle(SegNumA, State, undefined).
+    get_counted_handle(SegNumA, State, undefined);
+get_counted_handle(SegNumA, State = #qistate { partial_segments = Partials,
+                                               seg_ack_counts = AckCounts,
+                                               dir = Dir },
+                   {SegNumB, Hdl, Count}) ->
+    %% don't flush here because it's possible SegNumB has been deleted
+    State1 =
+        case dict:find(SegNumB, AckCounts) of
+            {ok, Count} ->
+                %% #acks == #pubs, and we're moving to different
+                %% segment, so delete.
+                delete_segment(SegNumB, State);
+            _ ->
+                State #qistate {
+                  partial_segments = dict:store(SegNumB, Count, Partials) }
+        end,
+    get_counted_handle(SegNumA, State1, undefined).
 
 get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
     case dict:find(SegNum, SegHdls) of
@@ -425,6 +449,17 @@ get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }
                        State)
     end.
 
+delete_segment(SegNum, State = #qistate { dir = Dir,
+                                          seg_ack_counts = AckCounts,
+                                          partial_segments = Partials }) ->
+    State1 = close_handle(SegNum, State),
+    ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
+             ok -> ok;
+             {error, enoent} -> ok
+         end,
+    State1 #qistate {seg_ack_counts = dict:erase(SegNum, AckCounts),
+                     partial_segments = dict:erase(SegNum, Partials) }.
+
 new_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
     {ok, Hdl} = file_handle_cache:open(Path, Mode, [{write_buffer, infinity}]),
     {Hdl, State #qistate { seg_num_handles = dict:store(Key, Hdl, SegHdls) }}.
@@ -486,7 +521,8 @@ blank_state(QueueName) ->
                journal_ack_dict = dict:new(),
                journal_del_dict = dict:new(),
                seg_ack_counts = dict:new(),
-               publish_handle = undefined
+               publish_handle = undefined,
+               partial_segments = dict:new()
              }.
 
 detect_clean_shutdown(Dir) ->
@@ -551,7 +587,8 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
     {TotalMsgCount, State1} =
         lists:foldl(
           fun (SegNum, {TotalMsgCount1, StateN =
-                        #qistate { publish_handle = PublishHandle }}) ->
+                        #qistate { publish_handle = PublishHandle,
+                                   partial_segments = Partials }}) ->
                   {SDict, PubCount, AckCount, _HighRelSeq, StateM} =
                       load_segment(SegNum, StateN),
                   StateL = #qistate { seg_ack_counts = AckCounts } =
@@ -565,19 +602,30 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
                                    0 -> AckCounts;
                                    N -> dict:store(SegNum, N, AckCounts)
                                end,
-                  %% In the following, there should only be max one
-                  %% segment that matches the 3rd case. All other
-                  %% segments should either be full or empty. There
-                  %% could be no partial segments.
-                  PublishHandle1 = case PubCount of
-                                       ?SEGMENT_ENTRIES_COUNT -> PublishHandle;
-                                       0 -> PublishHandle;
-                                       _ when PublishHandle == undefined ->
-                                           {SegNum, undefined, PubCount}
-                                   end,
+                  %% In the following, whilst there may be several
+                  %% partial segments, we only remember the last
+                  %% one. All other partial segments get added into
+                  %% the partial_segments dict
+                  {PublishHandle1, Partials1} =
+                      case PubCount of
+                          ?SEGMENT_ENTRIES_COUNT ->
+                              {PublishHandle, Partials};
+                          0 ->
+                              {PublishHandle, Partials};
+                          _ ->
+                              {{SegNum, undefined, PubCount},
+                               case PublishHandle of
+                                   undefined ->
+                                       Partials;
+                                   {SegNumOld, undefined, PubCountOld} ->
+                                       dict:store(SegNumOld, PubCountOld,
+                                                  Partials)
+                               end}
+                      end,
                   {TotalMsgCount2,
                    StateL #qistate { seg_ack_counts = AckCounts1,
-                                     publish_handle = PublishHandle1 }}
+                                     publish_handle = PublishHandle1,
+                                     partial_segments = Partials1 }}
           end, {0, State}, SegNums),
     {TotalMsgCount, State1}.
 
@@ -767,39 +815,40 @@ deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
 %%----------------------------------------------------------------------------
 
 append_acks_to_segment(SegNum, Acks,
-                       State = #qistate { seg_ack_counts = AckCounts }) ->
+                       State = #qistate { seg_ack_counts = AckCounts,
+                                          partial_segments = Partials }) ->
     AckCount = case dict:find(SegNum, AckCounts) of
                    {ok, AckCount1} -> AckCount1;
                    error           -> 0
                end,
+    AckTarget = case dict:find(SegNum, Partials) of
+                    {ok, PubCount} -> PubCount;
+                    error -> ?SEGMENT_ENTRIES_COUNT
+                end,
     AckCount2 = AckCount + length(Acks),
-    AckCounts1 = case AckCount2 of
-                     0 -> AckCounts;
-                     ?SEGMENT_ENTRIES_COUNT -> dict:erase(SegNum, AckCounts);
-                     _ -> dict:store(SegNum, AckCount2, AckCounts)
-                 end,
-    append_acks_to_segment(SegNum, AckCount2, Acks,
-                           State #qistate { seg_ack_counts = AckCounts1 }).
-
-append_acks_to_segment(SegNum, AckCount, _Acks,
-                       State = #qistate { dir = Dir, publish_handle = PubHdl })
-  when AckCount == ?SEGMENT_ENTRIES_COUNT ->
+    append_acks_to_segment(SegNum, AckCount2, Acks, AckTarget, State).
+
+append_acks_to_segment(SegNum, AckCount, _Acks, AckCount, State =
+                       #qistate { publish_handle = PubHdl }) ->
     PubHdl1 = case PubHdl of
-                  {SegNum, Hdl, ?SEGMENT_ENTRIES_COUNT} when Hdl /= undefined ->
+                  %% If we're adjusting the pubhdl here then there
+                  %% will be no entry in partials, thus the target ack
+                  %% count must be SEGMENT_ENTRIES_COUNT
+                  {SegNum, Hdl, AckCount = ?SEGMENT_ENTRIES_COUNT}
+                  when Hdl /= undefined ->
                       {SegNum + 1, undefined, 0};
                   _ -> PubHdl
               end,
-    State1 = close_handle(SegNum, State #qistate { publish_handle = PubHdl1 }),
-    ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
-             ok -> ok;
-             {error, enoent} -> ok
-         end,
-    State1;
-append_acks_to_segment(SegNum, AckCount, Acks, State)
-  when AckCount < ?SEGMENT_ENTRIES_COUNT ->
+    delete_segment(SegNum, State #qistate { publish_handle = PubHdl1 });
+append_acks_to_segment(_SegNum, _AckCount, [], _AckTarget, State) ->
+    State;
+append_acks_to_segment(SegNum, AckCount, Acks, AckTarget, State =
+                       #qistate { seg_ack_counts = AckCounts })
+  when AckCount < AckTarget ->
     {Hdl, State1} = append_to_segment(SegNum, Acks, State),
     ok = file_handle_cache:sync(Hdl),
-    State1.
+    State1 #qistate { seg_ack_counts =
+                      dict:store(SegNum, AckCount, AckCounts) }.
 
 append_dels_to_segment(SegNum, Dels, State) ->
     {_Hdl, State1} = append_to_segment(SegNum, Dels, State),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index d1131ed0..d74f998e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1025,6 +1025,23 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
               {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]}
       end, {Qi, []}, SeqIds).
 
+queue_index_deliver(SeqIds, Qi) ->
+    lists:foldl(
+      fun (SeqId, QiN) ->
+              rabbit_queue_index:write_delivered(SeqId, QiN)
+      end, Qi, SeqIds).
+
+queue_index_flush_journal(Qi) ->
+    {_Oks, {false, Qi1}} =
+        rabbit_misc:unfold(
+          fun ({true, QiN}) ->
+                  QiM = rabbit_queue_index:flush_journal(QiN),
+                  {true, ok, {rabbit_queue_index:can_flush_journal(QiM), QiM}};
+              ({false, _QiN}) ->
+                  false
+          end, {true, Qi}),
+    Qi1.
+
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
 verify_read_with_published(Delivered, Persistent,
@@ -1071,10 +1088,7 @@ test_queue_index() ->
     {LenB, Qi12} = rabbit_queue_index:init(test_queue()),
     {0, 20000, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
-    Qi14 = lists:foldl(
-             fun (SeqId, QiN) ->
-                     rabbit_queue_index:write_delivered(SeqId, QiN)
-             end, Qi13, SeqIdsB),
+    Qi14 = queue_index_deliver(SeqIdsB, Qi13),
     {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsMsgIdsB)),
@@ -1094,24 +1108,40 @@ test_queue_index() ->
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
-    %% this next bit is just to hit the auto deletion of segment files
-    SeqIdsC = lists:seq(0,65535),
+
+    %% These next bits are just to hit the auto deletion of segment files.
+    %% First, partials:
+    %% a) partial pub+del+ack, then move to new segment
+    SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
     {0, Qi22} = rabbit_queue_index:init(test_queue()),
     {Qi23, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi22),
-    Qi24 = lists:foldl(
-             fun (SeqId, QiN) ->
-                     rabbit_queue_index:write_delivered(SeqId, QiN)
-             end, Qi23, SeqIdsC),
+    Qi24 = queue_index_deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
-    {_Oks, {false, Qi26}} =
-        rabbit_misc:unfold(
-          fun ({true, QiN}) ->
-                  QiM = rabbit_queue_index:flush_journal(QiN),
-                  {true, ok, {rabbit_queue_index:can_flush_journal(QiM), QiM}};
-              ({false, _QiN}) ->
-                  false
-          end, {true, Qi25}),
-    _Qi27 = rabbit_queue_index:terminate_and_erase(Qi26),
+    Qi26 = queue_index_flush_journal(Qi25),
+    {Qi27, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize], false, Qi26),
+    _Qi28 = rabbit_queue_index:terminate_and_erase(Qi27),
+    ok = stop_msg_store(),
+    ok = empty_test_queue(),
+
+    %% b) partial pub+del, then move to new segment, then ack all in old segment
+    {0, Qi29} = rabbit_queue_index:init(test_queue()),
+    {Qi30, _SeqIdsMsgIdsC2} = queue_index_publish(SeqIdsC, false, Qi29),
+    Qi31 = queue_index_deliver(SeqIdsC, Qi30),
+    {Qi32, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi31),
+    Qi33 = rabbit_queue_index:write_acks(SeqIdsC, Qi32),
+    Qi34 = queue_index_flush_journal(Qi33),
+    _Qi35 = rabbit_queue_index:terminate_and_erase(Qi34),
+    ok = stop_msg_store(),
+    ok = empty_test_queue(),
+
+    %% c) just fill up several segments of all pubs, then +dels, then +acks
+    SeqIdsD = lists:seq(0,SegmentSize*4),
+    {0, Qi36} = rabbit_queue_index:init(test_queue()),
+    {Qi37, _SeqIdsMsgIdsD} = queue_index_publish(SeqIdsD, false, Qi36),
+    Qi38 = queue_index_deliver(SeqIdsD, Qi37),
+    Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
+    Qi40 = queue_index_flush_journal(Qi39),
+    _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
     ok = stop_msg_store(),
     ok = rabbit_queue_index:start_msg_store([]),
     ok = stop_msg_store(),
@@ -1167,14 +1197,26 @@ test_variable_queue_dynamic_duration_change() ->
     VQ0 = fresh_variable_queue(),
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
-    {_SeqIds, VQ1} = variable_queue_publish(true, Len1, VQ0),
+    {_SeqIds, VQ1} = variable_queue_publish(false, Len1, VQ0),
     VQ2 = rabbit_variable_queue:remeasure_egress_rate(VQ1),
-    {ok, _TRef} = timer:send_after(1000, {duration, 30, fun erlang:'-'/2}),
+    {ok, _TRef} = timer:send_after(1000, {duration, 60,
+                                          fun (V) -> (V*0.75)-1 end}),
     VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
     {VQ4, AckTags} = variable_queue_fetch(Len1, false, false, Len1, VQ3),
     VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
     {empty, VQ6} = rabbit_variable_queue:fetch(VQ5),
-    rabbit_variable_queue:terminate(VQ6),
+
+    %% just publish and fetch some persistent msgs, this hits the the
+    %% partial segment path in queue_index due to the period when
+    %% duration was 0 and the entire queue was gamma.
+    {_SeqIds1, VQ7} = variable_queue_publish(true, 20, VQ6),
+    {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
+    VQ10 = rabbit_variable_queue:flush_journal(VQ9),
+    VQ11 = rabbit_variable_queue:flush_journal(VQ10),
+    {empty, VQ12} = rabbit_variable_queue:fetch(VQ11),
+
+    rabbit_variable_queue:terminate(VQ12),
 
     passed.
 
@@ -1183,14 +1225,14 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
     {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(VQ1),
     VQ3 = rabbit_variable_queue:ack([AckTag], VQ2),
     receive
-        {duration, 30, stop} ->
+        {duration, _, stop} ->
             VQ3;
         {duration, N, Fun} ->
-            N1 = Fun(N, 1),
+            N1 = lists:max([Fun(N), 0]),
             Fun1 = case N1 of
-                       0  -> fun erlang:'+'/2;
-                       30 -> stop;
-                       _  -> Fun
+                       0               -> fun (V) -> (V+1)/0.75 end;
+                       _ when N1 > 400 -> stop;
+                       _               -> Fun
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
             VQ4 = rabbit_variable_queue:remeasure_egress_rate(VQ3),
-- 
cgit v1.2.1


From 68fea373e2da2d5930976421e0cdd39c8574476d Mon Sep 17 00:00:00 2001
From: Marek Majkowski <majek@lshift.net>
Date: Wed, 11 Nov 2009 06:25:21 -0500
Subject: QA: quoted atoms in specs, timer:now_diff

---
 src/rabbit_amqqueue_process.erl | 15 ++++-----------
 src/rabbit_memory_monitor.erl   |  8 ++++----
 src/vm_memory_monitor.erl       |  6 +++---
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index fad36f2c..9b97fe86 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -99,10 +99,6 @@ start_link(Q) ->
     gen_server2:start_link(?MODULE, Q, []).
 
 %%----------------------------------------------------------------------------
-now_us() ->
-    {Megaseconds,Seconds,Microseconds} = erlang:now(),
-    Megaseconds * 1000000 * 1000000 + Seconds * 1000000 + Microseconds.
-
 init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     rabbit_memory_monitor:register(self()),
@@ -117,7 +113,7 @@ init(Q) ->
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
             drain_ratio = #ratio{ratio = 0.0, 
-                                 t0 = now_us(),
+                                 t0 = now(),
                                  next_msg_id = 1}
             }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -825,10 +821,7 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
 handle_cast(send_memory_monitor_update, State) ->
     DrainRatio1 = update_ratio(State#q.drain_ratio, State#q.next_msg_id),
     MsgSec = DrainRatio1#ratio.ratio * 1000000, % msg/sec
-    QueueDuration = case MsgSec < 0.016 of  %% less than 1 msg/1 minute
-        true -> infinity;
-        false -> queue:len(State#q.message_buffer) / MsgSec
-    end,
+    QueueDuration = queue:len(State#q.message_buffer) / MsgSec, % seconds
     DesiredQueueDuration = rabbit_memory_monitor:push_queue_duration(
                                                         self(), QueueDuration),
     ?LOGDEBUG("~p Queue duration current/desired ~p/~p~n",
@@ -854,8 +847,8 @@ calc_load(Load, Exp, N) ->
     Load*Exp +  N*(1.0-Exp).
 
 update_ratio(_RatioRec = #ratio{ratio=Ratio, t0 = T0, next_msg_id = MsgCount0}, MsgCount1) ->
-    T1 = now_us(),
-    Td  = T1 - T0,
+    T1 = now(),
+    Td  = timer:now_diff(T1, T0),
     MsgCount = MsgCount1 - MsgCount0,
     MsgUSec = MsgCount / Td, % msg/usec
     %% Td is in usec. We're interested in "load average" from last 30 seconds.
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 0629591a..ff7684bd 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -108,11 +108,11 @@
                         memory_limit        :: pos_integer(),
                         memory_ratio        :: float() }).
 
--spec(start_link/0 :: () -> ignore | {error, _} | {ok, pid()}).
--spec(register/1 :: (pid()) -> ok).
--spec(push_queue_duration/2 :: (pid(), float() | infinity) -> ok).
+-spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
+-spec(register/1 :: (pid()) -> 'ok').
+-spec(push_queue_duration/2 :: (pid(), float() | 'infinity') -> 'ok').
 
--spec(init/1 :: ([]) -> {ok, state()}).
+-spec(init/1 :: ([]) -> {'ok', state()}).
 
 -ifdef(debug).
 -spec(ftoa/1 :: (any()) -> string()).
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index 6da6704d..65d4a451 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -74,10 +74,10 @@
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (float()) -> ('ignore' | {error, any()} | {'ok', pid()})).
+-spec(start_link/1 :: (float()) -> ('ignore' | {'error', any()} | {'ok', pid()})).
 -spec(update/0 :: () -> 'ok').
--spec(get_total_memory/0 :: () -> (non_neg_integer() | unknown)).
--spec(get_memory_limit/0 :: () -> (non_neg_integer() | undefined)).
+-spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')).
+-spec(get_memory_limit/0 :: () -> (non_neg_integer() | 'undefined')).
 -spec(get_check_interval/0 :: () -> non_neg_integer()).
 -spec(set_check_interval/1 :: (non_neg_integer()) -> 'ok').
 -spec(get_vm_memory_high_watermark/0 :: () -> float()).
-- 
cgit v1.2.1


From 761d2515c086c7b3b221e120c28b5d22c81b4c3f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 11 Nov 2009 15:02:08 +0000
Subject: Cosmetics

---
 src/rabbit_memory_monitor.erl | 194 ++++++++++++++++++++----------------------
 1 file changed, 90 insertions(+), 104 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index ff7684bd..4880b260 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -30,7 +30,7 @@
 %%
 
 
-%% This module handles the node-wide memory statistics. 
+%% This module handles the node-wide memory statistics.
 %% It receives statistics from all queues, counts the desired
 %% queue length (in seconds), and sends this information back to
 %% queues.
@@ -46,8 +46,8 @@
 %% Monitor X--*-+--X---*-+--X------X----X-----X+----------->
 %%
 %% Or to put it in words. Queue periodically sends (casts) 'push_queue_duration'
-%% message to the Monitor (cases 1 and 2 on the asciiart above). Monitor 
-%% _always_ replies with a 'set_queue_duration' cast. This way, 
+%% message to the Monitor (cases 1 and 2 on the asciiart above). Monitor
+%% _always_ replies with a 'set_queue_duration' cast. This way,
 %% we're pretty sure that the Queue is not hibernated.
 %% Monitor periodically recounts numbers ('X' on asciiart). If, during this
 %% update we notice that a queue was using too much memory, we send a message
@@ -57,13 +57,13 @@
 %%
 %% The main job of this module, is to make sure that all the queues have
 %% more or less the same number of seconds till become drained.
-%% This average, seconds-till-queue-is-drained, is then multiplied by 
+%% This average, seconds-till-queue-is-drained, is then multiplied by
 %% the ratio of Total/Used memory. So, if we can 'afford' more memory to be
 %% used, we'll report greater number back to the queues. In the out of
 %% memory case, we are going to reduce the average drain-seconds.
 %% To acheive all this we need to accumulate the information from every
 %% queue, and count an average from that.
-%% 
+%%
 %%  real_queue_duration_avg = avg([drain_from_queue_1, queue_2, queue_3, ...])
 %%  memory_overcommit = allowed_memory / used_memory
 %%  desired_queue_duration_avg = real_queue_duration_avg * memory_overcommit
@@ -84,9 +84,9 @@
 -export([register/1, push_queue_duration/2]).
 
 -record(state, {timer,               %% 'internal_update' timer
-                queue_durations,     %% ets, (qpid, seconds_till_queue_is_empty)
+                queue_durations,     %% ets, (qpid, last_reported, last_sent)
                 queue_duration_sum,  %% sum of all queue_durations
-                queue_duration_items,%% number of elements in sum
+                queue_duration_count,%% number of elements in sum
                 memory_limit,        %% how much memory we intend to use
                 memory_ratio         %% how much more memory we can use
                }).
@@ -94,7 +94,7 @@
 -define(SERVER, ?MODULE).
 -define(DEFAULT_UPDATE_INTERVAL_MS, 2500).
 -define(TABLE_NAME, ?MODULE).
--define(MAX_QUEUE_DURATION_ALLOWED, 60*60*24). % 1 day
+-define(MAX_QUEUE_DURATION, 60*60*24). % 1 day
 
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
@@ -104,7 +104,7 @@
 -type(state() :: #state{timer               :: timer:tref(),
                         queue_durations     :: tid(),
                         queue_duration_sum  :: float(),
-                        queue_duration_items:: non_neg_integer(),
+                        queue_duration_count:: non_neg_integer(),
                         memory_limit        :: pos_integer(),
                         memory_ratio        :: float() }).
 
@@ -114,10 +114,6 @@
 
 -spec(init/1 :: ([]) -> {'ok', state()}).
 
--ifdef(debug).
--spec(ftoa/1 :: (any()) -> string()).
--endif.
-
 -spec(internal_update/1 :: (state()) -> state()).
 -endif.
 
@@ -136,68 +132,67 @@ register(Pid) ->
 
 push_queue_duration(Pid, QueueDuration) ->
     gen_server2:call(rabbit_memory_monitor,
-                                    {push_queue_duration, Pid, QueueDuration}).
+                     {push_queue_duration, Pid, QueueDuration}).
 
 %%----------------------------------------------------------------------------
 
 get_memory_limit() ->
-    RabbitMemoryLimit = case vm_memory_monitor:get_memory_limit() of
+    case vm_memory_monitor:get_memory_limit() of
         undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
         A -> A
     end.
 
 init([]) ->
-    %% We should never use more memory than user requested. As the memory 
+    %% We should never use more memory than user requested. As the memory
     %% manager doesn't really know how much memory queues are using, we shall
     %% try to remain safe distance from real throttle limit.
     MemoryLimit = trunc(get_memory_limit() * 0.6),
-    rabbit_log:warning("Queues go to disk when memory is above: ~pMB~n", 
-                    [erlang:trunc(MemoryLimit/1048576)]),
-
-    {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS, 
-                                                        ?SERVER, update, []),
-    {ok, #state{timer = TRef,
-                queue_durations = ets:new(?TABLE_NAME, [set, private]),
-                queue_duration_sum = 0.0,
-                queue_duration_items = 0,
-                memory_limit = MemoryLimit,
-                memory_ratio = 1.0}}.
+
+    {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS,
+                                      ?SERVER, update, []),
+    {ok, #state{timer                = TRef,
+                queue_durations      = ets:new(?TABLE_NAME, [set, private]),
+                queue_duration_sum   = 0.0,
+                queue_duration_count = 0,
+                memory_limit         = MemoryLimit,
+                memory_ratio         = 1.0}}.
 
 get_avg_duration(#state{queue_duration_sum = Sum,
-                        queue_duration_items = Items}) ->
-    case Items of
+                        queue_duration_count = Count}) ->
+    case Count of
         0 -> infinity;
-        _ -> Sum / Items
+        _ -> Sum / Count
     end.
 
-get_desired_duration(State) ->
+get_desired_duration(State = #state{memory_ratio = Ratio}) ->
     case get_avg_duration(State) of
-        infinity -> infinity;
-        AvgQueueDuration -> AvgQueueDuration * State#state.memory_ratio
+        infinity         -> infinity;
+        AvgQueueDuration -> AvgQueueDuration * Ratio
     end.
 
-handle_call({push_queue_duration, Pid, QueueDuration0}, From, State) ->
+handle_call({push_queue_duration, Pid, QueueDuration}, From,
+            State = #state{queue_duration_sum = Sum,
+                           queue_duration_count = Count,
+                           queue_durations = Durations}) ->
     SendDuration = get_desired_duration(State),
     gen_server2:reply(From, SendDuration),
 
-    QueueDuration = case QueueDuration0 > ?MAX_QUEUE_DURATION_ALLOWED of
-        true -> infinity;
-        false -> QueueDuration0
-    end,
-
-    {Sum, Items} = {State#state.queue_duration_sum,
-                    State#state.queue_duration_items},
-    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(State#state.queue_durations, Pid),
-    {Sum1, Items1} =
-            case {PrevQueueDuration == infinity, QueueDuration == infinity} of
-        {true, true} -> {Sum, Items};
-        {true, false} -> {Sum + QueueDuration, Items + 1};
-        {false, true} -> {Sum - PrevQueueDuration, Items - 1};
-        {false, false} -> {Sum - PrevQueueDuration + QueueDuration, Items}
-    end,
-    ets:insert(State#state.queue_durations, {Pid, QueueDuration, SendDuration}),
+    QueueDuration1 = case QueueDuration > ?MAX_QUEUE_DURATION of
+                         true  -> infinity;
+                         false -> QueueDuration
+                     end,
+
+    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
+    {Sum1, Count1} =
+            case {PrevQueueDuration, QueueDuration1} of
+                {infinity, infinity} -> {Sum, Count};
+                {infinity, _}        -> {Sum + QueueDuration1,    Count + 1};
+                {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
+                {_, _} -> {Sum - PrevQueueDuration + QueueDuration1, Count}
+            end,
+    true = ets:insert(Durations, {Pid, QueueDuration1, SendDuration}),
     {noreply, State#state{queue_duration_sum = Sum1,
-                          queue_duration_items = Items1}};
+                          queue_duration_count = Count1}};
 
 handle_call(_Request, _From, State) ->
     {noreply, State}.
@@ -208,77 +203,68 @@ handle_cast(update, State) ->
 
 handle_cast({register, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
-    ets:insert(State#state.queue_durations, {Pid, infinity, infinity}),
+    true = ets:insert(State#state.queue_durations, {Pid, infinity, infinity}),
     {noreply, State};
 
 handle_cast(_Request, State) ->
     {noreply, State}.
 
 
-handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
-    {Sum, Items} = {State#state.queue_duration_sum,
-                    State#state.queue_duration_items},
-    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(State#state.queue_durations, Pid),
-    Sum1 = case PrevQueueDuration == infinity of
-        true  -> Sum;
-        false -> Sum - PrevQueueDuration
-    end,
-    ets:delete(State#state.queue_durations, Pid),
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+            State = #state{queue_duration_sum = Sum,
+                           queue_duration_count = Count,
+                           queue_durations = Durations}) ->
+    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
+    Sum1 = case PrevQueueDuration of
+               infinity -> Sum;
+               _        -> Sum - PrevQueueDuration
+           end,
+    true = ets:delete(State#state.queue_durations, Pid),
     {noreply, State#state{queue_duration_sum = Sum1,
-                          queue_duration_items = Items-1}};
+                          queue_duration_count = Count-1}};
 
-handle_info(_Info, State) -> 
+handle_info(_Info, State) ->
     {noreply, State}.
 
 
-terminate(_Reason, _State) -> 
+terminate(_Reason, _State) ->
     ok.
 
-code_change(_OldVsn, State, _Extra) -> 
+code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-
 set_queue_duration(Pid, QueueDuration) ->
     gen_server2:pcast(Pid, 7, {set_queue_duration, QueueDuration}).
 
--ifdef(debug).
-ftoa(Float) ->
-    Str = case is_float(Float) of
-        true  -> io_lib:format("~11.3f",[Float]);
-        false -> io_lib:format("~p", [Float])
-    end,
-    lists:flatten(Str).
--endif.
-
-
-%% Update memory ratio. Count new DesiredQueueDuration.
-%% Get queues that are using more than that, and send
-%% pessimistic information back to them.
-internal_update(State0) ->
-    %% available memory /  used memory
-    MemoryRatio = State0#state.memory_limit / erlang:memory(total),
-    State = State0#state{memory_ratio = MemoryRatio},
-
+internal_update(State = #state{memory_limit = Limit,
+                               queue_durations = Durations}) ->
     DesiredDurationAvg = get_desired_duration(State),
-
-    ?LOGDEBUG("Avg duration: real/desired:~s/~s  Memory ratio:~s  Queues:~p~n",
-                [ftoa(get_avg_duration(State)), ftoa(DesiredDurationAvg),
-                ftoa(MemoryRatio),
-                ets:foldl(fun (_, Acc) -> Acc+1 end,
-                                            0, State#state.queue_durations)] ),
-
-    %% If we have pessimistic information, we need to inform queues
-    %% to reduce it's memory usage when needed.
-    %% This sometimes wakes up queues from hibernation. Well, we don't care.
-    PromptReduceDuraton = fun ({Pid, QueueDuration, PrevSendDuration}, Acc) ->
-        case (PrevSendDuration > DesiredDurationAvg) and (QueueDuration > DesiredDurationAvg) of
-            true -> set_queue_duration(Pid, DesiredDurationAvg),
-                    ets:insert(State#state.queue_durations, {Pid, QueueDuration, DesiredDurationAvg}),
-                    Acc + 1;
-            _ -> Acc
-        end
+    %% available memory / used memory
+    MemoryRatio = Limit / erlang:memory(total),
+    State1 = State#state{memory_ratio = MemoryRatio},
+    DesiredDurationAvg1 = get_desired_duration(State1),
+
+    %% only inform queues immediately if the desired duration has
+    %% decreased
+    case DesiredDurationAvg1 < DesiredDurationAvg of
+        true ->
+            %% If we have pessimistic information, we need to inform
+            %% queues to reduce it's memory usage when needed. This
+            %% sometimes wakes up queues from hibernation.
+            true = ets:foldl(
+                     fun ({Pid, QueueDuration, PrevSendDuration}, true) ->
+                             case DesiredDurationAvg1 <
+                                 lists:min([PrevSendDuration, QueueDuration]) of
+                                 true ->
+                                     set_queue_duration(Pid,
+                                                        DesiredDurationAvg1),
+                                     ets:insert(Durations,
+                                                {Pid, QueueDuration,
+                                                 DesiredDurationAvg1});
+                                 _ -> true
+                             end
+                     end, true, Durations);
+        false -> ok
     end,
-    ets:foldl(PromptReduceDuraton, 0, State#state.queue_durations),
-    State.
-
+    State1.
 
-- 
cgit v1.2.1


From 194e7bc09679687eebce6691b4387786030d548c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 11 Nov 2009 15:16:20 +0000
Subject: Unhappy with the idea that the desired duration is affected
 immediately by new queues and queues dying, but the memory ratio, which
 reflects the amount of memory erlang has used, is updated periodically. This
 mix of up-to-date and stale information in the calculation of the desired
 duration alarms me. Thus store the desired duration in the state, and always
 report that. That is then update periodically, thus is only ever calculated
 using current values.

---
 src/rabbit_amqqueue_process.erl |  4 +--
 src/rabbit_memory_monitor.erl   | 71 ++++++++++++++++++++---------------------
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9b97fe86..0bfa6df1 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -822,8 +822,8 @@ handle_cast(send_memory_monitor_update, State) ->
     DrainRatio1 = update_ratio(State#q.drain_ratio, State#q.next_msg_id),
     MsgSec = DrainRatio1#ratio.ratio * 1000000, % msg/sec
     QueueDuration = queue:len(State#q.message_buffer) / MsgSec, % seconds
-    DesiredQueueDuration = rabbit_memory_monitor:push_queue_duration(
-                                                        self(), QueueDuration),
+    DesiredQueueDuration = rabbit_memory_monitor:report_queue_duration(
+                             self(), QueueDuration),
     ?LOGDEBUG("~p Queue duration current/desired ~p/~p~n",
             [(State#q.q)#amqqueue.name, QueueDuration, DesiredQueueDuration]),
     noreply(State#q{drain_ratio = DrainRatio1});
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 4880b260..7bd03c9c 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -81,18 +81,19 @@
 
 -export([update/0]).
 
--export([register/1, push_queue_duration/2]).
-
--record(state, {timer,               %% 'internal_update' timer
-                queue_durations,     %% ets, (qpid, last_reported, last_sent)
-                queue_duration_sum,  %% sum of all queue_durations
-                queue_duration_count,%% number of elements in sum
-                memory_limit,        %% how much memory we intend to use
-                memory_ratio         %% how much more memory we can use
+-export([register/1, report_queue_duration/2]).
+
+-record(state, {timer,                %% 'internal_update' timer
+                queue_durations,      %% ets, (qpid, last_reported, last_sent)
+                queue_duration_sum,   %% sum of all queue_durations
+                queue_duration_count, %% number of elements in sum
+                memory_limit,         %% how much memory we intend to use
+                memory_ratio,         %% how much more memory we can use
+                desired_duration      %% the desired queue duration
                }).
 
 -define(SERVER, ?MODULE).
--define(DEFAULT_UPDATE_INTERVAL_MS, 2500).
+-define(DEFAULT_UPDATE_INTERVAL, 2500).
 -define(TABLE_NAME, ?MODULE).
 -define(MAX_QUEUE_DURATION, 60*60*24). % 1 day
 
@@ -106,11 +107,12 @@
                         queue_duration_sum  :: float(),
                         queue_duration_count:: non_neg_integer(),
                         memory_limit        :: pos_integer(),
-                        memory_ratio        :: float() }).
+                        memory_ratio        :: float(),
+                        desired_duration    :: float() | 'infinity' }).
 
 -spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
 -spec(register/1 :: (pid()) -> 'ok').
--spec(push_queue_duration/2 :: (pid(), float() | 'infinity') -> 'ok').
+-spec(report_queue_duration/2 :: (pid(), float() | 'infinity') -> 'ok').
 
 -spec(init/1 :: ([]) -> {'ok', state()}).
 
@@ -130,9 +132,9 @@ update() ->
 register(Pid) ->
     gen_server2:cast(?SERVER, {register, Pid}).
 
-push_queue_duration(Pid, QueueDuration) ->
+report_queue_duration(Pid, QueueDuration) ->
     gen_server2:call(rabbit_memory_monitor,
-                     {push_queue_duration, Pid, QueueDuration}).
+                     {report_queue_duration, Pid, QueueDuration}).
 
 %%----------------------------------------------------------------------------
 
@@ -148,33 +150,21 @@ init([]) ->
     %% try to remain safe distance from real throttle limit.
     MemoryLimit = trunc(get_memory_limit() * 0.6),
 
-    {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL_MS,
+    {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
                                       ?SERVER, update, []),
     {ok, #state{timer                = TRef,
                 queue_durations      = ets:new(?TABLE_NAME, [set, private]),
                 queue_duration_sum   = 0.0,
                 queue_duration_count = 0,
                 memory_limit         = MemoryLimit,
-                memory_ratio         = 1.0}}.
+                memory_ratio         = 1.0,
+                desired_duration     = infinity}}.
 
-get_avg_duration(#state{queue_duration_sum = Sum,
-                        queue_duration_count = Count}) ->
-    case Count of
-        0 -> infinity;
-        _ -> Sum / Count
-    end.
-
-get_desired_duration(State = #state{memory_ratio = Ratio}) ->
-    case get_avg_duration(State) of
-        infinity         -> infinity;
-        AvgQueueDuration -> AvgQueueDuration * Ratio
-    end.
-
-handle_call({push_queue_duration, Pid, QueueDuration}, From,
+handle_call({report_queue_duration, Pid, QueueDuration}, From,
             State = #state{queue_duration_sum = Sum,
                            queue_duration_count = Count,
-                           queue_durations = Durations}) ->
-    SendDuration = get_desired_duration(State),
+                           queue_durations = Durations,
+                           desired_duration = SendDuration}) ->
     gen_server2:reply(From, SendDuration),
 
     QueueDuration1 = case QueueDuration > ?MAX_QUEUE_DURATION of
@@ -237,12 +227,22 @@ set_queue_duration(Pid, QueueDuration) ->
     gen_server2:pcast(Pid, 7, {set_queue_duration, QueueDuration}).
 
 internal_update(State = #state{memory_limit = Limit,
-                               queue_durations = Durations}) ->
-    DesiredDurationAvg = get_desired_duration(State),
+                               queue_durations = Durations,
+                               desired_duration = DesiredDurationAvg,
+                               queue_duration_sum = Sum,
+                               queue_duration_count = Count}) ->
     %% available memory / used memory
     MemoryRatio = Limit / erlang:memory(total),
-    State1 = State#state{memory_ratio = MemoryRatio},
-    DesiredDurationAvg1 = get_desired_duration(State1),
+    AvgDuration = case Count of
+                      0 -> infinity;
+                      _ -> Sum / Count
+                  end,
+    DesiredDurationAvg1 = case AvgDuration of
+                              infinity -> infinity;
+                              AvgQueueDuration -> AvgQueueDuration * MemoryRatio
+                          end,
+    State1 = State#state{memory_ratio = MemoryRatio,
+                         desired_duration = DesiredDurationAvg1},
 
     %% only inform queues immediately if the desired duration has
     %% decreased
@@ -267,4 +267,3 @@ internal_update(State = #state{memory_limit = Limit,
         false -> ok
     end,
     State1.
-
-- 
cgit v1.2.1


From 390bbf151ae557a4197f8199204b21417f85cf94 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 11 Nov 2009 16:29:14 +0000
Subject: Various amounts of tidying, post testing and a large amount of
 cosmetics.

---
 src/rabbit_amqqueue.erl         | 11 ++++-
 src/rabbit_amqqueue_process.erl | 48 +++++++++++-----------
 src/rabbit_memory_monitor.erl   | 89 +++++++++++++++++++++--------------------
 3 files changed, 81 insertions(+), 67 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 1a5e82d7..4abfcd0b 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -39,7 +39,8 @@
 -export([list/1, info/1, info/2, info_all/1, info_all/2]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2]).
+-export([notify_sent/2, unblock/2, set_queue_duration/2,
+         send_memory_monitor_update/1]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -101,6 +102,8 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
+-spec(set_queue_duration/2 :: (pid(), number()) -> 'ok'). 
+-spec(send_memory_monitor_update/1 :: (pid()) -> 'ok'). 
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
@@ -308,6 +311,12 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 8, {unblock, ChPid}).
 
+set_queue_duration(QPid, Duration) ->
+    gen_server2:pcast(QPid, 7, {set_queue_duration, Duration}).
+
+send_memory_monitor_update(QPid) ->
+    gen_server2:pcast(QPid, 7, send_memory_monitor_update).
+
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 0bfa6df1..2d264fc2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -42,7 +42,6 @@
 -export([start_link/1]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]).
--export([send_memory_monitor_update/1]).
 
 -import(queue).
 -import(erlang).
@@ -101,9 +100,11 @@ start_link(Q) ->
 %%----------------------------------------------------------------------------
 init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
-    rabbit_memory_monitor:register(self()),
+    rabbit_memory_monitor:register(self(), {rabbit_amqqueue, set_queue_duration,
+                                            [self()]}),
     %% Beware. This breaks hibernation!
-    timer:apply_interval(2500, ?MODULE, send_memory_monitor_update, [self()]),
+    timer:apply_interval(2500, rabbit_amqqueue, send_memory_monitor_update,
+                         [self()]),
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
@@ -821,26 +822,33 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
 handle_cast(send_memory_monitor_update, State) ->
     DrainRatio1 = update_ratio(State#q.drain_ratio, State#q.next_msg_id),
     MsgSec = DrainRatio1#ratio.ratio * 1000000, % msg/sec
-    QueueDuration = queue:len(State#q.message_buffer) / MsgSec, % seconds
+    QueueDuration =
+        case MsgSec == 0 of
+            true -> infinity;
+            false -> queue:len(State#q.message_buffer) / MsgSec % seconds
+        end,
     DesiredQueueDuration = rabbit_memory_monitor:report_queue_duration(
                              self(), QueueDuration),
-    ?LOGDEBUG("~p Queue duration current/desired ~p/~p~n",
-            [(State#q.q)#amqqueue.name, QueueDuration, DesiredQueueDuration]),
+    ?LOGDEBUG("TIMER ~p Queue length is ~8p, should be ~p~n",
+              [(State#q.q)#amqqueue.name, queue:len(State#q.message_buffer),
+               case DesiredQueueDuration of
+                   infinity -> infinity;
+                   _ -> MsgSec * DesiredQueueDuration
+               end]),
     noreply(State#q{drain_ratio = DrainRatio1});
 
 handle_cast({set_queue_duration, DesiredQueueDuration}, State) ->
     DrainRatio = State#q.drain_ratio,
-    DesiredBufLength = case DesiredQueueDuration of
-        infinity -> infinity;
-        _ -> DesiredQueueDuration * DrainRatio#ratio.ratio * 1000000
-    end,
-    %% Just to proove that something is happening.
-    ?LOGDEBUG("~p Queue length is~8p, should be ~p~n",
-                  [(State#q.q)#amqqueue.name, queue:len(State#q.message_buffer),
-                   DesiredBufLength]),
+    DesiredBufLength =
+        case DesiredQueueDuration of
+            infinity -> infinity;
+            _ -> DesiredQueueDuration * DrainRatio#ratio.ratio * 1000000
+        end,
+    ?LOGDEBUG("MAGIC ~p Queue length is ~8p, should be ~p~n",
+              [(State#q.q)#amqqueue.name, queue:len(State#q.message_buffer),
+               DesiredBufLength]),
     noreply(State).
 
-
 %% Based on kernel load average, as descibed:
 %% http://www.teamquest.com/resources/gunther/display/5/
 calc_load(Load, Exp, N) ->
@@ -852,14 +860,8 @@ update_ratio(_RatioRec = #ratio{ratio=Ratio, t0 = T0, next_msg_id = MsgCount0},
     MsgCount = MsgCount1 - MsgCount0,
     MsgUSec = MsgCount / Td, % msg/usec
     %% Td is in usec. We're interested in "load average" from last 30 seconds.
-    Ratio1 = calc_load(Ratio, 1.0/ (math:exp(Td/(30*1000000))), MsgUSec),
-    
-    #ratio{ratio = Ratio1, t0=T1, next_msg_id = MsgCount1}.
-
-
-send_memory_monitor_update(Pid) ->
-    gen_server2:cast(Pid, send_memory_monitor_update).
-    
+    Ratio1 = calc_load(Ratio, 1.0/ (math:exp(Td/(30*1000000))), MsgUSec),    
+    #ratio{ratio = Ratio1, t0=T1, next_msg_id = MsgCount1}.    
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 7bd03c9c..cf184f3f 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -74,22 +74,19 @@
 
 -behaviour(gen_server2).
 
--export([start_link/0]).
+-export([start_link/0, update/0, register/2, report_queue_duration/2]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([update/0]).
-
--export([register/1, report_queue_duration/2]).
-
 -record(state, {timer,                %% 'internal_update' timer
                 queue_durations,      %% ets, (qpid, last_reported, last_sent)
                 queue_duration_sum,   %% sum of all queue_durations
                 queue_duration_count, %% number of elements in sum
                 memory_limit,         %% how much memory we intend to use
                 memory_ratio,         %% how much more memory we can use
-                desired_duration      %% the desired queue duration
+                desired_duration,     %% the desired queue duration
+                callbacks             %% a dict of qpid -> {M,F,A}s
                }).
 
 -define(SERVER, ?MODULE).
@@ -101,24 +98,18 @@
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
 
 %%----------------------------------------------------------------------------
+
 -ifdef(use_specs).
--type(state() :: #state{timer               :: timer:tref(),
-                        queue_durations     :: tid(),
-                        queue_duration_sum  :: float(),
-                        queue_duration_count:: non_neg_integer(),
-                        memory_limit        :: pos_integer(),
-                        memory_ratio        :: float(),
-                        desired_duration    :: float() | 'infinity' }).
 
 -spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
--spec(register/1 :: (pid()) -> 'ok').
+-spec(update/0 :: () -> 'ok').
+-spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok').
 -spec(report_queue_duration/2 :: (pid(), float() | 'infinity') -> 'ok').
 
--spec(init/1 :: ([]) -> {'ok', state()}).
-
--spec(internal_update/1 :: (state()) -> state()).
 -endif.
 
+%%----------------------------------------------------------------------------
+%% Public API
 %%----------------------------------------------------------------------------
 
 start_link() ->
@@ -127,22 +118,17 @@ start_link() ->
 update() ->
     gen_server2:cast(?SERVER, update).
 
-%%----------------------------------------------------------------------------
-
-register(Pid) ->
-    gen_server2:cast(?SERVER, {register, Pid}).
+register(Pid, MFA = {_M, _F, _A}) ->
+    gen_server2:cast(?SERVER, {register, Pid, MFA}).
 
 report_queue_duration(Pid, QueueDuration) ->
     gen_server2:call(rabbit_memory_monitor,
                      {report_queue_duration, Pid, QueueDuration}).
 
-%%----------------------------------------------------------------------------
 
-get_memory_limit() ->
-    case vm_memory_monitor:get_memory_limit() of
-        undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
-        A -> A
-    end.
+%%----------------------------------------------------------------------------
+%% Gen_server callbacks
+%%----------------------------------------------------------------------------
 
 init([]) ->
     %% We should never use more memory than user requested. As the memory
@@ -158,7 +144,8 @@ init([]) ->
                 queue_duration_count = 0,
                 memory_limit         = MemoryLimit,
                 memory_ratio         = 1.0,
-                desired_duration     = infinity}}.
+                desired_duration     = infinity,
+                callbacks            = dict:new()}}.
 
 handle_call({report_queue_duration, Pid, QueueDuration}, From,
             State = #state{queue_duration_sum = Sum,
@@ -187,23 +174,23 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
-
 handle_cast(update, State) ->
     {noreply, internal_update(State)};
 
-handle_cast({register, Pid}, State) ->
+handle_cast({register, Pid, MFA}, State = #state{queue_durations = Durations,
+                                                 callbacks = Callbacks}) ->
     _MRef = erlang:monitor(process, Pid),
-    true = ets:insert(State#state.queue_durations, {Pid, infinity, infinity}),
-    {noreply, State};
+    true = ets:insert(Durations, {Pid, infinity, infinity}),
+    {noreply, State#state{callbacks = dict:store(Pid, MFA, Callbacks)}};
 
 handle_cast(_Request, State) ->
     {noreply, State}.
 
-
 handle_info({'DOWN', _MRef, process, Pid, _Reason},
             State = #state{queue_duration_sum = Sum,
                            queue_duration_count = Count,
-                           queue_durations = Durations}) ->
+                           queue_durations = Durations,
+                           callbacks = Callbacks}) ->
     [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
     Sum1 = case PrevQueueDuration of
                infinity -> Sum;
@@ -211,26 +198,30 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason},
            end,
     true = ets:delete(State#state.queue_durations, Pid),
     {noreply, State#state{queue_duration_sum = Sum1,
-                          queue_duration_count = Count-1}};
+                          queue_duration_count = Count-1,
+                          callbacks = dict:erase(Pid, Callbacks)}};
 
 handle_info(_Info, State) ->
     {noreply, State}.
 
-
-terminate(_Reason, _State) ->
+terminate(_Reason, #state{timer = TRef}) ->
+    timer:cancel(TRef),
     ok.
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-set_queue_duration(Pid, QueueDuration) ->
-    gen_server2:pcast(Pid, 7, {set_queue_duration, QueueDuration}).
+
+%%----------------------------------------------------------------------------
+%% Internal functions
+%%----------------------------------------------------------------------------
 
 internal_update(State = #state{memory_limit = Limit,
                                queue_durations = Durations,
                                desired_duration = DesiredDurationAvg,
                                queue_duration_sum = Sum,
-                               queue_duration_count = Count}) ->
+                               queue_duration_count = Count,
+                               callbacks = Callbacks}) ->
     %% available memory / used memory
     MemoryRatio = Limit / erlang:memory(total),
     AvgDuration = case Count of
@@ -246,7 +237,8 @@ internal_update(State = #state{memory_limit = Limit,
 
     %% only inform queues immediately if the desired duration has
     %% decreased
-    case DesiredDurationAvg1 < DesiredDurationAvg of
+    case (DesiredDurationAvg == infinity andalso DesiredDurationAvg /= infinity)
+        orelse (DesiredDurationAvg1 < DesiredDurationAvg) of
         true ->
             %% If we have pessimistic information, we need to inform
             %% queues to reduce it's memory usage when needed. This
@@ -256,8 +248,9 @@ internal_update(State = #state{memory_limit = Limit,
                              case DesiredDurationAvg1 <
                                  lists:min([PrevSendDuration, QueueDuration]) of
                                  true ->
-                                     set_queue_duration(Pid,
-                                                        DesiredDurationAvg1),
+                                     ok =
+                                         set_queue_duration(
+                                           Pid, DesiredDurationAvg1, Callbacks),
                                      ets:insert(Durations,
                                                 {Pid, QueueDuration,
                                                  DesiredDurationAvg1});
@@ -267,3 +260,13 @@ internal_update(State = #state{memory_limit = Limit,
         false -> ok
     end,
     State1.
+
+get_memory_limit() ->
+    case vm_memory_monitor:get_memory_limit() of
+        undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
+        A -> A
+    end.
+
+set_queue_duration(Pid, QueueDuration, Callbacks) ->
+    {M,F,A} = dict:fetch(Pid, Callbacks),
+    ok = erlang:apply(M, F, A++[QueueDuration]).
-- 
cgit v1.2.1


From 89237c229363daed0d210d1289b00ece507e1aa7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 11 Nov 2009 16:48:26 +0000
Subject: unused vars

---
 src/rabbit_queue_index.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a198ba51..54d681c5 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -422,9 +422,8 @@ get_counted_handle(SegNumA, State, {SegNumB, Hdl, ?SEGMENT_ENTRIES_COUNT})
     ok = file_handle_cache:append_write_buffer(Hdl),
     get_counted_handle(SegNumA, State, undefined);
 get_counted_handle(SegNumA, State = #qistate { partial_segments = Partials,
-                                               seg_ack_counts = AckCounts,
-                                               dir = Dir },
-                   {SegNumB, Hdl, Count}) ->
+                                               seg_ack_counts = AckCounts },
+                   {SegNumB, _Hdl, Count}) ->
     %% don't flush here because it's possible SegNumB has been deleted
     State1 =
         case dict:find(SegNumB, AckCounts) of
-- 
cgit v1.2.1


From 492881897079f9f8b413f1feade93cb2e8c279fd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 11 Nov 2009 17:56:38 +0000
Subject: Wired it all together. It does seem to work, but there seems to be an
 off by one bug somewhere in vq, wrt ? length counting. Which pops up
 sometimes... Joy.

---
 src/rabbit_amqqueue.erl         |   9 +-
 src/rabbit_amqqueue_process.erl |  28 ++-
 src/rabbit_memory_manager.erl   | 404 ----------------------------------------
 src/rabbit_memory_monitor.erl   |  18 +-
 src/rabbit_variable_queue.erl   |  33 +++-
 5 files changed, 63 insertions(+), 429 deletions(-)
 delete mode 100644 src/rabbit_memory_manager.erl

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 82a0f5b4..f7b39c77 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -33,7 +33,8 @@
 
 -export([start/0, recover/1, find_durable_queues/0, declare/4, delete/3,
          purge/1]).
--export([internal_declare/2, internal_delete/1, remeasure_egress_rate/1]).
+-export([internal_declare/2, internal_delete/1, remeasure_egress_rate/1,
+         set_queue_duration/2]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
          stat/1, stat_all/0, deliver/2, redeliver/2, requeue/3, ack/4]).
@@ -114,6 +115,7 @@
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(remeasure_egress_rate/1 :: (pid()) -> 'ok').
+-spec(set_queue_duration/2 :: (pid(), number()) -> 'ok').
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
 
@@ -373,7 +375,10 @@ internal_delete(QueueName) ->
       end).
 
 remeasure_egress_rate(QPid) ->
-    gen_server2:pcast(QPid, 8, remeasure_egress_rate).    
+    gen_server2:pcast(QPid, 9, remeasure_egress_rate).    
+
+set_queue_duration(QPid, Duration) ->
+    gen_server2:pcast(QPid, 9, {set_queue_duration, Duration}).    
 
 prune_queue_childspecs() ->
     lists:foreach(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index cd70979a..f247c0d1 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -103,8 +103,8 @@ start_link(Q) ->
 init(Q = #amqqueue { name = QName }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
-    ok = rabbit_memory_manager:register
-           (self(), false, rabbit_amqqueue, set_storage_mode, [self()]),
+    ok = rabbit_memory_monitor:register
+           (self(), {rabbit_amqqueue, set_queue_duration, [self()]}),
     VQS = rabbit_variable_queue:init(QName),
     State = #q{q = Q,
                owner = none,
@@ -872,9 +872,23 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
         end));
 
 handle_cast(remeasure_egress_rate, State = #q{variable_queue_state = VQS}) ->
+    VQS1 = rabbit_variable_queue:remeasure_egress_rate(VQS),
+    RamDuration = rabbit_variable_queue:ram_duration(VQS1),
+    DesiredDuration =
+        rabbit_memory_monitor:report_queue_duration(self(), RamDuration),
+    VQS2 = rabbit_variable_queue:set_queue_ram_duration_target(
+             DesiredDuration, VQS1),
+    io:format("~p Reported ~p and got back ~p~n", [self(), RamDuration, DesiredDuration]),
+    io:format("~p~n", [rabbit_variable_queue:status(VQS2)]),
     noreply(State#q{egress_rate_timer_ref = just_measured,
-                    variable_queue_state =
-                    rabbit_variable_queue:remeasure_egress_rate(VQS)}).
+                    variable_queue_state = VQS2});
+
+handle_cast({set_queue_duration, Duration},
+            State = #q{variable_queue_state = VQS}) ->
+    VQS1 = rabbit_variable_queue:set_queue_ram_duration_target(
+             Duration, VQS),
+    io:format("~p was told to make duration ~p~n", [self(), Duration]),
+    noreply(State#q{variable_queue_state = VQS1}).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -894,6 +908,12 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
         {ok, NewState}   -> noreply(NewState);
         {stop, NewState} -> {stop, normal, NewState}
     end;
+handle_info({'EXIT', _DownPid, normal}, State) ->
+    %% because we have trap_exit on, we'll pick up here the prefetcher
+    %% going down. We probably need to make sure that we really are
+    %% just picking up the prefetcher here. It's safe to ignore it
+    %% though, provided 'normal'
+    noreply(State);
 
 handle_info(timeout, State = #q{variable_queue_state = VQS,
                                 sync_timer_ref = undefined}) ->
diff --git a/src/rabbit_memory_manager.erl b/src/rabbit_memory_manager.erl
deleted file mode 100644
index a73f03e2..00000000
--- a/src/rabbit_memory_manager.erl
+++ /dev/null
@@ -1,404 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_memory_manager).
-
--behaviour(gen_server2).
-
--export([start_link/0]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([register/5, report_memory/3, info/0, conserve_memory/2]).
-
--define(TOTAL_TOKENS, 10000000).
--define(THRESHOLD_MULTIPLIER, 0.05).
--define(THRESHOLD_OFFSET, ?TOTAL_TOKENS * ?THRESHOLD_MULTIPLIER).
-
--define(SERVER, ?MODULE).
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--spec(start_link/0 :: () ->
-              ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(register/5 :: (pid(), boolean(), atom(), atom(), list()) -> 'ok').
--spec(report_memory/3 :: (pid(), non_neg_integer(), boolean()) -> 'ok').
--spec(info/0 :: () -> [{atom(), any()}]).
--spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
-
--endif.
-
-%%----------------------------------------------------------------------------
-
--record(state, { available_tokens,
-                 processes,
-                 callbacks,
-                 tokens_per_byte,
-                 hibernate,
-                 unoppressable,
-                 alarmed
-               }).
-
-%% Token-credit based memory management
-
-%% Start off by working out the amount of memory available in the
-%% system (RAM). Then, work out how many tokens each byte corresponds
-%% to. This is the tokens_per_byte field. When a process registers, it
-%% must provide an M-F-A triple to a function that needs one further
-%% argument, which is the new mode. This will either be 'liberated' or
-%% 'oppressed'.
-%%
-%% Processes then report their own memory usage, in bytes, and the
-%% manager takes care of the rest.
-%%
-%% There are a finite number of tokens in the system. These are
-%% allocated to processes as the processes report their memory
-%% usage. We keep track of processes which have hibernated. When a
-%% process reports memory use which can't be satisfied by the
-%% available tokens, we try and oppress processes first from the
-%% hibernated group. The hibernated group is a simple queue, and so is
-%% implicitly sorted by the order in which processes were added to the
-%% queue. This means that when removing from the queue, we evict the
-%% sleepiest (and most passive) pid first.
-%%
-%% If the reported memory use still can't be satisfied after
-%% oppressing everyone from those two groups (and note that we check
-%% first whether or not oppressing them would make available enough
-%% tokens to satisfy the reported use rather than just oppressing all
-%% those processes and then going "whoops, didn't help after all"),
-%% then we oppress the reporting process. When a process registers, it
-%% can declare itself "unoppressable". If a process is unoppressable
-%% then it will not be oppressed as a result of other processes
-%% needing more tokens. However, if it itself needs additional tokens
-%% which aren't available then it is still oppressed as before. This
-%% feature is only used by the disk_queue, because if the disk queue
-%% is not being used, and hibernates, and then memory pressure gets
-%% tight, the disk_queue would typically be one of the first processes
-%% to be oppressed (sent to disk_only mode), which cripples
-%% performance. Thus by setting it unoppressable, it is only possible
-%% for the disk_queue to be oppressed when it is active and attempting
-%% to increase its memory allocation.
-%%
-%% If a process has been oppressed, it continues making memory
-%% reports, as if it was liberated. As soon as a reported amount of
-%% memory can be satisfied (and this can include oppressing other
-%% processes in the way described above), *and* the number of
-%% available tokens has changed by ?THRESHOLD_MULTIPLIER since the
-%% processes was oppressed, it will be liberated. This later condition
-%% prevents processes from continually oppressing each other if they
-%% themselves can be liberated by oppressing other processes.
-%%
-%% Note that the hibernate group can get very out of date. This is
-%% fine, and somewhat unavoidable given the absence of useful APIs for
-%% queues. Thus we allow them to get out of date (processes will be
-%% left in there when they change groups, duplicates can appear, dead
-%% processes are not pruned etc etc etc), and when we go through the
-%% groups, summing up their allocated tokens, we tidy up at that
-%% point.
-%%
-%% A liberated process, which is reporting a smaller amount of RAM
-%% than its last report will remain liberated. A liberated process
-%% that is busy but consuming an unchanging amount of RAM will never
-%% be oppressed.
-
-%% Specific notes as applied to queues and the disk_queue:
-%%
-%% The disk_queue is managed in the same way as queues. This means
-%% that a queue that has gone back to mixed mode after being in disk
-%% mode now has its messages counted twice as they are counted both in
-%% the report made by the queue (even though they may not yet be in
-%% RAM (though see the prefetcher)) and also by the disk_queue. Thus
-%% the amount of available RAM must be higher when going disk -> mixed
-%% than when going mixed -> disk. This is fairly sensible as it
-%% reduces the risk of any oscillations occurring.
-%%
-%% The queue process deliberately reports 4 times its estimated RAM
-%% usage, and the disk_queue 2.5 times. In practise, this seems to
-%% work well. Note that we are deliberately running out of tokes a
-%% little early because of the fact that the mixed -> disk transition
-%% can transiently eat a lot of memory and take some time (flushing a
-%% few million messages to disk is never going to be instantaneous).
-
-start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []).
-
-register(Pid, Unoppressable, Module, Function, Args) ->
-    gen_server2:cast(?SERVER, {register, Pid, Unoppressable,
-                               Module, Function, Args}).
-
-report_memory(Pid, Memory, Hibernating) ->
-    gen_server2:cast(?SERVER, {report_memory, Pid, Memory, Hibernating}).
-
-info() ->
-    gen_server2:call(?SERVER, info).
-
-conserve_memory(_Pid, Conserve) ->
-    gen_server2:pcast(?SERVER, 9, {conserve_memory, Conserve}).
-
-%%----------------------------------------------------------------------------
-
-init([]) ->
-    process_flag(trap_exit, true),
-    rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
-    {MemTotal, MemUsed, _BigProc} = memsup:get_memory_data(),
-    MemAvail = MemTotal - MemUsed,
-    TPB = if MemAvail == 0 -> 0;
-             true -> ?TOTAL_TOKENS / MemAvail
-          end,
-    {ok, #state { available_tokens    = ?TOTAL_TOKENS,
-                  processes           = dict:new(),
-                  callbacks           = dict:new(),
-                  tokens_per_byte     = TPB,
-                  hibernate           = queue:new(),
-                  unoppressable       = sets:new(),
-                  alarmed             = false
-                }}.
-
-handle_call(info, _From, State) ->
-    State1 = #state { available_tokens    = Avail,
-                      processes           = Procs,
-                      hibernate           = Sleepy,
-                      unoppressable       = Unoppressable } =
-        free_upto(undefined, 1 + ?TOTAL_TOKENS, State), %% just tidy
-    {reply, [{ available_tokens,        Avail                       },
-             { processes,               dict:to_list(Procs)         },
-             { hibernated_processes,    queue:to_list(Sleepy)       },
-             { unoppressable_processes, sets:to_list(Unoppressable) }], State1}.
-
-handle_cast({report_memory, Pid, Memory, Hibernating},
-            State = #state { processes        = Procs,
-                             available_tokens = Avail,
-                             callbacks        = Callbacks,
-                             tokens_per_byte  = TPB,
-                             alarmed          = Alarmed }) ->
-    Req = rabbit_misc:ceil(TPB * Memory),
-    LibreActivity = if Hibernating -> hibernate;
-                       true -> active
-                    end,
-    {StateN = #state { hibernate = Sleepy }, ActivityNew} =
-        case find_process(Pid, Procs) of
-            {libre, OAlloc, _OActivity} ->
-                Avail1 = Avail + OAlloc,
-                State1 = #state { available_tokens = Avail2,
-                                  processes = Procs1 }
-                    = free_upto(Pid, Req,
-                                State #state { available_tokens = Avail1 }),
-                case Req > Avail2 of
-                    true -> %% nowt we can do, oppress the process
-                        Procs2 =
-                            set_process_mode(Procs1, Callbacks, Pid, oppressed,
-                                             {oppressed, Avail2}),
-                        {State1 #state { processes = Procs2 }, oppressed};
-                    false -> %% keep liberated
-                        {State1 #state
-                         { processes =
-                           dict:store(Pid, {libre, Req, LibreActivity}, Procs1),
-                           available_tokens = Avail2 - Req },
-                         LibreActivity}
-                end;
-            {oppressed, OrigAvail} ->
-                case Req > 0 andalso
-                    ( Alarmed orelse Hibernating orelse
-                      (Avail > (OrigAvail - ?THRESHOLD_OFFSET) andalso
-                       Avail < (OrigAvail + ?THRESHOLD_OFFSET)) ) of
-                    true ->
-                        {State, oppressed};
-                    false ->
-                        State1 = #state { available_tokens = Avail1,
-                                          processes = Procs1 } =
-                            free_upto(Pid, Req, State),
-                        case Req > Avail1 of
-                            true ->
-                                %% not enough space, so stay oppressed
-                                {State1, oppressed};
-                            false -> %% can liberate the process
-                                Procs2 = set_process_mode(
-                                           Procs1, Callbacks, Pid, liberated,
-                                           {libre, Req, LibreActivity}),
-                                {State1 #state {
-                                   processes = Procs2,
-                                   available_tokens = Avail1 - Req },
-                                 LibreActivity}
-                        end
-                end
-        end,
-    StateN1 =
-        case ActivityNew of
-            active    -> StateN;
-            oppressed -> StateN;
-            hibernate ->
-                StateN #state { hibernate = queue:in(Pid, Sleepy) }
-        end,
-    {noreply, StateN1};
-
-handle_cast({register, Pid, IsUnoppressable, Module, Function, Args},
-            State = #state { callbacks = Callbacks,
-                             unoppressable = Unoppressable }) ->
-    _MRef = erlang:monitor(process, Pid),
-    Unoppressable1 = case IsUnoppressable of
-                         true -> sets:add_element(Pid, Unoppressable);
-                         false -> Unoppressable
-                     end,
-    {noreply, State #state { callbacks = dict:store
-                             (Pid, {Module, Function, Args}, Callbacks),
-                             unoppressable = Unoppressable1
-                           }};
-
-handle_cast({conserve_memory, Conserve}, State) ->
-    {noreply, State #state { alarmed = Conserve }}.
-
-handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #state { available_tokens = Avail,
-                             processes        = Procs,
-                             callbacks        = Callbacks }) ->
-    State1 = State #state { processes = dict:erase(Pid, Procs),
-                            callbacks = dict:erase(Pid, Callbacks) },
-    {noreply, case find_process(Pid, Procs) of
-                  {oppressed, _OrigReq} ->
-                      State1;
-                  {libre, Alloc, _Activity} ->
-                      State1 #state { available_tokens = Avail + Alloc }
-              end};
-handle_info({'EXIT', _Pid, Reason}, State) ->
-    {stop, Reason, State};
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, State) ->
-    State.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-%%----------------------------------------------------------------------------
-
-find_process(Pid, Procs) ->
-    case dict:find(Pid, Procs) of
-        {ok, Value} -> Value;
-        error       -> {oppressed, 0}
-    end.
-
-set_process_mode(Procs, Callbacks, Pid, Mode, Record) ->
-    {Module, Function, Args} = dict:fetch(Pid, Callbacks),
-    ok = erlang:apply(Module, Function, Args ++ [Mode]),
-    dict:store(Pid, Record, Procs).
-
-tidy_and_sum_sleepy(IgnorePids, Sleepy, Procs) ->
-    tidy_and_sum(hibernate, Procs, fun queue:out/1,
-                 fun (Pid, _Alloc, Queue) -> queue:in(Pid, Queue) end,
-                 IgnorePids, Sleepy, queue:new(), 0).
-
-tidy_and_sum(AtomExpected, Procs, Generator, Consumer, DupCheckSet,
-             GenInit, ConInit, AllocAcc) ->
-    case Generator(GenInit) of
-        {empty, _GetInit} -> {ConInit, AllocAcc};
-        {{value, Pid}, GenInit1} ->
-            {DupCheckSet1, ConInit1, AllocAcc1} =
-                case sets:is_element(Pid, DupCheckSet) of
-                    true ->
-                        {DupCheckSet, ConInit, AllocAcc};
-                    false ->
-                        case find_process(Pid, Procs) of
-                            {libre, Alloc, AtomExpected} ->
-                                {sets:add_element(Pid, DupCheckSet),
-                                 Consumer(Pid, Alloc, ConInit),
-                                 Alloc + AllocAcc};
-                            _ ->
-                                {DupCheckSet, ConInit, AllocAcc}
-                        end
-                end,
-            tidy_and_sum(AtomExpected, Procs, Generator, Consumer,
-                         DupCheckSet1, GenInit1, ConInit1, AllocAcc1)
-    end.
-
-free_upto_sleepy(IgnorePids, Callbacks, Sleepy, Procs, Req, Avail) ->
-    free_from(Callbacks,
-              fun(Procs1, Sleepy1, SleepyAcc) ->
-                      case queue:out(Sleepy1) of
-                          {empty, _Sleepy2} ->
-                              empty;
-                          {{value, Pid}, Sleepy2} ->
-                              case sets:is_element(Pid, IgnorePids) of
-                                  true  -> {skip, Sleepy2,
-                                            queue:in(Pid, SleepyAcc)};
-                                  false -> {libre, Alloc, hibernate} =
-                                               dict:fetch(Pid, Procs1),
-                                           {value, Sleepy2, Pid, Alloc}
-                              end
-                      end
-              end, fun queue:join/2, Procs, Sleepy, queue:new(), Req, Avail).
-
-free_from(
-  Callbacks, Transformer, BaseCase, Procs, DestroyMe, CreateMe, Req, Avail) ->
-    case Transformer(Procs, DestroyMe, CreateMe) of
-        empty ->
-            {CreateMe, Procs, Req};
-        {skip, DestroyMe1, CreateMe1} ->
-            free_from(Callbacks, Transformer, BaseCase, Procs, DestroyMe1,
-                      CreateMe1, Req, Avail);
-        {value, DestroyMe1, Pid, Alloc} ->
-            Procs1 = set_process_mode(
-                       Procs, Callbacks, Pid, oppressed, {oppressed, Avail}),
-            Req1 = Req - Alloc,
-            case Req1 > 0 of
-                true -> free_from(Callbacks, Transformer, BaseCase, Procs1,
-                                  DestroyMe1, CreateMe, Req1, Avail);
-                false -> {BaseCase(DestroyMe1, CreateMe), Procs1, Req1}
-            end
-    end.
-
-free_upto(Pid, Req, State = #state { available_tokens = Avail,
-                                     processes        = Procs,
-                                     callbacks        = Callbacks,
-                                     hibernate        = Sleepy,
-                                     unoppressable    = Unoppressable })
-  when Req > Avail ->
-    Unoppressable1 = sets:add_element(Pid, Unoppressable),
-    {Sleepy1, SleepySum} = tidy_and_sum_sleepy(Unoppressable1, Sleepy, Procs),
-    case Req > Avail + SleepySum of
-        true -> %% not enough in sleepy, just return tidied state
-            State #state { hibernate = Sleepy1 };
-        false -> 
-            %% ReqRem will be <= 0 because it's likely we'll have
-            %% freed more than we need, thus Req - ReqRem is total
-            %% freed
-            {Sleepy2, Procs1, ReqRem} =
-                free_upto_sleepy(Unoppressable1, Callbacks,
-                                 Sleepy1, Procs, Req, Avail),
-            State #state { available_tokens = Avail + (Req - ReqRem),
-                           processes        = Procs1,
-                           hibernate        = Sleepy2 }
-    end;
-free_upto(_Pid, _Req, State) ->
-    State.
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index cf184f3f..649aec49 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -104,7 +104,7 @@
 -spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
 -spec(update/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok').
--spec(report_queue_duration/2 :: (pid(), float() | 'infinity') -> 'ok').
+-spec(report_queue_duration/2 :: (pid(), float() | 'infinity') -> number()).
 
 -endif.
 
@@ -119,11 +119,11 @@ update() ->
     gen_server2:cast(?SERVER, update).
 
 register(Pid, MFA = {_M, _F, _A}) ->
-    gen_server2:cast(?SERVER, {register, Pid, MFA}).
+    gen_server2:call(?SERVER, {register, Pid, MFA}, infinity).
 
 report_queue_duration(Pid, QueueDuration) ->
     gen_server2:call(rabbit_memory_monitor,
-                     {report_queue_duration, Pid, QueueDuration}).
+                     {report_queue_duration, Pid, QueueDuration}, infinity).
 
 
 %%----------------------------------------------------------------------------
@@ -171,18 +171,18 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
     {noreply, State#state{queue_duration_sum = Sum1,
                           queue_duration_count = Count1}};
 
+handle_call({register, Pid, MFA}, _From, State =
+            #state{queue_durations = Durations, callbacks = Callbacks}) ->
+    _MRef = erlang:monitor(process, Pid),
+    true = ets:insert(Durations, {Pid, infinity, infinity}),
+    {reply, ok, State#state{callbacks = dict:store(Pid, MFA, Callbacks)}};
+
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
 handle_cast(update, State) ->
     {noreply, internal_update(State)};
 
-handle_cast({register, Pid, MFA}, State = #state{queue_durations = Durations,
-                                                 callbacks = Callbacks}) ->
-    _MRef = erlang:monitor(process, Pid),
-    true = ets:insert(Durations, {Pid, infinity, infinity}),
-    {noreply, State#state{callbacks = dict:store(Pid, MFA, Callbacks)}};
-
 handle_cast(_Request, State) ->
     {noreply, State}.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 15caf81b..79fd24b8 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,11 +32,12 @@
 -module(rabbit_variable_queue).
 
 -export([init/1, terminate/1, publish/2, publish_delivered/2,
-         set_queue_ram_duration_target/2, remeasure_egress_rate/1, fetch/1,
-         ack/2, len/1, is_empty/1, maybe_start_prefetcher/1, purge/1, delete/1,
-         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
-         tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
-         can_flush_journal/1, flush_journal/1, status/1]).
+         set_queue_ram_duration_target/2, remeasure_egress_rate/1,
+         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1,
+         maybe_start_prefetcher/1, purge/1, delete/1, requeue/2, tx_publish/2,
+         tx_rollback/2, tx_commit/4, tx_commit_from_msg_store/4,
+         tx_commit_from_vq/1, needs_sync/1, can_flush_journal/1,
+         flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -130,6 +131,7 @@
 -spec(set_queue_ram_duration_target/2 ::
       (('undefined' | number()), vqstate()) -> vqstate()).
 -spec(remeasure_egress_rate/1 :: (vqstate()) -> vqstate()).
+-spec(ram_duration/1 :: (vqstate()) -> number()).
 -spec(fetch/1 :: (vqstate()) ->
              {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
               vqstate()}).
@@ -209,20 +211,23 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
             {ack_not_on_disk, State}
     end.
 
-set_queue_ram_duration_target(undefined, State) ->
-    State;
 set_queue_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
                                      target_ram_msg_count = TargetRamMsgCount
                                    }) ->
-    TargetRamMsgCount1 = trunc(DurationTarget * EgressRate), %% msgs = sec * msgs/sec
+    TargetRamMsgCount1 =
+        case DurationTarget of
+            infinity -> undefined;
+            undefined -> undefined;
+            _ -> trunc(DurationTarget * EgressRate) %% msgs = sec * msgs/sec
+        end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target = DurationTarget },
     if TargetRamMsgCount == TargetRamMsgCount1 ->
             State1;
-       TargetRamMsgCount == undefined orelse
+       TargetRamMsgCount1 == undefined orelse
        TargetRamMsgCount < TargetRamMsgCount1 ->
-            maybe_start_prefetcher(State1);
+            State1;
        true ->
             reduce_memory_use(State1)
     end.
@@ -246,6 +251,14 @@ remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
                        egress_rate_timestamp = Now,
                        out_counter = 0 }).
 
+ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
+                        ram_msg_count = RamMsgCount }) ->
+    %% msgs / (msgs/sec) == sec
+    case AvgEgressRate == 0 of
+        true  -> infinity;
+        false -> RamMsgCount / AvgEgressRate
+    end.
+
 fetch(State =
       #vqstate { q4 = Q4, ram_msg_count = RamMsgCount,
                  out_counter = OutCount, prefetcher = Prefetcher,
-- 
cgit v1.2.1


From fca6c2e6899df2cd16b059f40bea0abdf31fae59 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 12 Nov 2009 11:50:09 +0000
Subject: Found the bug. What was happening was: 1) ? contains a range, say a
 to b, where a is a segment boundary but b-a < segmentsize 2) ? -> ? happens,
 so all of ? gets loaded into q3 as betas. ? is empty 3) then push ? to ?
 happens. There was a logic failure in there, which meant that with ? empty,
 _all_ of q3 would be pushed to ?, but incorrectly accounted for, making the
 vq think there is less in ? than there really is 4) now ? -> ? happens again,
 vq is amazed to find more in the segment in ? than it was expecting. cue
 explosion The bug was simply not ensuring that if ? was empty, only push out
 enough to leave q3 with a maximum of one segment, ending on a segment
 boundary

---
 src/rabbit_variable_queue.erl | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 79fd24b8..0bce4c2b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -227,7 +227,7 @@ set_queue_ram_duration_target(
             State1;
        TargetRamMsgCount1 == undefined orelse
        TargetRamMsgCount < TargetRamMsgCount1 ->
-            State1;
+            maybe_start_prefetcher(State1);
        true ->
             reduce_memory_use(State1)
     end.
@@ -964,12 +964,15 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
     case queue:out(Q3) of
         {empty, _Q3} -> State1;
         {{value, #beta { seq_id = SeqId }}, _Q3a} -> 
+            {{value, #beta { seq_id = SeqIdMax }}, _Q3b} = queue:out_r(Q3),
             Limit = rabbit_queue_index:next_segment_boundary(SeqId),
-            case Gamma1SeqId of
-                Limit -> %% already only holding the minimum, nothing to do
+            %% ASSERTION
+            true = Gamma1SeqId == undefined orelse Gamma1SeqId > SeqIdMax,
+            case (Gamma1SeqId == undefined andalso SeqIdMax < Limit) orelse
+                Gamma1SeqId == Limit of
+                true -> %% already only holding LTE one segment indices in q3
                     State1;
-                _ when Gamma1SeqId == undefined orelse
-                (is_integer(Gamma1SeqId) andalso Gamma1SeqId > Limit) ->
+                false ->
                     %% ASSERTION (sadly large!)
                     %% This says that if Gamma1SeqId /= undefined then
                     %% the gap from Limit to Gamma1SeqId is an integer
-- 
cgit v1.2.1


From 15854f1913979d73127fe04ed4ea29458ff1dca2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 12 Nov 2009 11:57:55 +0000
Subject: Firstly the desired queue length shouldn't go negative. Secondly,
 testing shows that the amqqueue_process should not flush the qi journal when
 idle. Doing so can lead to disk thrashing.

---
 src/rabbit_amqqueue_process.erl | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index f247c0d1..a79abe8c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -161,14 +161,10 @@ next_state1(State = #q{sync_timer_ref = undefined}, true) ->
     {start_sync_timer(State), 0};
 next_state1(State, true) ->
     {State, 0};
-next_state1(State = #q{sync_timer_ref = undefined,
-                       variable_queue_state = VQS}, false) ->
-    {State, case rabbit_variable_queue:can_flush_journal(VQS) of
-                true  -> 0;
-                false -> hibernate
-            end};
+next_state1(State = #q{sync_timer_ref = undefined}, false) ->
+    {State, hibernate};
 next_state1(State, false) ->
-    {stop_sync_timer(State), 0}.
+    {stop_sync_timer(State), hibernate}.
 
 ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?EGRESS_REMEASURE_INTERVAL, rabbit_amqqueue,
@@ -915,13 +911,6 @@ handle_info({'EXIT', _DownPid, normal}, State) ->
     %% though, provided 'normal'
     noreply(State);
 
-handle_info(timeout, State = #q{variable_queue_state = VQS,
-                                sync_timer_ref = undefined}) ->
-    %% if sync_timer_ref is undefined then we must have set the
-    %% timeout to zero because we thought we could flush the journal
-    noreply(State#q{variable_queue_state =
-                    rabbit_variable_queue:flush_journal(VQS)});
-
 handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
     noreply(
       run_message_queue(
-- 
cgit v1.2.1


From 964d26ee9bcbc2b8e0ddf624996314327fc19903 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 12 Nov 2009 12:07:59 +0000
Subject: Err, as I said in the last commit, make sure the desired queue length
 isn't < 0.

---
 src/rabbit_memory_monitor.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 649aec49..80fa7edf 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -228,10 +228,11 @@ internal_update(State = #state{memory_limit = Limit,
                       0 -> infinity;
                       _ -> Sum / Count
                   end,
-    DesiredDurationAvg1 = case AvgDuration of
-                              infinity -> infinity;
-                              AvgQueueDuration -> AvgQueueDuration * MemoryRatio
-                          end,
+    DesiredDurationAvg1 =
+        case AvgDuration of
+            infinity -> infinity;
+            AvgQueueDuration -> lists:max([0, AvgQueueDuration * MemoryRatio])
+        end,
     State1 = State#state{memory_ratio = MemoryRatio,
                          desired_duration = DesiredDurationAvg1},
 
-- 
cgit v1.2.1


From 53380f3a5f09397a8f961dc33a00553fa8e42f5a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 12 Nov 2009 13:02:10 +0000
Subject: It makes sense only to fully flush the journal prior to hibernation.
 Other than that, the qi can take care of when to flush the journal out.
 Adjustments to tests

---
 scripts/rabbitmq-server         |   2 +-
 src/rabbit_amqqueue_process.erl |   3 +-
 src/rabbit_queue_index.erl      | 100 +++++++++++++++++++---------------------
 src/rabbit_tests.erl            |  21 ++-------
 src/rabbit_variable_queue.erl   |  13 ++----
 5 files changed, 60 insertions(+), 79 deletions(-)

diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 34904850..43488c92 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -109,7 +109,7 @@ exec erl \
     -os_mon start_cpu_sup true \
     -os_mon start_disksup false \
     -os_mon start_memsup false \
-    -os_mon vm_memory_high_watermark 0.4 \
+    -os_mon vm_memory_high_watermark 0.08 \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
     ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a79abe8c..b0c1ccac 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -923,5 +923,6 @@ handle_info(Info, State) ->
 
 handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
     VQS1 = rabbit_variable_queue:maybe_start_prefetcher(VQS),
+    VQS2 = rabbit_variable_queue:full_flush_journal(VQS1),
     {hibernate, stop_egress_rate_timer(
-                  State#q{ variable_queue_state = VQS1 })}.
+                  State#q{ variable_queue_state = VQS2 })}.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 54d681c5..bd899676 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,10 +32,9 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, sync_seq_ids/3, can_flush_journal/1,
-         flush_journal/1, read_segment_entries/2, next_segment_boundary/1,
-         segment_size/0, find_lowest_seq_id_seg_and_next_seq_id/1,
-         start_msg_store/1]).
+         write_delivered/2, write_acks/2, sync_seq_ids/3, full_flush_journal/1,
+         read_segment_entries/2, next_segment_boundary/1, segment_size/0,
+         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
 %%----------------------------------------------------------------------------
 %% The queue disk index
@@ -150,8 +149,7 @@
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(sync_seq_ids/3 :: ([seq_id()], boolean(), qistate()) -> qistate()).
--spec(can_flush_journal/1 :: (qistate()) -> boolean()).
--spec(flush_journal/1 :: (qistate()) -> qistate()).
+-spec(full_flush_journal/1 :: (qistate()) -> qistate()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
              {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
@@ -228,40 +226,10 @@ sync_seq_ids(SeqIds, SyncAckJournal, State) ->
               StateM
       end, State1, SegNumsSet).
 
-can_flush_journal(#qistate { journal_count = 0 }) ->
-    false;
-can_flush_journal(_) ->
-    true.
-
-flush_journal(State = #qistate { journal_count = 0 }) ->
+full_flush_journal(State = #qistate { journal_count = 0 }) ->
     State;
-flush_journal(State = #qistate { journal_ack_dict = JAckDict,
-                                 journal_del_dict = JDelDict,
-                                 journal_count    = JCount }) ->
-    SegNum = case dict:fetch_keys(JAckDict) of
-                 []    -> hd(dict:fetch_keys(JDelDict));
-                 [N|_] -> N
-             end,
-    Dels = seg_entries_from_dict(SegNum, JDelDict),
-    Acks = seg_entries_from_dict(SegNum, JAckDict),
-    State1 = append_dels_to_segment(SegNum, Dels, State),
-    State2 = append_acks_to_segment(SegNum, Acks, State1),
-    JCount1 = JCount - length(Dels) - length(Acks),
-    State3 = State2 #qistate { journal_del_dict = dict:erase(SegNum, JDelDict),
-                               journal_ack_dict = dict:erase(SegNum, JAckDict),
-                               journal_count    = JCount1 },
-    if
-        JCount1 == 0 ->
-            {Hdl, State4} = get_journal_handle(State3),
-            {ok, 0} = file_handle_cache:position(Hdl, bof),
-            ok = file_handle_cache:truncate(Hdl),
-            ok = file_handle_cache:sync(Hdl),
-            State4;
-        JCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
-            flush_journal(State3);
-        true ->
-            State3
-    end.
+full_flush_journal(State) ->
+    full_flush_journal(flush_journal(State)).
 
 read_segment_entries(InitSeqId, State) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
@@ -348,6 +316,47 @@ start_msg_store(DurableQueues) ->
     ok.
 
 
+%%----------------------------------------------------------------------------
+%% Journal Flushing
+%%----------------------------------------------------------------------------
+
+flush_journal(State = #qistate { journal_count = 0 }) ->
+    State;
+flush_journal(State = #qistate { journal_ack_dict = JAckDict,
+                                 journal_del_dict = JDelDict,
+                                 journal_count    = JCount }) ->
+    SegNum = case dict:fetch_keys(JAckDict) of
+                 []    -> hd(dict:fetch_keys(JDelDict));
+                 [N|_] -> N
+             end,
+    Dels = seg_entries_from_dict(SegNum, JDelDict),
+    Acks = seg_entries_from_dict(SegNum, JAckDict),
+    State1 = append_dels_to_segment(SegNum, Dels, State),
+    State2 = append_acks_to_segment(SegNum, Acks, State1),
+    JCount1 = JCount - length(Dels) - length(Acks),
+    State3 = State2 #qistate { journal_del_dict = dict:erase(SegNum, JDelDict),
+                               journal_ack_dict = dict:erase(SegNum, JAckDict),
+                               journal_count    = JCount1 },
+    if
+        JCount1 == 0 ->
+            {Hdl, State4} = get_journal_handle(State3),
+            {ok, 0} = file_handle_cache:position(Hdl, bof),
+            ok = file_handle_cache:truncate(Hdl),
+            ok = file_handle_cache:sync(Hdl),
+            State4;
+        JCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
+            flush_journal(State3);
+        true ->
+            State3
+    end.
+
+maybe_full_flush(State = #qistate { journal_count = JCount }) ->
+    case JCount > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
+        true  -> full_flush_journal(State);
+        false -> State
+    end.
+
+
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
@@ -363,19 +372,6 @@ write_to_journal(BinList, SeqIds, Dict,
           end, {Dict, JCount}, SeqIds),
     {Dict1, State1 #qistate { journal_count = JCount1 }}.
 
-maybe_full_flush(State = #qistate { journal_count = JCount }) ->
-    case JCount > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
-        true  -> full_flush_journal(State);
-        false -> State
-    end.
-
-full_flush_journal(State) ->
-    case can_flush_journal(State) of
-        true -> State1 = flush_journal(State),
-                full_flush_journal(State1);
-        false -> State
-    end.
-
 queue_name_to_dir_name(Name = #resource { kind = queue }) ->
     Bin = term_to_binary(Name),
     Size = 8*size(Bin),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index d74f998e..5b453b62 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1032,15 +1032,7 @@ queue_index_deliver(SeqIds, Qi) ->
       end, Qi, SeqIds).
 
 queue_index_flush_journal(Qi) ->
-    {_Oks, {false, Qi1}} =
-        rabbit_misc:unfold(
-          fun ({true, QiN}) ->
-                  QiM = rabbit_queue_index:flush_journal(QiN),
-                  {true, ok, {rabbit_queue_index:can_flush_journal(QiM), QiM}};
-              ({false, _QiN}) ->
-                  false
-          end, {true, Qi}),
-    Qi1.
+    rabbit_queue_index:full_flush_journal(Qi).
 
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
@@ -1071,7 +1063,6 @@ test_queue_index() ->
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0, as all the msgs were transient
     {0, Qi6} = rabbit_queue_index:init(test_queue()),
-    false = rabbit_queue_index:can_flush_journal(Qi6),
     {0, 10000, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
@@ -1093,8 +1084,7 @@ test_queue_index() ->
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsMsgIdsB)),
     Qi16 = rabbit_queue_index:write_acks(SeqIdsB, Qi15),
-    true = rabbit_queue_index:can_flush_journal(Qi16),
-    Qi17 = rabbit_queue_index:flush_journal(Qi16),
+    Qi17 = queue_index_flush_journal(Qi16),
     %% the entire first segment will have gone as they were firstly
     %% transient, and secondly ack'd
     SegmentSize = rabbit_queue_index:segment_size(),
@@ -1212,11 +1202,10 @@ test_variable_queue_dynamic_duration_change() ->
     {_SeqIds1, VQ7} = variable_queue_publish(true, 20, VQ6),
     {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
-    VQ10 = rabbit_variable_queue:flush_journal(VQ9),
-    VQ11 = rabbit_variable_queue:flush_journal(VQ10),
-    {empty, VQ12} = rabbit_variable_queue:fetch(VQ11),
+    VQ10 = rabbit_variable_queue:full_flush_journal(VQ9),
+    {empty, VQ11} = rabbit_variable_queue:fetch(VQ10),
 
-    rabbit_variable_queue:terminate(VQ12),
+    rabbit_variable_queue:terminate(VQ11),
 
     passed.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0bce4c2b..7c644f59 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -36,8 +36,7 @@
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1,
          maybe_start_prefetcher/1, purge/1, delete/1, requeue/2, tx_publish/2,
          tx_rollback/2, tx_commit/4, tx_commit_from_msg_store/4,
-         tx_commit_from_vq/1, needs_sync/1, can_flush_journal/1,
-         flush_journal/1, status/1]).
+         tx_commit_from_vq/1, needs_sync/1, full_flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -150,8 +149,7 @@
       ([msg_id()], [ack()], {pid(), any()}, vqstate()) -> vqstate()).
 -spec(tx_commit_from_vq/1 :: (vqstate()) -> vqstate()).
 -spec(needs_sync/1 :: (vqstate()) -> boolean()).
--spec(can_flush_journal/1 :: (vqstate()) -> boolean()).
--spec(flush_journal/1 :: (vqstate()) -> vqstate()).
+-spec(full_flush_journal/1 :: (vqstate()) -> vqstate()).
 -spec(status/1 :: (vqstate()) -> [{atom(), any()}]).
 
 -endif.
@@ -485,12 +483,9 @@ needs_sync(#vqstate { on_sync = {_, _, []} }) ->
 needs_sync(_) ->
     true.
 
-can_flush_journal(#vqstate { index_state = IndexState }) ->
-    rabbit_queue_index:can_flush_journal(IndexState).
-
-flush_journal(State = #vqstate { index_state = IndexState }) ->
+full_flush_journal(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
-                     rabbit_queue_index:flush_journal(IndexState) }.
+                     rabbit_queue_index:full_flush_journal(IndexState) }.
 
 status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
-- 
cgit v1.2.1


From 220ce1ac7731cb971bebaef1e876e71a26256eed Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 12 Nov 2009 13:24:25 +0000
Subject: reverting a change to the start up script that I never meant to
 commit. Also further simplifying logic in the previously faulty function.

---
 scripts/rabbitmq-server       | 2 +-
 src/rabbit_variable_queue.erl | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 43488c92..34904850 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -109,7 +109,7 @@ exec erl \
     -os_mon start_cpu_sup true \
     -os_mon start_disksup false \
     -os_mon start_memsup false \
-    -os_mon vm_memory_high_watermark 0.08 \
+    -os_mon vm_memory_high_watermark 0.4 \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
     ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7c644f59..de9c08a3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -963,8 +963,7 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
             Limit = rabbit_queue_index:next_segment_boundary(SeqId),
             %% ASSERTION
             true = Gamma1SeqId == undefined orelse Gamma1SeqId > SeqIdMax,
-            case (Gamma1SeqId == undefined andalso SeqIdMax < Limit) orelse
-                Gamma1SeqId == Limit of
+            case SeqIdMax < Limit of
                 true -> %% already only holding LTE one segment indices in q3
                     State1;
                 false ->
-- 
cgit v1.2.1


From 4b4c9cca3882ef037c73687d470e0f66eafe188c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 16 Nov 2009 17:40:15 +0000
Subject: stripped out the prefetcher. Note that I've not fixed up the tests
 yet, so they won't pass

---
 src/rabbit_amqqueue_process.erl |   5 +-
 src/rabbit_msg_store.erl        |  12 +-
 src/rabbit_queue_prefetcher.erl | 295 ----------------------------------------
 src/rabbit_tests.erl            |  45 ++----
 src/rabbit_variable_queue.erl   | 121 +++-------------
 5 files changed, 35 insertions(+), 443 deletions(-)
 delete mode 100644 src/rabbit_queue_prefetcher.erl

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b0c1ccac..3adf97ff 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -922,7 +922,6 @@ handle_info(Info, State) ->
     {stop, {unhandled_info, Info}, State}.
 
 handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
-    VQS1 = rabbit_variable_queue:maybe_start_prefetcher(VQS),
-    VQS2 = rabbit_variable_queue:full_flush_journal(VQS1),
+    VQS1 = rabbit_variable_queue:full_flush_journal(VQS),
     {hibernate, stop_egress_rate_timer(
-                  State#q{ variable_queue_state = VQS2 })}.
+                  State#q{ variable_queue_state = VQS1 })}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 591435ba..b42574c0 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,8 +33,8 @@
 
 -behaviour(gen_server2).
 
--export([start_link/3, write/2, read/1, peruse/2, contains/1, remove/1,
-         release/1, sync/2]).
+-export([start_link/3, write/2, read/1, contains/1, remove/1, release/1,
+         sync/2]).
 
 -export([sync/0]). %% internal
 
@@ -62,8 +62,6 @@
              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/2 :: (msg_id(), msg()) -> 'ok').
 -spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
--spec(peruse/2 :: (msg_id(), fun (({'ok', msg()} | 'not_found') -> 'ok')) ->
-             'ok').
 -spec(contains/1 :: (msg_id()) -> boolean()).
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
@@ -233,7 +231,6 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
 
 write(MsgId, Msg)  -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
 read(MsgId)        -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
-peruse(MsgId, Fun) -> gen_server2:pcast(?SERVER, -1, {peruse, MsgId, Fun}).
 contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
 remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
@@ -334,11 +331,6 @@ handle_cast({write, MsgId, Msg},
             noreply(State)
     end;
 
-handle_cast({peruse, MsgId, Fun}, State) ->
-    {Result, State1} = internal_read_message(MsgId, State),
-    Fun(Result),
-    noreply(State1);
-
 handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
     noreply(
       compact(sets:to_list(
diff --git a/src/rabbit_queue_prefetcher.erl b/src/rabbit_queue_prefetcher.erl
deleted file mode 100644
index f5e717f5..00000000
--- a/src/rabbit_queue_prefetcher.erl
+++ /dev/null
@@ -1,295 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_queue_prefetcher).
-
--behaviour(gen_server2).
-
--export([start_link/1]).
-
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
-
--export([publish/2, drain/1, drain_and_stop/1, stop/1]).
-
--include("rabbit.hrl").
--include("rabbit_queue.hrl").
-
--define(HIBERNATE_AFTER_MIN, 1000).
--define(DESIRED_HIBERNATE, 10000).
-
--record(pstate,
-        { alphas,
-          betas,
-          queue_mref,
-          peruse_cb
-        }).
-
-%%----------------------------------------------------------------------------
-%% Novel
-%%----------------------------------------------------------------------------
-
-%% The design of the prefetcher is based on the following:
-%%
-%% a) It must issue low-priority (-ve) requests to the disk queue for
-%%    the next message.
-%% b) If the prefetcher is empty and the amqqueue_process
-%%    (mixed_queue) asks it for a message, it must exit immediately,
-%%    telling the mixed_queue that it is empty so that the mixed_queue
-%%    can then take the more efficient path and communicate with the
-%%    disk_queue directly
-%% c) No message can accidentally be delivered twice, or lost
-%% d) The prefetcher must only cause load when the disk_queue is
-%%    otherwise idle, and must not worsen performance in a loaded
-%%    situation.
-%%
-%% As such, it's a little tricky. It must never issue a call to the
-%% disk_queue - if it did, then that could potentially block, thus
-%% causing pain to the mixed_queue that needs fast answers as to
-%% whether the prefetcher has prefetched content or not. It behaves as
-%% follows:
-%%
-%% 1) disk_queue:prefetch(Q)
-%%    This is a low priority cast
-%%
-%% 2) The disk_queue may pick up the cast, at which point it'll read
-%%    the next message and invoke prefetcher:publish(Msg) - normal
-%%    priority cast. Note that in the mean time, the mixed_queue could
-%%    have come along, found the prefetcher empty, asked it to
-%%    exit. This means the effective "reply" from the disk_queue will
-%%    go no where. As a result, the disk_queue should not advance the
-%%    queue. However, it does mark the messages as delivered. The
-%%    reasoning is that if it didn't, there would be the possibility
-%%    that the message was delivered without it being marked as such
-%%    on disk. We must maintain the property that a message which is
-%%    marked as non-redelivered really hasn't been delivered anywhere
-%%    before. The downside is that should the prefetcher not receive
-%%    this message, the queue will then fetch the message from the
-%%    disk_queue directly, and this message will have its delivered
-%%    bit set. The queue will not be advanced though - if it did
-%%    advance the queue and the msg was then lost, then the queue
-%%    would have lost a msg that the mixed_queue would not pick up.
-%%
-%% 3) The prefetcher hopefully receives the call from
-%%    prefetcher:publish(Msg). It replies immediately, and then adds
-%%    to its internal queue. A cast is not sufficient as a pseudo
-%%    "reply" here because the mixed_queue could come along, drain the
-%%    prefetcher, thus catching the msg just sent by the disk_queue
-%%    and then call disk_queue:fetch(Q) which is normal priority call,
-%%    which could overtake a reply cast from the prefetcher to the
-%%    disk queue, resulting in the same message being delivered
-%%    twice. Thus when the disk_queue calls prefetcher:publish(Msg),
-%%    it is briefly blocked. However, a) the prefetcher replies
-%%    immediately, and b) the prefetcher should never have more than
-%%    two items in its mailbox anyway (one from the queue process /
-%%    mixed_queue and one from the disk_queue), so this should not
-%%    cause a problem to the disk_queue.
-%%
-%% 4) The disk_queue receives the reply, and advances the Q to the
-%%    next msg.
-%%
-%% 5) If the prefetcher has not met its target then it goes back to
-%%    1). Otherwise it just sits and waits for the mixed_queue to
-%%    drain it.
-%%
-%% Now at some point, the mixed_queue will come along and will call
-%% prefetcher:drain() - normal priority call. The prefetcher then
-%% replies with its internal queue and a flag saying if the prefetcher
-%% has finished or is continuing; if the prefetch target was reached,
-%% the prefetcher stops normally at this point. If it hasn't been
-%% reached, then the prefetcher continues to hang around (it almost
-%% certainly has issued a disk_queue:prefetch(Q) cast and is waiting
-%% for a reply from the disk_queue).
-%%
-%% If the mixed_queue calls prefetcher:drain() and the prefetcher's
-%% internal queue is empty then the prefetcher replies with 'empty',
-%% and it exits. This informs the mixed_queue that it should from now
-%% on talk directly with the disk_queue and not via the
-%% prefetcher. This is more efficient and the mixed_queue will use
-%% normal priority blocking calls to the disk_queue and thus get
-%% better service.
-%%
-%% The prefetcher may at this point have issued a
-%% disk_queue:prefetch(Q) cast which has not yet been picked up by the
-%% disk_queue. This msg won't go away and the disk_queue will
-%% eventually find it. However, when it does, it'll simply read the
-%% next message from the queue (which could now be empty), possibly
-%% populate the cache (no harm done), mark the message as delivered
-%% (oh well, not a spec violation, and better than the alternative)
-%% and try and call prefetcher:publish(Msg) which will result in an
-%% error, which the disk_queue catches, as the publish call is to a
-%% non-existant process. However, the state of the queue has not been
-%% altered so the mixed_queue will be able to fetch this message as if
-%% it had never been prefetched.
-%%
-%% The only point at which the queue is advanced is when the
-%% prefetcher replies to the publish call. At this point the message
-%% has been received by the prefetcher and so we guarantee it will be
-%% passed to the mixed_queue when the mixed_queue tries to drain the
-%% prefetcher. We must therefore ensure that this msg can't also be
-%% delivered to the mixed_queue directly by the disk_queue through the
-%% mixed_queue calling disk_queue:fetch(Q) which is why the
-%% prefetcher:publish function is a call and not a cast, thus blocking
-%% the disk_queue.
-%%
-%% Finally, the prefetcher is only created when the mixed_queue is
-%% operating in mixed mode and it sees that the next N messages are
-%% all on disk, and the queue process is about to hibernate. During
-%% this phase, the mixed_queue can be asked to go back to disk_only
-%% mode. When this happens, it calls prefetcher:drain_and_stop() which
-%% behaves like two consecutive calls to drain() - i.e. replies with
-%% all prefetched messages and causes the prefetcher to exit.
-%%
-%% Note there is a flaw here in that we end up marking messages which
-%% have come through the prefetcher as delivered even if they don't
-%% get delivered (e.g. prefetcher fetches them, then broker
-%% dies). However, the alternative is that the mixed_queue must do a
-%% call to the disk_queue when it effectively passes them out to the
-%% rabbit_writer. This would hurt performance, and even at that stage,
-%% we have no guarantee that the message will really go out of the
-%% socket. What we do still have is that messages which have the
-%% redelivered bit set false really are guaranteed to have not been
-%% delivered already.
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--spec(start_link/1 :: (queue()) ->
-             ({'ok', pid()} | 'ignore' | {'error', any()})).
--spec(publish/2 :: (pid(), (message()| 'not_found')) -> 'ok').
--spec(drain/1 :: (pid()) -> ({('finished' | 'continuing' | 'empty'), queue()})).
--spec(drain_and_stop/1 :: (pid()) -> ({('empty' | queue()), queue()})).
--spec(stop/1 :: (pid()) -> 'ok').
-             
--endif.
-
-%%----------------------------------------------------------------------------
-
-start_link(Betas) ->
-    false = queue:is_empty(Betas), %% ASSERTION
-    gen_server2:start_link(?MODULE, [Betas, self()], []).
-
-publish(Prefetcher, Obj = #basic_message {}) ->
-    gen_server2:call(Prefetcher, {publish, Obj}, infinity);
-publish(Prefetcher, not_found) ->
-    gen_server2:call(Prefetcher, publish_empty, infinity).
-
-drain(Prefetcher) ->
-    gen_server2:call(Prefetcher, drain, infinity).
-
-drain_and_stop(Prefetcher) ->
-    gen_server2:call(Prefetcher, drain_and_stop, infinity).
-
-stop(Prefetcher) ->
-    gen_server2:call(Prefetcher, stop, infinity).
-
-%%----------------------------------------------------------------------------
-
-init([Betas, QPid]) when is_pid(QPid) ->
-    %% link isn't enough because the signal will not appear if the
-    %% queue exits normally. Thus have to use monitor.
-    MRef = erlang:monitor(process, QPid),
-    Self = self(),
-    CB = fun (Result) ->
-                 rabbit_misc:with_exit_handler(
-                   fun () -> ok end,
-                   fun () -> case Result of
-                                 {ok, Msg} -> publish(Self, Msg);
-                                 not_found -> publish(Self, not_found)
-                             end
-                   end)
-         end,
-    State = #pstate { alphas = queue:new(),
-                      betas = Betas,
-                      queue_mref = MRef,
-                      peruse_cb = CB
-                    },
-    {ok, prefetch(State), infinity, {backoff, ?HIBERNATE_AFTER_MIN,
-                                     ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
-
-handle_call({publish, Msg = #basic_message { guid = MsgId,
-                                             is_persistent = IsPersistent }},
-	    DiskQueue, State = #pstate { alphas = Alphas, betas = Betas }) ->
-    gen_server2:reply(DiskQueue, ok),
-    {{value, #beta { msg_id = MsgId, seq_id = SeqId,
-                     is_persistent = IsPersistent,
-                     is_delivered = IsDelivered,
-                     index_on_disk = IndexOnDisk}}, Betas1} = queue:out(Betas),
-    Alphas1 = queue:in(#alpha { msg = Msg, seq_id = SeqId,
-                                is_delivered = IsDelivered, msg_on_disk = true,
-                                index_on_disk = IndexOnDisk }, Alphas),
-    State1 = State #pstate { alphas = Alphas1, betas = Betas1 },
-    {Timeout, State2} = case queue:is_empty(Betas1) of
-                            true  -> {hibernate, State1};
-                            false -> {infinity, prefetch(State1)}
-                        end,
-    {noreply, State2, Timeout};
-handle_call(publish_empty, _From, State) ->
-    %% Very odd. This could happen if the queue is deleted or purged
-    %% and the mixed queue fails to shut us down.
-    {reply, ok, State, hibernate};
-handle_call(drain, _From, State = #pstate { alphas = Alphas, betas = Betas }) ->
-    case {queue:is_empty(Betas), queue:is_empty(Alphas)} of
-        {true , _    } -> {stop, normal, {finished, Alphas}, State};
-        {false, true } -> {stop, normal, {empty, Betas}, State};
-        {false, false} -> {reply, {continuing, Alphas},
-                           State #pstate { alphas = queue:new() }}
-    end;
-handle_call(drain_and_stop, _From, State = #pstate { alphas = Alphas,
-                                                     betas = Betas }) ->
-    Res = case queue:is_empty(Alphas) of
-              true -> {empty, Betas};
-              false -> {Alphas, Betas}
-          end,
-    {stop, normal, Res, State};
-handle_call(stop, _From, State) ->
-    {stop, normal, ok, State}.
-
-handle_cast(Msg, State) ->
-    exit({unexpected_message_cast_to_prefetcher, Msg, State}).
-
-handle_info({'DOWN', MRef, process, _Pid, _Reason},
-            State = #pstate { queue_mref = MRef }) ->
-    %% this is the amqqueue_process going down, so we should go down
-    %% too
-    {stop, normal, State}.
-
-terminate(_Reason, _State) ->
-    ok.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-prefetch(State = #pstate { betas = Betas, peruse_cb = CB }) ->
-    {{value, #beta { msg_id = MsgId }}, _Betas1} = queue:out(Betas),
-    ok = rabbit_msg_store:peruse(MsgId, CB),
-    State.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 5b453b62..bfeb397c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -931,22 +931,6 @@ test_msg_store() ->
     ok = rabbit_msg_store:release(MsgIds2ndHalf),
     %% read the second half again, just for fun (aka code coverage)
     ok = msg_store_read(MsgIds2ndHalf),
-    %% read the second half via peruse
-    lists:foldl(
-      fun (MsgId, ok) ->
-              rabbit_msg_store:peruse(MsgId,
-                                      fun ({ok, MsgId1}) when MsgId1 == MsgId ->
-                                              Self ! {peruse, MsgId1}
-                                      end),
-              receive
-                  {peruse, MsgId} ->
-                      ok
-              after
-                  10000 ->
-                      io:format("Failed to receive response via peruse~n"),
-                      throw(timeout)
-              end
-      end, ok, MsgIds2ndHalf),
     %% stop and restart, preserving every other msg in 2nd half
     ok = stop_msg_store(),
     ok = start_msg_store(fun ([]) -> finished;
@@ -1477,30 +1461,29 @@ test_variable_queue_prefetching_and_gammas_to_betas() ->
     assert_prop(S11, q2, 0),
     assert_prop(S11, q1, 0),
 
-    VQ12 = rabbit_variable_queue:maybe_start_prefetcher(VQ11),
-    S12 = rabbit_variable_queue:status(VQ12),
+    S12 = rabbit_variable_queue:status(VQ11),
     assert_prop(S12, prefetching, (Len4 - Prefetched) > 0),
     timer:sleep(2000),
     %% we have to fetch all of q4 before the prefetcher will be drained
-    {VQ13, AckTags1} =
-        variable_queue_fetch(Prefetched, false, false, Len4, VQ12),
-    {VQ16, Acks} =
+    {VQ12, AckTags1} =
+        variable_queue_fetch(Prefetched, false, false, Len4, VQ11),
+    {VQ15, Acks} =
         case Len4 == Prefetched of
             true ->
-                {VQ13, [AckTag2, AckTag1, AckTag, AckTags1]};
+                {VQ12, [AckTag2, AckTag1, AckTag, AckTags1]};
             false ->
                 Len5 = Len4 - Prefetched - 1,
-                {{_Msg3, false, AckTag3, Len5}, VQ14} =
-                    rabbit_variable_queue:fetch(VQ13),
-                assert_prop(rabbit_variable_queue:status(VQ14),
+                {{_Msg3, false, AckTag3, Len5}, VQ13} =
+                    rabbit_variable_queue:fetch(VQ12),
+                assert_prop(rabbit_variable_queue:status(VQ13),
                             prefetching, false),
-                {VQ15, AckTags2} =
-                    variable_queue_fetch(Len5, false, false, Len5, VQ14),
-                {VQ15, [AckTag3, AckTag2, AckTag1, AckTag, AckTags1, AckTags2]}
+                {VQ14, AckTags2} =
+                    variable_queue_fetch(Len5, false, false, Len5, VQ13),
+                {VQ14, [AckTag3, AckTag2, AckTag1, AckTag, AckTags1, AckTags2]}
         end,
-    VQ17 = rabbit_variable_queue:ack(lists:flatten(Acks), VQ16),
+    VQ16 = rabbit_variable_queue:ack(lists:flatten(Acks), VQ15),
 
-    {empty, VQ18} = rabbit_variable_queue:fetch(VQ17),
+    {empty, VQ17} = rabbit_variable_queue:fetch(VQ16),
 
-    rabbit_variable_queue:terminate(VQ18),
+    rabbit_variable_queue:terminate(VQ17),
     passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index de9c08a3..c89cdfd5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -33,10 +33,10 @@
 
 -export([init/1, terminate/1, publish/2, publish_delivered/2,
          set_queue_ram_duration_target/2, remeasure_egress_rate/1,
-         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1,
-         maybe_start_prefetcher/1, purge/1, delete/1, requeue/2, tx_publish/2,
-         tx_rollback/2, tx_commit/4, tx_commit_from_msg_store/4,
-         tx_commit_from_vq/1, needs_sync/1, full_flush_journal/1, status/1]).
+         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1, delete/1,
+         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
+         tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
+         full_flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -56,7 +56,6 @@
           egress_rate,
           avg_egress_rate,
           egress_rate_timestamp,
-          prefetcher,
           len,
           on_sync
         }).
@@ -116,7 +115,6 @@
                egress_rate           :: float(),
                avg_egress_rate       :: float(),
                egress_rate_timestamp :: {integer(), integer(), integer()},
-               prefetcher            :: ('undefined' | pid()),
                len                   :: non_neg_integer(),
                on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]}
               }).
@@ -137,7 +135,6 @@
 -spec(ack/2 :: ([ack()], vqstate()) -> vqstate()).
 -spec(len/1 :: (vqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (vqstate()) -> boolean()).
--spec(maybe_start_prefetcher/1 :: (vqstate()) -> vqstate()).
 -spec(purge/1 :: (vqstate()) -> {non_neg_integer(), vqstate()}).
 -spec(delete/1 :: (vqstate()) -> vqstate()).
 -spec(requeue/2 :: ([{basic_message(), ack()}], vqstate()) -> vqstate()).
@@ -181,7 +178,6 @@ init(QueueName) ->
                    egress_rate = 0,
                    avg_egress_rate = 0,
                    egress_rate_timestamp = now(),
-                   prefetcher = undefined,
                    len = GammaCount,
                    on_sync = {[], [], []}
                   },
@@ -221,13 +217,10 @@ set_queue_ram_duration_target(
         end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target = DurationTarget },
-    if TargetRamMsgCount == TargetRamMsgCount1 ->
-            State1;
-       TargetRamMsgCount1 == undefined orelse
-       TargetRamMsgCount < TargetRamMsgCount1 ->
-            maybe_start_prefetcher(State1);
-       true ->
-            reduce_memory_use(State1)
+    case TargetRamMsgCount1 == undefined orelse
+        TargetRamMsgCount1 >= TargetRamMsgCount of
+        true  -> State1;
+        false -> reduce_memory_use(State1)
     end.
 
 remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
@@ -258,14 +251,11 @@ ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
     end.
 
 fetch(State =
-      #vqstate { q4 = Q4, ram_msg_count = RamMsgCount,
-                 out_counter = OutCount, prefetcher = Prefetcher,
+      #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
                  index_state = IndexState, len = Len }) ->
     case queue:out(Q4) of
-        {empty, _Q4} when Prefetcher == undefined ->
-            fetch_from_q3_or_gamma(State);
         {empty, _Q4} ->
-            fetch(drain_prefetcher(drain, State));
+            fetch_from_q3_or_gamma(State);
         {{value,
           #alpha { msg = Msg = #basic_message { guid = MsgId,
                                                 is_persistent = IsPersistent },
@@ -333,45 +323,12 @@ len(#vqstate { len = Len }) ->
 is_empty(State) ->
     0 == len(State).
 
-maybe_start_prefetcher(State = #vqstate { target_ram_msg_count = 0 }) ->
-    State;
-maybe_start_prefetcher(State = #vqstate { prefetcher = undefined }) ->
-    %% ensure we have as much index in RAM as we can
-    State1 = #vqstate { ram_msg_count = RamMsgCount,
-                        target_ram_msg_count = TargetRamMsgCount,
-                        q1 = Q1, q3 = Q3 } = maybe_gammas_to_betas(State),
-    case queue:is_empty(Q3) of
-        true -> %% nothing to do
-            State1;
-        false ->
-            %% prefetched content takes priority over q1
-            AvailableSpace =
-                case TargetRamMsgCount of
-                    undefined -> queue:len(Q3);
-                    _ -> (TargetRamMsgCount - RamMsgCount) + queue:len(Q1)
-                end,
-            PrefetchCount = lists:min([queue:len(Q3), AvailableSpace]),
-            case PrefetchCount =< 0 of
-                true -> State1;
-                false ->
-                    {PrefetchQueue, Q3a} = queue:split(PrefetchCount, Q3),
-                    {ok, Prefetcher} =
-                        rabbit_queue_prefetcher:start_link(PrefetchQueue),
-                    State1 #vqstate { q3 = Q3a, prefetcher = Prefetcher }
-            end
-    end;
-maybe_start_prefetcher(State) ->
-    State.
-
-purge(State = #vqstate { prefetcher = undefined, q4 = Q4,
-                         index_state = IndexState, len = Len }) ->
+purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} = remove_queue_entries(Q4, IndexState),
     {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
-    {Len, State1 #vqstate { len = 0 }};
-purge(State) ->
-    purge(drain_prefetcher(stop, State)).
+    {Len, State1 #vqstate { len = 0 }}.
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
@@ -490,7 +447,7 @@ full_flush_journal(State = #vqstate { index_state = IndexState }) ->
 status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
-                  ram_msg_count = RamMsgCount, prefetcher = Prefetcher,
+                  ram_msg_count = RamMsgCount, 
                   avg_egress_rate = AvgEgressRate }) ->
     [ {q1, queue:len(Q1)},
       {q2, queue:len(Q2)},
@@ -501,8 +458,7 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
       {outstanding_txns, length(From)},
       {target_ram_msg_count, TargetRamMsgCount},
       {ram_msg_count, RamMsgCount},
-      {avg_egress_rate, AvgEgressRate},
-      {prefetching, Prefetcher /= undefined} ].
+      {avg_egress_rate, AvgEgressRate} ].
 
 %%----------------------------------------------------------------------------
 %% Minor helpers
@@ -660,56 +616,13 @@ fetch_from_q3_or_gamma(State = #vqstate {
             fetch(State2)
     end.
 
-drain_prefetcher(_DrainOrStop, State = #vqstate { prefetcher = undefined }) ->
-    State;
-drain_prefetcher(DrainOrStop,
-                 State = #vqstate { prefetcher = Prefetcher, q1 = Q1, q2 = Q2,
-                                    gamma = #gamma { count = GammaCount },
-                                    q3 = Q3, q4 = Q4,
-                                    ram_msg_count = RamMsgCount }) ->
-    Fun = case DrainOrStop of
-              drain -> fun rabbit_queue_prefetcher:drain/1;
-              stop  -> fun rabbit_queue_prefetcher:drain_and_stop/1
-          end,
-    {Q3a, Q4a, Prefetcher1, RamMsgCountAdj} =
-        case Fun(Prefetcher) of
-            {empty, Betas} ->       %% drain or drain_and_stop
-                {queue:join(Betas, Q3), Q4, undefined, 0};
-            {finished, Alphas} ->   %% just drain
-                {Q3, queue:join(Q4, Alphas), undefined, queue:len(Alphas)};
-            {continuing, Alphas} -> %% just drain
-                {Q3, queue:join(Q4, Alphas), Prefetcher, queue:len(Alphas)};
-            {Alphas, Betas} ->      %% just drain_and_stop
-                {queue:join(Betas, Q3), queue:join(Q4, Alphas), undefined,
-                 queue:len(Alphas)}
-        end,
-    State1 = State #vqstate { prefetcher = Prefetcher1, q3 = Q3a, q4 = Q4a,
-                              ram_msg_count = RamMsgCount + RamMsgCountAdj },
-    %% don't join up with q1/q2 unless the prefetcher has stopped
-    State2 = case GammaCount == 0 andalso Prefetcher1 == undefined of
-                 true -> case queue:is_empty(Q3a) andalso queue:is_empty(Q2) of
-                             true ->
-                                 State1 #vqstate { q1 = queue:new(),
-                                                   q4 = queue:join(Q4a, Q1) };
-                             false ->
-                                 State1 #vqstate { q3 = queue:join(Q3a, Q2) }
-                         end;
-                 false -> State1
-             end,
-    maybe_push_q1_to_betas(State2).
-
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
                                      target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 reduce_memory_use(State =
                   #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
-    %% strictly, it's not necessary to stop the prefetcher this early,
-    %% but because of its potential effect on q1 and the
-    %% ram_msg_count, it's just much simpler to stop it sooner and
-    %% relaunch when we next hibernate.
-    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(
-                                      drain_prefetcher(stop, State))),
+    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
     case TargetRamMsgCount of
         0 -> push_betas_to_gammas(State1);
         _ -> State1
@@ -810,9 +723,9 @@ publish(neither, Msg = #basic_message { guid = MsgId,
 store_alpha_entry(Entry = #alpha {}, State =
                   #vqstate { q1 = Q1, q2 = Q2,
                              gamma = #gamma { count = GammaCount },
-                             q3 = Q3, q4 = Q4, prefetcher = Prefetcher }) ->
+                             q3 = Q3, q4 = Q4 }) ->
     case queue:is_empty(Q2) andalso GammaCount == 0 andalso
-        queue:is_empty(Q3) andalso Prefetcher == undefined of
+        queue:is_empty(Q3) of
         true ->
             State #vqstate { q4 = queue:in(Entry, Q4) };
         false ->
-- 
cgit v1.2.1


From e1fc1b5382414eaa595c0273cb22efeacf30ae30 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 11:11:23 +0000
Subject: Remove most of the vq tests which are now invalid given the
 prefetcher has gone.

---
 src/rabbit_memory_monitor.erl |  17 +--
 src/rabbit_tests.erl          | 276 ------------------------------------------
 2 files changed, 9 insertions(+), 284 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 80fa7edf..be15ecbb 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -138,14 +138,15 @@ init([]) ->
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
                                       ?SERVER, update, []),
-    {ok, #state{timer                = TRef,
-                queue_durations      = ets:new(?TABLE_NAME, [set, private]),
-                queue_duration_sum   = 0.0,
-                queue_duration_count = 0,
-                memory_limit         = MemoryLimit,
-                memory_ratio         = 1.0,
-                desired_duration     = infinity,
-                callbacks            = dict:new()}}.
+    {ok, internal_update(
+           #state{timer                = TRef,
+                  queue_durations      = ets:new(?TABLE_NAME, [set, private]),
+                  queue_duration_sum   = 0.0,
+                  queue_duration_count = 0,
+                  memory_limit         = MemoryLimit,
+                  memory_ratio         = 1.0,
+                  desired_duration     = infinity,
+                  callbacks            = dict:new()})}.
 
 handle_call({report_queue_duration, Pid, QueueDuration}, From,
             State = #state{queue_duration_sum = Sum,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index bfeb397c..ac32e213 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1150,7 +1150,6 @@ fresh_variable_queue() ->
     VQ = rabbit_variable_queue:init(test_queue()),
     S0 = rabbit_variable_queue:status(VQ),
     assert_prop(S0, len, 0),
-    assert_prop(S0, prefetching, false),
     assert_prop(S0, q1, 0),
     assert_prop(S0, q2, 0),
     assert_prop(S0, gamma, {gamma, undefined, 0}),
@@ -1159,10 +1158,6 @@ fresh_variable_queue() ->
     VQ.
 
 test_variable_queue() ->
-    passed = test_variable_queue_prefetching_and_gammas_to_betas(),
-    passed = test_variable_queue_prefetching_during_publish(0),
-    passed = test_variable_queue_prefetching_during_publish(5000),
-    passed = test_variable_queue_prefetch_evicts_q1(),
     passed = test_variable_queue_dynamic_duration_change(),
     passed.
 
@@ -1216,274 +1211,3 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
     after 0 ->
             test_variable_queue_dynamic_duration_change_f(Len, VQ3)
     end.
-
-test_variable_queue_prefetch_evicts_q1() ->
-    SegmentSize = rabbit_queue_index:segment_size(),
-    VQ0 = fresh_variable_queue(),
-    VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ0),
-    assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
-    Len1 = 2*SegmentSize,
-    {_SeqIds, VQ2} = variable_queue_publish(true, Len1, VQ1),
-    %% one segment will be in q3, the other in gamma. We want to fetch
-    %% all of q3 so that gamma is then moved into q3, emptying gamma
-
-    VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
-    Start = now(),
-    {VQ4, AckTags} = variable_queue_fetch(SegmentSize, true, false, Len1, VQ3),
-    End = now(),
-    VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
-    S5 = rabbit_variable_queue:status(VQ5),
-    assert_prop(S5, q4, 0),
-    assert_prop(S5, q3, SegmentSize),
-    assert_prop(S5, gamma, {gamma, undefined, 0}),
-    assert_prop(S5, len, SegmentSize),
-    assert_prop(S5, prefetching, false),
-
-    VQ6 = rabbit_variable_queue:remeasure_egress_rate(VQ5),
-    %% half the seconds taken to fetch one segment
-    Duration = timer:now_diff(End, Start) / 2000000,
-    VQ7 = rabbit_variable_queue:set_queue_ram_duration_target(Duration, VQ6),
-    S7 = rabbit_variable_queue:status(VQ7),
-    assert_prop(S7, q4, 0),
-    Q3 = proplists:get_value(q3, S7),
-    true = Q3 > 0, %% not prefetching everything
-    assert_prop(S7, gamma, {gamma, undefined, 0}),
-    assert_prop(S7, len, SegmentSize),
-    assert_prop(S7, prefetching, true),
-
-    %% now publish a segment, this'll go half in q1, half in q3, in
-    %% theory.
-    {_SeqIds1, VQ8} = variable_queue_publish(true, SegmentSize, VQ7),
-    S8 = rabbit_variable_queue:status(VQ8),
-    assert_prop(S8, q4, 0),
-    assert_prop(S8, q2, 0),
-    assert_prop(S8, len, Len1),
-    assert_prop(S8, prefetching, true),
-    Q3a = proplists:get_value(q3, S8),
-    Q3a_new = Q3a - Q3,
-    Q1a = proplists:get_value(q1, S8),
-    true = (Q3a_new + Q1a == SegmentSize) andalso Q1a < SegmentSize,
-
-    %% wait a bit, to let the prefetcher do its thing
-    timer:sleep(2000),
-    %% fetch a msg. The prefetcher *should* have finished, but can't
-    %% guarantee it.
-    Len2 = Len1-1,
-    {{_Msg, false, AckTag, Len2}, VQ9} = rabbit_variable_queue:fetch(VQ8),
-    S9 = rabbit_variable_queue:status(VQ9),
-    case proplists:get_value(prefetching, S9) of
-        true ->
-            %% bits of q1 could have moved into q3, and the prefetcher
-            %% won't have returned any betas for q3. So q3 can not
-            %% have shrunk.
-            Q3b = proplists:get_value(q3, S9),
-            Q1b = proplists:get_value(q1, S9),
-            true = (Q1a + Q3a) == (Q1b + Q3b) andalso Q3b >= Q3a;
-        false ->
-            %% there should be content in q4 and q3 (we only did 1
-            %% fetch. This is not sufficient to kill the prefetcher
-            %% through draining it when it's empty, thus if it's not
-            %% running, it must have finished, not been killed, thus
-            %% q4 will not be empty), and q1 should have gone into q3.
-            Q1b = proplists:get_value(q1, S9),
-            Q3b = proplists:get_value(q3, S9),
-            Q4b = proplists:get_value(q4, S9),
-            NotPrefetched = Q3b - (SegmentSize - Q1b),
-            SegmentSize = NotPrefetched + Q4b + 1 %% we fetched one
-    end,
-
-    %% just for the fun of it, set duration to 0. This should push
-    %% everything back into gamma, except the eldest (partial) segment
-    %% in q3
-    VQ10 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ9),
-    S10 = rabbit_variable_queue:status(VQ10),
-    assert_prop(S10, len, Len2),
-    assert_prop(S10, prefetching, false),
-    assert_prop(S10, q1, 0),
-    assert_prop(S10, q2, 0),
-    assert_prop(S10, gamma, {gamma, Len1, SegmentSize}),
-    assert_prop(S10, q3, (Len2 - SegmentSize)),
-    assert_prop(S10, q4, 0),
-
-    {VQ11, AckTags1} = variable_queue_fetch(Len2, true, false, Len2, VQ10),
-    VQ12 = rabbit_variable_queue:ack([AckTag|AckTags1], VQ11),
-    {empty, VQ13} = rabbit_variable_queue:fetch(VQ12),
-    rabbit_variable_queue:terminate(VQ13),
-
-    passed.
-
-test_variable_queue_prefetching_during_publish(PrefetchDelay) ->
-    SegmentSize = rabbit_queue_index:segment_size(),
-    VQ0 = fresh_variable_queue(),
-    VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ0),
-    assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
-
-    Len1 = 2*SegmentSize,
-    {_SeqIds, VQ2} = variable_queue_publish(true, Len1, VQ1),
-    %% one segment will be in q3, the other in gamma. We want to fetch
-    %% all of q3 so that gamma is then moved into q3, emptying gamma
-
-    VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
-    {VQ4, AckTags} = variable_queue_fetch(SegmentSize, true, false, Len1, VQ3),
-    VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
-    S5 = rabbit_variable_queue:status(VQ5),
-    assert_prop(S5, q4, 0),
-    assert_prop(S5, q3, SegmentSize),
-    assert_prop(S5, gamma, {gamma, undefined, 0}),
-    assert_prop(S5, len, SegmentSize),
-    assert_prop(S5, prefetching, false),
-
-    %% we assume that we can fetch at > 1 msg a second
-    VQ6 = rabbit_variable_queue:remeasure_egress_rate(VQ5),
-    VQ7 = rabbit_variable_queue:set_queue_ram_duration_target(Len1, VQ6),
-    S7 = rabbit_variable_queue:status(VQ7),
-    assert_prop(S7, q4, 0),
-    assert_prop(S7, q3, 0),
-    assert_prop(S7, gamma, {gamma, undefined, 0}),
-    assert_prop(S7, len, SegmentSize),
-    assert_prop(S7, prefetching, true),
-
-    timer:sleep(PrefetchDelay),
-
-    {_SeqIds1, VQ8} = variable_queue_publish(true, SegmentSize, VQ7),
-    S8 = rabbit_variable_queue:status(VQ8),
-    assert_prop(S8, q4, 0),
-    assert_prop(S8, q2, 0),
-    assert_prop(S8, q1, SegmentSize),
-    assert_prop(S8, len, Len1),
-    assert_prop(S8, prefetching, true),
-
-    {VQ9, AckTags1} =
-        variable_queue_fetch(SegmentSize-1, true, false, Len1, VQ8),
-    VQ10 = rabbit_variable_queue:ack(AckTags1, VQ9),
-    %% can't guarantee the prefetcher has stopped here. If it is still
-    %% running, then we must have SegmentSize is q1. If it's not
-    %% running, and it completed, then we'll find SegmentSize + 1 in
-    %% q4 (q1 will have been joined to q4), otherwise, we'll find
-    %% SegmentSize in q1 and 1 in q3 and q4 empty.
-    S10 = rabbit_variable_queue:status(VQ10),
-    assert_prop(S10, q2, 0),
-    assert_prop(S10, len, (SegmentSize+1)),
-    case proplists:get_value(prefetching, S10) of
-        true -> assert_prop(S10, q1, SegmentSize),
-                assert_prop(S10, q3, 0),
-                assert_prop(S10, q4, 0);
-        false -> case proplists:get_value(q3, S10) of
-                     0 -> assert_prop(S10, q4, SegmentSize+1),
-                          assert_prop(S10, q1, 0);
-                     1 -> assert_prop(S10, q4, 0),
-                          assert_prop(S10, q1, SegmentSize)
-                 end
-    end,
-
-    {VQ11, AckTags2} =
-        variable_queue_fetch(SegmentSize+1, true, false, SegmentSize+1, VQ10),
-    VQ12 = rabbit_variable_queue:ack(AckTags2, VQ11),
-
-    {empty, VQ13} = rabbit_variable_queue:fetch(VQ12),
-    rabbit_variable_queue:terminate(VQ13),
-
-    passed.
-
-test_variable_queue_prefetching_and_gammas_to_betas() ->
-    SegmentSize = rabbit_queue_index:segment_size(),
-    VQ0 = fresh_variable_queue(),
-
-    VQ1 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ0),
-    assert_prop(rabbit_variable_queue:status(VQ1), target_ram_msg_count, 0),
-
-    {_SeqIds, VQ2} = variable_queue_publish(false, 3 * SegmentSize, VQ1),
-    S2 = rabbit_variable_queue:status(VQ2),
-    assert_prop(S2, gamma, {gamma, SegmentSize, 2*SegmentSize}),
-    assert_prop(S2, q3, SegmentSize),
-    assert_prop(S2, len, 3*SegmentSize),
-
-    VQ3 = rabbit_variable_queue:remeasure_egress_rate(VQ2),
-    Len1 = 3*SegmentSize - 1,
-    {{_Msg, false, AckTag, Len1}, VQ4} = rabbit_variable_queue:fetch(VQ3),
-    timer:sleep(1000),
-    VQ5 = rabbit_variable_queue:remeasure_egress_rate(VQ4),
-    VQ6 = rabbit_variable_queue:set_queue_ram_duration_target(10, VQ5),
-    timer:sleep(1000), %% let the prefetcher run and grab enough - about 4 msgs
-    S6 = rabbit_variable_queue:status(VQ6),
-    RamCount = proplists:get_value(target_ram_msg_count, S6),
-    assert_prop(S6, prefetching, true),
-    assert_prop(S6, q4, 0),
-    assert_prop(S6, q3, (Len1 - RamCount)),
-    assert_prop(S6, gamma, {gamma, undefined, 0}),
-
-    Len2 = Len1 - 1,
-    %% this should be enough to stop + drain the prefetcher
-    {{_Msg1, false, AckTag1, Len2}, VQ7} = rabbit_variable_queue:fetch(VQ6),
-    S7 = rabbit_variable_queue:status(VQ7),
-    assert_prop(S7, prefetching, false),
-    assert_prop(S7, q4, (RamCount - 1)),
-    assert_prop(S7, q3, (Len1 - RamCount)),
-
-    %% now fetch SegmentSize - 1 which will exhaust q4 and work through a bit of q3
-    %% bringing in a segment from gamma:
-    {VQ8, AckTags} = variable_queue_fetch(SegmentSize-1, false, false, Len2, VQ7),
-    Len3 = Len2 - (SegmentSize - 1),
-    S8 = rabbit_variable_queue:status(VQ8),
-    assert_prop(S8, prefetching, false),
-    assert_prop(S8, q4, 0),
-    assert_prop(S8, q3, Len3),
-    assert_prop(S8, len, Len3),
-
-    VQ9 = rabbit_variable_queue:remeasure_egress_rate(VQ8),
-    VQ10 = rabbit_variable_queue:ack(AckTags, VQ9),
-
-    S10 = rabbit_variable_queue:status(VQ10),
-    assert_prop(S10, prefetching, true),
-    %% egress rate should be really high, so it's likely if we wait a
-    %% little bit, lots of msgs will be brought in
-    timer:sleep(2000),
-    PrefetchCount = lists:min([proplists:get_value(target_ram_msg_count, S10) -
-                               proplists:get_value(ram_msg_count, S10),
-                               Len3]),
-    Len4 = Len3 - 1,
-    {{_Msg2, false, AckTag2, Len4}, VQ11} = rabbit_variable_queue:fetch(VQ10),
-    S11 = rabbit_variable_queue:status(VQ11),
-    %% prefetcher will stop if it's fast enough and has completed by
-    %% now, or may still be running if PrefetchCount > 1
-    Prefetched = proplists:get_value(q4, S11),
-    true = PrefetchCount > Prefetched, %% already fetched 1, thus >, not >=
-    %% q3 will contain whatever the prefetcher was not allowed to
-    %% prefetch, due to memory constraints. If the prefetcher is still
-    %% running, this will be less than (Len4 - Prefetched) because
-    %% Prefetched will not reflect the true number of msgs that it's
-    %% trying to prefetch.
-    case proplists:get_value(prefetching, S11) of
-        true  -> true = (Len4 - Prefetched) > proplists:get_value(q3, S11);
-        false -> assert_prop(S11, q3, Len4 - Prefetched)
-    end,
-    assert_prop(S11, gamma, {gamma, undefined, 0}),
-    assert_prop(S11, q2, 0),
-    assert_prop(S11, q1, 0),
-
-    S12 = rabbit_variable_queue:status(VQ11),
-    assert_prop(S12, prefetching, (Len4 - Prefetched) > 0),
-    timer:sleep(2000),
-    %% we have to fetch all of q4 before the prefetcher will be drained
-    {VQ12, AckTags1} =
-        variable_queue_fetch(Prefetched, false, false, Len4, VQ11),
-    {VQ15, Acks} =
-        case Len4 == Prefetched of
-            true ->
-                {VQ12, [AckTag2, AckTag1, AckTag, AckTags1]};
-            false ->
-                Len5 = Len4 - Prefetched - 1,
-                {{_Msg3, false, AckTag3, Len5}, VQ13} =
-                    rabbit_variable_queue:fetch(VQ12),
-                assert_prop(rabbit_variable_queue:status(VQ13),
-                            prefetching, false),
-                {VQ14, AckTags2} =
-                    variable_queue_fetch(Len5, false, false, Len5, VQ13),
-                {VQ14, [AckTag3, AckTag2, AckTag1, AckTag, AckTags1, AckTags2]}
-        end,
-    VQ16 = rabbit_variable_queue:ack(lists:flatten(Acks), VQ15),
-
-    {empty, VQ17} = rabbit_variable_queue:fetch(VQ16),
-
-    rabbit_variable_queue:terminate(VQ17),
-    passed.
-- 
cgit v1.2.1


From b2184c612b5fab12ad4607fa9c449591796b7759 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 13:10:52 +0000
Subject: In the absense of an egress rate, use ingress rate instead. Also, if
 there's been no fetches/publishes since the last measurement, use the
 previous measurement, appropriately scaled. This means that the rates will
 gently fall off and approach zero in the absence of activity, which is
 preferable to them suddenly jumping to zero. Also, the average is now the sum
 of the fetches/publishes in the last two segments, over the time since the
 start of the last segment (i.e. it's better than before, which was just a
 straight /2, which would be wrong if the segments are different sizes, which
 they could be, given a very busy queue).

---
 src/rabbit_amqqueue.erl         |   8 ++--
 src/rabbit_amqqueue_process.erl |   6 +--
 src/rabbit_tests.erl            |   4 +-
 src/rabbit_variable_queue.erl   | 100 +++++++++++++++++++++++++++-------------
 4 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index f7b39c77..e5a113ae 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -33,7 +33,7 @@
 
 -export([start/0, recover/1, find_durable_queues/0, declare/4, delete/3,
          purge/1]).
--export([internal_declare/2, internal_delete/1, remeasure_egress_rate/1,
+-export([internal_declare/2, internal_delete/1, remeasure_rates/1,
          set_queue_duration/2]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
@@ -114,7 +114,7 @@
 -spec(tx_commit_vq_callback/1 :: (pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
--spec(remeasure_egress_rate/1 :: (pid()) -> 'ok').
+-spec(remeasure_rates/1 :: (pid()) -> 'ok').
 -spec(set_queue_duration/2 :: (pid(), number()) -> 'ok').
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
@@ -374,8 +374,8 @@ internal_delete(QueueName) ->
               end
       end).
 
-remeasure_egress_rate(QPid) ->
-    gen_server2:pcast(QPid, 9, remeasure_egress_rate).    
+remeasure_rates(QPid) ->
+    gen_server2:pcast(QPid, 9, remeasure_rates).    
 
 set_queue_duration(QPid, Duration) ->
     gen_server2:pcast(QPid, 9, {set_queue_duration, Duration}).    
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 3adf97ff..40b19a54 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -168,7 +168,7 @@ next_state1(State, false) ->
 
 ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?EGRESS_REMEASURE_INTERVAL, rabbit_amqqueue,
-                                   remeasure_egress_rate, [self()]),
+                                   remeasure_rates, [self()]),
     State#q{egress_rate_timer_ref = TRef};
 ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = just_measured}) ->
     State#q{egress_rate_timer_ref = undefined};
@@ -867,8 +867,8 @@ handle_cast({limit, ChPid, LimiterPid}, State) ->
                 C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited}
         end));
 
-handle_cast(remeasure_egress_rate, State = #q{variable_queue_state = VQS}) ->
-    VQS1 = rabbit_variable_queue:remeasure_egress_rate(VQS),
+handle_cast(remeasure_rates, State = #q{variable_queue_state = VQS}) ->
+    VQS1 = rabbit_variable_queue:remeasure_rates(VQS),
     RamDuration = rabbit_variable_queue:ram_duration(VQS1),
     DesiredDuration =
         rabbit_memory_monitor:report_queue_duration(self(), RamDuration),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ac32e213..b1db243f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1167,7 +1167,7 @@ test_variable_queue_dynamic_duration_change() ->
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
     {_SeqIds, VQ1} = variable_queue_publish(false, Len1, VQ0),
-    VQ2 = rabbit_variable_queue:remeasure_egress_rate(VQ1),
+    VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
     {ok, _TRef} = timer:send_after(1000, {duration, 60,
                                           fun (V) -> (V*0.75)-1 end}),
     VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
@@ -1203,7 +1203,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
                        _               -> Fun
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
-            VQ4 = rabbit_variable_queue:remeasure_egress_rate(VQ3),
+            VQ4 = rabbit_variable_queue:remeasure_rates(VQ3),
             VQ5 = %% /37 otherwise the duration is just to high to stress things
                 rabbit_variable_queue:set_queue_ram_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c89cdfd5..2ee57ba7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,7 +32,7 @@
 -module(rabbit_variable_queue).
 
 -export([init/1, terminate/1, publish/2, publish_delivered/2,
-         set_queue_ram_duration_target/2, remeasure_egress_rate/1,
+         set_queue_ram_duration_target/2, remeasure_rates/1,
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1, delete/1,
          requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
          tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
@@ -53,9 +53,12 @@
           index_state,
           next_seq_id,
           out_counter,
+          in_counter,
           egress_rate,
           avg_egress_rate,
-          egress_rate_timestamp,
+          ingress_rate,
+          avg_ingress_rate,
+          rate_timestamp,
           len,
           on_sync
         }).
@@ -112,9 +115,12 @@
                index_state           :: any(),
                next_seq_id           :: seq_id(),
                out_counter           :: non_neg_integer(),
-               egress_rate           :: float(),
+               in_counter            :: non_neg_integer(),
+               egress_rate           :: {{integer(), integer(), integer()}, non_neg_integer()},
                avg_egress_rate       :: float(),
-               egress_rate_timestamp :: {integer(), integer(), integer()},
+               ingress_rate          :: {{integer(), integer(), integer()}, non_neg_integer()},
+               avg_ingress_rate      :: float(),
+               rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
                on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]}
               }).
@@ -127,7 +133,7 @@
              {ack(), vqstate()}).
 -spec(set_queue_ram_duration_target/2 ::
       (('undefined' | number()), vqstate()) -> vqstate()).
--spec(remeasure_egress_rate/1 :: (vqstate()) -> vqstate()).
+-spec(remeasure_rates/1 :: (vqstate()) -> vqstate()).
 -spec(ram_duration/1 :: (vqstate()) -> number()).
 -spec(fetch/1 :: (vqstate()) ->
              {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
@@ -164,6 +170,7 @@ init(QueueName) ->
                 0 -> #gamma { seq_id = undefined, count = 0 };
                 _ -> #gamma { seq_id = GammaSeqId, count = GammaCount }
             end,
+    Now = now(),
     State =
         #vqstate { q1 = queue:new(), q2 = queue:new(),
                    gamma = Gamma,
@@ -175,9 +182,12 @@ init(QueueName) ->
                    index_state = IndexState1,
                    next_seq_id = NextSeqId,
                    out_counter = 0,
-                   egress_rate = 0,
+                   in_counter = 0,
+                   egress_rate = {Now, 0},
                    avg_egress_rate = 0,
-                   egress_rate_timestamp = now(),
+                   ingress_rate = {Now, GammaCount},
+                   avg_ingress_rate = 0,
+                   rate_timestamp = Now,
                    len = GammaCount,
                    on_sync = {[], [], []}
                   },
@@ -192,28 +202,37 @@ publish(Msg, State) ->
 publish_delivered(Msg = #basic_message { guid = MsgId,
                                          is_persistent = IsPersistent },
                   State = #vqstate { len = 0, index_state = IndexState,
-                                     next_seq_id = SeqId }) ->
+                                     next_seq_id = SeqId,
+                                     out_counter = OutCount,
+                                     in_counter = InCount}) ->
+    State1 = State #vqstate { out_counter = OutCount + 1,
+                              in_counter = InCount + 1 },
     case maybe_write_msg_to_disk(false, false, Msg) of
         true ->
             {true, IndexState1} =
                 maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
                                           true, IndexState),
             {{ack_index_and_store, MsgId, SeqId},
-             State #vqstate { index_state = IndexState1,
-                              next_seq_id = SeqId + 1 }};
+             State1 #vqstate { index_state = IndexState1,
+                               next_seq_id = SeqId + 1 }};
         false ->
-            {ack_not_on_disk, State}
+            {ack_not_on_disk, State1}
     end.
 
 set_queue_ram_duration_target(
-  DurationTarget, State = #vqstate { avg_egress_rate = EgressRate,
+  DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
+                                     avg_ingress_rate = AvgIngressRate,
                                      target_ram_msg_count = TargetRamMsgCount
                                    }) ->
+    Rate = case 0 == AvgEgressRate of
+               true -> AvgIngressRate;
+               false -> AvgEgressRate
+           end,
     TargetRamMsgCount1 =
         case DurationTarget of
             infinity -> undefined;
             undefined -> undefined;
-            _ -> trunc(DurationTarget * EgressRate) %% msgs = sec * msgs/sec
+            _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
         end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target = DurationTarget },
@@ -223,30 +242,34 @@ set_queue_ram_duration_target(
         false -> reduce_memory_use(State1)
     end.
 
-remeasure_egress_rate(State = #vqstate { egress_rate = OldEgressRate,
-                                         egress_rate_timestamp = Timestamp,
-                                         out_counter = OutCount,
-                                         duration_target = DurationTarget }) ->
-    %% We do an average over the last two values, but also hold the
-    %% current value separately so that the average always only
-    %% incorporates the last two values, and not the current value and
-    %% the last average. Averaging helps smooth out spikes.
+remeasure_rates(State = #vqstate { egress_rate = Egress,
+                                   ingress_rate = Ingress,
+                                   rate_timestamp = Timestamp,
+                                   in_counter = InCount,
+                                   out_counter = OutCount,
+                                   duration_target = DurationTarget }) ->
     Now = now(),
-    %% EgressRate is in seconds, and now_diff is in microseconds
-    EgressRate = 1000000 * OutCount / timer:now_diff(Now, Timestamp),
-    AvgEgressRate = (EgressRate + OldEgressRate) / 2,
+    {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
+    {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
+
     set_queue_ram_duration_target(
       DurationTarget,
-      State #vqstate { egress_rate = EgressRate,
+      State #vqstate { egress_rate = Egress1,
                        avg_egress_rate = AvgEgressRate,
-                       egress_rate_timestamp = Now,
-                       out_counter = 0 }).
+                       ingress_rate = Ingress1,
+                       avg_ingress_rate = AvgIngressRate,
+                       rate_timestamp = Now,
+                       out_counter = 0, in_counter = 0 }).
 
 ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
+                        avg_ingress_rate = AvgIngressRate,
                         ram_msg_count = RamMsgCount }) ->
     %% msgs / (msgs/sec) == sec
     case AvgEgressRate == 0 of
-        true  -> infinity;
+        true  -> case AvgIngressRate == 0 of
+                     true -> infinity;
+                     false -> RamMsgCount / AvgIngressRate
+                 end;
         false -> RamMsgCount / AvgEgressRate
     end.
 
@@ -448,7 +471,8 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count = RamMsgCount, 
-                  avg_egress_rate = AvgEgressRate }) ->
+                  avg_egress_rate = AvgEgressRate,
+                  avg_ingress_rate = AvgIngressRate }) ->
     [ {q1, queue:len(Q1)},
       {q2, queue:len(Q2)},
       {gamma, Gamma},
@@ -458,12 +482,22 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
       {outstanding_txns, length(From)},
       {target_ram_msg_count, TargetRamMsgCount},
       {ram_msg_count, RamMsgCount},
-      {avg_egress_rate, AvgEgressRate} ].
+      {avg_egress_rate, AvgEgressRate},
+      {avg_ingress_rate, AvgIngressRate} ].
 
 %%----------------------------------------------------------------------------
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
+update_rate(Now, Then, Count, Rate = {OThen, OCount}) ->
+    %% form the avg over the current periond and the previous
+    Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
+    Rate1 = case 0 == Count of
+                true -> Rate; %% keep the last period with activity
+                false -> {Then, Count}
+            end,
+    {Avg, Rate1}.
+
 persistent_msg_ids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
               Obj #basic_message.is_persistent].
@@ -666,10 +700,12 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
     end.
 
 publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { next_seq_id = SeqId, len = Len }) ->
+        State = #vqstate { next_seq_id = SeqId, len = Len,
+                           in_counter = InCount }) ->
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
                     PersistentMsgsAlreadyOnDisk,
-                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1 })}.
+                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1,
+                                     in_counter = InCount + 1 })}.
 
 publish(msg, Msg = #basic_message { guid = MsgId,
                                     is_persistent = IsPersistent },
-- 
cgit v1.2.1


From e31bcfb2c0dd7bddeb970ccb16baba48b76ad299 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 14:20:57 +0000
Subject: If a queue reports 0 duration, then the MM may well calculate that
 the desired avg duration should be 0. The queue will then be told this on its
 next report. This is potentially disasterous because for a fast moving queue,
 setting the duration to 0 will force allmsgs via the disk, which will destroy
 performance. However, it is not sufficient to just tell all queues reporting
 0, infinity, because doing so would result in queues which have been pushed
 out to disk, suddenly coming all the way back in. Thus if a queue reports a
 duration of < 1, and the last value we told it was infinity, then we tell it
 infinity again. Thus fast moving queues are not restricted, and queues which
 have been told real numbers, and slowly squeezed out to disk stay squeezed
 out.

---
 src/rabbit_memory_monitor.erl | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index be15ecbb..ec08d475 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -153,14 +153,19 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                            queue_duration_count = Count,
                            queue_durations = Durations,
                            desired_duration = SendDuration}) ->
-    gen_server2:reply(From, SendDuration),
+    [{_Pid, PrevQueueDuration, PrevSendDuration}] = ets:lookup(Durations, Pid),
+    SendDuration1 =
+        case QueueDuration < 1 andalso PrevSendDuration == infinity of
+            true -> infinity;
+            false -> SendDuration
+        end,
+    gen_server2:reply(From, SendDuration1),
 
     QueueDuration1 = case QueueDuration > ?MAX_QUEUE_DURATION of
                          true  -> infinity;
                          false -> QueueDuration
                      end,
 
-    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
     {Sum1, Count1} =
             case {PrevQueueDuration, QueueDuration1} of
                 {infinity, infinity} -> {Sum, Count};
@@ -168,7 +173,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                 {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
                 {_, _} -> {Sum - PrevQueueDuration + QueueDuration1, Count}
             end,
-    true = ets:insert(Durations, {Pid, QueueDuration1, SendDuration}),
+    true = ets:insert(Durations, {Pid, QueueDuration1, SendDuration1}),
     {noreply, State#state{queue_duration_sum = Sum1,
                           queue_duration_count = Count1}};
 
-- 
cgit v1.2.1


From 51eb9a967a2c903f936f19be8fc97e2d66a03970 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 15:57:50 +0000
Subject: 1. A bugfix in MM, in internal_update. 2. In MM only enforce limits
 if we're using more than half the available RAM. 3. In VQ, sum the ingress
 and egress rates - this makes things much smoother when queues go from being
 flooded to being consumed, and vice versa.

---
 src/rabbit_memory_monitor.erl | 17 +++++++++--------
 src/rabbit_variable_queue.erl | 14 ++++----------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index ec08d475..1a879b7c 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -230,14 +230,14 @@ internal_update(State = #state{memory_limit = Limit,
                                callbacks = Callbacks}) ->
     %% available memory / used memory
     MemoryRatio = Limit / erlang:memory(total),
-    AvgDuration = case Count of
-                      0 -> infinity;
-                      _ -> Sum / Count
+    AvgDuration = case Count == 0 of
+                      true  -> infinity;
+                      false -> Sum / Count
                   end,
     DesiredDurationAvg1 =
-        case AvgDuration of
-            infinity -> infinity;
-            AvgQueueDuration -> lists:max([0, AvgQueueDuration * MemoryRatio])
+        case AvgDuration == infinity orelse MemoryRatio > 2 of
+            true  -> infinity;
+            false -> lists:max([0, AvgDuration * MemoryRatio])
         end,
     State1 = State#state{memory_ratio = MemoryRatio,
                          desired_duration = DesiredDurationAvg1},
@@ -245,7 +245,8 @@ internal_update(State = #state{memory_limit = Limit,
     %% only inform queues immediately if the desired duration has
     %% decreased
     case (DesiredDurationAvg == infinity andalso DesiredDurationAvg /= infinity)
-        orelse (DesiredDurationAvg1 < DesiredDurationAvg) of
+        orelse (DesiredDurationAvg1 /= infinity andalso
+                DesiredDurationAvg1 < DesiredDurationAvg) of
         true ->
             %% If we have pessimistic information, we need to inform
             %% queues to reduce it's memory usage when needed. This
@@ -261,7 +262,7 @@ internal_update(State = #state{memory_limit = Limit,
                                      ets:insert(Durations,
                                                 {Pid, QueueDuration,
                                                  DesiredDurationAvg1});
-                                 _ -> true
+                                 false -> true
                              end
                      end, true, Durations);
         false -> ok
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 2ee57ba7..461d3110 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -224,10 +224,7 @@ set_queue_ram_duration_target(
                                      avg_ingress_rate = AvgIngressRate,
                                      target_ram_msg_count = TargetRamMsgCount
                                    }) ->
-    Rate = case 0 == AvgEgressRate of
-               true -> AvgIngressRate;
-               false -> AvgEgressRate
-           end,
+    Rate = AvgEgressRate + AvgIngressRate,
     TargetRamMsgCount1 =
         case DurationTarget of
             infinity -> undefined;
@@ -265,12 +262,9 @@ ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
                         avg_ingress_rate = AvgIngressRate,
                         ram_msg_count = RamMsgCount }) ->
     %% msgs / (msgs/sec) == sec
-    case AvgEgressRate == 0 of
-        true  -> case AvgIngressRate == 0 of
-                     true -> infinity;
-                     false -> RamMsgCount / AvgIngressRate
-                 end;
-        false -> RamMsgCount / AvgEgressRate
+    case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
+        true  -> infinity;
+        false -> RamMsgCount / (AvgEgressRate + AvgIngressRate)
     end.
 
 fetch(State =
-- 
cgit v1.2.1


From 53e19a0beca2d9935c6e034d737e19a742ac762f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 16:18:12 +0000
Subject: Well tracking down that transposed error took about an hour... sigh.

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index bd899676..abf8f57e 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -548,7 +548,7 @@ queue_index_walker([]) ->
 queue_index_walker([QueueName|QueueNames]) ->
     State = blank_state(QueueName),
     {Hdl, State1} = get_journal_handle(State),
-    {_JDelDict, JAckDict} = load_journal(Hdl, dict:new(), dict:new()),
+    {JAckDict, _JDelDict} = load_journal(Hdl, dict:new(), dict:new()),
     State2 = #qistate { dir = Dir } =
         close_handle(journal, State1 #qistate { journal_ack_dict = JAckDict }),
     SegNums = all_segment_nums(Dir),
-- 
cgit v1.2.1


From 221dfc5804c037dd7992832d7dbd3d16f73736f1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 16:47:37 +0000
Subject: Tidying of msg_store as per QA discussion with Matthias

---
 src/rabbit_msg_store.erl | 108 +++++++++++++++++++++++------------------------
 1 file changed, 53 insertions(+), 55 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b42574c0..2ddb1826 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -286,8 +286,54 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     {ok, State1 #msstate { current_file_handle = FileHdl }}.
 
-handle_call({read, MsgId}, _From, State) ->
-    {Result, State1} = internal_read_message(MsgId, State),
+handle_call({read, MsgId}, _From, State =
+            #msstate { current_file = CurFile,
+                       current_file_handle = CurHdl }) ->
+    {Result, State1} =
+        case index_lookup(MsgId, State) of
+            not_found -> {not_found, State};
+            #msg_location { ref_count  = RefCount,
+                            file       = File,
+                            offset     = Offset,
+                            total_size = TotalSize } ->
+                case fetch_and_increment_cache(MsgId, State) of
+                    not_found ->
+                        ok = case CurFile =:= File andalso {ok, Offset} >=
+                                 file_handle_cache:current_raw_offset(CurHdl) of
+                                 true ->
+                                     file_handle_cache:append_write_buffer(
+                                       CurHdl);
+                                 false ->
+                                     ok
+                             end,
+                        {Hdl, State2} = get_read_handle(File, State),
+                        {ok, Offset} = file_handle_cache:position(Hdl, Offset),
+                        {ok, {MsgId, Msg}} =
+                            case rabbit_msg_file:read(Hdl, TotalSize) of
+                                {ok, {MsgId, _}} = Obj -> Obj;
+                                Rest ->
+                                    throw({error, {misread, [{old_state, State},
+                                                             {file_num, File},
+                                                             {offset, Offset},
+                                                             {read, Rest},
+                                                             {proc_dict, get()}
+                                                            ]}})
+                            end,
+                        ok = case RefCount > 1 of
+                                 true ->
+                                     insert_into_cache(MsgId, Msg, State2);
+                                 false ->
+                                     %% it's not in the cache and we
+                                     %% only have one reference to the
+                                     %% message. So don't bother
+                                     %% putting it in the cache.
+                                     ok
+                             end,
+                        {{ok, Msg}, State2};
+                    {Msg, _RefCount} ->
+                        {{ok, Msg}, State}
+                end
+        end,
     reply(Result, State1);
 
 handle_call({contains, MsgId}, _From, State) ->
@@ -477,55 +523,10 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             no_compact
     end.
 
-internal_read_message(MsgId,
-                      State = #msstate { current_file = CurFile,
-                                         current_file_handle = CurHdl }) ->
-    case index_lookup(MsgId, State) of
-        not_found -> {not_found, State};
-        #msg_location { ref_count  = RefCount,
-                        file       = File,
-                        offset     = Offset,
-                        total_size = TotalSize } ->
-            case fetch_and_increment_cache(MsgId, State) of
-                not_found ->
-                    {ok, CurOffset} =
-                        file_handle_cache:current_raw_offset(CurHdl),
-                    ok = case CurFile =:= File andalso Offset >= CurOffset of
-                             true ->
-                                 file_handle_cache:append_write_buffer(CurHdl);
-                             false ->
-                                 ok
-                         end,
-                    {Hdl, State1} = get_read_handle(File, State),
-                    {ok, Offset} = file_handle_cache:position(Hdl, Offset),
-                    {ok, {MsgId, Msg}} =
-                        case rabbit_msg_file:read(Hdl, TotalSize) of
-                            {ok, {MsgId, _}} = Obj -> Obj;
-                            Rest ->
-                                throw({error, {misread, [{old_state, State},
-                                                         {file_num, File},
-                                                         {offset, Offset},
-                                                         {read, Rest},
-                                                         {proc_dict, get()}]}})
-                        end,
-                    ok = if RefCount > 1 ->
-                                 insert_into_cache(MsgId, Msg, State1);
-                            true -> ok
-                                    %% it's not in the cache and we
-                                    %% only have one reference to the
-                                    %% message. So don't bother
-                                    %% putting it in the cache.
-                         end,
-                    {{ok, Msg}, State1};
-                {Msg, _RefCount} ->
-                    {{ok, Msg}, State}
-            end
-    end.
-
 close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
     case dict:find(Key, FHC) of
         {ok, Hdl} ->
-            ok = close_file(Hdl),
+            ok = file_handle_cache:close(Hdl),
             State #msstate { file_handle_cache = dict:erase(Key, FHC) };
         error -> State
     end.
@@ -552,9 +553,6 @@ open_file(Dir, FileName, Mode) ->
     file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
                            [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
 
-close_file(Hdl) ->
-    file_handle_cache:close(Hdl).
-
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
@@ -850,7 +848,7 @@ maybe_roll_to_new_file(Offset,
                                           file_summary        = FileSummary })
   when Offset >= FileSizeLimit ->
     State1 = sync(State),
-    ok = close_file(CurHdl),
+    ok = file_handle_cache:close(CurHdl),
     NextFile = CurFile + 1,
     {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
     true = ets:update_element(FileSummary, CurFile,
@@ -993,15 +991,15 @@ combine_files(#file_summary { file = Source,
                 file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
             %% position in DestinationHdl should now be DestinationValid
             ok = file_handle_cache:sync(DestinationHdl),
-            ok = close_file(TmpHdl),
+            ok = file_handle_cache:close(TmpHdl),
             ok = file:delete(form_filename(Dir, Tmp))
     end,
     SourceWorkList = index_search_by_file(Source, State1),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
-    ok = close_file(SourceHdl),
-    ok = close_file(DestinationHdl),
+    ok = file_handle_cache:close(SourceHdl),
+    ok = file_handle_cache:close(DestinationHdl),
     ok = file:delete(form_filename(Dir, SourceName)),
     State1.
 
-- 
cgit v1.2.1


From 1763f7acfad51cee6d29f75434361d918a92ced9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 17:59:11 +0000
Subject: some cosmetics to MM. Also, there's a problem where if all queues
 have been pushed out to disk (duration == 0), and then more memory becomes
 available (eg queue death), then no queues will be brought back in.
 Therefore, if >= 5% of our memory is available, ensure that the sum is at
 least 1. This allows queues to come back in.

---
 src/rabbit_memory_monitor.erl | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 1a879b7c..0b6ad5c7 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -70,7 +70,6 @@
 
 
 -module(rabbit_memory_monitor).
--include("rabbit.hrl").
 
 -behaviour(gen_server2).
 
@@ -84,7 +83,7 @@
                 queue_duration_sum,   %% sum of all queue_durations
                 queue_duration_count, %% number of elements in sum
                 memory_limit,         %% how much memory we intend to use
-                memory_ratio,         %% how much more memory we can use
+                memory_ratio,         %% limit / used
                 desired_duration,     %% the desired queue duration
                 callbacks             %% a dict of qpid -> {M,F,A}s
                }).
@@ -92,7 +91,7 @@
 -define(SERVER, ?MODULE).
 -define(DEFAULT_UPDATE_INTERVAL, 2500).
 -define(TABLE_NAME, ?MODULE).
--define(MAX_QUEUE_DURATION, 60*60*24). % 1 day
+-define(MAX_QUEUE_DURATION, 86400). %% 60*60*24 i.e. 1 day
 
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
@@ -153,19 +152,20 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                            queue_duration_count = Count,
                            queue_durations = Durations,
                            desired_duration = SendDuration}) ->
+
+    QueueDuration1 = case QueueDuration > ?MAX_QUEUE_DURATION of
+                         true  -> infinity;
+                         false -> QueueDuration
+                     end,
     [{_Pid, PrevQueueDuration, PrevSendDuration}] = ets:lookup(Durations, Pid),
+
     SendDuration1 =
-        case QueueDuration < 1 andalso PrevSendDuration == infinity of
+        case QueueDuration1 < 1 andalso PrevSendDuration == infinity of
             true -> infinity;
             false -> SendDuration
         end,
     gen_server2:reply(From, SendDuration1),
 
-    QueueDuration1 = case QueueDuration > ?MAX_QUEUE_DURATION of
-                         true  -> infinity;
-                         false -> QueueDuration
-                     end,
-
     {Sum1, Count1} =
             case {PrevQueueDuration, QueueDuration1} of
                 {infinity, infinity} -> {Sum, Count};
@@ -230,9 +230,13 @@ internal_update(State = #state{memory_limit = Limit,
                                callbacks = Callbacks}) ->
     %% available memory / used memory
     MemoryRatio = Limit / erlang:memory(total),
+    Sum1 = case MemoryRatio > 1.05 of
+               true -> Sum + 1;
+               false -> Sum
+           end,
     AvgDuration = case Count == 0 of
                       true  -> infinity;
-                      false -> Sum / Count
+                      false -> Sum1 / Count
                   end,
     DesiredDurationAvg1 =
         case AvgDuration == infinity orelse MemoryRatio > 2 of
-- 
cgit v1.2.1


From af13a2c6febf1dcfb61db888cef98eabe5f47345 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 17 Nov 2009 18:19:19 +0000
Subject: truncate desired duration as I've now seen rounding issues cause
 behavioural problems

---
 src/rabbit_memory_monitor.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 0b6ad5c7..99becb57 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -241,7 +241,7 @@ internal_update(State = #state{memory_limit = Limit,
     DesiredDurationAvg1 =
         case AvgDuration == infinity orelse MemoryRatio > 2 of
             true  -> infinity;
-            false -> lists:max([0, AvgDuration * MemoryRatio])
+            false -> lists:max([0, trunc(AvgDuration * MemoryRatio)])
         end,
     State1 = State#state{memory_ratio = MemoryRatio,
                          desired_duration = DesiredDurationAvg1},
-- 
cgit v1.2.1


From 37c3f025ac25aa88117fba6a432d5c2fade5a5f6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 00:14:09 +0000
Subject: yeah, duration shouldn't be truncated. reverted

---
 scripts/rabbitmq-server       | 2 +-
 src/rabbit_memory_monitor.erl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 34904850..2a1e48ef 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -109,7 +109,7 @@ exec erl \
     -os_mon start_cpu_sup true \
     -os_mon start_disksup false \
     -os_mon start_memsup false \
-    -os_mon vm_memory_high_watermark 0.4 \
+    -os_mon vm_memory_high_watermark 0.2 \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
     ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 99becb57..0b6ad5c7 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -241,7 +241,7 @@ internal_update(State = #state{memory_limit = Limit,
     DesiredDurationAvg1 =
         case AvgDuration == infinity orelse MemoryRatio > 2 of
             true  -> infinity;
-            false -> lists:max([0, trunc(AvgDuration * MemoryRatio)])
+            false -> lists:max([0, AvgDuration * MemoryRatio])
         end,
     State1 = State#state{memory_ratio = MemoryRatio,
                          desired_duration = DesiredDurationAvg1},
-- 
cgit v1.2.1


From cddce4230dfaec1f412b72d577e61fafe895d41b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 00:14:51 +0000
Subject: and now remove the accidental commit of the server script. it's
 late...

---
 scripts/rabbitmq-server | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index 2a1e48ef..34904850 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -109,7 +109,7 @@ exec erl \
     -os_mon start_cpu_sup true \
     -os_mon start_disksup false \
     -os_mon start_memsup false \
-    -os_mon vm_memory_high_watermark 0.2 \
+    -os_mon vm_memory_high_watermark 0.4 \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
     ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
-- 
cgit v1.2.1


From 66432149eee2edee2d049673bb5bb02bb6130cdd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 14:10:02 +0000
Subject: Minor bugfix

---
 src/rabbit_memory_monitor.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 0b6ad5c7..03b3bfde 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -153,7 +153,8 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                            queue_durations = Durations,
                            desired_duration = SendDuration}) ->
 
-    QueueDuration1 = case QueueDuration > ?MAX_QUEUE_DURATION of
+    QueueDuration1 = case infinity == QueueDuration orelse
+                         QueueDuration > ?MAX_QUEUE_DURATION of
                          true  -> infinity;
                          false -> QueueDuration
                      end,
-- 
cgit v1.2.1


From c757f713ab2bd9c7bf5961911c77ab05c383e230 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 14:20:19 +0000
Subject: Documentation improvements

---
 src/rabbit_memory_monitor.erl | 37 +++----------------------------------
 1 file changed, 3 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 03b3bfde..7237b825 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -34,40 +34,6 @@
 %% It receives statistics from all queues, counts the desired
 %% queue length (in seconds), and sends this information back to
 %% queues.
-%%
-%% Normally, messages are exchanged like that:
-%%
-%%           (1)      (2)                     (3)
-%% Timer      |        |
-%%            v        v
-%% Queue -----+--------+-----<***hibernated***>------------->
-%%            | ^      | ^                     ^
-%%            v |      v |                     |
-%% Monitor X--*-+--X---*-+--X------X----X-----X+----------->
-%%
-%% Or to put it in words. Queue periodically sends (casts) 'push_queue_duration'
-%% message to the Monitor (cases 1 and 2 on the asciiart above). Monitor
-%% _always_ replies with a 'set_queue_duration' cast. This way,
-%% we're pretty sure that the Queue is not hibernated.
-%% Monitor periodically recounts numbers ('X' on asciiart). If, during this
-%% update we notice that a queue was using too much memory, we send a message
-%% back. This will happen even if the queue is hibernated, as we really do want
-%% it to reduce its memory footprint.
-%%
-%%
-%% The main job of this module, is to make sure that all the queues have
-%% more or less the same number of seconds till become drained.
-%% This average, seconds-till-queue-is-drained, is then multiplied by
-%% the ratio of Total/Used memory. So, if we can 'afford' more memory to be
-%% used, we'll report greater number back to the queues. In the out of
-%% memory case, we are going to reduce the average drain-seconds.
-%% To acheive all this we need to accumulate the information from every
-%% queue, and count an average from that.
-%%
-%%  real_queue_duration_avg = avg([drain_from_queue_1, queue_2, queue_3, ...])
-%%  memory_overcommit = allowed_memory / used_memory
-%%  desired_queue_duration_avg = real_queue_duration_avg * memory_overcommit
-
 
 -module(rabbit_memory_monitor).
 
@@ -231,6 +197,9 @@ internal_update(State = #state{memory_limit = Limit,
                                callbacks = Callbacks}) ->
     %% available memory / used memory
     MemoryRatio = Limit / erlang:memory(total),
+    %% if all queues are pushed to disk, then Sum will be 0. If memory
+    %% then becomes available, unless we do the following, we will
+    %% never allow queues to come off disk.
     Sum1 = case MemoryRatio > 1.05 of
                true -> Sum + 1;
                false -> Sum
-- 
cgit v1.2.1


From 2034332b3440a8cf67b3eb2816765025b1dd73d7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 16:01:21 +0000
Subject: Lots of tidying. Also, don't allow rates to decay infinitely. Instead
 avg over the last two periods, and assume zero rates when we hibernate.

---
 src/rabbit_amqqueue_process.erl | 43 +++++++++++++++++++++--------------------
 src/rabbit_variable_queue.erl   | 10 +++-------
 2 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 40b19a54..bb951b40 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -39,7 +39,7 @@
 -define(HIBERNATE_AFTER_MIN,        1000).
 -define(DESIRED_HIBERNATE,         10000).
 -define(SYNC_INTERVAL,                 5). %% milliseconds
--define(EGRESS_REMEASURE_INTERVAL,  5000).
+-define(RATES_REMEASURE_INTERVAL,  5000).
 
 -export([start_link/1]).
 
@@ -60,7 +60,7 @@
             active_consumers,
             blocked_consumers,
             sync_timer_ref,
-            egress_rate_timer_ref
+            rate_timer_ref
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -115,7 +115,7 @@ init(Q = #amqqueue { name = QName }) ->
                active_consumers = queue:new(),
                blocked_consumers = queue:new(),
                sync_timer_ref = undefined,
-               egress_rate_timer_ref = undefined
+               rate_timer_ref = undefined
               },
     {ok, State, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -154,7 +154,7 @@ noreply(NewState) ->
     {noreply, NewState1, Timeout}.
 
 next_state(State = #q{variable_queue_state = VQS}) ->
-    next_state1(ensure_egress_rate_timer(State),
+    next_state1(ensure_rate_timer(State),
                 rabbit_variable_queue:needs_sync(VQS)).
 
 next_state1(State = #q{sync_timer_ref = undefined}, true) ->
@@ -166,22 +166,22 @@ next_state1(State = #q{sync_timer_ref = undefined}, false) ->
 next_state1(State, false) ->
     {stop_sync_timer(State), hibernate}.
 
-ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = undefined}) ->
-    {ok, TRef} = timer:apply_after(?EGRESS_REMEASURE_INTERVAL, rabbit_amqqueue,
+ensure_rate_timer(State = #q{rate_timer_ref = undefined}) ->
+    {ok, TRef} = timer:apply_after(?RATES_REMEASURE_INTERVAL, rabbit_amqqueue,
                                    remeasure_rates, [self()]),
-    State#q{egress_rate_timer_ref = TRef};
-ensure_egress_rate_timer(State = #q{egress_rate_timer_ref = just_measured}) ->
-    State#q{egress_rate_timer_ref = undefined};
-ensure_egress_rate_timer(State) ->
+    State#q{rate_timer_ref = TRef};
+ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) ->
+    State#q{rate_timer_ref = undefined};
+ensure_rate_timer(State) ->
     State.
 
-stop_egress_rate_timer(State = #q{egress_rate_timer_ref = undefined}) ->
+stop_rate_timer(State = #q{rate_timer_ref = undefined}) ->
     State;
-stop_egress_rate_timer(State = #q{egress_rate_timer_ref = just_measured}) ->
-    State#q{egress_rate_timer_ref = undefined};
-stop_egress_rate_timer(State = #q{egress_rate_timer_ref = TRef}) ->
+stop_rate_timer(State = #q{rate_timer_ref = just_measured}) ->
+    State#q{rate_timer_ref = undefined};
+stop_rate_timer(State = #q{rate_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
-    State#q{egress_rate_timer_ref = undefined}.
+    State#q{rate_timer_ref = undefined}.
 
 start_sync_timer(State = #q{sync_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, rabbit_amqqueue,
@@ -874,16 +874,13 @@ handle_cast(remeasure_rates, State = #q{variable_queue_state = VQS}) ->
         rabbit_memory_monitor:report_queue_duration(self(), RamDuration),
     VQS2 = rabbit_variable_queue:set_queue_ram_duration_target(
              DesiredDuration, VQS1),
-    io:format("~p Reported ~p and got back ~p~n", [self(), RamDuration, DesiredDuration]),
-    io:format("~p~n", [rabbit_variable_queue:status(VQS2)]),
-    noreply(State#q{egress_rate_timer_ref = just_measured,
+    noreply(State#q{rate_timer_ref = just_measured,
                     variable_queue_state = VQS2});
 
 handle_cast({set_queue_duration, Duration},
             State = #q{variable_queue_state = VQS}) ->
     VQS1 = rabbit_variable_queue:set_queue_ram_duration_target(
              Duration, VQS),
-    io:format("~p was told to make duration ~p~n", [self(), Duration]),
     noreply(State#q{variable_queue_state = VQS1}).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
@@ -923,5 +920,9 @@ handle_info(Info, State) ->
 
 handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
     VQS1 = rabbit_variable_queue:full_flush_journal(VQS),
-    {hibernate, stop_egress_rate_timer(
-                  State#q{ variable_queue_state = VQS1 })}.
+    %% no activity for a while == 0 egress and ingress rates
+    DesiredDuration = 
+        rabbit_memory_monitor:report_queue_duration(self(), infinity),
+    VQS2 = rabbit_variable_queue:set_queue_ram_duration_target(
+             DesiredDuration, VQS1),
+    {hibernate, stop_rate_timer(State#q{variable_queue_state = VQS2})}.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 461d3110..b00a6059 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -483,14 +483,10 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
-update_rate(Now, Then, Count, Rate = {OThen, OCount}) ->
-    %% form the avg over the current periond and the previous
+update_rate(Now, Then, Count, {OThen, OCount}) ->
+    %% form the avg over the current period and the previous
     Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
-    Rate1 = case 0 == Count of
-                true -> Rate; %% keep the last period with activity
-                false -> {Then, Count}
-            end,
-    {Avg, Rate1}.
+    {Avg, {Then, Count}}.
 
 persistent_msg_ids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
-- 
cgit v1.2.1


From cbf9a048c22fb796832f68d58beb0f8da4c2de41 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 16:11:03 +0000
Subject: correction of documentation in absence of prefetcher

---
 src/rabbit_variable_queue.erl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b00a6059..c3ad5463 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -84,11 +84,10 @@
 %% The major invariant is that if the msg is to be a beta, q1 will be
 %% empty, and if it is to be a gamma then both q1 and q2 will be empty.
 %%
-%% When taking msgs out of the queue, if q4 is empty then we drain the
-%% prefetcher. If that doesn't help then we read directly from q3, or
-%% gamma, if q3 is empty. If q3 and gamma are empty then we have an
-%% invariant that q2 must be empty because q2 can only grow if gamma
-%% is non empty.
+%% When taking msgs out of the queue, if q4 is empty then we read
+%% directly from q3, or gamma, if q3 is empty. If q3 and gamma are
+%% empty then we have an invariant that q2 must be empty because q2
+%% can only grow if gamma is non empty.
 %%
 %% A further invariant is that if the queue is non empty, either q4 or
 %% q3 contains at least one entry. I.e. we never allow gamma to
-- 
cgit v1.2.1


From 22eebed3fffbfe2255a3a7f80e542c09a808a566 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 18 Nov 2009 18:15:03 +0000
Subject: started on the server part of fhc

---
 src/file_handle_cache.erl | 104 ++++++++++++++++++++++++++++++++++++++++------
 src/rabbit.erl            |   1 +
 src/rabbit_msg_store.erl  |   1 -
 3 files changed, 92 insertions(+), 14 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 5c1c5a83..634cf016 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,10 +31,17 @@
 
 -module(file_handle_cache).
 
+-behaviour(gen_server2).
+
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
          append_write_buffer/1, copy/3]).
 
+-export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-define(SERVER, ?MODULE).
+
 %%----------------------------------------------------------------------------
 
 -record(file,
@@ -62,6 +69,7 @@
 
 %%----------------------------------------------------------------------------
 %% Specs
+%%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
@@ -91,6 +99,10 @@
 
 %%----------------------------------------------------------------------------
 %% Public API
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]).
 
 open(Path, Mode, Options) ->
     case is_appender(Mode) of
@@ -137,14 +149,31 @@ close(Ref) ->
         Handle ->
             case write_buffer(Handle) of
                 {ok, #handle { hdl = Hdl, global_key = GRef, is_dirty = IsDirty,
-                               is_read = IsReader, is_write = IsWriter }} ->
+                               is_read = IsReader, is_write = IsWriter,
+                               last_used_at = Then }} ->
                     case Hdl of
                         closed -> ok;
                         _ -> ok = case IsDirty of
                                       true -> file:sync(Hdl);
                                       false -> ok
                                   end,
-                             ok = file:close(Hdl)
+                             ok = file:close(Hdl),
+                             with_age_tree(
+                               fun (Tree) ->
+                                       Tree1 = gb_trees:delete(Then, Tree),
+                                       Oldest =
+                                           case gb_trees:is_empty(Tree1) of
+                                               true ->
+                                                   undefined;
+                                               false ->
+                                                   {Oldest1, _Ref} =
+                                                       gb_trees:smallest(Tree1),
+                                                   Oldest1
+                                           end,
+                                       gen_server2:cast(
+                                         ?SERVER, {self(), close, Oldest}),
+                                       Tree1
+                               end)
                     end,
                     #file { reader_count = RCount, has_writer = HasWriter,
                             path = Path } = File = get({GRef, fhc_file}),
@@ -162,7 +191,7 @@ close(Ref) ->
                     end,
                     ok;
                 {Error, Handle1} ->
-                    put({Ref, fhc_handle}, Handle1),
+                    put_handle(Ref, Handle1),
                     Error
             end
     end.
@@ -185,7 +214,7 @@ read(Ref, Count) ->
                         end;
                     {Error, Handle2} -> {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1),
+            put_handle(Ref, Handle1),
             Result;
         Error -> Error
     end.
@@ -202,7 +231,7 @@ append(Ref, Data) ->
                     {{error, _} = Error, Handle2} ->
                         {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1),
+            put_handle(Ref, Handle1),
             Result;
         Error -> Error
     end.
@@ -225,7 +254,7 @@ sync(Ref) ->
                         end;
                     Error -> {Error, Handle}
                 end,
-            put({Ref, fhc_handle}, Handle1),
+            put_handle(Ref, Handle1),
             Result;
         Error -> Error
     end.
@@ -238,7 +267,7 @@ position(Ref, NewOffset) ->
                     {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
                     {Error, Handle2} -> {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1),
+            put_handle(Ref, Handle1),
             Result;
         Error -> Error
     end.
@@ -265,7 +294,7 @@ truncate(Ref) ->
                         end;
                     {Error, Handle2} -> {Error, Handle2}
                 end,
-            put({Ref, fhc_handle}, Handle1),
+            put_handle(Ref, Handle1),
             Result;
         Error -> Error
     end.
@@ -295,7 +324,7 @@ append_write_buffer(Ref) ->
     case get_or_reopen(Ref) of
         {ok, Handle} ->
             {Result, Handle1} = write_buffer(Handle),
-            put({Ref, fhc_handle}, Handle1),
+            put_handle(Ref, Handle1),
             Result;
         Error -> Error
     end.
@@ -328,8 +357,8 @@ copy(Src, Dest, Count) ->
                                 end;
                             Error -> {Error, SHandle, DHandle}
                         end,
-                    put({Src, fhc_handle}, SHandle1),
-                    put({Dest, fhc_handle}, DHandle1),
+                    put_handle(Src, SHandle1),
+                    put_handle(Dest, DHandle1),
                     Result;
                 {ok, _} -> {error, destination_not_open_for_writing};
                 Error -> Error
@@ -350,9 +379,27 @@ get_or_reopen(Ref) ->
                   options = Options } ->
             #file { path = Path } = get({GRef, fhc_file}),
             open1(Path, Mode, Options, Ref, GRef);
-        Handle -> {ok, Handle #handle { last_used_at = now() }}
+        Handle ->
+            {ok, Handle}
     end.
 
+get_or_create_age_tree() ->
+    case get(fhc_age_tree) of
+        undefined -> gb_trees:empty();
+        AgeTree -> AgeTree
+    end.
+
+with_age_tree(Fun) ->
+    put(fhc_age_tree, Fun(get_or_create_age_tree())).
+
+put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
+    Now = now(),
+    with_age_tree(
+      fun (Tree) ->
+              gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree))
+      end),
+    put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
+
 open1(Path, Mode, Options, Ref, GRef) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
@@ -362,15 +409,23 @@ open1(Path, Mode, Options, Ref, GRef) ->
                     infinity -> infinity;
                     N when is_integer(N) -> N
                 end,
+            Now = now(),
             Handle =
                 #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
                           write_buffer_size = 0, options = Options,
                           write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
-                          global_key = GRef, last_used_at = now(),
+                          global_key = GRef, last_used_at = Now,
                           is_dirty = false },
             put({Ref, fhc_handle}, Handle),
+            with_age_tree(fun (Tree) ->
+                                  Tree1 = gb_trees:insert(Now, Ref, Tree),
+                                  {Oldest, _Ref} = gb_trees:smallest(Tree1),
+                                  gen_server2:cast(?SERVER,
+                                                   {self(), open, Oldest}),
+                                  Tree1
+                          end),
             {ok, Handle};
         {error, Reason} ->
             {error, Reason}
@@ -453,3 +508,26 @@ needs_seek(true, CurOffset, DesiredOffset) %% same as {bof, DO}
 %% because we can't really track size, we could well end up at EoF and not know
 needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
     {false, true}.
+
+%%----------------------------------------------------------------------------
+%% gen_server
+%%----------------------------------------------------------------------------
+
+init([]) ->
+    {ok, state}.
+
+handle_call(_Msg, _From, State) ->
+    {reply, message_not_understood, State}.
+
+handle_cast(Msg, State) ->
+    io:format("~p~n", [Msg]),
+    {noreply, State}.
+
+handle_info(_Msg, State) ->
+    {noreply, State}.
+
+terminate(_Reason, State) ->
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
diff --git a/src/rabbit.erl b/src/rabbit.erl
index 2e6810bc..405d170b 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -142,6 +142,7 @@ start(normal, []) ->
                     check_empty_content_body_frame_size(),
 
                 ok = rabbit_alarm:start(),
+                ok = start_child(file_handle_cache),
 
                 {ok, MemoryWatermark} =
                     application:get_env(vm_memory_high_watermark),
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2ddb1826..e9f47d36 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -43,7 +43,6 @@
 
 -define(SERVER, ?MODULE).
 
--define(MAX_READ_FILE_HANDLES, 256).
 -define(FILE_SIZE_LIMIT,       (256*1024*1024)).
 -define(SYNC_INTERVAL,         5). %% milliseconds
 -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
-- 
cgit v1.2.1


From 5aa03a688ab5196773ce6e7f5557f6086746f9fc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 19 Nov 2009 16:43:17 +0000
Subject: Finished the file handle cache. It works as follows: 1) Every client
 keeps a gb_tree of timestamp-when-fd-was-last-used => fd_ref. This is updated
 for each action. 2) When a client opens a file or closes a file, it sends a
 suitable msg to the server, including the smallest
 timestamp-when-fd-was-last-used (i.e. least recently used fd) 3) The server
 counts how many fds have been used 4) When too many fds have been used, it
 finds the average age of the least-recently-used-fds and tells all clients to
 close anything older than that 5) This is likely to have no effect, because
 the clients may have since used the fds, thus the ages will be wrong.
 Regardless of whether any fds have been closed at this point, all the clients
 send back to the server their current smallest
 timestamp-when-fd-was-last-used 6) 2 seconds later, the server checks to see
 if the situation has improved, and if not, using the now updated information
 (thus the average age will be lower) may choose to further ask all clients to
 kill off fhs. This will repeat, albeit not that fast until enough fds have
 been closed.

---
 src/file_handle_cache.erl       | 267 ++++++++++++++++++++++++++++++----------
 src/rabbit_amqqueue_process.erl |   4 +
 src/rabbit_misc.erl             |  10 +-
 src/rabbit_msg_store.erl        |   6 +-
 src/rabbit_reader.erl           |   2 +
 src/vm_memory_monitor.erl       |  12 +-
 6 files changed, 227 insertions(+), 74 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 634cf016..53ed95d4 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -35,12 +35,18 @@
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
-         append_write_buffer/1, copy/3]).
+         append_write_buffer/1, copy/3, set_maximum_since_use/1]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
+-export([decrement/0, increment/0]).
+
 -define(SERVER, ?MODULE).
+-define(RESERVED_FOR_OTHERS, 50).
+-define(FILE_HANDLES_LIMIT_WINDOWS, 10000000).
+-define(FILE_HANDLES_LIMIT_OTHER, 1024).
+-define(FILE_HANDLES_CHECK_INTERVAL, 2000).
 
 %%----------------------------------------------------------------------------
 
@@ -67,6 +73,12 @@
           last_used_at
         }).
 
+-record(fhc_state,
+        { elders,
+          limit,
+          count
+        }).
+
 %%----------------------------------------------------------------------------
 %% Specs
 %%----------------------------------------------------------------------------
@@ -94,6 +106,7 @@
 -spec(append_write_buffer/1 :: (ref()) -> ok_or_error()).
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
              ({'ok', integer()} | error())).
+-spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). 
 
 -endif.
 
@@ -128,7 +141,7 @@ open(Path, Mode, Options) ->
                                   reader_count = RCount1,
                                   has_writer = HasWriter orelse IsWriter }),
                             Ref = make_ref(),
-                            case open1(Path1, Mode1, Options, Ref, GRef) of
+                            case open1(Path1, Mode1, Options, Ref, GRef, bof) of
                                 {ok, _Handle} -> {ok, Ref};
                                 Error -> Error
                             end
@@ -146,54 +159,7 @@ open(Path, Mode, Options) ->
 close(Ref) ->
     case erase({Ref, fhc_handle}) of
         undefined -> ok;
-        Handle ->
-            case write_buffer(Handle) of
-                {ok, #handle { hdl = Hdl, global_key = GRef, is_dirty = IsDirty,
-                               is_read = IsReader, is_write = IsWriter,
-                               last_used_at = Then }} ->
-                    case Hdl of
-                        closed -> ok;
-                        _ -> ok = case IsDirty of
-                                      true -> file:sync(Hdl);
-                                      false -> ok
-                                  end,
-                             ok = file:close(Hdl),
-                             with_age_tree(
-                               fun (Tree) ->
-                                       Tree1 = gb_trees:delete(Then, Tree),
-                                       Oldest =
-                                           case gb_trees:is_empty(Tree1) of
-                                               true ->
-                                                   undefined;
-                                               false ->
-                                                   {Oldest1, _Ref} =
-                                                       gb_trees:smallest(Tree1),
-                                                   Oldest1
-                                           end,
-                                       gen_server2:cast(
-                                         ?SERVER, {self(), close, Oldest}),
-                                       Tree1
-                               end)
-                    end,
-                    #file { reader_count = RCount, has_writer = HasWriter,
-                            path = Path } = File = get({GRef, fhc_file}),
-                    RCount1 = case IsReader of
-                                  true -> RCount - 1;
-                                  false -> RCount
-                              end,
-                    HasWriter1 = HasWriter andalso not IsWriter,
-                    case RCount1 =:= 0 andalso not HasWriter1 of
-                        true -> erase({GRef, fhc_file}),
-                                erase({Path, fhc_path});
-                        false -> put({GRef, fhc_file},
-                                     File #file { reader_count = RCount1,
-                                                  has_writer = HasWriter1 })
-                    end,
-                    ok;
-                {Error, Handle1} ->
-                    put_handle(Ref, Handle1),
-                    Error
-            end
+        Handle -> close1(Ref, Handle, hard)
     end.
 
 read(Ref, Count) ->
@@ -367,18 +333,54 @@ copy(Src, Dest, Count) ->
         Error -> Error
     end.
                                 
+set_maximum_since_use(MaximumAge) ->
+    Now = now(),
+    lists:foreach(
+      fun ({{Ref, fhc_handle}, Handle =
+            #handle { hdl = Hdl, last_used_at = Then }}) ->
+              Age = timer:now_diff(Now, Then),
+              case Hdl /= closed andalso Age >= MaximumAge of
+                  true ->
+                      case close1(Ref, Handle, soft) of
+                          {ok, Handle1} ->
+                              put({Ref, fhc_handle}, Handle1);
+                          _ -> ok
+                      end;
+                  false -> ok
+              end;
+          (_KeyValuePair) -> ok
+      end, get()),
+    report_eldest().
+
+decrement() ->
+    gen_server2:cast(?SERVER, decrement).
+
+increment() ->
+    gen_server2:cast(?SERVER, increment).
 
 %%----------------------------------------------------------------------------
 %% Internal functions
 %%----------------------------------------------------------------------------
 
+report_eldest() ->
+    with_age_tree(
+      fun (Tree) ->
+              case gb_trees:is_empty(Tree) of
+                  true -> Tree;
+                  false -> {Oldest, _Ref} = gb_trees:smallest(Tree),
+                           gen_server2:cast(?SERVER, {self(), update, Oldest})
+              end,
+              Tree
+      end),
+    ok.
+
 get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
         undefined -> {error, not_open, Ref};
         #handle { hdl = closed, mode = Mode, global_key = GRef,
-                  options = Options } ->
+                  options = Options, offset = Offset } ->
             #file { path = Path } = get({GRef, fhc_file}),
-            open1(Path, Mode, Options, Ref, GRef);
+            open1(Path, Mode, Options, Ref, GRef, Offset);
         Handle ->
             {ok, Handle}
     end.
@@ -395,12 +397,10 @@ with_age_tree(Fun) ->
 put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
     Now = now(),
     with_age_tree(
-      fun (Tree) ->
-              gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree))
-      end),
+      fun (Tree) -> gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree)) end),
     put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
 
-open1(Path, Mode, Options, Ref, GRef) ->
+open1(Path, Mode, Options, Ref, GRef, Offset) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
             WriteBufferSize =
@@ -411,14 +411,15 @@ open1(Path, Mode, Options, Ref, GRef) ->
                 end,
             Now = now(),
             Handle =
-                #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
+                #handle { hdl = Hdl, offset = 0, trusted_offset = Offset,
                           write_buffer_size = 0, options = Options,
                           write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
                           global_key = GRef, last_used_at = Now,
                           is_dirty = false },
-            put({Ref, fhc_handle}, Handle),
+            {{ok, _Offset}, Handle1} = maybe_seek(Offset, Handle),
+            put({Ref, fhc_handle}, Handle1),
             with_age_tree(fun (Tree) ->
                                   Tree1 = gb_trees:insert(Now, Ref, Tree),
                                   {Oldest, _Ref} = gb_trees:smallest(Tree1),
@@ -426,11 +427,64 @@ open1(Path, Mode, Options, Ref, GRef) ->
                                                    {self(), open, Oldest}),
                                   Tree1
                           end),
-            {ok, Handle};
+            {ok, Handle1};
         {error, Reason} ->
             {error, Reason}
     end.
 
+close1(Ref, Handle, SoftOrHard) ->
+    case write_buffer(Handle) of
+        {ok, #handle { hdl = Hdl, global_key = GRef, is_dirty = IsDirty,
+                       is_read = IsReader, is_write = IsWriter,
+                       last_used_at = Then } = Handle1 } ->
+            case Hdl of
+                closed -> ok;
+                _ -> ok = case IsDirty of
+                              true -> file:sync(Hdl);
+                              false -> ok
+                          end,
+                     ok = file:close(Hdl),
+                     with_age_tree(
+                       fun (Tree) ->
+                               Tree1 = gb_trees:delete(Then, Tree),
+                               Oldest =
+                                   case gb_trees:is_empty(Tree1) of
+                                       true -> undefined;
+                                       false ->
+                                           {Oldest1, _Ref} =
+                                               gb_trees:smallest(Tree1),
+                                           Oldest1
+                                   end,
+                               gen_server2:cast(
+                                 ?SERVER, {self(), close, Oldest}),
+                               Tree1
+                       end)
+            end,
+            case SoftOrHard of
+                hard ->
+                    #file { reader_count = RCount, has_writer = HasWriter,
+                            path = Path } = File = get({GRef, fhc_file}),
+                    RCount1 = case IsReader of
+                                  true -> RCount - 1;
+                                  false -> RCount
+                              end,
+                    HasWriter1 = HasWriter andalso not IsWriter,
+                    case RCount1 =:= 0 andalso not HasWriter1 of
+                        true -> erase({GRef, fhc_file}),
+                                erase({Path, fhc_path});
+                        false -> put({GRef, fhc_file},
+                                     File #file { reader_count = RCount1,
+                                                  has_writer = HasWriter1 })
+                    end,
+                    ok;
+                soft ->
+                    {ok, Handle1 #handle { hdl = closed }}
+            end;
+        {Error, Handle1} ->
+            put_handle(Ref, Handle1),
+            Error
+    end.
+
 maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
                                          offset = Offset }) ->
     {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
@@ -514,14 +568,46 @@ needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
 %%----------------------------------------------------------------------------
 
 init([]) ->
-    {ok, state}.
+    Limit = case application:get_env(file_handles_high_watermark) of
+                {ok, Watermark}
+                when is_integer(Watermark) andalso Watermark > 0 -> Watermark;
+                _ -> ulimit()
+            end,
+    rabbit_log:info("Limiting to approx ~p file handles~n", [Limit]),
+    {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0}}.
 
 handle_call(_Msg, _From, State) ->
     {reply, message_not_understood, State}.
 
-handle_cast(Msg, State) ->
-    io:format("~p~n", [Msg]),
-    {noreply, State}.
+handle_cast({Pid, open, EldestUnusedSince}, State =
+            #fhc_state { elders = Elders, count = Count }) ->
+    Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
+    {noreply, maybe_reduce(State #fhc_state { elders = Elders1,
+                                              count = Count + 1 })};
+
+handle_cast({Pid, update, EldestUnusedSince}, State =
+            #fhc_state { elders = Elders }) ->
+    Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
+    %% don't call maybe_reduce from here otherwise we can create a
+    %% storm of messages
+    {noreply, State #fhc_state { elders = Elders1 }};
+
+handle_cast({Pid, close, EldestUnusedSince}, State =
+            #fhc_state { elders = Elders, count = Count }) ->
+    Elders1 = case EldestUnusedSince of
+                  undefined -> dict:erase(Pid, Elders);
+                  _         -> dict:store(Pid, EldestUnusedSince, Elders)
+              end,
+    {noreply, State #fhc_state { elders = Elders1, count = Count - 1 }};
+
+handle_cast(increment, State = #fhc_state { count = Count }) ->
+    {noreply, maybe_reduce(State #fhc_state { count = Count + 1 })};
+
+handle_cast(decrement, State = #fhc_state { count = Count }) ->
+    {noreply, State #fhc_state { count = Count - 1 }};
+
+handle_cast(check_counts, State) ->
+    {noreply, maybe_reduce(State)}.
 
 handle_info(_Msg, State) ->
     {noreply, State}.
@@ -531,3 +617,58 @@ terminate(_Reason, State) ->
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% server helpers
+%%----------------------------------------------------------------------------
+
+maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
+                                  elders = Elders })
+  when Limit /= infinity andalso Count >= Limit ->
+    Now = now(),
+    {Pids, Sum, ClientCount} =
+        dict:fold(fun (_Pid, undefined, Accs) ->
+                          Accs;
+                      (Pid, Eldest, {PidsAcc, SumAcc, CountAcc}) ->
+                          {[Pid|PidsAcc], SumAcc + timer:now_diff(Now, Eldest),
+                           CountAcc + 1}
+                  end, {[], 0, 0}, Elders),
+    %% ClientCount can't be 0.
+    AverageAge = Sum / ClientCount,
+    lists:foreach(fun (Pid) ->
+                          Pid ! {?MODULE, maximum_eldest_since_use, AverageAge}
+                  end, Pids),
+    {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server2,
+                                    cast, [?SERVER, check_counts]),
+    State;
+maybe_reduce(State) ->
+    State.
+
+%% Googling around suggests that Windows has a limit somewhere around 16M.
+%% eg http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx
+%% For everything else, assume ulimit exists. Further googling suggests that
+%% BSDs (incl OS X), solaris and linux all agree that ulimit -n is file handles
+ulimit() ->
+    try
+        %% under Linux, Solaris and FreeBSD, ulimit is a shell
+        %% builtin, not a command. In OS X, it's a command, but it's
+        %% still safe to call it this way:
+        case rabbit_misc:cmd("sh -c \"ulimit -n\"") of
+            "unlimited" -> infinity;
+            String = [C|_] when $0 =< C andalso C =< $9 ->
+                Num = list_to_integer(
+                        lists:takewhile(fun (D) -> $0 =< D andalso D =< $9 end,
+                                        String)) - ?RESERVED_FOR_OTHERS,
+                lists:max([1, Num]);
+            String ->
+                rabbit_log:warning("Unexpected result of \"ulimit -n\": ~p~n",
+                                   [String]),
+                throw({unexpected_result, String})
+        end
+    catch
+        throw:_ ->
+            case os:type() of
+                {win32, _OsName} -> ?FILE_HANDLES_LIMIT_WINDOWS;
+                _ -> ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
+            end
+    end.
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index bb951b40..183bf26d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -914,6 +914,10 @@ handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
         State#q{variable_queue_state =
                 rabbit_variable_queue:tx_commit_from_vq(VQS)}));    
 
+handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State);
+
 handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 21764fce..391efb1d 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -55,7 +55,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
--export([unfold/2, ceil/1]).
+-export([unfold/2, ceil/1, cmd/1]).
 
 -import(mnesia).
 -import(lists).
@@ -126,6 +126,7 @@
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 -spec(ceil/1 :: (number()) -> number()).
+-spec(cmd/1 :: (string()) -> string()). 
 
 -endif.
 
@@ -489,3 +490,10 @@ ceil(N) ->
         0 -> N;
         _ -> 1 + T
     end.
+
+cmd(Command) ->
+    Exec = hd(string:tokens(Command, " ")),
+    case os:find_executable(Exec) of
+        false -> throw({command_not_found, Exec});
+        _     -> os:cmd(Command)
+    end.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e9f47d36..3d38f721 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -414,7 +414,11 @@ handle_cast(sync, State) ->
     noreply(sync(State)).
 
 handle_info(timeout, State) ->
-    noreply(sync(State)).
+    noreply(sync(State));
+
+handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State).
 
 terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
                                       file_summary           = FileSummary,
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index e21485b5..bf57ca5f 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -209,6 +209,7 @@ start_connection(Parent, Deb, ClientSock) ->
     {PeerAddressS, PeerPort} = peername(ClientSock),
     ProfilingValue = setup_profiling(),
     try 
+        file_handle_cache:increment(),
         rabbit_log:info("starting TCP connection ~p from ~s:~p~n",
                         [self(), PeerAddressS, PeerPort]),
         erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(),
@@ -232,6 +233,7 @@ start_connection(Parent, Deb, ClientSock) ->
                end)("exception on TCP connection ~p from ~s:~p~n~p~n",
                     [self(), PeerAddressS, PeerPort, Ex])
     after
+        file_handle_cache:decrement(),
         rabbit_log:info("closing TCP connection ~p from ~s:~p~n",
                         [self(), PeerAddressS, PeerPort]),
         %% We don't close the socket explicitly. The reader is the
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index 65d4a451..9eee0c0b 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -223,19 +223,13 @@ get_mem_limit(MemFraction, TotalMemory) ->
 %%----------------------------------------------------------------------------
 %% Internal Helpers
 %%----------------------------------------------------------------------------
-cmd(Command) ->
-    Exec = hd(string:tokens(Command, " ")),
-    case os:find_executable(Exec) of
-        false -> throw({command_not_found, Exec});
-        _     -> os:cmd(Command)
-    end.
 
 %% get_total_memory(OS) -> Total
 %% Windows and Freebsd code based on: memsup:get_memory_usage/1
 %% Original code was part of OTP and released under "Erlang Public License".
 
 get_total_memory({unix,darwin}) ->
-    File = cmd("/usr/bin/vm_stat"),
+    File = rabbit_misc:cmd("/usr/bin/vm_stat"),
     Lines = string:tokens(File, "\n"),
     Dict = dict:from_list(lists:map(fun parse_line_mach/1, Lines)),
     [PageSize, Inactive, Active, Free, Wired] =
@@ -263,7 +257,7 @@ get_total_memory({unix, linux}) ->
     dict:fetch('MemTotal', Dict);
 
 get_total_memory({unix, sunos}) ->
-    File = cmd("/usr/sbin/prtconf"),
+    File = rabbit_misc:cmd("/usr/sbin/prtconf"),
     Lines = string:tokens(File, "\n"),
     Dict = dict:from_list(lists:map(fun parse_line_sunos/1, Lines)),
     dict:fetch('Memory size', Dict);
@@ -314,7 +308,7 @@ parse_line_sunos(Line) ->
     end.
 
 freebsd_sysctl(Def) ->
-    list_to_integer(cmd("/sbin/sysctl -n " ++ Def) -- "\n").
+    list_to_integer(rabbit_misc:cmd("/sbin/sysctl -n " ++ Def) -- "\n").
 
 %% file:read_file does not work on files in /proc as it seems to get
 %% the size of the file first and then read that many bytes. But files
-- 
cgit v1.2.1


From 4977956b60e486eee96cb15eefc6f9305ff3ec0d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 19 Nov 2009 18:11:12 +0000
Subject: Did Matthias's requested transformation of the fhc. Saved 50 lines.
 It's not quite as neat as I wanted due to passing extra args through (the
 difficulty is in writing the with_flushed_handles - you can't write the fun
 in there to have any arity), so it's always a 2-arity function :: ([any()],
 [handle()]), but hey, it's still quite a lot nicer than it was.

---
 src/file_handle_cache.erl | 297 ++++++++++++++++++++++------------------------
 1 file changed, 145 insertions(+), 152 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 53ed95d4..fca4441b 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -163,176 +163,35 @@ close(Ref) ->
     end.
 
 read(Ref, Count) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { is_read = false }} ->
-            {error, not_open_for_reading};
-        {ok, Handle} ->
-            {Result, Handle1} =
-                case write_buffer(Handle) of
-                    {ok, Handle2 = #handle { hdl = Hdl, offset = Offset }} ->
-                        case file:read(Hdl, Count) of
-                            {ok, Data} = Obj ->
-                                Size = iolist_size(Data),
-                                {Obj,
-                                 Handle2 #handle { offset = Offset + Size }};
-                            eof -> {eof, Handle2 #handle { at_eof = true }};
-                            Error -> {Error, Handle2}
-                        end;
-                    {Error, Handle2} -> {Error, Handle2}
-                end,
-            put_handle(Ref, Handle1),
-            Result;
-        Error -> Error
-    end.
+    with_flushed_handles([Ref], Count, fun internal_read/2).
 
 append(Ref, Data) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { is_write = false }} ->
-            {error, not_open_for_writing};
-        {ok, Handle} ->
-            {Result, Handle1} =
-                case maybe_seek(eof, Handle) of
-                    {{ok, _Offset}, Handle2 = #handle { at_eof = true }} ->
-                        write_to_buffer(Data, Handle2);
-                    {{error, _} = Error, Handle2} ->
-                        {Error, Handle2}
-                end,
-            put_handle(Ref, Handle1),
-            Result;
-        Error -> Error
-    end.
+    with_handles([Ref], Data, fun internal_append/2).
 
 sync(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { is_dirty = false, write_buffer = [] }} ->
-            ok;
-        {ok, Handle} ->
-            %% write_buffer will set is_dirty, or leave it set if buffer empty
-            {Result, Handle1} =
-                case write_buffer(Handle) of
-                    {ok, Handle2 = #handle {
-                           hdl = Hdl, offset = Offset, is_dirty = true }} ->
-                        case file:sync(Hdl) of
-                            ok -> {ok,
-                                   Handle2 #handle { trusted_offset = Offset,
-                                                     is_dirty = false }};
-                            Error -> {Error, Handle2}
-                        end;
-                    Error -> {Error, Handle}
-                end,
-            put_handle(Ref, Handle1),
-            Result;
-        Error -> Error
-    end.
+    with_handles([Ref], ok, fun internal_sync/2).
 
 position(Ref, NewOffset) ->
-    case get_or_reopen(Ref) of
-        {ok, Handle} ->
-            {Result, Handle1} =
-                case write_buffer(Handle) of
-                    {ok, Handle2} -> maybe_seek(NewOffset, Handle2);
-                    {Error, Handle2} -> {Error, Handle2}
-                end,
-            put_handle(Ref, Handle1),
-            Result;
-        Error -> Error
-    end.
+    with_handles([Ref], NewOffset, fun internal_position/2).
 
 truncate(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { is_write = false }} ->
-            {error, not_open_for_writing};
-        {ok, Handle} ->
-            {Result, Handle1} =
-                case write_buffer(Handle) of
-                    {ok,
-                     Handle2 = #handle { hdl = Hdl, offset = Offset,
-                                         trusted_offset = TrustedOffset }} ->
-                        case file:truncate(Hdl) of
-                            ok ->
-                                {ok,
-                                 Handle2 #handle {
-                                   at_eof = true,
-                                   trusted_offset = lists:min([Offset,
-                                                               TrustedOffset])
-                                  }};
-                            Error -> {Error, Handle2}
-                        end;
-                    {Error, Handle2} -> {Error, Handle2}
-                end,
-            put_handle(Ref, Handle1),
-            Result;
-        Error -> Error
-    end.
+    with_handles([Ref], ok, fun internal_truncate/2).
 
 last_sync_offset(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { trusted_offset = TrustedOffset }} -> {ok, TrustedOffset};
-        Error -> Error
-    end.
+    with_handles([Ref], ok, fun internal_last_sync_offset/2).
 
 current_virtual_offset(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { at_eof = true, is_write = true, offset = Offset,
-                       write_buffer_size = Size }} ->
-            {ok, Offset + Size};
-        {ok, #handle { offset = Offset }} -> {ok, Offset};
-        Error -> Error
-    end.
+    with_handles([Ref], ok, fun internal_current_virtual_offset/2).
 
 current_raw_offset(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, #handle { offset = Offset }} -> {ok, Offset};
-        Error -> Error
-    end.    
+    with_handles([Ref], ok, fun internal_current_raw_offset/2).
 
 append_write_buffer(Ref) ->
-    case get_or_reopen(Ref) of
-        {ok, Handle} ->
-            {Result, Handle1} = write_buffer(Handle),
-            put_handle(Ref, Handle1),
-            Result;
-        Error -> Error
-    end.
+    with_flushed_handles([Ref], ok, fun internal_append_write_buffer/2).
 
 copy(Src, Dest, Count) ->
-    case get_or_reopen(Src) of
-        {ok, SHandle = #handle { is_read = true }} ->
-            case get_or_reopen(Dest) of
-                {ok, DHandle = #handle { is_write = true }} ->
-                    {Result, SHandle1, DHandle1} =
-                        case write_buffer(SHandle) of
-                            {ok, SHandle2 = #handle { hdl = SHdl,
-                                                      offset = SOffset }} ->
-                                case write_buffer(DHandle) of
-                                    {ok,
-                                     DHandle2 = #handle { hdl = DHdl,
-                                                          offset = DOffset }} ->
-                                        Result1 = file:copy(SHdl, DHdl, Count),
-                                        case Result1 of
-                                            {ok, Count1} ->
-                                                {Result1,
-                                                 SHandle2 #handle {
-                                                   offset = SOffset + Count1 },
-                                                 DHandle2 #handle {
-                                                   offset = DOffset + Count1 }};
-                                            Error ->
-                                                {Error, SHandle2, DHandle2}
-                                        end;
-                                    Error -> {Error, SHandle2, DHandle}
-                                end;
-                            Error -> {Error, SHandle, DHandle}
-                        end,
-                    put_handle(Src, SHandle1),
-                    put_handle(Dest, DHandle1),
-                    Result;
-                {ok, _} -> {error, destination_not_open_for_writing};
-                Error -> Error
-            end;
-        {ok, _} -> {error, source_not_open_for_reading};
-        Error -> Error
-    end.
-                                
+    with_flushed_handles([Src, Dest], Count, fun internal_copy/2).
+
 set_maximum_since_use(MaximumAge) ->
     Now = now(),
     lists:foreach(
@@ -358,6 +217,102 @@ decrement() ->
 increment() ->
     gen_server2:cast(?SERVER, increment).
 
+%%----------------------------------------------------------------------------
+%% Internal versions for the above
+%%----------------------------------------------------------------------------
+
+internal_read(_Count, [#handle { is_read = false }]) ->
+    {error, not_open_for_reading};
+internal_read(Count, [Handle = #handle { hdl = Hdl, offset = Offset }]) ->
+    case file:read(Hdl, Count) of
+        {ok, Data} = Obj ->
+            Size = iolist_size(Data),
+            {Obj, [Handle #handle { offset = Offset + Size }]};
+        eof -> {eof, [Handle #handle { at_eof = true }]};
+        Error -> {Error, [Handle]}
+    end.
+
+internal_append(_Data, [#handle { is_write = false }]) ->
+    {error, not_open_for_writing};
+internal_append(Data, [Handle]) ->
+    case maybe_seek(eof, Handle) of
+        {{ok, _Offset}, Handle1 = #handle { at_eof = true }} ->
+            {Result, Handle2} = write_to_buffer(Data, Handle1),
+            {Result, [Handle2]};
+        {{error, _} = Error, Handle1} ->
+            {Error, [Handle1]}
+    end.
+
+internal_sync(ok, [#handle { is_dirty = false, write_buffer = [] }]) ->
+    ok;
+internal_sync(ok, [Handle]) ->
+    %% write_buffer will set is_dirty, or leave it set if buffer empty
+    case write_buffer(Handle) of
+        {ok, Handle1 = #handle {hdl = Hdl, offset = Offset, is_dirty = true}} ->
+            case file:sync(Hdl) of
+                ok -> {ok, [Handle1 #handle { trusted_offset = Offset,
+                                              is_dirty = false }]};
+                Error -> {Error, [Handle1]}
+            end;
+        {Error, Handle1} -> {Error, [Handle1]}
+    end.
+
+internal_position(NewOffset, [Handle]) ->
+    case write_buffer(Handle) of
+        {ok, Handle1} -> {Result, Handle2} = maybe_seek(NewOffset, Handle1),
+                         {Result, [Handle2]};
+        {Error, Handle1} -> {Error, [Handle1]}
+    end.
+
+internal_truncate(ok, [#handle { is_write = false }]) ->
+            {error, not_open_for_writing};
+internal_truncate(ok, [Handle]) ->
+    case write_buffer(Handle) of
+        {ok, Handle1 = #handle { hdl = Hdl, offset = Offset,
+                                 trusted_offset = TrustedOffset }} ->
+            case file:truncate(Hdl) of
+                ok -> {ok,
+                       [Handle1 #handle {
+                          at_eof = true,
+                          trusted_offset = lists:min([Offset, TrustedOffset])
+                         }]};
+                Error -> {Error, [Handle1]}
+            end;
+        {Error, Handle1} -> {Error, Handle1}
+    end.
+
+internal_last_sync_offset(ok, [#handle { trusted_offset = TrustedOffset }]) ->
+    {ok, TrustedOffset}.
+
+internal_current_virtual_offset(ok, [#handle { at_eof = true, is_write = true,
+                                               offset = Offset,
+                                               write_buffer_size = Size }]) ->
+    {ok, Offset + Size};
+internal_current_virtual_offset(ok, [#handle { offset = Offset }]) ->
+    {ok, Offset}.
+
+internal_current_raw_offset(ok, [#handle { offset = Offset }]) ->
+    {ok, Offset}.
+
+internal_append_write_buffer(ok, [Handle]) ->
+    {ok, [Handle]}.
+
+internal_copy(Count, [SHandle = #handle { is_read = true, hdl = SHdl,
+                                          offset = SOffset },
+                      DHandle = #handle { is_write = true, hdl = DHdl,
+                                              offset = DOffset }]) ->
+    Result1 = file:copy(SHdl, DHdl, Count),
+    case Result1 of
+        {ok, Count1} ->
+            {Result1,
+             [SHandle #handle { offset = SOffset + Count1 },
+              DHandle #handle { offset = DOffset + Count1 }]};
+        Error ->
+            {Error, [SHandle, DHandle]}
+    end;
+internal_copy(_Count, _Handles) ->
+    {error, incorrect_handle_modes}.
+
 %%----------------------------------------------------------------------------
 %% Internal functions
 %%----------------------------------------------------------------------------
@@ -374,6 +329,44 @@ report_eldest() ->
       end),
     ok.
 
+with_handles(Refs, Args, Fun) ->
+    ResHandles = lists:foldl(
+                   fun (Ref, {ok, HandlesAcc}) ->
+                           case get_or_reopen(Ref) of
+                               {ok, Handle} -> {ok, [Handle | HandlesAcc]};
+                               Error -> Error
+                           end;
+                       (_Ref, Error) -> Error
+                   end, {ok, []}, Refs),
+    case ResHandles of
+        {ok, Handles} ->
+            case erlang:apply(Fun, [Args, lists:reverse(Handles)]) of
+                {Result, Handles1} when is_list(Handles1) ->
+                    lists:zipwith(fun put_handle/2, Refs, Handles1),
+                    Result;
+                Result -> Result
+            end;
+        Error -> Error
+    end.
+
+with_flushed_handles(Refs, Args, Fun) ->
+    with_handles(
+      Refs, Args,
+      fun (Args1, Handles) ->
+              ResHandles1 =
+                  lists:foldl(
+                    fun (Handle, {ok, HandlesAcc}) ->
+                            {Res, Handle1} = write_buffer(Handle),
+                            {Res, [Handle1 | HandlesAcc]};
+                        (Handle, {Error, HandlesAcc}) ->
+                            {Error, [Handle | HandlesAcc]}
+                    end, {ok, []}, Handles),
+              case ResHandles1 of
+                  {ok, Handles1} -> erlang:apply(Fun, [Args1, lists:reverse(Handles1)]);
+                  {Error, Handles1} -> {Error, lists:reverse(Handles1)}
+              end
+      end).
+
 get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
         undefined -> {error, not_open, Ref};
-- 
cgit v1.2.1


From fbef5aca34a25c3492d8e19cfe397cd33a9bf191 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 24 Nov 2009 17:19:13 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index fca4441b..6631730f 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -94,7 +94,7 @@
 -spec(open/3 :: (string(), [any()], [any()]) -> ({'ok', ref()} | error())).
 -spec(close/1 :: (ref()) -> ('ok' | error())).
 -spec(read/2 :: (ref(), integer()) ->
-             ({'ok', ([char()]|binary())} | eof | error())). 
+             ({'ok', ([char()]|binary())} | eof | error())).
 -spec(append/2 :: (ref(), iodata()) -> ok_or_error()).
 -spec(sync/1 :: (ref()) ->  ok_or_error()).
 -spec(position/2 :: (ref(), position()) ->
@@ -106,7 +106,7 @@
 -spec(append_write_buffer/1 :: (ref()) -> ok_or_error()).
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
              ({'ok', integer()} | error())).
--spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). 
+-spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 
 -endif.
 
@@ -115,7 +115,8 @@
 %%----------------------------------------------------------------------------
 
 start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]).
+    gen_server2:start_link({local, ?SERVER}, ?MODULE, [],
+                           [{timeout, infinity}]).
 
 open(Path, Mode, Options) ->
     case is_appender(Mode) of
@@ -353,17 +354,17 @@ with_flushed_handles(Refs, Args, Fun) ->
     with_handles(
       Refs, Args,
       fun (Args1, Handles) ->
-              ResHandles1 =
-                  lists:foldl(
-                    fun (Handle, {ok, HandlesAcc}) ->
-                            {Res, Handle1} = write_buffer(Handle),
-                            {Res, [Handle1 | HandlesAcc]};
-                        (Handle, {Error, HandlesAcc}) ->
-                            {Error, [Handle | HandlesAcc]}
-                    end, {ok, []}, Handles),
-              case ResHandles1 of
-                  {ok, Handles1} -> erlang:apply(Fun, [Args1, lists:reverse(Handles1)]);
-                  {Error, Handles1} -> {Error, lists:reverse(Handles1)}
+              case lists:foldl(
+                     fun (Handle, {ok, HandlesAcc}) ->
+                             {Res, Handle1} = write_buffer(Handle),
+                             {Res, [Handle1 | HandlesAcc]};
+                         (Handle, {Error, HandlesAcc}) ->
+                             {Error, [Handle | HandlesAcc]}
+                     end, {ok, []}, Handles) of
+                  {ok, Handles1} ->
+                      erlang:apply(Fun, [Args1, lists:reverse(Handles1)]);
+                  {Error, Handles1} ->
+                      {Error, lists:reverse(Handles1)}
               end
       end).
 
-- 
cgit v1.2.1


From 7f383b71aec1ab09856defccd19eca2f00b84b02 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 24 Nov 2009 18:24:11 +0000
Subject: more cosmetic

---
 src/file_handle_cache.erl | 147 +++++++++++++++++++++++++---------------------
 1 file changed, 81 insertions(+), 66 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 6631730f..276af801 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -134,7 +134,7 @@ open(Path, Mode, Options) ->
                             {error, writer_exists};
                         false ->
                             RCount1 = case is_reader(Mode1) of
-                                          true -> RCount + 1;
+                                          true  -> RCount + 1;
                                           false -> RCount
                                       end,
                             put({GRef, fhc_file},
@@ -144,7 +144,7 @@ open(Path, Mode, Options) ->
                             Ref = make_ref(),
                             case open1(Path1, Mode1, Options, Ref, GRef, bof) of
                                 {ok, _Handle} -> {ok, Ref};
-                                Error -> Error
+                                Error         -> Error
                             end
                     end;
                 undefined ->
@@ -160,7 +160,7 @@ open(Path, Mode, Options) ->
 close(Ref) ->
     case erase({Ref, fhc_handle}) of
         undefined -> ok;
-        Handle -> close1(Ref, Handle, hard)
+        Handle    -> close1(Ref, Handle, hard)
     end.
 
 read(Ref, Count) ->
@@ -202,13 +202,14 @@ set_maximum_since_use(MaximumAge) ->
               case Hdl /= closed andalso Age >= MaximumAge of
                   true ->
                       case close1(Ref, Handle, soft) of
-                          {ok, Handle1} ->
-                              put({Ref, fhc_handle}, Handle1);
-                          _ -> ok
+                          {ok, Handle1} -> put({Ref, fhc_handle}, Handle1);
+                          _             -> ok
                       end;
-                  false -> ok
+                  false ->
+                      ok
               end;
-          (_KeyValuePair) -> ok
+          (_KeyValuePair) ->
+              ok
       end, get()),
     report_eldest().
 
@@ -229,8 +230,10 @@ internal_read(Count, [Handle = #handle { hdl = Hdl, offset = Offset }]) ->
         {ok, Data} = Obj ->
             Size = iolist_size(Data),
             {Obj, [Handle #handle { offset = Offset + Size }]};
-        eof -> {eof, [Handle #handle { at_eof = true }]};
-        Error -> {Error, [Handle]}
+        eof ->
+            {eof, [Handle #handle { at_eof = true }]};
+        Error ->
+            {Error, [Handle]}
     end.
 
 internal_append(_Data, [#handle { is_write = false }]) ->
@@ -251,17 +254,18 @@ internal_sync(ok, [Handle]) ->
     case write_buffer(Handle) of
         {ok, Handle1 = #handle {hdl = Hdl, offset = Offset, is_dirty = true}} ->
             case file:sync(Hdl) of
-                ok -> {ok, [Handle1 #handle { trusted_offset = Offset,
-                                              is_dirty = false }]};
+                ok    -> {ok, [Handle1 #handle { trusted_offset = Offset,
+                                                 is_dirty = false }]};
                 Error -> {Error, [Handle1]}
             end;
-        {Error, Handle1} -> {Error, [Handle1]}
+        {Error, Handle1} ->
+            {Error, [Handle1]}
     end.
 
 internal_position(NewOffset, [Handle]) ->
     case write_buffer(Handle) of
-        {ok, Handle1} -> {Result, Handle2} = maybe_seek(NewOffset, Handle1),
-                         {Result, [Handle2]};
+        {ok, Handle1}    -> {Result, Handle2} = maybe_seek(NewOffset, Handle1),
+                            {Result, [Handle2]};
         {Error, Handle1} -> {Error, [Handle1]}
     end.
 
@@ -272,14 +276,16 @@ internal_truncate(ok, [Handle]) ->
         {ok, Handle1 = #handle { hdl = Hdl, offset = Offset,
                                  trusted_offset = TrustedOffset }} ->
             case file:truncate(Hdl) of
-                ok -> {ok,
-                       [Handle1 #handle {
-                          at_eof = true,
-                          trusted_offset = lists:min([Offset, TrustedOffset])
-                         }]};
-                Error -> {Error, [Handle1]}
+                ok ->
+                    {ok, [Handle1 #handle {
+                            at_eof = true,
+                            trusted_offset = lists:min([Offset, TrustedOffset])
+                           }]};
+                Error ->
+                    {Error, [Handle1]}
             end;
-        {Error, Handle1} -> {Error, Handle1}
+        {Error, Handle1} ->
+            {Error, Handle1}
     end.
 
 internal_last_sync_offset(ok, [#handle { trusted_offset = TrustedOffset }]) ->
@@ -302,9 +308,8 @@ internal_copy(Count, [SHandle = #handle { is_read = true, hdl = SHdl,
                                           offset = SOffset },
                       DHandle = #handle { is_write = true, hdl = DHdl,
                                               offset = DOffset }]) ->
-    Result1 = file:copy(SHdl, DHdl, Count),
-    case Result1 of
-        {ok, Count1} ->
+    case file:copy(SHdl, DHdl, Count) of
+        {ok, Count1} = Result1 ->
             {Result1,
              [SHandle #handle { offset = SOffset + Count1 },
               DHandle #handle { offset = DOffset + Count1 }]};
@@ -322,7 +327,7 @@ report_eldest() ->
     with_age_tree(
       fun (Tree) ->
               case gb_trees:is_empty(Tree) of
-                  true -> Tree;
+                  true  -> Tree;
                   false -> {Oldest, _Ref} = gb_trees:smallest(Tree),
                            gen_server2:cast(?SERVER, {self(), update, Oldest})
               end,
@@ -335,9 +340,10 @@ with_handles(Refs, Args, Fun) ->
                    fun (Ref, {ok, HandlesAcc}) ->
                            case get_or_reopen(Ref) of
                                {ok, Handle} -> {ok, [Handle | HandlesAcc]};
-                               Error -> Error
+                               Error        -> Error
                            end;
-                       (_Ref, Error) -> Error
+                       (_Ref, Error) ->
+                           Error
                    end, {ok, []}, Refs),
     case ResHandles of
         {ok, Handles} ->
@@ -345,9 +351,11 @@ with_handles(Refs, Args, Fun) ->
                 {Result, Handles1} when is_list(Handles1) ->
                     lists:zipwith(fun put_handle/2, Refs, Handles1),
                     Result;
-                Result -> Result
+                Result ->
+                    Result
             end;
-        Error -> Error
+        Error ->
+            Error
     end.
 
 with_flushed_handles(Refs, Args, Fun) ->
@@ -370,7 +378,8 @@ with_flushed_handles(Refs, Args, Fun) ->
 
 get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
-        undefined -> {error, not_open, Ref};
+        undefined ->
+            {error, not_open, Ref};
         #handle { hdl = closed, mode = Mode, global_key = GRef,
                   options = Options, offset = Offset } ->
             #file { path = Path } = get({GRef, fhc_file}),
@@ -382,7 +391,7 @@ get_or_reopen(Ref) ->
 get_or_create_age_tree() ->
     case get(fhc_age_tree) of
         undefined -> gb_trees:empty();
-        AgeTree -> AgeTree
+        AgeTree   -> AgeTree
     end.
 
 with_age_tree(Fun) ->
@@ -399,8 +408,8 @@ open1(Path, Mode, Options, Ref, GRef, Offset) ->
         {ok, Hdl} ->
             WriteBufferSize =
                 case proplists:get_value(write_buffer, Options, unbuffered) of
-                    unbuffered -> 0;
-                    infinity -> infinity;
+                    unbuffered           -> 0;
+                    infinity             -> infinity;
                     N when is_integer(N) -> N
                 end,
             Now = now(),
@@ -432,40 +441,41 @@ close1(Ref, Handle, SoftOrHard) ->
                        is_read = IsReader, is_write = IsWriter,
                        last_used_at = Then } = Handle1 } ->
             case Hdl of
-                closed -> ok;
-                _ -> ok = case IsDirty of
-                              true -> file:sync(Hdl);
-                              false -> ok
-                          end,
-                     ok = file:close(Hdl),
-                     with_age_tree(
-                       fun (Tree) ->
-                               Tree1 = gb_trees:delete(Then, Tree),
-                               Oldest =
-                                   case gb_trees:is_empty(Tree1) of
-                                       true -> undefined;
-                                       false ->
-                                           {Oldest1, _Ref} =
-                                               gb_trees:smallest(Tree1),
-                                           Oldest1
-                                   end,
-                               gen_server2:cast(
-                                 ?SERVER, {self(), close, Oldest}),
-                               Tree1
-                       end)
+                closed ->
+                    ok;
+                _ ->
+                    ok = case IsDirty of
+                             true  -> file:sync(Hdl);
+                             false -> ok
+                         end,
+                    ok = file:close(Hdl),
+                    with_age_tree(
+                      fun (Tree) ->
+                              Tree1 = gb_trees:delete(Then, Tree),
+                              Oldest =
+                                  case gb_trees:is_empty(Tree1) of
+                                      true  ->  undefined;
+                                      false -> {Oldest1, _Ref} =
+                                                   gb_trees:smallest(Tree1),
+                                               Oldest1
+                                  end,
+                              gen_server2:cast(
+                                ?SERVER, {self(), close, Oldest}),
+                              Tree1
+                      end)
             end,
             case SoftOrHard of
                 hard ->
                     #file { reader_count = RCount, has_writer = HasWriter,
                             path = Path } = File = get({GRef, fhc_file}),
                     RCount1 = case IsReader of
-                                  true -> RCount - 1;
+                                  true  -> RCount - 1;
                                   false -> RCount
                               end,
                     HasWriter1 = HasWriter andalso not IsWriter,
                     case RCount1 =:= 0 andalso not HasWriter1 of
-                        true -> erase({GRef, fhc_file}),
-                                erase({Path, fhc_path});
+                        true  -> erase({GRef, fhc_file}),
+                                 erase({Path, fhc_path});
                         false -> put({GRef, fhc_file},
                                      File #file { reader_count = RCount1,
                                                   has_writer = HasWriter1 })
@@ -483,13 +493,14 @@ maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
                                          offset = Offset }) ->
     {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
     Result = case NeedsSeek of
-                 true -> file:position(Hdl, NewOffset);
+                 true  -> file:position(Hdl, NewOffset);
                  false -> {ok, Offset}
              end,
     case Result of
         {ok, Offset1} ->
             {Result, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
-        {error, _} = Error -> {Error, Handle}
+        {error, _} = Error -> 
+            {Error, Handle}
     end.
 
 write_to_buffer(Data, Handle = #handle { hdl = Hdl, offset = Offset,
@@ -505,7 +516,7 @@ write_to_buffer(Data, Handle =
     Handle1 = Handle #handle { write_buffer = [ Data | WriteBuffer ],
                                write_buffer_size = Size1 },
     case Limit /= infinity andalso Size1 > Limit of
-        true -> write_buffer(Handle1);
+        true  -> write_buffer(Handle1);
         false -> {ok, Handle1}
     end.
 
@@ -563,9 +574,11 @@ needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
 
 init([]) ->
     Limit = case application:get_env(file_handles_high_watermark) of
-                {ok, Watermark}
-                when is_integer(Watermark) andalso Watermark > 0 -> Watermark;
-                _ -> ulimit()
+                {ok, Watermark} when (is_integer(Watermark) andalso
+                                      Watermark > 0) ->
+                    Watermark;
+                _ ->
+                    ulimit()
             end,
     rabbit_log:info("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0}}.
@@ -662,7 +675,9 @@ ulimit() ->
     catch
         throw:_ ->
             case os:type() of
-                {win32, _OsName} -> ?FILE_HANDLES_LIMIT_WINDOWS;
-                _ -> ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
+                {win32, _OsName} ->
+                    ?FILE_HANDLES_LIMIT_WINDOWS;
+                _ ->
+                    ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
             end
     end.
-- 
cgit v1.2.1


From 150807030c0dcc5228721c8f3a767efba7754d4c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 24 Nov 2009 19:39:29 +0000
Subject: more cosmetic

---
 src/file_handle_cache.erl | 84 +++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 43 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 276af801..ac86d35e 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -196,17 +196,15 @@ copy(Src, Dest, Count) ->
 set_maximum_since_use(MaximumAge) ->
     Now = now(),
     lists:foreach(
-      fun ({{Ref, fhc_handle}, Handle =
-            #handle { hdl = Hdl, last_used_at = Then }}) ->
+      fun ({{Ref, fhc_handle}, Handle = #handle { hdl = Hdl,
+                                                  last_used_at = Then }}) ->
               Age = timer:now_diff(Now, Then),
               case Hdl /= closed andalso Age >= MaximumAge of
-                  true ->
-                      case close1(Ref, Handle, soft) of
-                          {ok, Handle1} -> put({Ref, fhc_handle}, Handle1);
-                          _             -> ok
-                      end;
-                  false ->
-                      ok
+                  true -> case close1(Ref, Handle, soft) of
+                              {ok, Handle1} -> put({Ref, fhc_handle}, Handle1);
+                              _             -> ok
+                          end;
+                  false -> ok
               end;
           (_KeyValuePair) ->
               ok
@@ -323,6 +321,12 @@ internal_copy(_Count, _Handles) ->
 %% Internal functions
 %%----------------------------------------------------------------------------
 
+is_reader(Mode) -> lists:member(read, Mode).
+
+is_writer(Mode) -> lists:member(write, Mode).
+
+is_appender(Mode) -> lists:member(append, Mode).
+
 report_eldest() ->
     with_age_tree(
       fun (Tree) ->
@@ -499,10 +503,37 @@ maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
     case Result of
         {ok, Offset1} ->
             {Result, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
-        {error, _} = Error -> 
+        {error, _} = Error ->
             {Error, Handle}
     end.
 
+needs_seek(AtEof, _CurOffset, DesiredOffset)
+  when DesiredOffset == cur orelse DesiredOffset == {cur, 0} ->
+    {AtEof, false};
+needs_seek(true, _CurOffset, DesiredOffset)
+  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
+    {true, false};
+needs_seek(false, _CurOffset, DesiredOffset)
+  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
+    {true, true};
+needs_seek(AtEof, 0, DesiredOffset)
+  when DesiredOffset == bof orelse DesiredOffset == {bof, 0} ->
+    {AtEof, false};
+needs_seek(AtEof, CurOffset, CurOffset) ->
+    {AtEof, false};
+needs_seek(true, CurOffset, {bof, DesiredOffset})
+  when DesiredOffset >= CurOffset ->
+    {true, true};
+needs_seek(true, _CurOffset, {cur, DesiredOffset})
+  when DesiredOffset > 0 ->
+    {true, true};
+needs_seek(true, CurOffset, DesiredOffset) %% same as {bof, DO}
+  when is_integer(DesiredOffset) andalso DesiredOffset >= CurOffset ->
+    {true, true};
+%% because we can't really track size, we could well end up at EoF and not know
+needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
+    {false, true}.
+
 write_to_buffer(Data, Handle = #handle { hdl = Hdl, offset = Offset,
                                          write_buffer_size_limit = 0 }) ->
     Offset1 = Offset + iolist_size(Data),
@@ -535,39 +566,6 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
             {Error, Handle}
     end.
 
-is_reader(Mode) -> lists:member(read, Mode).
-
-is_writer(Mode) -> lists:member(write, Mode).
-
-is_appender(Mode) -> lists:member(append, Mode).
-
-needs_seek(AtEof, _CurOffset, DesiredOffset)
-  when DesiredOffset == cur orelse DesiredOffset == {cur, 0} ->
-    {AtEof, false};
-needs_seek(true, _CurOffset, DesiredOffset)
-  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
-    {true, false};
-needs_seek(false, _CurOffset, DesiredOffset)
-  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
-    {true, true};
-needs_seek(AtEof, 0, DesiredOffset)
-  when DesiredOffset == bof orelse DesiredOffset == {bof, 0} ->
-    {AtEof, false};
-needs_seek(AtEof, CurOffset, CurOffset) ->
-    {AtEof, false};
-needs_seek(true, CurOffset, {bof, DesiredOffset})
-  when DesiredOffset >= CurOffset ->
-    {true, true};
-needs_seek(true, _CurOffset, {cur, DesiredOffset})
-  when DesiredOffset > 0 ->
-    {true, true};
-needs_seek(true, CurOffset, DesiredOffset) %% same as {bof, DO}
-  when is_integer(DesiredOffset) andalso DesiredOffset >= CurOffset ->
-    {true, true};
-%% because we can't really track size, we could well end up at EoF and not know
-needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
-    {false, true}.
-
 %%----------------------------------------------------------------------------
 %% gen_server
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 2bd05368128432123bd8c151649017da0871c672 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 24 Nov 2009 20:06:48 +0000
Subject: more cosmetic

---
 src/file_handle_cache.erl | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index ac86d35e..a966d286 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -507,27 +507,22 @@ maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
             {Error, Handle}
     end.
 
-needs_seek(AtEof, _CurOffset, DesiredOffset)
-  when DesiredOffset == cur orelse DesiredOffset == {cur, 0} ->
-    {AtEof, false};
-needs_seek(true, _CurOffset, DesiredOffset)
-  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
-    {true, false};
-needs_seek(false, _CurOffset, DesiredOffset)
-  when DesiredOffset == eof orelse DesiredOffset == {eof, 0} ->
-    {true, true};
-needs_seek(AtEof, 0, DesiredOffset)
-  when DesiredOffset == bof orelse DesiredOffset == {bof, 0} ->
-    {AtEof, false};
-needs_seek(AtEof, CurOffset, CurOffset) ->
-    {AtEof, false};
-needs_seek(true, CurOffset, {bof, DesiredOffset})
+needs_seek( AtEoF, _CurOffset,  cur     ) -> {AtEoF, false};
+needs_seek( AtEoF, _CurOffset,  {cur, 0}) -> {AtEoF, false};
+needs_seek(  true, _CurOffset,  eof     ) -> {true , false};
+needs_seek(  true, _CurOffset,  {eof, 0}) -> {true , false};
+needs_seek( false, _CurOffset,  eof     ) -> {true , true };
+needs_seek( false, _CurOffset,  {eof, 0}) -> {true , true };
+needs_seek( AtEoF,          0,  bof     ) -> {AtEoF, false};
+needs_seek( AtEoF,          0,  {bof, 0}) -> {AtEoF, false};
+needs_seek( AtEoF,  CurOffset, CurOffset) -> {AtEoF, false};
+needs_seek(  true,  CurOffset, {bof, DesiredOffset})
   when DesiredOffset >= CurOffset ->
     {true, true};
-needs_seek(true, _CurOffset, {cur, DesiredOffset})
+needs_seek(  true, _CurOffset, {cur, DesiredOffset})
   when DesiredOffset > 0 ->
     {true, true};
-needs_seek(true, CurOffset, DesiredOffset) %% same as {bof, DO}
+needs_seek(  true,  CurOffset, DesiredOffset) %% same as {bof, DO}
   when is_integer(DesiredOffset) andalso DesiredOffset >= CurOffset ->
     {true, true};
 %% because we can't really track size, we could well end up at EoF and not know
-- 
cgit v1.2.1


From d52b6b73dbe6c1ee402d20b04cf5e0548498d46a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 24 Nov 2009 23:55:18 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index a966d286..df7e6564 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -644,10 +644,12 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
 maybe_reduce(State) ->
     State.
 
-%% Googling around suggests that Windows has a limit somewhere around 16M.
-%% eg http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx
-%% For everything else, assume ulimit exists. Further googling suggests that
-%% BSDs (incl OS X), solaris and linux all agree that ulimit -n is file handles
+%% Googling around suggests that Windows has a limit somewhere around
+%% 16M, eg
+%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx
+%% For everything else, assume ulimit exists. Further googling
+%% suggests that BSDs (incl OS X), solaris and linux all agree that
+%% ulimit -n is file handles
 ulimit() ->
     try
         %% under Linux, Solaris and FreeBSD, ulimit is a shell
@@ -665,12 +667,10 @@ ulimit() ->
                                    [String]),
                 throw({unexpected_result, String})
         end
-    catch
-        throw:_ ->
-            case os:type() of
-                {win32, _OsName} ->
-                    ?FILE_HANDLES_LIMIT_WINDOWS;
-                _ ->
-                    ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
-            end
+    catch _ -> case os:type() of
+                   {win32, _OsName} ->
+                       ?FILE_HANDLES_LIMIT_WINDOWS;
+                   _ ->
+                       ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
+               end
     end.
-- 
cgit v1.2.1


From dc9d84497253be6a1ff4ab59f7667e9a30c22217 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 24 Nov 2009 23:55:55 +0000
Subject: minor refactoring

---
 src/file_handle_cache.erl | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index df7e6564..d4a2b00f 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -238,9 +238,24 @@ internal_append(_Data, [#handle { is_write = false }]) ->
     {error, not_open_for_writing};
 internal_append(Data, [Handle]) ->
     case maybe_seek(eof, Handle) of
-        {{ok, _Offset}, Handle1 = #handle { at_eof = true }} ->
-            {Result, Handle2} = write_to_buffer(Data, Handle1),
-            {Result, [Handle2]};
+        {{ok, _Offset}, Handle1 = #handle { hdl = Hdl, offset = Offset,
+                                            write_buffer_size_limit = 0,
+                                            at_eof = true }} ->
+            Offset1 = Offset + iolist_size(Data),
+            {file:write(Hdl, Data),
+             [Handle1 #handle { is_dirty = true, offset = Offset1 }]};
+        {{ok, _Offset}, Handle1 = #handle { write_buffer = WriteBuffer,
+                                            write_buffer_size = Size,
+                                            write_buffer_size_limit = Limit,
+                                            at_eof = true }} ->
+            Size1 = Size + iolist_size(Data),
+            Handle2 = Handle1 #handle { write_buffer = [ Data | WriteBuffer ],
+                                        write_buffer_size = Size1 },
+            case Limit /= infinity andalso Size1 > Limit of
+                true  -> {Result, Handle3} = write_buffer(Handle2),
+                         {Result, [Handle3]};
+                false -> {ok, [Handle2]}
+            end;
         {{error, _} = Error, Handle1} ->
             {Error, [Handle1]}
     end.
@@ -529,23 +544,6 @@ needs_seek(  true,  CurOffset, DesiredOffset) %% same as {bof, DO}
 needs_seek(_AtEoF, _CurOffset, _DesiredOffset) ->
     {false, true}.
 
-write_to_buffer(Data, Handle = #handle { hdl = Hdl, offset = Offset,
-                                         write_buffer_size_limit = 0 }) ->
-    Offset1 = Offset + iolist_size(Data),
-    {file:write(Hdl, Data),
-     Handle #handle { is_dirty = true, offset = Offset1 }};
-write_to_buffer(Data, Handle =
-                #handle { write_buffer = WriteBuffer,
-                          write_buffer_size = Size,
-                          write_buffer_size_limit = Limit }) ->
-    Size1 = Size + iolist_size(Data),
-    Handle1 = Handle #handle { write_buffer = [ Data | WriteBuffer ],
-                               write_buffer_size = Size1 },
-    case Limit /= infinity andalso Size1 > Limit of
-        true  -> write_buffer(Handle1);
-        false -> {ok, Handle1}
-    end.
-
 write_buffer(Handle = #handle { write_buffer = [] }) ->
     {ok, Handle};
 write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
-- 
cgit v1.2.1


From bc739724501c978c6c9fb9429c6131121371e691 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 11:16:48 +0000
Subject: Remove the double indirection of fhc_file and fhc_path; make open
 non-recursive

---
 src/file_handle_cache.erl | 84 ++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index d4a2b00f..26cedca7 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -52,8 +52,7 @@
 
 -record(file,
         { reader_count,
-          has_writer,
-          path
+          has_writer
         }).
 
 -record(handle,
@@ -69,7 +68,7 @@
           is_read,
           mode,
           options,
-          global_key,
+          path,
           last_used_at
         }).
 
@@ -123,37 +122,13 @@ open(Path, Mode, Options) ->
         true -> {error, append_not_supported};
         false ->
             Path1 = filename:absname(Path),
-            case get({Path1, fhc_path}) of
-                {gref, GRef} ->
-                    #file { reader_count = RCount, has_writer = HasWriter }
-                        = File = get({GRef, fhc_file}),
-                    Mode1 = lists:usort(Mode),
-                    IsWriter = is_writer(Mode1),
-                    case IsWriter andalso HasWriter of
-                        true ->
-                            {error, writer_exists};
-                        false ->
-                            RCount1 = case is_reader(Mode1) of
-                                          true  -> RCount + 1;
-                                          false -> RCount
-                                      end,
-                            put({GRef, fhc_file},
-                                File #file {
-                                  reader_count = RCount1,
-                                  has_writer = HasWriter orelse IsWriter }),
-                            Ref = make_ref(),
-                            case open1(Path1, Mode1, Options, Ref, GRef, bof) of
-                                {ok, _Handle} -> {ok, Ref};
-                                Error         -> Error
-                            end
-                    end;
+            case get({Path1, fhc_file}) of
+                File = #file {} ->
+                    open_new_record(Path1, Mode, Options, File);
                 undefined ->
-                    GRef = make_ref(),
-                    put({Path1, fhc_path}, {gref, GRef}),
-                    put({GRef, fhc_file},
-                        #file { reader_count = 0, has_writer = false,
-                                path = Path1 }),
-                    open(Path, Mode, Options)
+                    File = #file { reader_count = 0, has_writer = false },
+                    put({Path1, fhc_file}, File),
+                    open_new_record(Path1, Mode, Options, File)
             end
     end.
 
@@ -399,10 +374,9 @@ get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
         undefined ->
             {error, not_open, Ref};
-        #handle { hdl = closed, mode = Mode, global_key = GRef,
-                  options = Options, offset = Offset } ->
-            #file { path = Path } = get({GRef, fhc_file}),
-            open1(Path, Mode, Options, Ref, GRef, Offset);
+        #handle { hdl = closed, mode = Mode, options = Options,
+                  offset = Offset, path = Path } ->
+            open1(Path, Mode, Options, Ref, Offset);
         Handle ->
             {ok, Handle}
     end.
@@ -422,7 +396,28 @@ put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
       fun (Tree) -> gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree)) end),
     put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
 
-open1(Path, Mode, Options, Ref, GRef, Offset) ->
+open_new_record(Path, Mode, Options, File =
+                #file { reader_count = RCount, has_writer = HasWriter }) ->
+    IsWriter = is_writer(Mode),
+    case IsWriter andalso HasWriter of
+        true ->
+            {error, writer_exists};
+        false ->
+            RCount1 = case is_reader(Mode) of
+                          true  -> RCount + 1;
+                          false -> RCount
+                      end,
+            put({Path, fhc_file},
+                File #file { reader_count = RCount1,
+                             has_writer = HasWriter orelse IsWriter }),
+            Ref = make_ref(),
+            case open1(Path, Mode, Options, Ref, bof) of
+                {ok, _Handle} -> {ok, Ref};
+                Error         -> Error
+            end
+    end.
+
+open1(Path, Mode, Options, Ref, Offset) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
             WriteBufferSize =
@@ -438,7 +433,7 @@ open1(Path, Mode, Options, Ref, GRef, Offset) ->
                           write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
-                          global_key = GRef, last_used_at = Now,
+                          path = Path, last_used_at = Now,
                           is_dirty = false },
             {{ok, _Offset}, Handle1} = maybe_seek(Offset, Handle),
             put({Ref, fhc_handle}, Handle1),
@@ -456,7 +451,7 @@ open1(Path, Mode, Options, Ref, GRef, Offset) ->
 
 close1(Ref, Handle, SoftOrHard) ->
     case write_buffer(Handle) of
-        {ok, #handle { hdl = Hdl, global_key = GRef, is_dirty = IsDirty,
+        {ok, #handle { hdl = Hdl, path = Path, is_dirty = IsDirty,
                        is_read = IsReader, is_write = IsWriter,
                        last_used_at = Then } = Handle1 } ->
             case Hdl of
@@ -485,17 +480,16 @@ close1(Ref, Handle, SoftOrHard) ->
             end,
             case SoftOrHard of
                 hard ->
-                    #file { reader_count = RCount, has_writer = HasWriter,
-                            path = Path } = File = get({GRef, fhc_file}),
+                    #file { reader_count = RCount, has_writer = HasWriter } =
+                        File = get({Path, fhc_file}),
                     RCount1 = case IsReader of
                                   true  -> RCount - 1;
                                   false -> RCount
                               end,
                     HasWriter1 = HasWriter andalso not IsWriter,
                     case RCount1 =:= 0 andalso not HasWriter1 of
-                        true  -> erase({GRef, fhc_file}),
-                                 erase({Path, fhc_path});
-                        false -> put({GRef, fhc_file},
+                        true  -> erase({Path, fhc_file});
+                        false -> put({Path, fhc_file},
                                      File #file { reader_count = RCount1,
                                                   has_writer = HasWriter1 })
                     end,
-- 
cgit v1.2.1


From 97badabb5c3697c5925fe80e39a9161dd50f66d5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 11:30:40 +0000
Subject: corrected order of cast tuple; improved set_maximum_since_use;
 corrected bug that led to a /0, due to exhausting fhs just through calls to
 inc and dec (from sockets - only happens when the limit is very very low)

---
 src/file_handle_cache.erl | 51 ++++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 26cedca7..53c3e6a2 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -170,21 +170,26 @@ copy(Src, Dest, Count) ->
 
 set_maximum_since_use(MaximumAge) ->
     Now = now(),
-    lists:foreach(
-      fun ({{Ref, fhc_handle}, Handle = #handle { hdl = Hdl,
-                                                  last_used_at = Then }}) ->
+    Report =
+        lists:foldl(
+          fun ({{Ref, fhc_handle},
+                Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) ->
               Age = timer:now_diff(Now, Then),
               case Hdl /= closed andalso Age >= MaximumAge of
                   true -> case close1(Ref, Handle, soft) of
-                              {ok, Handle1} -> put({Ref, fhc_handle}, Handle1);
-                              _             -> ok
+                              {ok, Handle1} -> put({Ref, fhc_handle}, Handle1),
+                                               false;
+                              _             -> Rep
                           end;
-                  false -> ok
+                  false -> Rep
               end;
-          (_KeyValuePair) ->
-              ok
-      end, get()),
-    report_eldest().
+              (_KeyValuePair, Rep) ->
+                  Rep
+          end, true, get()),
+    case Report of
+        true -> report_eldest();
+        false -> ok
+    end.
 
 decrement() ->
     gen_server2:cast(?SERVER, decrement).
@@ -323,7 +328,7 @@ report_eldest() ->
               case gb_trees:is_empty(Tree) of
                   true  -> Tree;
                   false -> {Oldest, _Ref} = gb_trees:smallest(Tree),
-                           gen_server2:cast(?SERVER, {self(), update, Oldest})
+                           gen_server2:cast(?SERVER, {update, self(), Oldest})
               end,
               Tree
       end),
@@ -441,7 +446,7 @@ open1(Path, Mode, Options, Ref, Offset) ->
                                   Tree1 = gb_trees:insert(Now, Ref, Tree),
                                   {Oldest, _Ref} = gb_trees:smallest(Tree1),
                                   gen_server2:cast(?SERVER,
-                                                   {self(), open, Oldest}),
+                                                   {open, self(), Oldest}),
                                   Tree1
                           end),
             {ok, Handle1};
@@ -474,7 +479,7 @@ close1(Ref, Handle, SoftOrHard) ->
                                                Oldest1
                                   end,
                               gen_server2:cast(
-                                ?SERVER, {self(), close, Oldest}),
+                                ?SERVER, {close, self(), Oldest}),
                               Tree1
                       end)
             end,
@@ -571,20 +576,20 @@ init([]) ->
 handle_call(_Msg, _From, State) ->
     {reply, message_not_understood, State}.
 
-handle_cast({Pid, open, EldestUnusedSince}, State =
+handle_cast({open, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
     Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
     {noreply, maybe_reduce(State #fhc_state { elders = Elders1,
                                               count = Count + 1 })};
 
-handle_cast({Pid, update, EldestUnusedSince}, State =
+handle_cast({update, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders }) ->
     Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
     %% don't call maybe_reduce from here otherwise we can create a
     %% storm of messages
     {noreply, State #fhc_state { elders = Elders1 }};
 
-handle_cast({Pid, close, EldestUnusedSince}, State =
+handle_cast({close, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
     Elders1 = case EldestUnusedSince of
                   undefined -> dict:erase(Pid, Elders);
@@ -625,11 +630,15 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                           {[Pid|PidsAcc], SumAcc + timer:now_diff(Now, Eldest),
                            CountAcc + 1}
                   end, {[], 0, 0}, Elders),
-    %% ClientCount can't be 0.
-    AverageAge = Sum / ClientCount,
-    lists:foreach(fun (Pid) ->
-                          Pid ! {?MODULE, maximum_eldest_since_use, AverageAge}
-                  end, Pids),
+    case Pids of
+        [] -> ok;
+        _ ->
+            %% ClientCount can't be 0 if we have some pids
+            AverageAge = Sum / ClientCount,
+            lists:foreach(
+              fun (Pid) -> Pid ! {?MODULE, maximum_eldest_since_use, AverageAge}
+              end, Pids)
+    end,
     {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server2,
                                     cast, [?SERVER, check_counts]),
     State;
-- 
cgit v1.2.1


From abb15269f2527ea4fe0053224b565b0187b41ebb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 11:33:56 +0000
Subject: removed calls to rabbit_log, using error_logger instead. Still one
 call to rabbit_misc:cmd. Not sure quite what to do about that...

---
 src/file_handle_cache.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 53c3e6a2..cf278179 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -570,7 +570,7 @@ init([]) ->
                 _ ->
                     ulimit()
             end,
-    rabbit_log:info("Limiting to approx ~p file handles~n", [Limit]),
+    error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0}}.
 
 handle_call(_Msg, _From, State) ->
@@ -664,8 +664,8 @@ ulimit() ->
                                         String)) - ?RESERVED_FOR_OTHERS,
                 lists:max([1, Num]);
             String ->
-                rabbit_log:warning("Unexpected result of \"ulimit -n\": ~p~n",
-                                   [String]),
+                error_logger:warning_msg(
+                  "Unexpected result of \"ulimit -n\": ~p~n", [String]),
                 throw({unexpected_result, String})
         end
     catch _ -> case os:type() of
-- 
cgit v1.2.1


From 872772f6b6576da4cad68f77a72a6d39ff77408b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 13:07:29 +0000
Subject: Corrected spec for open and inlined the internal versions. Also used
 the with_flushed_handles for truncate and position. It breaks sync if you do
 *both* the flushed form and then the obvious refactoring within it (i.e. pull
 the Handle1 up) and I can't quite see why

---
 src/file_handle_cache.erl | 248 ++++++++++++++++++++++------------------------
 1 file changed, 121 insertions(+), 127 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index cf278179..bbc09b8a 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -90,7 +90,10 @@
 -type(position() :: ('bof' | 'eof' | {'bof',integer()} | {'eof',integer()}
                      | {'cur',integer()} | integer())).
 
--spec(open/3 :: (string(), [any()], [any()]) -> ({'ok', ref()} | error())).
+-spec(open/3 ::
+      (string(), [any()],
+       [{'write_buffer', (non_neg_integer()|'infinity'|'unbuffered')}]) ->
+             ({'ok', ref()} | error())).
 -spec(close/1 :: (ref()) -> ('ok' | error())).
 -spec(read/2 :: (ref(), integer()) ->
              ({'ok', ([char()]|binary())} | eof | error())).
@@ -139,34 +142,135 @@ close(Ref) ->
     end.
 
 read(Ref, Count) ->
-    with_flushed_handles([Ref], Count, fun internal_read/2).
+    with_flushed_handles(
+      [Ref], fun ([#handle { is_read = false }]) ->
+                     {error, not_open_for_reading};
+                 ([Handle = #handle { hdl = Hdl, offset = Offset }]) ->
+                     case file:read(Hdl, Count) of
+                         {ok, Data} = Obj ->
+                             Size = iolist_size(Data),
+                             {Obj, [Handle #handle { offset = Offset + Size }]};
+                         eof ->
+                             {eof, [Handle #handle { at_eof = true }]};
+                         Error ->
+                             {Error, [Handle]}
+                     end
+             end).
 
 append(Ref, Data) ->
-    with_handles([Ref], Data, fun internal_append/2).
+    with_handles(
+      [Ref],
+      fun ([#handle { is_write = false }]) ->
+              {error, not_open_for_writing};
+          ([Handle]) ->
+              case maybe_seek(eof, Handle) of
+                  {{ok, _Offset}, #handle { hdl = Hdl, offset = Offset,
+                                            write_buffer_size_limit = 0,
+                                            at_eof = true } = Handle1} ->
+                      Offset1 = Offset + iolist_size(Data),
+                      {file:write(Hdl, Data),
+                       [Handle1 #handle { is_dirty = true, offset = Offset1 }]};
+                  {{ok, _Offset}, #handle { write_buffer = WriteBuffer,
+                                            write_buffer_size = Size,
+                                            write_buffer_size_limit = Limit,
+                                            at_eof = true } = Handle1} ->
+                      Size1 = Size + iolist_size(Data),
+                      Handle2 = Handle1 #handle { write_buffer = [ Data | WriteBuffer ],
+                                                  write_buffer_size = Size1 },
+                      case Limit /= infinity andalso Size1 > Limit of
+                          true  -> {Result, Handle3} = write_buffer(Handle2),
+                                   {Result, [Handle3]};
+                          false -> {ok, [Handle2]}
+                      end;
+                  {{error, _} = Error, Handle1} ->
+                      {Error, [Handle1]}
+              end
+      end).
 
 sync(Ref) ->
-    with_handles([Ref], ok, fun internal_sync/2).
+    with_handles(
+      [Ref],
+      fun ([#handle { is_dirty = false, write_buffer = [] }]) ->
+              ok;
+          ([Handle]) ->
+              %% write_buffer will set is_dirty, or leave it set if buffer empty
+              case write_buffer(Handle) of
+                  {ok, Handle1 = #handle { hdl = Hdl, offset = Offset,
+                                           is_dirty = true }} ->
+                      case file:sync(Hdl) of
+                          ok ->
+                              {ok, [Handle1 #handle { trusted_offset = Offset,
+                                                      is_dirty = false }]};
+                          Error ->
+                              {Error, [Handle1]}
+                      end;
+                  {Error, Handle1} ->
+                      {Error, [Handle1]}
+              end
+      end).
 
 position(Ref, NewOffset) ->
-    with_handles([Ref], NewOffset, fun internal_position/2).
+    with_flushed_handles(
+      [Ref], fun ([Handle]) ->
+                     {Result, Handle1} = maybe_seek(NewOffset, Handle),
+                     {Result, [Handle1]}
+             end).
 
 truncate(Ref) ->
-    with_handles([Ref], ok, fun internal_truncate/2).
+    with_flushed_handles(
+      [Ref],
+      fun ([Handle1 = #handle { hdl = Hdl, offset = Offset,
+                                trusted_offset = TrustedOffset }]) ->
+              case file:truncate(Hdl) of
+                  ok ->
+                      {ok, [Handle1 #handle {
+                              at_eof = true,
+                              trusted_offset = lists:min([Offset,
+                                                          TrustedOffset])
+                             }]};
+                  Error ->
+                      {Error, [Handle1]}
+              end
+      end).
 
 last_sync_offset(Ref) ->
-    with_handles([Ref], ok, fun internal_last_sync_offset/2).
+    with_handles([Ref], fun ([#handle { trusted_offset = TrustedOffset }]) ->
+                                {ok, TrustedOffset}
+                        end).
 
 current_virtual_offset(Ref) ->
-    with_handles([Ref], ok, fun internal_current_virtual_offset/2).
+    with_handles([Ref],
+                 fun ([#handle { at_eof = true, is_write = true,
+                                 offset = Offset,
+                                 write_buffer_size = Size }]) ->
+                         {ok, Offset + Size};
+                     ([#handle { offset = Offset }]) ->
+                         {ok, Offset}
+                 end).
 
 current_raw_offset(Ref) ->
-    with_handles([Ref], ok, fun internal_current_raw_offset/2).
+    with_handles([Ref], fun ([Handle]) -> {ok, Handle #handle.offset} end). 
 
 append_write_buffer(Ref) ->
-    with_flushed_handles([Ref], ok, fun internal_append_write_buffer/2).
+    with_flushed_handles([Ref], fun ([Handle]) -> {ok, [Handle]} end). 
 
 copy(Src, Dest, Count) ->
-    with_flushed_handles([Src, Dest], Count, fun internal_copy/2).
+    with_flushed_handles(
+      [Src, Dest],
+      fun ([SHandle = #handle { is_read = true, hdl = SHdl, offset = SOffset },
+            DHandle = #handle { is_write = true, hdl = DHdl, offset = DOffset }]
+          ) ->
+              case file:copy(SHdl, DHdl, Count) of
+                  {ok, Count1} = Result1 ->
+                      {Result1,
+                       [SHandle #handle { offset = SOffset + Count1 },
+                        DHandle #handle { offset = DOffset + Count1 }]};
+                  Error ->
+                      {Error, [SHandle, DHandle]}
+              end;
+          (_Handles) ->
+              {error, incorrect_handle_modes}
+      end).
 
 set_maximum_since_use(MaximumAge) ->
     Now = now(),
@@ -201,116 +305,6 @@ increment() ->
 %% Internal versions for the above
 %%----------------------------------------------------------------------------
 
-internal_read(_Count, [#handle { is_read = false }]) ->
-    {error, not_open_for_reading};
-internal_read(Count, [Handle = #handle { hdl = Hdl, offset = Offset }]) ->
-    case file:read(Hdl, Count) of
-        {ok, Data} = Obj ->
-            Size = iolist_size(Data),
-            {Obj, [Handle #handle { offset = Offset + Size }]};
-        eof ->
-            {eof, [Handle #handle { at_eof = true }]};
-        Error ->
-            {Error, [Handle]}
-    end.
-
-internal_append(_Data, [#handle { is_write = false }]) ->
-    {error, not_open_for_writing};
-internal_append(Data, [Handle]) ->
-    case maybe_seek(eof, Handle) of
-        {{ok, _Offset}, Handle1 = #handle { hdl = Hdl, offset = Offset,
-                                            write_buffer_size_limit = 0,
-                                            at_eof = true }} ->
-            Offset1 = Offset + iolist_size(Data),
-            {file:write(Hdl, Data),
-             [Handle1 #handle { is_dirty = true, offset = Offset1 }]};
-        {{ok, _Offset}, Handle1 = #handle { write_buffer = WriteBuffer,
-                                            write_buffer_size = Size,
-                                            write_buffer_size_limit = Limit,
-                                            at_eof = true }} ->
-            Size1 = Size + iolist_size(Data),
-            Handle2 = Handle1 #handle { write_buffer = [ Data | WriteBuffer ],
-                                        write_buffer_size = Size1 },
-            case Limit /= infinity andalso Size1 > Limit of
-                true  -> {Result, Handle3} = write_buffer(Handle2),
-                         {Result, [Handle3]};
-                false -> {ok, [Handle2]}
-            end;
-        {{error, _} = Error, Handle1} ->
-            {Error, [Handle1]}
-    end.
-
-internal_sync(ok, [#handle { is_dirty = false, write_buffer = [] }]) ->
-    ok;
-internal_sync(ok, [Handle]) ->
-    %% write_buffer will set is_dirty, or leave it set if buffer empty
-    case write_buffer(Handle) of
-        {ok, Handle1 = #handle {hdl = Hdl, offset = Offset, is_dirty = true}} ->
-            case file:sync(Hdl) of
-                ok    -> {ok, [Handle1 #handle { trusted_offset = Offset,
-                                                 is_dirty = false }]};
-                Error -> {Error, [Handle1]}
-            end;
-        {Error, Handle1} ->
-            {Error, [Handle1]}
-    end.
-
-internal_position(NewOffset, [Handle]) ->
-    case write_buffer(Handle) of
-        {ok, Handle1}    -> {Result, Handle2} = maybe_seek(NewOffset, Handle1),
-                            {Result, [Handle2]};
-        {Error, Handle1} -> {Error, [Handle1]}
-    end.
-
-internal_truncate(ok, [#handle { is_write = false }]) ->
-            {error, not_open_for_writing};
-internal_truncate(ok, [Handle]) ->
-    case write_buffer(Handle) of
-        {ok, Handle1 = #handle { hdl = Hdl, offset = Offset,
-                                 trusted_offset = TrustedOffset }} ->
-            case file:truncate(Hdl) of
-                ok ->
-                    {ok, [Handle1 #handle {
-                            at_eof = true,
-                            trusted_offset = lists:min([Offset, TrustedOffset])
-                           }]};
-                Error ->
-                    {Error, [Handle1]}
-            end;
-        {Error, Handle1} ->
-            {Error, Handle1}
-    end.
-
-internal_last_sync_offset(ok, [#handle { trusted_offset = TrustedOffset }]) ->
-    {ok, TrustedOffset}.
-
-internal_current_virtual_offset(ok, [#handle { at_eof = true, is_write = true,
-                                               offset = Offset,
-                                               write_buffer_size = Size }]) ->
-    {ok, Offset + Size};
-internal_current_virtual_offset(ok, [#handle { offset = Offset }]) ->
-    {ok, Offset}.
-
-internal_current_raw_offset(ok, [#handle { offset = Offset }]) ->
-    {ok, Offset}.
-
-internal_append_write_buffer(ok, [Handle]) ->
-    {ok, [Handle]}.
-
-internal_copy(Count, [SHandle = #handle { is_read = true, hdl = SHdl,
-                                          offset = SOffset },
-                      DHandle = #handle { is_write = true, hdl = DHdl,
-                                              offset = DOffset }]) ->
-    case file:copy(SHdl, DHdl, Count) of
-        {ok, Count1} = Result1 ->
-            {Result1,
-             [SHandle #handle { offset = SOffset + Count1 },
-              DHandle #handle { offset = DOffset + Count1 }]};
-        Error ->
-            {Error, [SHandle, DHandle]}
-    end;
-internal_copy(_Count, _Handles) ->
-    {error, incorrect_handle_modes}.
 
 %%----------------------------------------------------------------------------
 %% Internal functions
@@ -334,7 +328,7 @@ report_eldest() ->
       end),
     ok.
 
-with_handles(Refs, Args, Fun) ->
+with_handles(Refs, Fun) ->
     ResHandles = lists:foldl(
                    fun (Ref, {ok, HandlesAcc}) ->
                            case get_or_reopen(Ref) of
@@ -346,7 +340,7 @@ with_handles(Refs, Args, Fun) ->
                    end, {ok, []}, Refs),
     case ResHandles of
         {ok, Handles} ->
-            case erlang:apply(Fun, [Args, lists:reverse(Handles)]) of
+            case erlang:apply(Fun, [lists:reverse(Handles)]) of
                 {Result, Handles1} when is_list(Handles1) ->
                     lists:zipwith(fun put_handle/2, Refs, Handles1),
                     Result;
@@ -357,10 +351,10 @@ with_handles(Refs, Args, Fun) ->
             Error
     end.
 
-with_flushed_handles(Refs, Args, Fun) ->
+with_flushed_handles(Refs, Fun) ->
     with_handles(
-      Refs, Args,
-      fun (Args1, Handles) ->
+      Refs,
+      fun (Handles) ->
               case lists:foldl(
                      fun (Handle, {ok, HandlesAcc}) ->
                              {Res, Handle1} = write_buffer(Handle),
@@ -369,7 +363,7 @@ with_flushed_handles(Refs, Args, Fun) ->
                              {Error, [Handle | HandlesAcc]}
                      end, {ok, []}, Handles) of
                   {ok, Handles1} ->
-                      erlang:apply(Fun, [Args1, lists:reverse(Handles1)]);
+                      erlang:apply(Fun, [lists:reverse(Handles1)]);
                   {Error, Handles1} ->
                       {Error, lists:reverse(Handles1)}
               end
-- 
cgit v1.2.1


From 0b09b4863e8b3a17f311733d26ddc3138c3e7bfc Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 25 Nov 2009 13:18:18 +0000
Subject: inlining

---
 src/file_handle_cache.erl | 55 ++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index bbc09b8a..85a1e2d1 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -122,16 +122,34 @@ start_link() ->
 
 open(Path, Mode, Options) ->
     case is_appender(Mode) of
-        true -> {error, append_not_supported};
+        true  ->
+            {error, append_not_supported};
         false ->
             Path1 = filename:absname(Path),
-            case get({Path1, fhc_file}) of
-                File = #file {} ->
-                    open_new_record(Path1, Mode, Options, File);
-                undefined ->
-                    File = #file { reader_count = 0, has_writer = false },
-                    put({Path1, fhc_file}, File),
-                    open_new_record(Path1, Mode, Options, File)
+            File1 = #file { reader_count = RCount, has_writer = HasWriter } =
+                case get({Path1, fhc_file}) of
+                    File = #file {} -> File;
+                    undefined       -> File = #file { reader_count = 0,
+                                                      has_writer = false },
+                                       put({Path1, fhc_file}, File),
+                                       File
+                end,
+            IsWriter = is_writer(Mode),
+            case IsWriter andalso HasWriter of
+                true  -> {error, writer_exists};
+                false -> RCount1 = case is_reader(Mode) of
+                                       true  -> RCount + 1;
+                                       false -> RCount
+                                   end,
+                         HasWriter1 = HasWriter orelse IsWriter,
+                         put({Path1, fhc_file},
+                             File1 #file { reader_count = RCount1,
+                                           has_writer = HasWriter1}),
+                         Ref = make_ref(),
+                         case open1(Path1, Mode, Options, Ref, bof) of
+                             {ok, _Handle} -> {ok, Ref};
+                             Error         -> Error
+                         end
             end
     end.
 
@@ -395,27 +413,6 @@ put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
       fun (Tree) -> gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree)) end),
     put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
 
-open_new_record(Path, Mode, Options, File =
-                #file { reader_count = RCount, has_writer = HasWriter }) ->
-    IsWriter = is_writer(Mode),
-    case IsWriter andalso HasWriter of
-        true ->
-            {error, writer_exists};
-        false ->
-            RCount1 = case is_reader(Mode) of
-                          true  -> RCount + 1;
-                          false -> RCount
-                      end,
-            put({Path, fhc_file},
-                File #file { reader_count = RCount1,
-                             has_writer = HasWriter orelse IsWriter }),
-            Ref = make_ref(),
-            case open1(Path, Mode, Options, Ref, bof) of
-                {ok, _Handle} -> {ok, Ref};
-                Error         -> Error
-            end
-    end.
-
 open1(Path, Mode, Options, Ref, Offset) ->
     case file:open(Path, Mode) of
         {ok, Hdl} ->
-- 
cgit v1.2.1


From 68ec26fb9dc35742ca188cfbd4f21cefc7df41e8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 25 Nov 2009 13:26:18 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 80 ++++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 42 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 85a1e2d1..3aace20a 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -161,19 +161,18 @@ close(Ref) ->
 
 read(Ref, Count) ->
     with_flushed_handles(
-      [Ref], fun ([#handle { is_read = false }]) ->
-                     {error, not_open_for_reading};
-                 ([Handle = #handle { hdl = Hdl, offset = Offset }]) ->
-                     case file:read(Hdl, Count) of
-                         {ok, Data} = Obj ->
-                             Size = iolist_size(Data),
-                             {Obj, [Handle #handle { offset = Offset + Size }]};
-                         eof ->
-                             {eof, [Handle #handle { at_eof = true }]};
-                         Error ->
-                             {Error, [Handle]}
-                     end
-             end).
+      [Ref],
+      fun ([#handle { is_read = false }]) ->
+              {error, not_open_for_reading};
+          ([Handle = #handle { hdl = Hdl, offset = Offset }]) ->
+              case file:read(Hdl, Count) of
+                  {ok, Data} = Obj -> Offset1 = Offset + iolist_size(Data),
+                                      {Obj,
+                                       [Handle #handle { offset = Offset1 }]};
+                  eof              -> {eof, [Handle #handle { at_eof = true }]};
+                  Error            -> {Error, [Handle]}
+              end
+      end).
 
 append(Ref, Data) ->
     with_handles(
@@ -192,8 +191,9 @@ append(Ref, Data) ->
                                             write_buffer_size = Size,
                                             write_buffer_size_limit = Limit,
                                             at_eof = true } = Handle1} ->
+                      WriteBuffer1 = [Data | WriteBuffer],
                       Size1 = Size + iolist_size(Data),
-                      Handle2 = Handle1 #handle { write_buffer = [ Data | WriteBuffer ],
+                      Handle2 = Handle1 #handle { write_buffer = WriteBuffer1,
                                                   write_buffer_size = Size1 },
                       case Limit /= infinity andalso Size1 > Limit of
                           true  -> {Result, Handle3} = write_buffer(Handle2),
@@ -211,16 +211,16 @@ sync(Ref) ->
       fun ([#handle { is_dirty = false, write_buffer = [] }]) ->
               ok;
           ([Handle]) ->
-              %% write_buffer will set is_dirty, or leave it set if buffer empty
+              %% write_buffer will set is_dirty, or leave it set if
+              %% buffer empty
               case write_buffer(Handle) of
                   {ok, Handle1 = #handle { hdl = Hdl, offset = Offset,
                                            is_dirty = true }} ->
                       case file:sync(Hdl) of
-                          ok ->
-                              {ok, [Handle1 #handle { trusted_offset = Offset,
-                                                      is_dirty = false }]};
-                          Error ->
-                              {Error, [Handle1]}
+                          ok    -> {ok, [Handle1 #handle {
+                                           trusted_offset = Offset,
+                                           is_dirty = false }]};
+                          Error -> {Error, [Handle1]}
                       end;
                   {Error, Handle1} ->
                       {Error, [Handle1]}
@@ -229,10 +229,10 @@ sync(Ref) ->
 
 position(Ref, NewOffset) ->
     with_flushed_handles(
-      [Ref], fun ([Handle]) ->
-                     {Result, Handle1} = maybe_seek(NewOffset, Handle),
-                     {Result, [Handle1]}
-             end).
+      [Ref],
+      fun ([Handle]) -> {Result, Handle1} = maybe_seek(NewOffset, Handle),
+                        {Result, [Handle1]}
+      end).
 
 truncate(Ref) ->
     with_flushed_handles(
@@ -240,14 +240,11 @@ truncate(Ref) ->
       fun ([Handle1 = #handle { hdl = Hdl, offset = Offset,
                                 trusted_offset = TrustedOffset }]) ->
               case file:truncate(Hdl) of
-                  ok ->
-                      {ok, [Handle1 #handle {
-                              at_eof = true,
-                              trusted_offset = lists:min([Offset,
-                                                          TrustedOffset])
-                             }]};
-                  Error ->
-                      {Error, [Handle1]}
+                  ok    -> TrustedOffset1 = lists:min([Offset, TrustedOffset]),
+                           {ok, [Handle1 #handle {
+                                   at_eof = true,
+                                   trusted_offset = TrustedOffset1 }]};
+                  Error -> {Error, [Handle1]}
               end
       end).
 
@@ -257,20 +254,19 @@ last_sync_offset(Ref) ->
                         end).
 
 current_virtual_offset(Ref) ->
-    with_handles([Ref],
-                 fun ([#handle { at_eof = true, is_write = true,
-                                 offset = Offset,
-                                 write_buffer_size = Size }]) ->
-                         {ok, Offset + Size};
-                     ([#handle { offset = Offset }]) ->
-                         {ok, Offset}
-                 end).
+    with_handles([Ref], fun ([#handle { at_eof = true, is_write = true,
+                                        offset = Offset,
+                                        write_buffer_size = Size }]) ->
+                                {ok, Offset + Size};
+                            ([#handle { offset = Offset }]) ->
+                                {ok, Offset}
+                        end).
 
 current_raw_offset(Ref) ->
-    with_handles([Ref], fun ([Handle]) -> {ok, Handle #handle.offset} end). 
+    with_handles([Ref], fun ([Handle]) -> {ok, Handle #handle.offset} end).
 
 append_write_buffer(Ref) ->
-    with_flushed_handles([Ref], fun ([Handle]) -> {ok, [Handle]} end). 
+    with_flushed_handles([Ref], fun ([Handle]) -> {ok, [Handle]} end).
 
 copy(Src, Dest, Count) ->
     with_flushed_handles(
-- 
cgit v1.2.1


From 5f0681a59506c73f9960de8536251498d99969cf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 13:28:31 +0000
Subject: Utterly baffled. I thought this was the obvious refactoring I was
 doing earlier, but apparently not. Anyway, this works.

---
 src/file_handle_cache.erl | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index bbc09b8a..740ae169 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -188,24 +188,18 @@ append(Ref, Data) ->
       end).
 
 sync(Ref) ->
-    with_handles(
+    with_flushed_handles(
       [Ref],
       fun ([#handle { is_dirty = false, write_buffer = [] }]) ->
               ok;
-          ([Handle]) ->
-              %% write_buffer will set is_dirty, or leave it set if buffer empty
-              case write_buffer(Handle) of
-                  {ok, Handle1 = #handle { hdl = Hdl, offset = Offset,
-                                           is_dirty = true }} ->
-                      case file:sync(Hdl) of
-                          ok ->
-                              {ok, [Handle1 #handle { trusted_offset = Offset,
-                                                      is_dirty = false }]};
-                          Error ->
-                              {Error, [Handle1]}
-                      end;
-                  {Error, Handle1} ->
-                      {Error, [Handle1]}
+          ([Handle = #handle { hdl = Hdl, offset = Offset,
+                               is_dirty = true, write_buffer = [] }]) ->
+              case file:sync(Hdl) of
+                  ok ->
+                      {ok, [Handle #handle { trusted_offset = Offset,
+                                             is_dirty = false }]};
+                  Error ->
+                      {Error, [Handle]}
               end
       end).
 
-- 
cgit v1.2.1


From c80c3f4d5055f215459f994b89903c5a03fb3be6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 13:52:38 +0000
Subject: Make sure that trusted_offset in open1 is set to an absolute offset

---
 src/file_handle_cache.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index a3ebd7a5..dbfffff4 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -414,15 +414,16 @@ open1(Path, Mode, Options, Ref, Offset) ->
                 end,
             Now = now(),
             Handle =
-                #handle { hdl = Hdl, offset = 0, trusted_offset = Offset,
+                #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
                           write_buffer_size = 0, options = Options,
                           write_buffer_size_limit = WriteBufferSize,
                           write_buffer = [], at_eof = false, mode = Mode,
                           is_write = is_writer(Mode), is_read = is_reader(Mode),
                           path = Path, last_used_at = Now,
                           is_dirty = false },
-            {{ok, _Offset}, Handle1} = maybe_seek(Offset, Handle),
-            put({Ref, fhc_handle}, Handle1),
+            {{ok, Offset1}, Handle1} = maybe_seek(Offset, Handle),
+            Handle2 = Handle1 #handle { trusted_offset = Offset1 },
+            put({Ref, fhc_handle}, Handle2),
             with_age_tree(fun (Tree) ->
                                   Tree1 = gb_trees:insert(Now, Ref, Tree),
                                   {Oldest, _Ref} = gb_trees:smallest(Tree1),
@@ -430,7 +431,7 @@ open1(Path, Mode, Options, Ref, Offset) ->
                                                    {open, self(), Oldest}),
                                   Tree1
                           end),
-            {ok, Handle1};
+            {ok, Handle2};
         {error, Reason} ->
             {error, Reason}
     end.
-- 
cgit v1.2.1


From bffc3862bedef2436b900189135422f99d76af2e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 25 Nov 2009 14:25:29 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 173 ++++++++++++++++++++++------------------------
 1 file changed, 81 insertions(+), 92 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index dbfffff4..d67fd365 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -213,11 +213,9 @@ sync(Ref) ->
           ([Handle = #handle { hdl = Hdl, offset = Offset,
                                is_dirty = true, write_buffer = [] }]) ->
               case file:sync(Hdl) of
-                  ok ->
-                      {ok, [Handle #handle { trusted_offset = Offset,
-                                             is_dirty = false }]};
-                  Error ->
-                      {Error, [Handle]}
+                  ok    -> {ok, [Handle #handle { trusted_offset = Offset,
+                                                  is_dirty = false }]};
+                  Error -> {Error, [Handle]}
               end
       end).
 
@@ -282,24 +280,35 @@ copy(Src, Dest, Count) ->
 
 set_maximum_since_use(MaximumAge) ->
     Now = now(),
-    Report =
-        lists:foldl(
-          fun ({{Ref, fhc_handle},
-                Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) ->
-              Age = timer:now_diff(Now, Then),
-              case Hdl /= closed andalso Age >= MaximumAge of
-                  true -> case close1(Ref, Handle, soft) of
-                              {ok, Handle1} -> put({Ref, fhc_handle}, Handle1),
-                                               false;
-                              _             -> Rep
-                          end;
-                  false -> Rep
-              end;
-              (_KeyValuePair, Rep) ->
-                  Rep
-          end, true, get()),
-    case Report of
-        true -> report_eldest();
+    case lists:foldl(
+           fun ({{Ref, fhc_handle},
+                 Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) ->
+                   Age = timer:now_diff(Now, Then),
+                   case Hdl /= closed andalso Age >= MaximumAge of
+                       true  -> case close1(Ref, Handle, soft) of
+                                    {ok, Handle1} ->
+                                        put({Ref, fhc_handle}, Handle1),
+                                        false;
+                                    _ ->
+                                        Rep
+                                end;
+                       false -> Rep
+                   end;
+               (_KeyValuePair, Rep) ->
+                   Rep
+           end, true, get()) of
+        true  -> with_age_tree(
+                   fun (Tree) ->
+                           case gb_trees:is_empty(Tree) of
+                               true  -> Tree;
+                               false -> {Oldest, _Ref} =
+                                            gb_trees:smallest(Tree),
+                                        gen_server2:cast(
+                                          ?SERVER, {update, self(), Oldest})
+                           end,
+                           Tree
+                   end),
+                 ok;
         false -> ok
     end.
 
@@ -309,11 +318,6 @@ decrement() ->
 increment() ->
     gen_server2:cast(?SERVER, increment).
 
-%%----------------------------------------------------------------------------
-%% Internal versions for the above
-%%----------------------------------------------------------------------------
-
-
 %%----------------------------------------------------------------------------
 %% Internal functions
 %%----------------------------------------------------------------------------
@@ -324,18 +328,6 @@ is_writer(Mode) -> lists:member(write, Mode).
 
 is_appender(Mode) -> lists:member(append, Mode).
 
-report_eldest() ->
-    with_age_tree(
-      fun (Tree) ->
-              case gb_trees:is_empty(Tree) of
-                  true  -> Tree;
-                  false -> {Oldest, _Ref} = gb_trees:smallest(Tree),
-                           gen_server2:cast(?SERVER, {update, self(), Oldest})
-              end,
-              Tree
-      end),
-    ok.
-
 with_handles(Refs, Fun) ->
     ResHandles = lists:foldl(
                    fun (Ref, {ok, HandlesAcc}) ->
@@ -442,47 +434,46 @@ close1(Ref, Handle, SoftOrHard) ->
                        is_read = IsReader, is_write = IsWriter,
                        last_used_at = Then } = Handle1 } ->
             case Hdl of
-                closed ->
-                    ok;
-                _ ->
-                    ok = case IsDirty of
-                             true  -> file:sync(Hdl);
-                             false -> ok
-                         end,
-                    ok = file:close(Hdl),
-                    with_age_tree(
-                      fun (Tree) ->
-                              Tree1 = gb_trees:delete(Then, Tree),
-                              Oldest =
-                                  case gb_trees:is_empty(Tree1) of
-                                      true  ->  undefined;
-                                      false -> {Oldest1, _Ref} =
-                                                   gb_trees:smallest(Tree1),
-                                               Oldest1
-                                  end,
-                              gen_server2:cast(
-                                ?SERVER, {close, self(), Oldest}),
-                              Tree1
-                      end)
+                closed -> ok;
+                _      -> ok = case IsDirty of
+                                   true  -> file:sync(Hdl);
+                                   false -> ok
+                               end,
+                          ok = file:close(Hdl),
+                          with_age_tree(
+                            fun (Tree) ->
+                                    Tree1 = gb_trees:delete(Then, Tree),
+                                    Oldest =
+                                        case gb_trees:is_empty(Tree1) of
+                                            true ->
+                                                undefined;
+                                            false ->
+                                                {Oldest1, _Ref} =
+                                                    gb_trees:smallest(Tree1),
+                                                Oldest1
+                                        end,
+                                    gen_server2:cast(
+                                      ?SERVER, {close, self(), Oldest}),
+                                    Tree1
+                            end)
             end,
             case SoftOrHard of
-                hard ->
-                    #file { reader_count = RCount, has_writer = HasWriter } =
-                        File = get({Path, fhc_file}),
-                    RCount1 = case IsReader of
-                                  true  -> RCount - 1;
-                                  false -> RCount
-                              end,
-                    HasWriter1 = HasWriter andalso not IsWriter,
-                    case RCount1 =:= 0 andalso not HasWriter1 of
-                        true  -> erase({Path, fhc_file});
-                        false -> put({Path, fhc_file},
-                                     File #file { reader_count = RCount1,
-                                                  has_writer = HasWriter1 })
-                    end,
-                    ok;
-                soft ->
-                    {ok, Handle1 #handle { hdl = closed }}
+                hard -> #file { reader_count = RCount,
+                                has_writer = HasWriter } = File =
+                            get({Path, fhc_file}),
+                        RCount1 = case IsReader of
+                                      true  -> RCount - 1;
+                                      false -> RCount
+                                  end,
+                        HasWriter1 = HasWriter andalso not IsWriter,
+                        case RCount1 =:= 0 andalso not HasWriter1 of
+                            true  -> erase({Path, fhc_file});
+                            false -> put({Path, fhc_file},
+                                         File #file { reader_count = RCount1,
+                                                      has_writer = HasWriter1 })
+                        end,
+                        ok;
+                soft -> {ok, Handle1 #handle { hdl = closed }}
             end;
         {Error, Handle1} ->
             put_handle(Ref, Handle1),
@@ -492,12 +483,11 @@ close1(Ref, Handle, SoftOrHard) ->
 maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
                                          offset = Offset }) ->
     {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
-    Result = case NeedsSeek of
-                 true  -> file:position(Hdl, NewOffset);
-                 false -> {ok, Offset}
-             end,
-    case Result of
-        {ok, Offset1} ->
+    case (case NeedsSeek of
+              true  -> file:position(Hdl, NewOffset);
+              false -> {ok, Offset}
+          end) of
+        {ok, Offset1} = Result ->
             {Result, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
         {error, _} = Error ->
             {Error, Handle}
@@ -541,7 +531,7 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
     end.
 
 %%----------------------------------------------------------------------------
-%% gen_server
+%% gen_server callbacks
 %%----------------------------------------------------------------------------
 
 init([]) ->
@@ -614,12 +604,11 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                   end, {[], 0, 0}, Elders),
     case Pids of
         [] -> ok;
-        _ ->
-            %% ClientCount can't be 0 if we have some pids
-            AverageAge = Sum / ClientCount,
-            lists:foreach(
-              fun (Pid) -> Pid ! {?MODULE, maximum_eldest_since_use, AverageAge}
-              end, Pids)
+        _  -> AverageAge = Sum / ClientCount,
+              lists:foreach(fun (Pid) -> Pid ! {?MODULE,
+                                                maximum_eldest_since_use,
+                                                AverageAge}
+                            end, Pids)
     end,
     {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server2,
                                     cast, [?SERVER, check_counts]),
-- 
cgit v1.2.1


From 7ecbe3542af2d58ca7e202dc309f236e4b3a47d1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 25 Nov 2009 14:36:29 +0000
Subject: use plain, old gen_server instead of gen_server2 - for better
 compatibility

---
 src/file_handle_cache.erl | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index d67fd365..aa2ced5b 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,7 +31,7 @@
 
 -module(file_handle_cache).
 
--behaviour(gen_server2).
+-behaviour(gen_server).
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
@@ -117,8 +117,7 @@
 %%----------------------------------------------------------------------------
 
 start_link() ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE, [],
-                           [{timeout, infinity}]).
+    gen_server:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]).
 
 open(Path, Mode, Options) ->
     case is_appender(Mode) of
@@ -303,7 +302,7 @@ set_maximum_since_use(MaximumAge) ->
                                true  -> Tree;
                                false -> {Oldest, _Ref} =
                                             gb_trees:smallest(Tree),
-                                        gen_server2:cast(
+                                        gen_server:cast(
                                           ?SERVER, {update, self(), Oldest})
                            end,
                            Tree
@@ -313,10 +312,10 @@ set_maximum_since_use(MaximumAge) ->
     end.
 
 decrement() ->
-    gen_server2:cast(?SERVER, decrement).
+    gen_server:cast(?SERVER, decrement).
 
 increment() ->
-    gen_server2:cast(?SERVER, increment).
+    gen_server:cast(?SERVER, increment).
 
 %%----------------------------------------------------------------------------
 %% Internal functions
@@ -419,8 +418,8 @@ open1(Path, Mode, Options, Ref, Offset) ->
             with_age_tree(fun (Tree) ->
                                   Tree1 = gb_trees:insert(Now, Ref, Tree),
                                   {Oldest, _Ref} = gb_trees:smallest(Tree1),
-                                  gen_server2:cast(?SERVER,
-                                                   {open, self(), Oldest}),
+                                  gen_server:cast(?SERVER,
+                                                  {open, self(), Oldest}),
                                   Tree1
                           end),
             {ok, Handle2};
@@ -452,7 +451,7 @@ close1(Ref, Handle, SoftOrHard) ->
                                                     gb_trees:smallest(Tree1),
                                                 Oldest1
                                         end,
-                                    gen_server2:cast(
+                                    gen_server:cast(
                                       ?SERVER, {close, self(), Oldest}),
                                     Tree1
                             end)
@@ -610,7 +609,7 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                                                 AverageAge}
                             end, Pids)
     end,
-    {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server2,
+    {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server,
                                     cast, [?SERVER, check_counts]),
     State;
 maybe_reduce(State) ->
-- 
cgit v1.2.1


From e357fb82bdb6182fdc3d178f23c0f8f35e0f10ef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 14:47:40 +0000
Subject: Stop using _misc:cmd and use os:cmd instead

---
 src/file_handle_cache.erl | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index d67fd365..a299fc1e 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -623,26 +623,27 @@ maybe_reduce(State) ->
 %% suggests that BSDs (incl OS X), solaris and linux all agree that
 %% ulimit -n is file handles
 ulimit() ->
-    try
-        %% under Linux, Solaris and FreeBSD, ulimit is a shell
-        %% builtin, not a command. In OS X, it's a command, but it's
-        %% still safe to call it this way:
-        case rabbit_misc:cmd("sh -c \"ulimit -n\"") of
-            "unlimited" -> infinity;
-            String = [C|_] when $0 =< C andalso C =< $9 ->
-                Num = list_to_integer(
-                        lists:takewhile(fun (D) -> $0 =< D andalso D =< $9 end,
-                                        String)) - ?RESERVED_FOR_OTHERS,
-                lists:max([1, Num]);
-            String ->
-                error_logger:warning_msg(
-                  "Unexpected result of \"ulimit -n\": ~p~n", [String]),
-                throw({unexpected_result, String})
-        end
-    catch _ -> case os:type() of
-                   {win32, _OsName} ->
-                       ?FILE_HANDLES_LIMIT_WINDOWS;
-                   _ ->
-                       ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
-               end
+    case os:type() of
+        {win32, _OsName} ->
+            ?FILE_HANDLES_LIMIT_WINDOWS;
+        {unix, _OsName} ->
+            %% Under Linux, Solaris and FreeBSD, ulimit is a shell
+            %% builtin, not a command. In OS X, it's a command.
+            %% Fortunately, os:cmd invokes the cmd in a shell env, so
+            %% we're safe in all cases.
+            case os:cmd("ulimit -n") of
+                "unlimited" -> infinity;
+                String = [C|_] when $0 =< C andalso C =< $9 ->
+                    Num = list_to_integer(
+                            lists:takewhile(
+                              fun (D) -> $0 =< D andalso D =< $9 end, String)) -
+                        ?RESERVED_FOR_OTHERS,
+                    lists:max([1, Num]);
+                _ ->
+                    %% probably a variant of
+                    %% "/bin/sh: line 1: ulimit: command not found\n"
+                    ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
+            end;
+        _ ->
+            ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
     end.
-- 
cgit v1.2.1


From b2af8a24e4b3da928c4163ffec5e125632c0d96b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 25 Nov 2009 15:11:45 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 2f4c3bc0..1e00b5a8 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -631,7 +631,8 @@ ulimit() ->
             %% Fortunately, os:cmd invokes the cmd in a shell env, so
             %% we're safe in all cases.
             case os:cmd("ulimit -n") of
-                "unlimited" -> infinity;
+                "unlimited" ->
+                    infinity;
                 String = [C|_] when $0 =< C andalso C =< $9 ->
                     Num = list_to_integer(
                             lists:takewhile(
-- 
cgit v1.2.1


From 547a3203d811a661c513d126f4304c3cf6849f87 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 15:46:26 +0000
Subject: A novel

---
 src/file_handle_cache.erl | 80 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 2f4c3bc0..9786fb06 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -31,6 +31,86 @@
 
 -module(file_handle_cache).
 
+%% A File Handle Cache
+%%
+%% Some constraints
+%% 1) This supports 1 writer, multiple readers per file. Nothing else.
+%% 2) Writes are all appends. You can not write to the middle of a
+%% file, although you can truncate and then append if you want.
+%% 3) Although there is a write buffer, there is no read buffer. Feel
+%% free to use the read_ahead mode, but beware of the interaction
+%% between that buffer and the write buffer.
+%%
+%% Some benefits
+%% 1) You don't have to remember to call sync before close
+%% 2) Buffering is much more flexible than with plain file module, and
+%% you can control when the buffer gets flushed out. This means that
+%% you can rely on reads-after-writes working, without having to call
+%% the expensive sync.
+%% 3) Unnecessary calls to position and sync get optimised out.
+%% 4) You can find out what your 'real' offset is, and what your
+%% 'virtual' offset is (i.e. where the hdl really is, and where it
+%% would be after the write buffer is written out).
+%% 5) You can find out what the offset was when you last sync'd.
+%%
+%% In general, it mirrors exactly the common API with the file module.
+%%
+%% There is also a server component which serves to limit the number
+%% of open file handles in a "soft" way. By "soft", I mean that the
+%% server will never prevent a client from opening a handle, but may
+%% immediately tell it close the handle. Thus you can set the limit to
+%% zero and it will still all work correctly, it's just that
+%% effectively no caching will take place. The operation of limiting
+%% is as follows:
+%%
+%% On open and close, the client sends messages to the server
+%% informing it of opens and closes. This allows the server to keep
+%% track of the number of open handles. The client also keeps a
+%% gb_tree which is updated on every use of a file handle, mapping the
+%% time at which the file handle was last used (timestamp) to the
+%% handle. Thus the smallest key in this tree maps to the file handle
+%% that has not been used for the longest amount of time. This
+%% smallest key is included in the messages to the server. As such,
+%% the server keeps track of which file handle has least recently been
+%% used *at the point of the most recent open or close from each
+%% client*.
+%%
+%% Note that this data can go very out of date, by the client using
+%% the least recently used handle.
+%%
+%% When the limit is reached, the server calculates the average age of
+%% the last reported least recently used file handle of all the
+%% clients. It then tells all the clients to close any handles not
+%% used for longer than this average. The client should call this back
+%% into set_maximum_since_use/1. However, it's highly possible this
+%% age will be too big because the client has used its file handles in
+%% the mean time. Thus at this point it reports to the server the
+%% current timestamp at which its least recently used file handle was
+%% last used. The server will check two seconds later that either it's
+%% back under the limit, in which case all is well again, or if not,
+%% it will calculate a new average age. Its data will be much more
+%% recent now, and so it's very likely that when this is communicated
+%% to the clients, the clients will close file handles.
+%%
+%% The advantage of this scheme is that there is only communication
+%% from the client to the server on open, close, and when in the
+%% process of trying to reduce file handle usage. There is no
+%% communication from the client to the server on normal file handle
+%% operations. This scheme forms a feed back loop - the server doesn't
+%% care which file handles are close, just that some are, and it
+%% checks this repeatedly when over the limit. Given the guarantees of
+%% now(), even if there is just one file handle open, a limit of 1,
+%% and one client, it is certain that when the client calculates the
+%% age of the handle, it'll be greater than when the server calculated
+%% it, hence it should be closed.
+%%
+%% Handles which are closed as a result of the server are put into a
+%% "soft-closed" state in which the handle is closed (data flushed out
+%% and sync'd first) but the state is maintained. The handle will be
+%% fully reopened again as soon as needed, thus users of this library
+%% do not need to worry about their handles being closed by the server
+%% - reopening them when necessary is handled transparently.
+
 -behaviour(gen_server).
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
-- 
cgit v1.2.1


From 53d88243ea6eaa956564c39991561b917faed07f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 15:56:04 +0000
Subject: typeos to novel

---
 src/file_handle_cache.erl | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index c76dbccd..2a3f1ded 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -33,11 +33,16 @@
 
 %% A File Handle Cache
 %%
+%% This extends a subset of the functionality of the Erlang file
+%% module.
+%%
 %% Some constraints
 %% 1) This supports 1 writer, multiple readers per file. Nothing else.
-%% 2) Writes are all appends. You can not write to the middle of a
+%% 2) Do not open the same file from different processes. Bad things
+%% may happen.
+%% 3) Writes are all appends. You cannot write to the middle of a
 %% file, although you can truncate and then append if you want.
-%% 3) Although there is a write buffer, there is no read buffer. Feel
+%% 4) Although there is a write buffer, there is no read buffer. Feel
 %% free to use the read_ahead mode, but beware of the interaction
 %% between that buffer and the write buffer.
 %%
@@ -53,13 +58,11 @@
 %% would be after the write buffer is written out).
 %% 5) You can find out what the offset was when you last sync'd.
 %%
-%% In general, it mirrors exactly the common API with the file module.
-%%
 %% There is also a server component which serves to limit the number
 %% of open file handles in a "soft" way. By "soft", I mean that the
 %% server will never prevent a client from opening a handle, but may
-%% immediately tell it close the handle. Thus you can set the limit to
-%% zero and it will still all work correctly, it's just that
+%% immediately tell it to close the handle. Thus you can set the limit
+%% to zero and it will still all work correctly, it's just that
 %% effectively no caching will take place. The operation of limiting
 %% is as follows:
 %%
@@ -71,9 +74,9 @@
 %% handle. Thus the smallest key in this tree maps to the file handle
 %% that has not been used for the longest amount of time. This
 %% smallest key is included in the messages to the server. As such,
-%% the server keeps track of which file handle has least recently been
-%% used *at the point of the most recent open or close from each
-%% client*.
+%% the server keeps track of when the least recently used file handle
+%% was used *at the point of the most recent open or close* by each
+%% client.
 %%
 %% Note that this data can go very out of date, by the client using
 %% the least recently used handle.
@@ -81,23 +84,24 @@
 %% When the limit is reached, the server calculates the average age of
 %% the last reported least recently used file handle of all the
 %% clients. It then tells all the clients to close any handles not
-%% used for longer than this average. The client should call this back
-%% into set_maximum_since_use/1. However, it's highly possible this
-%% age will be too big because the client has used its file handles in
-%% the mean time. Thus at this point it reports to the server the
-%% current timestamp at which its least recently used file handle was
-%% last used. The server will check two seconds later that either it's
-%% back under the limit, in which case all is well again, or if not,
-%% it will calculate a new average age. Its data will be much more
-%% recent now, and so it's very likely that when this is communicated
-%% to the clients, the clients will close file handles.
+%% used for longer than this average. The client should receive this
+%% message and pass it into set_maximum_since_use/1. However, it's
+%% highly possible this age will be greater than the ages of all the
+%% handles the client knows of because the client has used its file
+%% handles in the mean time. Thus at this point it reports to the
+%% server the current timestamp at which its least recently used file
+%% handle was last used. The server will check two seconds later that
+%% either it's back under the limit, in which case all is well again,
+%% or if not, it will calculate a new average age. Its data will be
+%% much more recent now, and so it's very likely that when this is
+%% communicated to the clients, the clients will close file handles.
 %%
 %% The advantage of this scheme is that there is only communication
 %% from the client to the server on open, close, and when in the
 %% process of trying to reduce file handle usage. There is no
 %% communication from the client to the server on normal file handle
 %% operations. This scheme forms a feed back loop - the server doesn't
-%% care which file handles are close, just that some are, and it
+%% care which file handles are closed, just that some are, and it
 %% checks this repeatedly when over the limit. Given the guarantees of
 %% now(), even if there is just one file handle open, a limit of 1,
 %% and one client, it is certain that when the client calculates the
-- 
cgit v1.2.1


From 2e909e2c79e0b83a43141b924841ffcdc6a3ae23 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 25 Nov 2009 16:34:08 +0000
Subject: (kinda bug 22042) inline rabbit_misc:cmd seeing as it's not used
 elsewhere and we want to allow vm_memory_monitor to be used outside of rabbit

---
 src/rabbit_misc.erl       | 10 +---------
 src/vm_memory_monitor.erl | 12 +++++++++---
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index b9f92442..97c96fc7 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -55,7 +55,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
--export([unfold/2, ceil/1, cmd/1]).
+-export([unfold/2, ceil/1]).
 -export([sort_field_table/1]).
 
 -import(mnesia).
@@ -127,7 +127,6 @@
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 -spec(ceil/1 :: (number()) -> number()).
--spec(cmd/1 :: (string()) -> string()). 
 -spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()).
 
 -endif.
@@ -496,10 +495,3 @@ ceil(N) ->
 %% Sorts a list of AMQP table fields as per the AMQP spec
 sort_field_table(Arguments) ->
     lists:keysort(1, Arguments).
-
-cmd(Command) ->
-    Exec = hd(string:tokens(Command, " ")),
-    case os:find_executable(Exec) of
-        false -> throw({command_not_found, Exec});
-        _     -> os:cmd(Command)
-    end.
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index 9eee0c0b..65d4a451 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -223,13 +223,19 @@ get_mem_limit(MemFraction, TotalMemory) ->
 %%----------------------------------------------------------------------------
 %% Internal Helpers
 %%----------------------------------------------------------------------------
+cmd(Command) ->
+    Exec = hd(string:tokens(Command, " ")),
+    case os:find_executable(Exec) of
+        false -> throw({command_not_found, Exec});
+        _     -> os:cmd(Command)
+    end.
 
 %% get_total_memory(OS) -> Total
 %% Windows and Freebsd code based on: memsup:get_memory_usage/1
 %% Original code was part of OTP and released under "Erlang Public License".
 
 get_total_memory({unix,darwin}) ->
-    File = rabbit_misc:cmd("/usr/bin/vm_stat"),
+    File = cmd("/usr/bin/vm_stat"),
     Lines = string:tokens(File, "\n"),
     Dict = dict:from_list(lists:map(fun parse_line_mach/1, Lines)),
     [PageSize, Inactive, Active, Free, Wired] =
@@ -257,7 +263,7 @@ get_total_memory({unix, linux}) ->
     dict:fetch('MemTotal', Dict);
 
 get_total_memory({unix, sunos}) ->
-    File = rabbit_misc:cmd("/usr/sbin/prtconf"),
+    File = cmd("/usr/sbin/prtconf"),
     Lines = string:tokens(File, "\n"),
     Dict = dict:from_list(lists:map(fun parse_line_sunos/1, Lines)),
     dict:fetch('Memory size', Dict);
@@ -308,7 +314,7 @@ parse_line_sunos(Line) ->
     end.
 
 freebsd_sysctl(Def) ->
-    list_to_integer(rabbit_misc:cmd("/sbin/sysctl -n " ++ Def) -- "\n").
+    list_to_integer(cmd("/sbin/sysctl -n " ++ Def) -- "\n").
 
 %% file:read_file does not work on files in /proc as it seems to get
 %% the size of the file first and then read that many bytes. But files
-- 
cgit v1.2.1


From 071ed83294cf83a4bc614f919f8386a242c6f17d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 11:16:08 +0000
Subject: Added deregister and stop to MM

---
 src/rabbit_amqqueue_process.erl |  4 +++-
 src/rabbit_memory_monitor.erl   | 51 ++++++++++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b1982d30..945cd8bd 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -92,7 +92,7 @@
          transactions,
          memory
         ]).
-         
+
 %%----------------------------------------------------------------------------
 
 start_link(Q) ->
@@ -121,8 +121,10 @@ init(Q = #amqqueue { name = QName }) ->
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 terminate(shutdown, #q{variable_queue_state = VQS}) ->
+    ok = rabbit_memory_monitor:deregister(self()),
     _VQS = rabbit_variable_queue:terminate(VQS);
 terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
+    ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
     %% Ensure that any persisted tx messages are removed.
     %% TODO: wait for all in flight tx_commits to complete
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 7237b825..3f2c02f4 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -39,7 +39,8 @@
 
 -behaviour(gen_server2).
 
--export([start_link/0, update/0, register/2, report_queue_duration/2]).
+-export([start_link/0, update/0, register/2, deregister/1,
+         report_queue_duration/2, stop/0]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -49,7 +50,6 @@
                 queue_duration_sum,   %% sum of all queue_durations
                 queue_duration_count, %% number of elements in sum
                 memory_limit,         %% how much memory we intend to use
-                memory_ratio,         %% limit / used
                 desired_duration,     %% the desired queue duration
                 callbacks             %% a dict of qpid -> {M,F,A}s
                }).
@@ -69,7 +69,9 @@
 -spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
 -spec(update/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok').
+-spec(deregister/1 :: (pid()) -> 'ok').
 -spec(report_queue_duration/2 :: (pid(), float() | 'infinity') -> number()).
+-spec(stop/0 :: () -> 'ok').
 
 -endif.
 
@@ -86,10 +88,15 @@ update() ->
 register(Pid, MFA = {_M, _F, _A}) ->
     gen_server2:call(?SERVER, {register, Pid, MFA}, infinity).
 
+deregister(Pid) ->
+    gen_server2:cast(?SERVER, {deregister, Pid}).
+
 report_queue_duration(Pid, QueueDuration) ->
     gen_server2:call(rabbit_memory_monitor,
                      {report_queue_duration, Pid, QueueDuration}, infinity).
 
+stop() ->
+    gen_server2:cast(?SERVER, stop).
 
 %%----------------------------------------------------------------------------
 %% Gen_server callbacks
@@ -109,7 +116,6 @@ init([]) ->
                   queue_duration_sum   = 0.0,
                   queue_duration_count = 0,
                   memory_limit         = MemoryLimit,
-                  memory_ratio         = 1.0,
                   desired_duration     = infinity,
                   callbacks            = dict:new()})}.
 
@@ -156,23 +162,17 @@ handle_call(_Request, _From, State) ->
 handle_cast(update, State) ->
     {noreply, internal_update(State)};
 
+handle_cast({deregister, Pid}, State) ->
+    {noreply, internal_deregister(Pid, State)};
+
+handle_cast(stop, State) ->
+    {stop, normal, State};
+
 handle_cast(_Request, State) ->
     {noreply, State}.
 
-handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #state{queue_duration_sum = Sum,
-                           queue_duration_count = Count,
-                           queue_durations = Durations,
-                           callbacks = Callbacks}) ->
-    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
-    Sum1 = case PrevQueueDuration of
-               infinity -> Sum;
-               _        -> Sum - PrevQueueDuration
-           end,
-    true = ets:delete(State#state.queue_durations, Pid),
-    {noreply, State#state{queue_duration_sum = Sum1,
-                          queue_duration_count = Count-1,
-                          callbacks = dict:erase(Pid, Callbacks)}};
+handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
+    {noreply, internal_deregister(Pid, State)};
 
 handle_info(_Info, State) ->
     {noreply, State}.
@@ -189,6 +189,20 @@ code_change(_OldVsn, State, _Extra) ->
 %% Internal functions
 %%----------------------------------------------------------------------------
 
+internal_deregister(Pid, State = #state{queue_duration_sum = Sum,
+                                        queue_duration_count = Count,
+                                        queue_durations = Durations,
+                                        callbacks = Callbacks}) ->
+    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
+    Sum1 = case PrevQueueDuration of
+               infinity -> Sum;
+               _        -> Sum - PrevQueueDuration
+           end,
+    true = ets:delete(State#state.queue_durations, Pid),
+    State#state{queue_duration_sum = Sum1,
+                queue_duration_count = Count-1,
+                callbacks = dict:erase(Pid, Callbacks)}.
+
 internal_update(State = #state{memory_limit = Limit,
                                queue_durations = Durations,
                                desired_duration = DesiredDurationAvg,
@@ -213,8 +227,7 @@ internal_update(State = #state{memory_limit = Limit,
             true  -> infinity;
             false -> lists:max([0, AvgDuration * MemoryRatio])
         end,
-    State1 = State#state{memory_ratio = MemoryRatio,
-                         desired_duration = DesiredDurationAvg1},
+    State1 = State#state{desired_duration = DesiredDurationAvg1},
 
     %% only inform queues immediately if the desired duration has
     %% decreased
-- 
cgit v1.2.1


From e5ad9476b79c82edfb292b2942841a8f31b0eb3d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 11:52:32 +0000
Subject: Lots of improvements: o) merge the dict and ets; use a record there;
 o) drop record memory_ratio; o) corrected where we clamp to non-neg numbers;
 o) correct conditional in internal update

---
 src/rabbit_memory_monitor.erl | 83 +++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 3f2c02f4..6359ecc9 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -45,13 +45,14 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
+-record(process, {pid, reported, sent, callback}).
+
 -record(state, {timer,                %% 'internal_update' timer
-                queue_durations,      %% ets, (qpid, last_reported, last_sent)
+                queue_durations,      %% ets #process
                 queue_duration_sum,   %% sum of all queue_durations
                 queue_duration_count, %% number of elements in sum
                 memory_limit,         %% how much memory we intend to use
-                desired_duration,     %% the desired queue duration
-                callbacks             %% a dict of qpid -> {M,F,A}s
+                desired_duration      %% the desired queue duration
                }).
 
 -define(SERVER, ?MODULE).
@@ -110,14 +111,16 @@ init([]) ->
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
                                       ?SERVER, update, []),
+
+    Ets = ets:new(?TABLE_NAME, [set, private, {keypos, #process.pid}]),
+
     {ok, internal_update(
            #state{timer                = TRef,
-                  queue_durations      = ets:new(?TABLE_NAME, [set, private]),
+                  queue_durations      = Ets,
                   queue_duration_sum   = 0.0,
                   queue_duration_count = 0,
                   memory_limit         = MemoryLimit,
-                  desired_duration     = infinity,
-                  callbacks            = dict:new()})}.
+                  desired_duration     = infinity})}.
 
 handle_call({report_queue_duration, Pid, QueueDuration}, From,
             State = #state{queue_duration_sum = Sum,
@@ -130,7 +133,8 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                          true  -> infinity;
                          false -> QueueDuration
                      end,
-    [{_Pid, PrevQueueDuration, PrevSendDuration}] = ets:lookup(Durations, Pid),
+    [Proc = #process{reported = PrevQueueDuration, sent = PrevSendDuration}] =
+        ets:lookup(Durations, Pid),
 
     SendDuration1 =
         case QueueDuration1 < 1 andalso PrevSendDuration == infinity of
@@ -146,15 +150,17 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                 {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
                 {_, _} -> {Sum - PrevQueueDuration + QueueDuration1, Count}
             end,
-    true = ets:insert(Durations, {Pid, QueueDuration1, SendDuration1}),
-    {noreply, State#state{queue_duration_sum = Sum1,
+    true = ets:insert(Durations, Proc#process{reported = QueueDuration1,
+                                              sent = SendDuration1}),
+    {noreply, State#state{queue_duration_sum = lists:max([0, Sum1]),
                           queue_duration_count = Count1}};
 
-handle_call({register, Pid, MFA}, _From, State =
-            #state{queue_durations = Durations, callbacks = Callbacks}) ->
+handle_call({register, Pid, MFA}, _From,
+            State = #state{queue_durations = Durations}) ->
     _MRef = erlang:monitor(process, Pid),
-    true = ets:insert(Durations, {Pid, infinity, infinity}),
-    {reply, ok, State#state{callbacks = dict:store(Pid, MFA, Callbacks)}};
+    true = ets:insert(Durations, #process{pid = Pid, reported = infinity,
+                                          sent = infinity, callback = MFA}),
+    {reply, ok, State};
 
 handle_call(_Request, _From, State) ->
     {noreply, State}.
@@ -191,24 +197,24 @@ code_change(_OldVsn, State, _Extra) ->
 
 internal_deregister(Pid, State = #state{queue_duration_sum = Sum,
                                         queue_duration_count = Count,
-                                        queue_durations = Durations,
-                                        callbacks = Callbacks}) ->
-    [{_Pid, PrevQueueDuration, _PrevSendDuration}] = ets:lookup(Durations, Pid),
-    Sum1 = case PrevQueueDuration of
-               infinity -> Sum;
-               _        -> Sum - PrevQueueDuration
-           end,
-    true = ets:delete(State#state.queue_durations, Pid),
-    State#state{queue_duration_sum = Sum1,
-                queue_duration_count = Count-1,
-                callbacks = dict:erase(Pid, Callbacks)}.
+                                        queue_durations = Durations}) ->
+    case ets:lookup(Durations, Pid) of
+        [] -> State;
+        [#process{reported = PrevQueueDuration}] ->
+            Sum1 = case PrevQueueDuration of
+                       infinity -> Sum;
+                       _        -> lists:max([0, Sum - PrevQueueDuration])
+                   end,
+            true = ets:delete(State#state.queue_durations, Pid),
+            State#state{queue_duration_sum = Sum1,
+                        queue_duration_count = Count-1}
+    end.
 
 internal_update(State = #state{memory_limit = Limit,
                                queue_durations = Durations,
                                desired_duration = DesiredDurationAvg,
                                queue_duration_sum = Sum,
-                               queue_duration_count = Count,
-                               callbacks = Callbacks}) ->
+                               queue_duration_count = Count}) ->
     %% available memory / used memory
     MemoryRatio = Limit / erlang:memory(total),
     %% if all queues are pushed to disk, then Sum will be 0. If memory
@@ -225,30 +231,30 @@ internal_update(State = #state{memory_limit = Limit,
     DesiredDurationAvg1 =
         case AvgDuration == infinity orelse MemoryRatio > 2 of
             true  -> infinity;
-            false -> lists:max([0, AvgDuration * MemoryRatio])
+            false -> AvgDuration * MemoryRatio
         end,
     State1 = State#state{desired_duration = DesiredDurationAvg1},
 
     %% only inform queues immediately if the desired duration has
     %% decreased
-    case (DesiredDurationAvg == infinity andalso DesiredDurationAvg /= infinity)
-        orelse (DesiredDurationAvg1 /= infinity andalso
-                DesiredDurationAvg1 < DesiredDurationAvg) of
+    case (DesiredDurationAvg == infinity andalso DesiredDurationAvg1 /= infinity)
+        orelse (DesiredDurationAvg /= infinity andalso
+                DesiredDurationAvg > DesiredDurationAvg1) of
         true ->
             %% If we have pessimistic information, we need to inform
             %% queues to reduce it's memory usage when needed. This
             %% sometimes wakes up queues from hibernation.
             true = ets:foldl(
-                     fun ({Pid, QueueDuration, PrevSendDuration}, true) ->
+                     fun (Proc = #process{reported = QueueDuration,
+                                          sent = PrevSendDuration}, true) ->
                              case DesiredDurationAvg1 <
                                  lists:min([PrevSendDuration, QueueDuration]) of
                                  true ->
-                                     ok =
-                                         set_queue_duration(
-                                           Pid, DesiredDurationAvg1, Callbacks),
-                                     ets:insert(Durations,
-                                                {Pid, QueueDuration,
-                                                 DesiredDurationAvg1});
+                                     ok = set_queue_duration(
+                                            Proc, DesiredDurationAvg1),
+                                     ets:insert(
+                                       Durations,
+                                       Proc#process{sent=DesiredDurationAvg1});
                                  false -> true
                              end
                      end, true, Durations);
@@ -262,6 +268,5 @@ get_memory_limit() ->
         A -> A
     end.
 
-set_queue_duration(Pid, QueueDuration, Callbacks) ->
-    {M,F,A} = dict:fetch(Pid, Callbacks),
+set_queue_duration(#process{callback={M,F,A}}, QueueDuration) ->
     ok = erlang:apply(M, F, A++[QueueDuration]).
-- 
cgit v1.2.1


From 18eb4aa8bdb74f1bdf212fe25f751e64887708ef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 12:40:48 +0000
Subject: Further correction to conditional; line lengths; extract all magic
 numbers and -define them and document them.

---
 src/rabbit_memory_monitor.erl | 57 ++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 6359ecc9..d6f0c600 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -60,9 +60,36 @@
 -define(TABLE_NAME, ?MODULE).
 -define(MAX_QUEUE_DURATION, 86400). %% 60*60*24 i.e. 1 day
 
+-define(LIMIT_THRESHOLD, 0.5). %% don't limit queues when mem use is < this
+
+%% If all queues are pushed to disk (duration 0), then the sum of
+%% their reported lengths will be 0. If memory then becomes available,
+%% unless we manually intervene, the sum will remain 0, and the queues
+%% will never get a non-zero duration.  Thus when the mem use is <
+%% SUM_INC_THRESHOLD, increase the sum artificially by SUM_INC_AMOUNT.
+-define(SUM_INC_THRESHOLD, 0.95).
+-define(SUM_INC_AMOUNT, 1.0).
+
+%% Queues which are empty will report a duration of 0. This may result
+%% in the memory-monitor deciding that the desired duration should
+%% also be 0, which is a disaster for fast moving queues. A fast
+%% moving queue may well oscillate between reporting 0 and a small
+%% number close to 0. Thus if the number reported is under
+%% SMALL_INFINITY_OSCILLATION_DURATION and the last value we sent it
+%% was infinity, then send it infinity again. Thus its duration must
+%% rise to above SMALL_INFINITY_OSCILLATION_DURATION before we start
+%% sending it durations /= infinity.
+-define(SMALL_INFINITY_OSCILLATION_DURATION, 1.0).
+
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
 
+%% Because we have a feedback loop here, we need to ensure that we
+%% have some space for when the queues don't quite respond as fast as
+%% we would like, or when there is buffering going on in other parts
+%% of the system. In short, we aim to stay some distance away from
+%% when the memory alarms will go off, which cause channel.flow.
+-define(MEMORY_LIMIT_SCALING, 0.6).
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
@@ -104,10 +131,7 @@ stop() ->
 %%----------------------------------------------------------------------------
 
 init([]) ->
-    %% We should never use more memory than user requested. As the memory
-    %% manager doesn't really know how much memory queues are using, we shall
-    %% try to remain safe distance from real throttle limit.
-    MemoryLimit = trunc(get_memory_limit() * 0.6),
+    MemoryLimit = trunc(get_memory_limit() * ?MEMORY_LIMIT_SCALING),
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
                                       ?SERVER, update, []),
@@ -137,7 +161,8 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
         ets:lookup(Durations, Pid),
 
     SendDuration1 =
-        case QueueDuration1 < 1 andalso PrevSendDuration == infinity of
+        case QueueDuration1 < ?SMALL_INFINITY_OSCILLATION_DURATION andalso
+            PrevSendDuration == infinity of
             true -> infinity;
             false -> SendDuration
         end,
@@ -152,7 +177,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
             end,
     true = ets:insert(Durations, Proc#process{reported = QueueDuration1,
                                               sent = SendDuration1}),
-    {noreply, State#state{queue_duration_sum = lists:max([0, Sum1]),
+    {noreply, State#state{queue_duration_sum = lists:max([0.0, Sum1]),
                           queue_duration_count = Count1}};
 
 handle_call({register, Pid, MFA}, _From,
@@ -203,7 +228,7 @@ internal_deregister(Pid, State = #state{queue_duration_sum = Sum,
         [#process{reported = PrevQueueDuration}] ->
             Sum1 = case PrevQueueDuration of
                        infinity -> Sum;
-                       _        -> lists:max([0, Sum - PrevQueueDuration])
+                       _        -> lists:max([0.0, Sum - PrevQueueDuration])
                    end,
             true = ets:delete(State#state.queue_durations, Pid),
             State#state{queue_duration_sum = Sum1,
@@ -215,13 +240,9 @@ internal_update(State = #state{memory_limit = Limit,
                                desired_duration = DesiredDurationAvg,
                                queue_duration_sum = Sum,
                                queue_duration_count = Count}) ->
-    %% available memory / used memory
-    MemoryRatio = Limit / erlang:memory(total),
-    %% if all queues are pushed to disk, then Sum will be 0. If memory
-    %% then becomes available, unless we do the following, we will
-    %% never allow queues to come off disk.
-    Sum1 = case MemoryRatio > 1.05 of
-               true -> Sum + 1;
+    MemoryRatio = erlang:memory(total) / Limit,
+    Sum1 = case MemoryRatio < ?SUM_INC_THRESHOLD of
+               true -> Sum + ?SUM_INC_AMOUNT;
                false -> Sum
            end,
     AvgDuration = case Count == 0 of
@@ -229,16 +250,18 @@ internal_update(State = #state{memory_limit = Limit,
                       false -> Sum1 / Count
                   end,
     DesiredDurationAvg1 =
-        case AvgDuration == infinity orelse MemoryRatio > 2 of
+        case AvgDuration == infinity orelse MemoryRatio < ?LIMIT_THRESHOLD of
             true  -> infinity;
-            false -> AvgDuration * MemoryRatio
+            false -> AvgDuration / MemoryRatio
         end,
     State1 = State#state{desired_duration = DesiredDurationAvg1},
 
     %% only inform queues immediately if the desired duration has
     %% decreased
-    case (DesiredDurationAvg == infinity andalso DesiredDurationAvg1 /= infinity)
+    case (DesiredDurationAvg == infinity andalso
+          DesiredDurationAvg1 /= infinity)
         orelse (DesiredDurationAvg /= infinity andalso
+                DesiredDurationAvg1 /= infinity andalso
                 DesiredDurationAvg > DesiredDurationAvg1) of
         true ->
             %% If we have pessimistic information, we need to inform
-- 
cgit v1.2.1


From 607fee460dfa16d221ceaa6f821497d3e9f34215 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 13:16:36 +0000
Subject: Improvements to documentation, further improvements to conditional
 and refactorings. Removal of max queue duration because the rate calculated
 by the queue doesn't decay in the way necessary to make the max queue
 duration necessary

---
 src/rabbit_memory_monitor.erl | 65 +++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index d6f0c600..2fc983b4 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -58,7 +58,14 @@
 -define(SERVER, ?MODULE).
 -define(DEFAULT_UPDATE_INTERVAL, 2500).
 -define(TABLE_NAME, ?MODULE).
--define(MAX_QUEUE_DURATION, 86400). %% 60*60*24 i.e. 1 day
+
+%% Because we have a feedback loop here, we need to ensure that we
+%% have some space for when the queues don't quite respond as fast as
+%% we would like, or when there is buffering going on in other parts
+%% of the system. In short, we aim to stay some distance away from
+%% when the memory alarms will go off, which cause channel.flow.
+%% Note that all other Thresholds are relative to this scaling.
+-define(MEMORY_LIMIT_SCALING, 0.6).
 
 -define(LIMIT_THRESHOLD, 0.5). %% don't limit queues when mem use is < this
 
@@ -70,11 +77,11 @@
 -define(SUM_INC_THRESHOLD, 0.95).
 -define(SUM_INC_AMOUNT, 1.0).
 
-%% Queues which are empty will report a duration of 0. This may result
-%% in the memory-monitor deciding that the desired duration should
-%% also be 0, which is a disaster for fast moving queues. A fast
-%% moving queue may well oscillate between reporting 0 and a small
-%% number close to 0. Thus if the number reported is under
+%% Queues which are empty will report a duration of 0. If all queues
+%% are empty then the memory-monitor may decide that the desired
+%% duration should also be 0, which is a disaster for fast moving
+%% queues. A fast moving queue may well oscillate between reporting 0
+%% and a small number close to 0. Thus if the number reported is under
 %% SMALL_INFINITY_OSCILLATION_DURATION and the last value we sent it
 %% was infinity, then send it infinity again. Thus its duration must
 %% rise to above SMALL_INFINITY_OSCILLATION_DURATION before we start
@@ -84,12 +91,6 @@
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
 
-%% Because we have a feedback loop here, we need to ensure that we
-%% have some space for when the queues don't quite respond as fast as
-%% we would like, or when there is buffering going on in other parts
-%% of the system. In short, we aim to stay some distance away from
-%% when the memory alarms will go off, which cause channel.flow.
--define(MEMORY_LIMIT_SCALING, 0.6).
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
@@ -152,8 +153,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                            queue_durations = Durations,
                            desired_duration = SendDuration}) ->
 
-    QueueDuration1 = case infinity == QueueDuration orelse
-                         QueueDuration > ?MAX_QUEUE_DURATION of
+    QueueDuration1 = case infinity == QueueDuration of
                          true  -> infinity;
                          false -> QueueDuration
                      end,
@@ -241,29 +241,29 @@ internal_update(State = #state{memory_limit = Limit,
                                queue_duration_sum = Sum,
                                queue_duration_count = Count}) ->
     MemoryRatio = erlang:memory(total) / Limit,
-    Sum1 = case MemoryRatio < ?SUM_INC_THRESHOLD of
-               true -> Sum + ?SUM_INC_AMOUNT;
-               false -> Sum
-           end,
-    AvgDuration = case Count == 0 of
-                      true  -> infinity;
-                      false -> Sum1 / Count
-                  end,
     DesiredDurationAvg1 =
-        case AvgDuration == infinity orelse MemoryRatio < ?LIMIT_THRESHOLD of
-            true  -> infinity;
-            false -> AvgDuration / MemoryRatio
+        case MemoryRatio < ?LIMIT_THRESHOLD of
+            true ->
+                infinity;
+            false ->
+                Sum1 = case MemoryRatio < ?SUM_INC_THRESHOLD of
+                           true -> Sum + ?SUM_INC_AMOUNT;
+                           false -> Sum
+                       end,
+                case Count == 0 of
+                    true  -> infinity;
+                    false -> (Sum1 / Count) / MemoryRatio
+                end
         end,
     State1 = State#state{desired_duration = DesiredDurationAvg1},
 
     %% only inform queues immediately if the desired duration has
     %% decreased
-    case (DesiredDurationAvg == infinity andalso
-          DesiredDurationAvg1 /= infinity)
-        orelse (DesiredDurationAvg /= infinity andalso
-                DesiredDurationAvg1 /= infinity andalso
-                DesiredDurationAvg > DesiredDurationAvg1) of
-        true ->
+    case DesiredDurationAvg1 == infinity orelse
+        (DesiredDurationAvg /= infinity andalso
+         DesiredDurationAvg1 >= DesiredDurationAvg) of
+        true -> ok;
+        false ->
             %% If we have pessimistic information, we need to inform
             %% queues to reduce it's memory usage when needed. This
             %% sometimes wakes up queues from hibernation.
@@ -280,8 +280,7 @@ internal_update(State = #state{memory_limit = Limit,
                                        Proc#process{sent=DesiredDurationAvg1});
                                  false -> true
                              end
-                     end, true, Durations);
-        false -> ok
+                     end, true, Durations)
     end,
     State1.
 
-- 
cgit v1.2.1


From e36a9b724097e8c26a41f536779838f37b726efb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 13:20:59 +0000
Subject: beautifuler

---
 src/rabbit_memory_monitor.erl | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 2fc983b4..f2d1e9ca 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -242,7 +242,7 @@ internal_update(State = #state{memory_limit = Limit,
                                queue_duration_count = Count}) ->
     MemoryRatio = erlang:memory(total) / Limit,
     DesiredDurationAvg1 =
-        case MemoryRatio < ?LIMIT_THRESHOLD of
+        case MemoryRatio < ?LIMIT_THRESHOLD orelse Count == 0 of
             true ->
                 infinity;
             false ->
@@ -250,10 +250,7 @@ internal_update(State = #state{memory_limit = Limit,
                            true -> Sum + ?SUM_INC_AMOUNT;
                            false -> Sum
                        end,
-                case Count == 0 of
-                    true  -> infinity;
-                    false -> (Sum1 / Count) / MemoryRatio
-                end
+                (Sum1 / Count) / MemoryRatio
         end,
     State1 = State#state{desired_duration = DesiredDurationAvg1},
 
-- 
cgit v1.2.1


From 902624e3220d1ec8ea4111b277ce14f7c8558be7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 15:04:36 +0000
Subject: Improved documentation and code regarding fast moving mainly empty
 queues and the SMALL_INFINITY_OSCILLATION_DURATION definition. Oh, and
 clamped zero.

---
 src/rabbit_memory_monitor.erl | 94 +++++++++++++++++++++++++++----------------
 1 file changed, 59 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index f2d1e9ca..1e84d64d 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -77,20 +77,31 @@
 -define(SUM_INC_THRESHOLD, 0.95).
 -define(SUM_INC_AMOUNT, 1.0).
 
-%% Queues which are empty will report a duration of 0. If all queues
-%% are empty then the memory-monitor may decide that the desired
-%% duration should also be 0, which is a disaster for fast moving
-%% queues. A fast moving queue may well oscillate between reporting 0
-%% and a small number close to 0. Thus if the number reported is under
-%% SMALL_INFINITY_OSCILLATION_DURATION and the last value we sent it
-%% was infinity, then send it infinity again. Thus its duration must
-%% rise to above SMALL_INFINITY_OSCILLATION_DURATION before we start
-%% sending it durations /= infinity.
+%% A queue may report a duration of 0, or close to zero, and may be
+%% told a duration of infinity (eg if less than LIMIT_THRESHOLD memory
+%% is being used). Subsequently, the memory-monitor can calculate the
+%% desired duration as zero, or close to zero (eg now more memory is
+%% being used, but the sum of durations is very small). If it is a
+%% fast moving queue, telling it a very small value will badly hurt
+%% it, unnecessarily: a fast moving queue will often oscillate between
+%% being empty and having a few thousand msgs in it, representing a
+%% few hundred milliseconds. SMALL_INFINITY_OSCILLATION_DURATION is a
+%% threshold: if a queue has been told a duration of infinity last
+%% time, and it's reporting a value <
+%% SMALL_INFINITY_OSCILLATION_DURATION then we send it back a duration
+%% of infinity, even if the current desired duration /= infinity. Thus
+%% for a queue which has been told infinity, it must report a duration
+%% >= SMALL_INFINITY_OSCILLATION_DURATION before it is told a
+%% non-infinity duration. This basically forms a threshold which
+%% effects faster queues more than slower queues and which accounts
+%% for natural fluctuations occurring in the queue length.
 -define(SMALL_INFINITY_OSCILLATION_DURATION, 1.0).
 
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
 
+-define(EPSILON, 0.000001). %% less than this and we clamp to 0
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
@@ -153,31 +164,27 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                            queue_durations = Durations,
                            desired_duration = SendDuration}) ->
 
-    QueueDuration1 = case infinity == QueueDuration of
-                         true  -> infinity;
-                         false -> QueueDuration
-                     end,
     [Proc = #process{reported = PrevQueueDuration, sent = PrevSendDuration}] =
         ets:lookup(Durations, Pid),
 
     SendDuration1 =
-        case QueueDuration1 < ?SMALL_INFINITY_OSCILLATION_DURATION andalso
-            PrevSendDuration == infinity of
+        case QueueDuration /= infinity andalso PrevSendDuration == infinity
+            andalso QueueDuration < ?SMALL_INFINITY_OSCILLATION_DURATION of
             true -> infinity;
             false -> SendDuration
         end,
     gen_server2:reply(From, SendDuration1),
 
     {Sum1, Count1} =
-            case {PrevQueueDuration, QueueDuration1} of
+            case {PrevQueueDuration, QueueDuration} of
                 {infinity, infinity} -> {Sum, Count};
-                {infinity, _}        -> {Sum + QueueDuration1,    Count + 1};
+                {infinity, _}        -> {Sum + QueueDuration,    Count + 1};
                 {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
-                {_, _} -> {Sum - PrevQueueDuration + QueueDuration1, Count}
+                {_, _} -> {Sum - PrevQueueDuration + QueueDuration, Count}
             end,
-    true = ets:insert(Durations, Proc#process{reported = QueueDuration1,
+    true = ets:insert(Durations, Proc#process{reported = QueueDuration,
                                               sent = SendDuration1}),
-    {noreply, State#state{queue_duration_sum = lists:max([0.0, Sum1]),
+    {noreply, State#state{queue_duration_sum = zero_clamp(Sum1),
                           queue_duration_count = Count1}};
 
 handle_call({register, Pid, MFA}, _From,
@@ -220,6 +227,12 @@ code_change(_OldVsn, State, _Extra) ->
 %% Internal functions
 %%----------------------------------------------------------------------------
 
+zero_clamp(Sum) ->
+    case Sum < ?EPSILON of
+        true -> 0.0;
+        false -> Sum
+    end.
+
 internal_deregister(Pid, State = #state{queue_duration_sum = Sum,
                                         queue_duration_count = Count,
                                         queue_durations = Durations}) ->
@@ -228,7 +241,7 @@ internal_deregister(Pid, State = #state{queue_duration_sum = Sum,
         [#process{reported = PrevQueueDuration}] ->
             Sum1 = case PrevQueueDuration of
                        infinity -> Sum;
-                       _        -> lists:max([0.0, Sum - PrevQueueDuration])
+                       _        -> zero_clamp(Sum - PrevQueueDuration)
                    end,
             true = ets:delete(State#state.queue_durations, Pid),
             State#state{queue_duration_sum = Sum1,
@@ -264,20 +277,31 @@ internal_update(State = #state{memory_limit = Limit,
             %% If we have pessimistic information, we need to inform
             %% queues to reduce it's memory usage when needed. This
             %% sometimes wakes up queues from hibernation.
-            true = ets:foldl(
-                     fun (Proc = #process{reported = QueueDuration,
-                                          sent = PrevSendDuration}, true) ->
-                             case DesiredDurationAvg1 <
-                                 lists:min([PrevSendDuration, QueueDuration]) of
-                                 true ->
-                                     ok = set_queue_duration(
-                                            Proc, DesiredDurationAvg1),
-                                     ets:insert(
-                                       Durations,
-                                       Proc#process{sent=DesiredDurationAvg1});
-                                 false -> true
-                             end
-                     end, true, Durations)
+            true =
+                ets:foldl(
+                  fun (Proc = #process{reported = QueueDuration,
+                                       sent = PrevSendDuration}, true) ->
+                          Send =
+                              case {QueueDuration, PrevSendDuration} of
+                                  {infinity, infinity} ->
+                                      true;
+                                  {infinity, B} ->
+                                      DesiredDurationAvg1 < B;
+                                  {A, infinity} ->
+                                      DesiredDurationAvg1 < A andalso A >=
+                                          ?SMALL_INFINITY_OSCILLATION_DURATION;
+                                  {A, B} ->
+                                      DesiredDurationAvg1 < lists:min([A,B])
+                              end,
+                          case Send of
+                              true ->
+                                  ok = set_queue_duration(Proc, DesiredDurationAvg1),
+                                  ets:insert(
+                                    Durations,
+                                    Proc#process{sent=DesiredDurationAvg1});
+                              false -> true
+                          end
+                  end, true, Durations)
     end,
     State1.
 
-- 
cgit v1.2.1


From 2cbdb487dcd7e0b7f0445c2e628ea87a181e04e3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 15:06:01 +0000
Subject: s/b(.)t/s\1m/

---
 src/rabbit_memory_monitor.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 1e84d64d..03892193 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -81,7 +81,7 @@
 %% told a duration of infinity (eg if less than LIMIT_THRESHOLD memory
 %% is being used). Subsequently, the memory-monitor can calculate the
 %% desired duration as zero, or close to zero (eg now more memory is
-%% being used, but the sum of durations is very small). If it is a
+%% being used, and the sum of durations is very small). If it is a
 %% fast moving queue, telling it a very small value will badly hurt
 %% it, unnecessarily: a fast moving queue will often oscillate between
 %% being empty and having a few thousand msgs in it, representing a
-- 
cgit v1.2.1


From 692d61310130cabd954a4cf742bb04a956859694 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 15:29:56 +0000
Subject: SMALL_INFINITY_OSCILLATION_DURATION => SMALL_DURATION_THRESHOLD

---
 src/rabbit_memory_monitor.erl | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 03892193..9689994a 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -85,17 +85,16 @@
 %% fast moving queue, telling it a very small value will badly hurt
 %% it, unnecessarily: a fast moving queue will often oscillate between
 %% being empty and having a few thousand msgs in it, representing a
-%% few hundred milliseconds. SMALL_INFINITY_OSCILLATION_DURATION is a
-%% threshold: if a queue has been told a duration of infinity last
-%% time, and it's reporting a value <
-%% SMALL_INFINITY_OSCILLATION_DURATION then we send it back a duration
-%% of infinity, even if the current desired duration /= infinity. Thus
-%% for a queue which has been told infinity, it must report a duration
-%% >= SMALL_INFINITY_OSCILLATION_DURATION before it is told a
+%% few hundred milliseconds. SMALL_DURATION_THRESHOLD is a threshold:
+%% if a queue has been told a duration of infinity last time, and it's
+%% reporting a value < SMALL_DURATION_THRESHOLD then we send it back a
+%% duration of infinity, even if the current desired duration /=
+%% infinity. Thus for a queue which has been told infinity, it must
+%% report a duration >= SMALL_DURATION_THRESHOLD before it is told a
 %% non-infinity duration. This basically forms a threshold which
 %% effects faster queues more than slower queues and which accounts
 %% for natural fluctuations occurring in the queue length.
--define(SMALL_INFINITY_OSCILLATION_DURATION, 1.0).
+-define(SMALL_DURATION_THRESHOLD, 1.0).
 
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
@@ -169,7 +168,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
 
     SendDuration1 =
         case QueueDuration /= infinity andalso PrevSendDuration == infinity
-            andalso QueueDuration < ?SMALL_INFINITY_OSCILLATION_DURATION of
+            andalso QueueDuration < ?SMALL_DURATION_THRESHOLD of
             true -> infinity;
             false -> SendDuration
         end,
-- 
cgit v1.2.1


From 2fcb3ed2aab1ef3e4df8b14fb4143baadbd57c88 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 16:28:46 +0000
Subject: Remove the small_duration_threshold, and instead smooth the
 ram_msg_count over the last two periods. This seems to work reasonably well.

---
 src/rabbit_memory_monitor.erl | 39 +++++----------------------------------
 src/rabbit_variable_queue.erl | 15 +++++++++++----
 2 files changed, 16 insertions(+), 38 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 9689994a..8266845f 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -77,25 +77,6 @@
 -define(SUM_INC_THRESHOLD, 0.95).
 -define(SUM_INC_AMOUNT, 1.0).
 
-%% A queue may report a duration of 0, or close to zero, and may be
-%% told a duration of infinity (eg if less than LIMIT_THRESHOLD memory
-%% is being used). Subsequently, the memory-monitor can calculate the
-%% desired duration as zero, or close to zero (eg now more memory is
-%% being used, and the sum of durations is very small). If it is a
-%% fast moving queue, telling it a very small value will badly hurt
-%% it, unnecessarily: a fast moving queue will often oscillate between
-%% being empty and having a few thousand msgs in it, representing a
-%% few hundred milliseconds. SMALL_DURATION_THRESHOLD is a threshold:
-%% if a queue has been told a duration of infinity last time, and it's
-%% reporting a value < SMALL_DURATION_THRESHOLD then we send it back a
-%% duration of infinity, even if the current desired duration /=
-%% infinity. Thus for a queue which has been told infinity, it must
-%% report a duration >= SMALL_DURATION_THRESHOLD before it is told a
-%% non-infinity duration. This basically forms a threshold which
-%% effects faster queues more than slower queues and which accounts
-%% for natural fluctuations occurring in the queue length.
--define(SMALL_DURATION_THRESHOLD, 1.0).
-
 %% If user disabled vm_memory_monitor, let's assume 1GB of memory we can use.
 -define(MEMORY_SIZE_FOR_DISABLED_VMM, 1073741824).
 
@@ -166,13 +147,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
     [Proc = #process{reported = PrevQueueDuration, sent = PrevSendDuration}] =
         ets:lookup(Durations, Pid),
 
-    SendDuration1 =
-        case QueueDuration /= infinity andalso PrevSendDuration == infinity
-            andalso QueueDuration < ?SMALL_DURATION_THRESHOLD of
-            true -> infinity;
-            false -> SendDuration
-        end,
-    gen_server2:reply(From, SendDuration1),
+    gen_server2:reply(From, SendDuration),
 
     {Sum1, Count1} =
             case {PrevQueueDuration, QueueDuration} of
@@ -182,7 +157,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                 {_, _} -> {Sum - PrevQueueDuration + QueueDuration, Count}
             end,
     true = ets:insert(Durations, Proc#process{reported = QueueDuration,
-                                              sent = SendDuration1}),
+                                              sent = SendDuration}),
     {noreply, State#state{queue_duration_sum = zero_clamp(Sum1),
                           queue_duration_count = Count1}};
 
@@ -282,13 +257,9 @@ internal_update(State = #state{memory_limit = Limit,
                                        sent = PrevSendDuration}, true) ->
                           Send =
                               case {QueueDuration, PrevSendDuration} of
-                                  {infinity, infinity} ->
-                                      true;
-                                  {infinity, B} ->
-                                      DesiredDurationAvg1 < B;
-                                  {A, infinity} ->
-                                      DesiredDurationAvg1 < A andalso A >=
-                                          ?SMALL_INFINITY_OSCILLATION_DURATION;
+                                  {infinity, infinity} -> true;
+                                  {infinity, B} -> DesiredDurationAvg1 < B;
+                                  {A, infinity} -> DesiredDurationAvg1 < A;
                                   {A, B} ->
                                       DesiredDurationAvg1 < lists:min([A,B])
                               end,
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c3ad5463..6806a0cd 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -49,6 +49,7 @@
           duration_target,
           target_ram_msg_count,
           ram_msg_count,
+          ram_msg_count_prev,
           queue,
           index_state,
           next_seq_id,
@@ -110,6 +111,8 @@
                q4                    :: queue(),
                duration_target       :: non_neg_integer(),
                target_ram_msg_count  :: non_neg_integer(),
+               ram_msg_count         :: non_neg_integer(),
+               ram_msg_count_prev    :: non_neg_integer(),
                queue                 :: queue_name(),
                index_state           :: any(),
                next_seq_id           :: seq_id(),
@@ -174,9 +177,10 @@ init(QueueName) ->
         #vqstate { q1 = queue:new(), q2 = queue:new(),
                    gamma = Gamma,
                    q3 = queue:new(), q4 = queue:new(),
-                   target_ram_msg_count = undefined,
                    duration_target = undefined,
+                   target_ram_msg_count = undefined,
                    ram_msg_count = 0,
+                   ram_msg_count_prev = 0,
                    queue = QueueName,
                    index_state = IndexState1,
                    next_seq_id = NextSeqId,
@@ -243,6 +247,7 @@ remeasure_rates(State = #vqstate { egress_rate = Egress,
                                    rate_timestamp = Timestamp,
                                    in_counter = InCount,
                                    out_counter = OutCount,
+                                   ram_msg_count = RamMsgCount,
                                    duration_target = DurationTarget }) ->
     Now = now(),
     {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
@@ -255,15 +260,17 @@ remeasure_rates(State = #vqstate { egress_rate = Egress,
                        ingress_rate = Ingress1,
                        avg_ingress_rate = AvgIngressRate,
                        rate_timestamp = Now,
+                       ram_msg_count_prev = RamMsgCount,
                        out_counter = 0, in_counter = 0 }).
 
 ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
                         avg_ingress_rate = AvgIngressRate,
-                        ram_msg_count = RamMsgCount }) ->
+                        ram_msg_count = RamMsgCount,
+                        ram_msg_count_prev = RamMsgCountPrev }) ->
     %% msgs / (msgs/sec) == sec
     case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
         true  -> infinity;
-        false -> RamMsgCount / (AvgEgressRate + AvgIngressRate)
+        false -> (RamMsgCountPrev + RamMsgCount) / (2 * (AvgEgressRate + AvgIngressRate))
     end.
 
 fetch(State =
@@ -463,7 +470,7 @@ full_flush_journal(State = #vqstate { index_state = IndexState }) ->
 status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
-                  ram_msg_count = RamMsgCount, 
+                  ram_msg_count = RamMsgCount,
                   avg_egress_rate = AvgEgressRate,
                   avg_ingress_rate = AvgIngressRate }) ->
     [ {q1, queue:len(Q1)},
-- 
cgit v1.2.1


From 7fd1c0c92b8c3fe8c975ced752c2f32d2ee4802d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 17:10:27 +0000
Subject: Don't need this variable anymore

---
 src/rabbit_memory_monitor.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 8266845f..5f39f7a9 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -144,8 +144,7 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                            queue_durations = Durations,
                            desired_duration = SendDuration}) ->
 
-    [Proc = #process{reported = PrevQueueDuration, sent = PrevSendDuration}] =
-        ets:lookup(Durations, Pid),
+    [Proc = #process{reported=PrevQueueDuration}] = ets:lookup(Durations, Pid),
 
     gen_server2:reply(From, SendDuration),
 
-- 
cgit v1.2.1


From 79a9e65a65e34fa95dd8bea88cf65f946fb8c83c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 17:19:42 +0000
Subject: Added demonitoring, corrected counting on deregistering, and got fed
 up with the lack of whitespace

---
 src/rabbit_memory_monitor.erl | 92 +++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 43 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 5f39f7a9..1b2c6982 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -45,7 +45,7 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--record(process, {pid, reported, sent, callback}).
+-record(process, {pid, reported, sent, callback, monitor}).
 
 -record(state, {timer,                %% 'internal_update' timer
                 queue_durations,      %% ets #process
@@ -131,20 +131,21 @@ init([]) ->
     Ets = ets:new(?TABLE_NAME, [set, private, {keypos, #process.pid}]),
 
     {ok, internal_update(
-           #state{timer                = TRef,
-                  queue_durations      = Ets,
-                  queue_duration_sum   = 0.0,
-                  queue_duration_count = 0,
-                  memory_limit         = MemoryLimit,
-                  desired_duration     = infinity})}.
+           #state { timer                = TRef,
+                    queue_durations      = Ets,
+                    queue_duration_sum   = 0.0,
+                    queue_duration_count = 0,
+                    memory_limit         = MemoryLimit,
+                    desired_duration     = infinity })}.
 
 handle_call({report_queue_duration, Pid, QueueDuration}, From,
-            State = #state{queue_duration_sum = Sum,
-                           queue_duration_count = Count,
-                           queue_durations = Durations,
-                           desired_duration = SendDuration}) ->
+            State = #state { queue_duration_sum = Sum,
+                             queue_duration_count = Count,
+                             queue_durations = Durations,
+                             desired_duration = SendDuration }) ->
 
-    [Proc = #process{reported=PrevQueueDuration}] = ets:lookup(Durations, Pid),
+    [Proc = #process { reported = PrevQueueDuration }] =
+        ets:lookup(Durations, Pid),
 
     gen_server2:reply(From, SendDuration),
 
@@ -155,16 +156,17 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
                 {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
                 {_, _} -> {Sum - PrevQueueDuration + QueueDuration, Count}
             end,
-    true = ets:insert(Durations, Proc#process{reported = QueueDuration,
-                                              sent = SendDuration}),
-    {noreply, State#state{queue_duration_sum = zero_clamp(Sum1),
-                          queue_duration_count = Count1}};
+    true = ets:insert(Durations, Proc #process { reported = QueueDuration,
+                                                 sent = SendDuration }),
+    {noreply, State #state { queue_duration_sum = zero_clamp(Sum1),
+                             queue_duration_count = Count1 }};
 
 handle_call({register, Pid, MFA}, _From,
-            State = #state{queue_durations = Durations}) ->
-    _MRef = erlang:monitor(process, Pid),
-    true = ets:insert(Durations, #process{pid = Pid, reported = infinity,
-                                          sent = infinity, callback = MFA}),
+            State = #state { queue_durations = Durations }) ->
+    MRef = erlang:monitor(process, Pid),
+    true = ets:insert(Durations, #process { pid = Pid, reported = infinity,
+                                            sent = infinity, callback = MFA,
+                                            monitor = MRef }),
     {reply, ok, State};
 
 handle_call(_Request, _From, State) ->
@@ -188,7 +190,7 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
 handle_info(_Info, State) ->
     {noreply, State}.
 
-terminate(_Reason, #state{timer = TRef}) ->
+terminate(_Reason, #state { timer = TRef }) ->
     timer:cancel(TRef),
     ok.
 
@@ -206,26 +208,29 @@ zero_clamp(Sum) ->
         false -> Sum
     end.
 
-internal_deregister(Pid, State = #state{queue_duration_sum = Sum,
-                                        queue_duration_count = Count,
-                                        queue_durations = Durations}) ->
+internal_deregister(Pid, State = #state { queue_duration_sum = Sum,
+                                          queue_duration_count = Count,
+                                          queue_durations = Durations }) ->
     case ets:lookup(Durations, Pid) of
         [] -> State;
-        [#process{reported = PrevQueueDuration}] ->
-            Sum1 = case PrevQueueDuration of
-                       infinity -> Sum;
-                       _        -> zero_clamp(Sum - PrevQueueDuration)
-                   end,
-            true = ets:delete(State#state.queue_durations, Pid),
-            State#state{queue_duration_sum = Sum1,
-                        queue_duration_count = Count-1}
+        [#process { reported = PrevQueueDuration, monitor = MRef }] ->
+            true = erlang:demonitor(MRef),
+            {Sum1, Count1} =
+                case PrevQueueDuration of
+                    infinity -> {Sum, Count};
+                    _        -> {zero_clamp(Sum - PrevQueueDuration),
+                                 Count - 1}
+                end,
+            true = ets:delete(State #state.queue_durations, Pid),
+            State #state { queue_duration_sum = Sum1,
+                           queue_duration_count = Count1 }
     end.
 
-internal_update(State = #state{memory_limit = Limit,
-                               queue_durations = Durations,
-                               desired_duration = DesiredDurationAvg,
-                               queue_duration_sum = Sum,
-                               queue_duration_count = Count}) ->
+internal_update(State = #state { memory_limit = Limit,
+                                 queue_durations = Durations,
+                                 desired_duration = DesiredDurationAvg,
+                                 queue_duration_sum = Sum,
+                                 queue_duration_count = Count }) ->
     MemoryRatio = erlang:memory(total) / Limit,
     DesiredDurationAvg1 =
         case MemoryRatio < ?LIMIT_THRESHOLD orelse Count == 0 of
@@ -238,7 +243,7 @@ internal_update(State = #state{memory_limit = Limit,
                        end,
                 (Sum1 / Count) / MemoryRatio
         end,
-    State1 = State#state{desired_duration = DesiredDurationAvg1},
+    State1 = State #state { desired_duration = DesiredDurationAvg1 },
 
     %% only inform queues immediately if the desired duration has
     %% decreased
@@ -252,8 +257,8 @@ internal_update(State = #state{memory_limit = Limit,
             %% sometimes wakes up queues from hibernation.
             true =
                 ets:foldl(
-                  fun (Proc = #process{reported = QueueDuration,
-                                       sent = PrevSendDuration}, true) ->
+                  fun (Proc = #process { reported = QueueDuration,
+                                         sent = PrevSendDuration }, true) ->
                           Send =
                               case {QueueDuration, PrevSendDuration} of
                                   {infinity, infinity} -> true;
@@ -264,10 +269,11 @@ internal_update(State = #state{memory_limit = Limit,
                               end,
                           case Send of
                               true ->
-                                  ok = set_queue_duration(Proc, DesiredDurationAvg1),
+                                  ok = set_queue_duration(Proc,
+                                                          DesiredDurationAvg1),
                                   ets:insert(
                                     Durations,
-                                    Proc#process{sent=DesiredDurationAvg1});
+                                    Proc #process {sent = DesiredDurationAvg1});
                               false -> true
                           end
                   end, true, Durations)
@@ -280,5 +286,5 @@ get_memory_limit() ->
         A -> A
     end.
 
-set_queue_duration(#process{callback={M,F,A}}, QueueDuration) ->
+set_queue_duration(#process { callback = {M, F, A} }, QueueDuration) ->
     ok = erlang:apply(M, F, A++[QueueDuration]).
-- 
cgit v1.2.1


From 2e6822de01dfc024bab05bf48461ad6b2562a317 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 17:21:50 +0000
Subject: remove unnecessary comment

---
 src/rabbit_memory_monitor.erl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 1b2c6982..664e2348 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -252,9 +252,6 @@ internal_update(State = #state { memory_limit = Limit,
          DesiredDurationAvg1 >= DesiredDurationAvg) of
         true -> ok;
         false ->
-            %% If we have pessimistic information, we need to inform
-            %% queues to reduce it's memory usage when needed. This
-            %% sometimes wakes up queues from hibernation.
             true =
                 ets:foldl(
                   fun (Proc = #process { reported = QueueDuration,
-- 
cgit v1.2.1


From e249cf68dc3dfc8ab772ac975200f001f508dee9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 26 Nov 2009 17:48:21 +0000
Subject: Don't demonitor if we received a DOWN msg.

---
 src/rabbit_memory_monitor.erl | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 664e2348..43de768e 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -176,7 +176,7 @@ handle_cast(update, State) ->
     {noreply, internal_update(State)};
 
 handle_cast({deregister, Pid}, State) ->
-    {noreply, internal_deregister(Pid, State)};
+    {noreply, internal_deregister(Pid, true, State)};
 
 handle_cast(stop, State) ->
     {stop, normal, State};
@@ -185,7 +185,7 @@ handle_cast(_Request, State) ->
     {noreply, State}.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) ->
-    {noreply, internal_deregister(Pid, State)};
+    {noreply, internal_deregister(Pid, false, State)};
 
 handle_info(_Info, State) ->
     {noreply, State}.
@@ -208,13 +208,17 @@ zero_clamp(Sum) ->
         false -> Sum
     end.
 
-internal_deregister(Pid, State = #state { queue_duration_sum = Sum,
-                                          queue_duration_count = Count,
-                                          queue_durations = Durations }) ->
+internal_deregister(Pid, Demonitor,
+                    State = #state { queue_duration_sum = Sum,
+                                     queue_duration_count = Count,
+                                     queue_durations = Durations }) ->
     case ets:lookup(Durations, Pid) of
         [] -> State;
         [#process { reported = PrevQueueDuration, monitor = MRef }] ->
-            true = erlang:demonitor(MRef),
+            true = case Demonitor of
+                       true -> erlang:demonitor(MRef);
+                       false -> true
+                   end,
             {Sum1, Count1} =
                 case PrevQueueDuration of
                     infinity -> {Sum, Count};
-- 
cgit v1.2.1


From 0c6604e251ade6ca210dba328bf23314fa647b46 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 26 Nov 2009 18:20:13 +0000
Subject: use ?SERVER wherever possible

---
 src/rabbit_memory_monitor.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 43de768e..63f33836 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -112,7 +112,7 @@ deregister(Pid) ->
     gen_server2:cast(?SERVER, {deregister, Pid}).
 
 report_queue_duration(Pid, QueueDuration) ->
-    gen_server2:call(rabbit_memory_monitor,
+    gen_server2:call(?SERVER,
                      {report_queue_duration, Pid, QueueDuration}, infinity).
 
 stop() ->
-- 
cgit v1.2.1


From e5be2a2b45ac92310ee736a8fcf838f188b83c81 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 26 Nov 2009 18:20:36 +0000
Subject: cosmetic and inlining

---
 src/rabbit_memory_monitor.erl | 66 +++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 63f33836..b94badc4 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -123,7 +123,11 @@ stop() ->
 %%----------------------------------------------------------------------------
 
 init([]) ->
-    MemoryLimit = trunc(get_memory_limit() * ?MEMORY_LIMIT_SCALING),
+    MemoryLimit = trunc(?MEMORY_LIMIT_SCALING *
+                        (case vm_memory_monitor:get_memory_limit() of
+                             undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
+                             Limit     -> Limit
+                         end)),
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
                                       ?SERVER, update, []),
@@ -150,12 +154,13 @@ handle_call({report_queue_duration, Pid, QueueDuration}, From,
     gen_server2:reply(From, SendDuration),
 
     {Sum1, Count1} =
-            case {PrevQueueDuration, QueueDuration} of
-                {infinity, infinity} -> {Sum, Count};
-                {infinity, _}        -> {Sum + QueueDuration,    Count + 1};
-                {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
-                {_, _} -> {Sum - PrevQueueDuration + QueueDuration, Count}
-            end,
+        case {PrevQueueDuration, QueueDuration} of
+            {infinity, infinity} -> {Sum, Count};
+            {infinity, _}        -> {Sum + QueueDuration,    Count + 1};
+            {_, infinity}        -> {Sum - PrevQueueDuration, Count - 1};
+            {_, _}               -> {Sum - PrevQueueDuration + QueueDuration,
+                                     Count}
+        end,
     true = ets:insert(Durations, Proc #process { reported = QueueDuration,
                                                  sent = SendDuration }),
     {noreply, State #state { queue_duration_sum = zero_clamp(Sum1),
@@ -216,7 +221,7 @@ internal_deregister(Pid, Demonitor,
         [] -> State;
         [#process { reported = PrevQueueDuration, monitor = MRef }] ->
             true = case Demonitor of
-                       true -> erlang:demonitor(MRef);
+                       true  -> erlang:demonitor(MRef);
                        false -> true
                    end,
             {Sum1, Count1} =
@@ -225,7 +230,7 @@ internal_deregister(Pid, Demonitor,
                     _        -> {zero_clamp(Sum - PrevQueueDuration),
                                  Count - 1}
                 end,
-            true = ets:delete(State #state.queue_durations, Pid),
+            true = ets:delete(Durations, Pid),
             State #state { queue_duration_sum = Sum1,
                            queue_duration_count = Count1 }
     end.
@@ -242,7 +247,7 @@ internal_update(State = #state { memory_limit = Limit,
                 infinity;
             false ->
                 Sum1 = case MemoryRatio < ?SUM_INC_THRESHOLD of
-                           true -> Sum + ?SUM_INC_AMOUNT;
+                           true  -> Sum + ?SUM_INC_AMOUNT;
                            false -> Sum
                        end,
                 (Sum1 / Count) / MemoryRatio
@@ -254,38 +259,33 @@ internal_update(State = #state { memory_limit = Limit,
     case DesiredDurationAvg1 == infinity orelse
         (DesiredDurationAvg /= infinity andalso
          DesiredDurationAvg1 >= DesiredDurationAvg) of
-        true -> ok;
+        true ->
+            ok;
         false ->
             true =
                 ets:foldl(
                   fun (Proc = #process { reported = QueueDuration,
-                                         sent = PrevSendDuration }, true) ->
-                          Send =
-                              case {QueueDuration, PrevSendDuration} of
-                                  {infinity, infinity} -> true;
-                                  {infinity, B} -> DesiredDurationAvg1 < B;
-                                  {A, infinity} -> DesiredDurationAvg1 < A;
-                                  {A, B} ->
-                                      DesiredDurationAvg1 < lists:min([A,B])
-                              end,
-                          case Send of
+                                         sent = PrevSendDuration,
+                                         callback = {M, F, A} }, true) ->
+                          case (case {QueueDuration, PrevSendDuration} of
+                                    {infinity, infinity} ->
+                                        true;
+                                    {infinity, B} ->
+                                        DesiredDurationAvg1 < B;
+                                    {A, infinity} ->
+                                        DesiredDurationAvg1 < A;
+                                    {A, B} ->
+                                        DesiredDurationAvg1 < lists:min([A,B])
+                                end) of
                               true ->
-                                  ok = set_queue_duration(Proc,
-                                                          DesiredDurationAvg1),
+                                  ok = erlang:apply(
+                                         M, F, A ++ [DesiredDurationAvg1]),
                                   ets:insert(
                                     Durations,
                                     Proc #process {sent = DesiredDurationAvg1});
-                              false -> true
+                              false ->
+                                  true
                           end
                   end, true, Durations)
     end,
     State1.
-
-get_memory_limit() ->
-    case vm_memory_monitor:get_memory_limit() of
-        undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
-        A -> A
-    end.
-
-set_queue_duration(#process { callback = {M, F, A} }, QueueDuration) ->
-    ok = erlang:apply(M, F, A++[QueueDuration]).
-- 
cgit v1.2.1


From ba5a0121e7e50646381223e5f621006594c5a4e3 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 26 Nov 2009 21:06:14 +0000
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 74 +++++++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index abf8f57e..2f6b6c18 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -208,9 +208,9 @@ write_acks(SeqIds, State = #qistate { journal_ack_dict = JAckDict }) ->
 
 sync_seq_ids(SeqIds, SyncAckJournal, State) ->
     State1 = case SyncAckJournal of
-                 true -> {Hdl, State2} = get_journal_handle(State),
-                         ok = file_handle_cache:sync(Hdl),
-                         State2;
+                 true  -> {Hdl, State2} = get_journal_handle(State),
+                          ok = file_handle_cache:sync(Hdl),
+                          State2;
                  false -> State
              end,
     SegNumsSet =
@@ -386,7 +386,8 @@ rev_sort(List) ->
 
 get_journal_handle(State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
     case dict:find(journal, SegHdls) of
-        {ok, Hdl} -> {Hdl, State};
+        {ok, Hdl} ->
+            {Hdl, State};
         error ->
             Path = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
             Mode = [raw, binary, delayed_write, write, read, read_ahead],
@@ -435,7 +436,8 @@ get_counted_handle(SegNumA, State = #qistate { partial_segments = Partials,
 
 get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
     case dict:find(SegNum, SegHdls) of
-        {ok, Hdl} -> {Hdl, State};
+        {ok, Hdl} ->
+            {Hdl, State};
         error ->
             new_handle(SegNum, seg_num_to_path(Dir, SegNum),
                        [binary, raw, read, write,
@@ -449,7 +451,7 @@ delete_segment(SegNum, State = #qistate { dir = Dir,
                                           partial_segments = Partials }) ->
     State1 = close_handle(SegNum, State),
     ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
-             ok -> ok;
+             ok              -> ok;
              {error, enoent} -> ok
          end,
     State1 #qistate {seg_ack_counts = dict:erase(SegNum, AckCounts),
@@ -464,7 +466,8 @@ close_handle(Key, State = #qistate { seg_num_handles = SegHdls }) ->
         {ok, Hdl} ->
             ok = file_handle_cache:close(Hdl),
             State #qistate { seg_num_handles = dict:erase(Key, SegHdls) };
-        error -> State
+        error ->
+            State
     end.
 
 close_all_handles(State = #qistate { seg_num_handles = SegHdls }) ->
@@ -484,7 +487,7 @@ reconstruct_seq_id(SegNum, RelSeq) ->
 
 seg_num_to_path(Dir, SegNum) ->
     SegName = integer_to_list(SegNum),
-    filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).    
+    filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
 
 delete_queue_directory(Dir) ->
     {ok, Entries} = file:list_dir(Dir),
@@ -510,19 +513,19 @@ blank_state(QueueName) ->
     StrName = queue_name_to_dir_name(QueueName),
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    #qistate { dir = Dir,
-               seg_num_handles = dict:new(),
-               journal_count = 0,
+    #qistate { dir              = Dir,
+               seg_num_handles  = dict:new(),
+               journal_count    = 0,
                journal_ack_dict = dict:new(),
                journal_del_dict = dict:new(),
-               seg_ack_counts = dict:new(),
-               publish_handle = undefined,
+               seg_ack_counts   = dict:new(),
+               publish_handle   = undefined,
                partial_segments = dict:new()
              }.
 
 detect_clean_shutdown(Dir) ->
     case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
-        ok -> true;
+        ok              -> true;
         {error, enoent} -> false
     end.
 
@@ -534,10 +537,10 @@ store_clean_shutdown(Dir) ->
 
 seg_entries_from_dict(SegNum, Dict) ->
     case dict:find(SegNum, Dict) of
-        {ok, Entries} -> Entries; 
-        error -> []
+        {ok, Entries} -> Entries;
+        error         -> []
     end.
-    
+
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
@@ -567,7 +570,7 @@ queue_index_walker({[], State, SegNums, QueueNames}) ->
 queue_index_walker({[{_RelSeq, {MsgId, _IsDelivered, IsPersistent}} | Msgs],
                     State, SegNums, QueueNames}) ->
     case IsPersistent of
-        true -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
+        true  -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
         false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
     end.
 
@@ -656,7 +659,8 @@ load_journal(Hdl, ADict, DDict) ->
             load_journal(Hdl, ADict, add_seqid_to_dict(SeqId, DDict));
         {ok, <<?ACK_BIT:1, SeqId:?SEQ_BITS>>} ->
             load_journal(Hdl, add_seqid_to_dict(SeqId, ADict), DDict);
-        _ErrOrEoF -> {ADict, DDict}
+        _ErrOrEoF ->
+            {ADict, DDict}
     end.
 
 replay_journal_to_segment(_SegNum, [], {TotalMsgCount, ADict, State}) ->
@@ -669,18 +673,18 @@ replay_journal_to_segment(SegNum, Dels, {TotalMsgCount, ADict, State}) ->
                     fun (RelSeq) ->
                             case dict:find(RelSeq, SDict) of
                                 {ok, {_MsgId, false, _IsPersistent}} -> true;
-                                _ -> false
+                                _                                    -> false
                             end
                     end, sets:from_list(Dels))),
     State2 = append_dels_to_segment(SegNum, ValidDels, State1),
     Acks = seg_entries_from_dict(SegNum, ADict),
     case Acks of
         [] -> {TotalMsgCount, ADict, State2};
-        _  -> 
-            ADict1 = dict:erase(SegNum, ADict),
-            {Count, State3} = filter_acks_and_append_to_segment(SegNum, SDict,
-                                                                Acks, State2),        
-            {TotalMsgCount - Count, ADict1, State3}
+        _  -> ADict1 = dict:erase(SegNum, ADict),
+              {Count, State3} =
+                  filter_acks_and_append_to_segment(SegNum, SDict,
+                                                    Acks, State2),
+              {TotalMsgCount - Count, ADict1, State3}
     end.
 
 replay_journal_acks_to_segment(_SegNum, [], {TotalMsgCount, State}) ->
@@ -740,10 +744,11 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
                                         dir = Dir }) ->
     SegmentExists = case dict:find(SegNum, SegHdls) of
                         {ok, _} -> true;
-                        error -> filelib:is_file(seg_num_to_path(Dir, SegNum))
+                        error   -> filelib:is_file(seg_num_to_path(Dir, SegNum))
                     end,
     case SegmentExists of
-        false -> {dict:new(), 0, 0, 0, State};
+        false ->
+            {dict:new(), 0, 0, 0, State};
         true ->
             {Hdl, State1 = #qistate { journal_del_dict = JDelDict,
                                       journal_ack_dict = JAckDict }} =
@@ -763,12 +768,13 @@ load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
                   fun (RelSeq, SDict4) ->
                           case dict:find(RelSeq, SDict4) of
                               {ok, {MsgId, false, IsPersistent}} ->
-                                  dict:store(RelSeq, {MsgId, true, IsPersistent},
+                                  dict:store(RelSeq,
+                                             {MsgId, true, IsPersistent},
                                              SDict4);
-                              _ -> SDict4
+                              _ ->
+                                  SDict4
                           end
                   end, SDict1, seg_entries_from_dict(SegNum, JDelDict)),
-                                        
             {SDict3, PubCount, AckCount1, HighRelSeq, State1}
     end.
 
@@ -793,7 +799,8 @@ load_segment_entries(Hdl, SDict, PubCount, AckCount, HighRelSeq) ->
             load_segment_entries(
               Hdl, dict:store(RelSeq, {MsgId, false, 1 == IsPersistentNum},
                               SDict), PubCount + 1, AckCount, HighRelSeq1);
-        _ErrOrEoF -> {SDict, PubCount, AckCount, HighRelSeq}
+        _ErrOrEoF ->
+            {SDict, PubCount, AckCount, HighRelSeq}
     end.
 
 deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
@@ -818,7 +825,7 @@ append_acks_to_segment(SegNum, Acks,
                end,
     AckTarget = case dict:find(SegNum, Partials) of
                     {ok, PubCount} -> PubCount;
-                    error -> ?SEGMENT_ENTRIES_COUNT
+                    error          -> ?SEGMENT_ENTRIES_COUNT
                 end,
     AckCount2 = AckCount + length(Acks),
     append_acks_to_segment(SegNum, AckCount2, Acks, AckTarget, State).
@@ -832,7 +839,8 @@ append_acks_to_segment(SegNum, AckCount, _Acks, AckCount, State =
                   {SegNum, Hdl, AckCount = ?SEGMENT_ENTRIES_COUNT}
                   when Hdl /= undefined ->
                       {SegNum + 1, undefined, 0};
-                  _ -> PubHdl
+                  _ ->
+                      PubHdl
               end,
     delete_segment(SegNum, State #qistate { publish_handle = PubHdl1 });
 append_acks_to_segment(_SegNum, _AckCount, [], _AckTarget, State) ->
-- 
cgit v1.2.1


From 2376c691437e7be04c0e9832a6d38952b8ce0652 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 26 Nov 2009 21:28:00 +0000
Subject: better const names

---
 src/rabbit_queue_index.erl | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 2f6b6c18..6fe788f9 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -73,8 +73,8 @@
 
 -define(CLEAN_FILENAME, "clean.dot").
 
--define(MAX_ACK_JOURNAL_ENTRY_COUNT, 32768).
--define(ACK_JOURNAL_FILENAME, "journal.jif").
+-define(MAX_JOURNAL_ENTRY_COUNT, 32768).
+-define(JOURNAL_FILENAME, "journal.jif").
 
 -define(DEL_BIT, 0).
 -define(ACK_BIT, 1).
@@ -84,7 +84,7 @@
 
 -define(REL_SEQ_BITS, 14).
 -define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + 8 - (?REL_SEQ_BITS rem 8))).
--define(SEGMENT_ENTRIES_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
+-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
 
 %% seq only is binary 00 followed by 14 bits of rel seq id
 %% (range: 0 - 16383)
@@ -103,7 +103,7 @@
 -define(PUBLISH_RECORD_LENGTH_BYTES, ?MSG_ID_BYTES + 2).
 
 %% 1 publish, 1 deliver, 1 ack per msg
--define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRIES_COUNT *
+-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
         (?PUBLISH_RECORD_LENGTH_BYTES +
          (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
 
@@ -250,7 +250,7 @@ next_segment_boundary(SeqId) ->
     reconstruct_seq_id(SegNum + 1, 0).
 
 segment_size() ->
-    ?SEGMENT_ENTRIES_COUNT.
+    ?SEGMENT_ENTRY_COUNT.
 
 find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
     SegNums = all_segment_nums(Dir),
@@ -344,14 +344,14 @@ flush_journal(State = #qistate { journal_ack_dict = JAckDict,
             ok = file_handle_cache:truncate(Hdl),
             ok = file_handle_cache:sync(Hdl),
             State4;
-        JCount1 > ?MAX_ACK_JOURNAL_ENTRY_COUNT ->
+        JCount1 > ?MAX_JOURNAL_ENTRY_COUNT ->
             flush_journal(State3);
         true ->
             State3
     end.
 
 maybe_full_flush(State = #qistate { journal_count = JCount }) ->
-    case JCount > ?MAX_ACK_JOURNAL_ENTRY_COUNT of
+    case JCount > ?MAX_JOURNAL_ENTRY_COUNT of
         true  -> full_flush_journal(State);
         false -> State
     end.
@@ -389,7 +389,7 @@ get_journal_handle(State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
         {ok, Hdl} ->
             {Hdl, State};
         error ->
-            Path = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
+            Path = filename:join(Dir, ?JOURNAL_FILENAME),
             Mode = [raw, binary, delayed_write, write, read, read_ahead],
             new_handle(journal, Path, Mode, State)
     end.
@@ -412,9 +412,9 @@ get_counted_handle(SegNum, State = #qistate { partial_segments = Partials },
     Count1 = Count + 1 + CountExtra,
     {State1 #qistate { partial_segments = Partials1 }, {SegNum, Hdl, Count1}};
 get_counted_handle(SegNum, State, {SegNum, Hdl, Count})
-  when Count < ?SEGMENT_ENTRIES_COUNT ->
+  when Count < ?SEGMENT_ENTRY_COUNT ->
     {State, {SegNum, Hdl, Count + 1}};
-get_counted_handle(SegNumA, State, {SegNumB, Hdl, ?SEGMENT_ENTRIES_COUNT})
+get_counted_handle(SegNumA, State, {SegNumB, Hdl, ?SEGMENT_ENTRY_COUNT})
   when SegNumA == SegNumB + 1 ->
     ok = file_handle_cache:append_write_buffer(Hdl),
     get_counted_handle(SegNumA, State, undefined);
@@ -480,10 +480,10 @@ bool_to_int(true ) -> 1;
 bool_to_int(false) -> 0.
 
 seq_id_to_seg_and_rel_seq_id(SeqId) ->
-    { SeqId div ?SEGMENT_ENTRIES_COUNT, SeqId rem ?SEGMENT_ENTRIES_COUNT }.
+    { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
 
 reconstruct_seq_id(SegNum, RelSeq) ->
-    (SegNum * ?SEGMENT_ENTRIES_COUNT) + RelSeq.
+    (SegNum * ?SEGMENT_ENTRY_COUNT) + RelSeq.
 
 seg_num_to_path(Dir, SegNum) ->
     SegName = integer_to_list(SegNum),
@@ -606,7 +606,7 @@ read_and_prune_segments(State = #qistate { dir = Dir }) ->
                   %% the partial_segments dict
                   {PublishHandle1, Partials1} =
                       case PubCount of
-                          ?SEGMENT_ENTRIES_COUNT ->
+                          ?SEGMENT_ENTRY_COUNT ->
                               {PublishHandle, Partials};
                           0 ->
                               {PublishHandle, Partials};
@@ -649,7 +649,7 @@ scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
     {TotalMsgCount2, State4} =
         dict:fold(fun replay_journal_acks_to_segment/3,
                   {TotalMsgCount1, State3}, ADict1),
-    JournalPath = filename:join(Dir, ?ACK_JOURNAL_FILENAME),
+    JournalPath = filename:join(Dir, ?JOURNAL_FILENAME),
     ok = file:delete(JournalPath),
     {TotalMsgCount2, State4}.
 
@@ -825,7 +825,7 @@ append_acks_to_segment(SegNum, Acks,
                end,
     AckTarget = case dict:find(SegNum, Partials) of
                     {ok, PubCount} -> PubCount;
-                    error          -> ?SEGMENT_ENTRIES_COUNT
+                    error          -> ?SEGMENT_ENTRY_COUNT
                 end,
     AckCount2 = AckCount + length(Acks),
     append_acks_to_segment(SegNum, AckCount2, Acks, AckTarget, State).
@@ -835,8 +835,8 @@ append_acks_to_segment(SegNum, AckCount, _Acks, AckCount, State =
     PubHdl1 = case PubHdl of
                   %% If we're adjusting the pubhdl here then there
                   %% will be no entry in partials, thus the target ack
-                  %% count must be SEGMENT_ENTRIES_COUNT
-                  {SegNum, Hdl, AckCount = ?SEGMENT_ENTRIES_COUNT}
+                  %% count must be SEGMENT_ENTRY_COUNT
+                  {SegNum, Hdl, AckCount = ?SEGMENT_ENTRY_COUNT}
                   when Hdl /= undefined ->
                       {SegNum + 1, undefined, 0};
                   _ ->
-- 
cgit v1.2.1


From bc55d8d8b743c3e04d90ae79e3a88296f97e9a97 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 26 Nov 2009 21:51:35 +0000
Subject: s/full_flush_journal/flush_journal and some refactoring

---
 src/rabbit_amqqueue_process.erl |  2 +-
 src/rabbit_queue_index.erl      | 83 +++++++++++++++++------------------------
 src/rabbit_tests.erl            |  4 +-
 src/rabbit_variable_queue.erl   |  8 ++--
 4 files changed, 41 insertions(+), 56 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 945cd8bd..7364c479 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -925,7 +925,7 @@ handle_info(Info, State) ->
     {stop, {unhandled_info, Info}, State}.
 
 handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
-    VQS1 = rabbit_variable_queue:full_flush_journal(VQS),
+    VQS1 = rabbit_variable_queue:flush_journal(VQS),
     %% no activity for a while == 0 egress and ingress rates
     DesiredDuration = 
         rabbit_memory_monitor:report_queue_duration(self(), infinity),
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6fe788f9..6e1f496a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, sync_seq_ids/3, full_flush_journal/1,
+         write_delivered/2, write_acks/2, sync_seq_ids/3, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
@@ -149,7 +149,7 @@
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(sync_seq_ids/3 :: ([seq_id()], boolean(), qistate()) -> qistate()).
--spec(full_flush_journal/1 :: (qistate()) -> qistate()).
+-spec(flush_journal/1 :: (qistate()) -> qistate()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
              {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
@@ -198,13 +198,13 @@ write_delivered(SeqId, State = #qistate { journal_del_dict = JDelDict }) ->
     {JDelDict1, State1} =
         write_to_journal([<<?DEL_BIT:1, SeqId:?SEQ_BITS>>],
                          [SeqId], JDelDict, State),
-    maybe_full_flush(State1 #qistate { journal_del_dict = JDelDict1 }).
+    maybe_flush(State1 #qistate { journal_del_dict = JDelDict1 }).
 
 write_acks(SeqIds, State = #qistate { journal_ack_dict = JAckDict }) ->
     {JAckDict1, State1} =
         write_to_journal([<<?ACK_BIT:1, SeqId:?SEQ_BITS>> || SeqId <- SeqIds],
                          SeqIds, JAckDict, State),
-    maybe_full_flush(State1 #qistate { journal_ack_dict = JAckDict1 }).
+    maybe_flush(State1 #qistate { journal_ack_dict = JAckDict1 }).
 
 sync_seq_ids(SeqIds, SyncAckJournal, State) ->
     State1 = case SyncAckJournal of
@@ -226,10 +226,31 @@ sync_seq_ids(SeqIds, SyncAckJournal, State) ->
               StateM
       end, State1, SegNumsSet).
 
-full_flush_journal(State = #qistate { journal_count = 0 }) ->
+flush_journal(State = #qistate { journal_count = 0 }) ->
     State;
-full_flush_journal(State) ->
-    full_flush_journal(flush_journal(State)).
+flush_journal(State = #qistate { journal_ack_dict = JAckDict,
+                                 journal_del_dict = JDelDict,
+                                 journal_count    = JCount }) ->
+    SegNum = case dict:fetch_keys(JAckDict) of
+                 []    -> hd(dict:fetch_keys(JDelDict));
+                 [N|_] -> N
+             end,
+    Dels = seg_entries_from_dict(SegNum, JDelDict),
+    Acks = seg_entries_from_dict(SegNum, JAckDict),
+    State1 = append_dels_to_segment(SegNum, Dels, State),
+    State2 = append_acks_to_segment(SegNum, Acks, State1),
+    JCount1 = JCount - length(Dels) - length(Acks),
+    State3 = State2 #qistate { journal_del_dict = dict:erase(SegNum, JDelDict),
+                               journal_ack_dict = dict:erase(SegNum, JAckDict),
+                               journal_count    = JCount1 },
+    case JCount1 of
+        0 -> {Hdl, State4} = get_journal_handle(State3),
+             {ok, 0} = file_handle_cache:position(Hdl, bof),
+             ok = file_handle_cache:truncate(Hdl),
+             ok = file_handle_cache:sync(Hdl),
+             State4;
+        _ -> flush_journal(State3)
+    end.
 
 read_segment_entries(InitSeqId, State) ->
     {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
@@ -315,52 +336,16 @@ start_msg_store(DurableQueues) ->
                   end, TransientDirs),
     ok.
 
-
-%%----------------------------------------------------------------------------
-%% Journal Flushing
-%%----------------------------------------------------------------------------
-
-flush_journal(State = #qistate { journal_count = 0 }) ->
-    State;
-flush_journal(State = #qistate { journal_ack_dict = JAckDict,
-                                 journal_del_dict = JDelDict,
-                                 journal_count    = JCount }) ->
-    SegNum = case dict:fetch_keys(JAckDict) of
-                 []    -> hd(dict:fetch_keys(JDelDict));
-                 [N|_] -> N
-             end,
-    Dels = seg_entries_from_dict(SegNum, JDelDict),
-    Acks = seg_entries_from_dict(SegNum, JAckDict),
-    State1 = append_dels_to_segment(SegNum, Dels, State),
-    State2 = append_acks_to_segment(SegNum, Acks, State1),
-    JCount1 = JCount - length(Dels) - length(Acks),
-    State3 = State2 #qistate { journal_del_dict = dict:erase(SegNum, JDelDict),
-                               journal_ack_dict = dict:erase(SegNum, JAckDict),
-                               journal_count    = JCount1 },
-    if
-        JCount1 == 0 ->
-            {Hdl, State4} = get_journal_handle(State3),
-            {ok, 0} = file_handle_cache:position(Hdl, bof),
-            ok = file_handle_cache:truncate(Hdl),
-            ok = file_handle_cache:sync(Hdl),
-            State4;
-        JCount1 > ?MAX_JOURNAL_ENTRY_COUNT ->
-            flush_journal(State3);
-        true ->
-            State3
-    end.
-
-maybe_full_flush(State = #qistate { journal_count = JCount }) ->
-    case JCount > ?MAX_JOURNAL_ENTRY_COUNT of
-        true  -> full_flush_journal(State);
-        false -> State
-    end.
-
-
 %%----------------------------------------------------------------------------
 %% Minor Helpers
 %%----------------------------------------------------------------------------
 
+maybe_flush(State = #qistate { journal_count = JCount })
+  when JCount > ?MAX_JOURNAL_ENTRY_COUNT ->
+    flush_journal(State);
+maybe_flush(State) ->
+    State.
+
 write_to_journal(BinList, SeqIds, Dict,
                  State = #qistate { journal_count = JCount }) ->
     {Hdl, State1} = get_journal_handle(State),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 43fdaf3b..c931e0b0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1065,7 +1065,7 @@ queue_index_deliver(SeqIds, Qi) ->
       end, Qi, SeqIds).
 
 queue_index_flush_journal(Qi) ->
-    rabbit_queue_index:full_flush_journal(Qi).
+    rabbit_queue_index:flush_journal(Qi).
 
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
@@ -1230,7 +1230,7 @@ test_variable_queue_dynamic_duration_change() ->
     {_SeqIds1, VQ7} = variable_queue_publish(true, 20, VQ6),
     {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
-    VQ10 = rabbit_variable_queue:full_flush_journal(VQ9),
+    VQ10 = rabbit_variable_queue:flush_journal(VQ9),
     {empty, VQ11} = rabbit_variable_queue:fetch(VQ10),
 
     rabbit_variable_queue:terminate(VQ11),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6806a0cd..3958216e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -36,7 +36,7 @@
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1, delete/1,
          requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
          tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
-         full_flush_journal/1, status/1]).
+         flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
 
@@ -154,7 +154,7 @@
       ([msg_id()], [ack()], {pid(), any()}, vqstate()) -> vqstate()).
 -spec(tx_commit_from_vq/1 :: (vqstate()) -> vqstate()).
 -spec(needs_sync/1 :: (vqstate()) -> boolean()).
--spec(full_flush_journal/1 :: (vqstate()) -> vqstate()).
+-spec(flush_journal/1 :: (vqstate()) -> vqstate()).
 -spec(status/1 :: (vqstate()) -> [{atom(), any()}]).
 
 -endif.
@@ -463,9 +463,9 @@ needs_sync(#vqstate { on_sync = {_, _, []} }) ->
 needs_sync(_) ->
     true.
 
-full_flush_journal(State = #vqstate { index_state = IndexState }) ->
+flush_journal(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
-                     rabbit_queue_index:full_flush_journal(IndexState) }.
+                     rabbit_queue_index:flush_journal(IndexState) }.
 
 status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
-- 
cgit v1.2.1


From 995efb74d8a51834e72c952317cd375c8d99c59e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 27 Nov 2009 09:05:01 +0000
Subject: unintended variable capture

---
 src/rabbit_memory_monitor.erl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index b94badc4..5c0b2daa 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -270,12 +270,13 @@ internal_update(State = #state { memory_limit = Limit,
                           case (case {QueueDuration, PrevSendDuration} of
                                     {infinity, infinity} ->
                                         true;
-                                    {infinity, B} ->
-                                        DesiredDurationAvg1 < B;
-                                    {A, infinity} ->
-                                        DesiredDurationAvg1 < A;
-                                    {A, B} ->
-                                        DesiredDurationAvg1 < lists:min([A,B])
+                                    {infinity, D} ->
+                                        DesiredDurationAvg1 < D;
+                                    {D, infinity} ->
+                                        DesiredDurationAvg1 < D;
+                                    {D1, D2} ->
+                                        DesiredDurationAvg1 <
+                                            lists:min([D1,D2])
                                 end) of
                               true ->
                                   ok = erlang:apply(
-- 
cgit v1.2.1


From 5c1fb67a0619df6edd4da7727aa9e1d7eb3c725d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 27 Nov 2009 09:24:22 +0000
Subject: storage_mode is long gone

---
 src/rabbit_control.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index 5e6229b1..a18f6534 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -174,7 +174,7 @@ virtual host parameter for which to display results. The default value is \"/\".
 
 <QueueInfoItem> must be a member of the list [name, durable, auto_delete, 
 arguments, node, messages_ready, messages_unacknowledged, messages_uncommitted, 
-messages, acks_uncommitted, consumers, transactions, memory, storage_mode]. The
+messages, acks_uncommitted, consumers, transactions, memory]. The
 default is to display name and (number of) messages.
 
 <ExchangeInfoItem> must be a member of the list [name, type, durable, 
-- 
cgit v1.2.1


From 6bbc59b738a617b256349467e55bbad957ac44f2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 27 Nov 2009 13:44:47 +0000
Subject: There was a bug. Now it has gone away. It arose when ? has a partial
 segment, and then memory is made available, and the next msg is persistent.
 It will go into the partial segment in qi, but will also be in q1 in vq. This
 lead to the msg being duplicated. Solution is to track the max seq id beyond
 the end of the ?s, and thus drop anything being returned in the segment from
 qi with a seq_id above this max seq id.

---
 include/rabbit_queue.hrl      | 10 +++--
 src/rabbit_tests.erl          | 53 ++++++++++++++++++++++++-
 src/rabbit_variable_queue.erl | 92 +++++++++++++++++++++++++++----------------
 3 files changed, 116 insertions(+), 39 deletions(-)

diff --git a/include/rabbit_queue.hrl b/include/rabbit_queue.hrl
index 165a7e7b..69ad7588 100644
--- a/include/rabbit_queue.hrl
+++ b/include/rabbit_queue.hrl
@@ -46,13 +46,15 @@
         }).
 
 -record(gamma,
-        { seq_id,
-          count
+        { start_seq_id,
+          count,
+          end_seq_id %% note the end_seq_id is always >, not >=
         }).
 
 -ifdef(use_specs).
 
--type(gamma() :: #gamma { seq_id :: non_neg_integer(),
-                          count :: non_neg_integer () }).
+-type(gamma() :: #gamma { start_seq_id :: non_neg_integer(),
+                          count :: non_neg_integer (),
+                          end_seq_id :: non_neg_integer() }).
 
 -endif.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c931e0b0..f84ba70a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -41,6 +41,7 @@
 -import(lists).
 
 -include("rabbit.hrl").
+-include("rabbit_queue.hrl").
 -include_lib("kernel/include/file.hrl").
 
 test_content_prop_roundtrip(Datum, Binary) ->
@@ -1201,13 +1202,16 @@ fresh_variable_queue() ->
     assert_prop(S0, len, 0),
     assert_prop(S0, q1, 0),
     assert_prop(S0, q2, 0),
-    assert_prop(S0, gamma, {gamma, undefined, 0}),
+    assert_prop(S0, gamma, #gamma { start_seq_id = undefined,
+                                    count = 0,
+                                    end_seq_id = undefined }),
     assert_prop(S0, q3, 0),
     assert_prop(S0, q4, 0),
     VQ.
 
 test_variable_queue() ->
     passed = test_variable_queue_dynamic_duration_change(),
+    passed = test_variable_queue_partial_segments_gamma_thing(),
     passed.
 
 test_variable_queue_dynamic_duration_change() ->
@@ -1260,3 +1264,50 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
     after 0 ->
             test_variable_queue_dynamic_duration_change_f(Len, VQ3)
     end.
+
+test_variable_queue_partial_segments_gamma_thing() ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    HalfSegment = SegmentSize div 2,
+    VQ0 = fresh_variable_queue(),
+    {_SeqIds, VQ1} =
+        variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
+    VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
+    VQ3 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ2),
+    %% one segment in q3 as betas, and half a segment in gamma
+    S3 = rabbit_variable_queue:status(VQ3),
+    io:format("~p~n", [S3]),
+    assert_prop(S3, gamma, #gamma { start_seq_id = SegmentSize,
+                                    count = HalfSegment,
+                                    end_seq_id = SegmentSize + HalfSegment }),
+    assert_prop(S3, q3, SegmentSize),
+    assert_prop(S3, len, SegmentSize + HalfSegment),
+    VQ4 = rabbit_variable_queue:set_queue_ram_duration_target(infinity, VQ3),
+    {[_SeqId], VQ5} = variable_queue_publish(true, 1, VQ4),
+    %% should have 1 alpha, but it's in the same segment as the gammas
+    S5 = rabbit_variable_queue:status(VQ5),
+    io:format("~p~n", [S5]),
+    assert_prop(S5, q1, 1),
+    assert_prop(S5, gamma, #gamma { start_seq_id = SegmentSize,
+                                    count = HalfSegment,
+                                    end_seq_id = SegmentSize + HalfSegment }),
+    assert_prop(S5, q3, SegmentSize),
+    assert_prop(S5, len, SegmentSize + HalfSegment + 1),
+    {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false,
+                                          SegmentSize + HalfSegment + 1, VQ5),
+    %% the half segment should now be in q3 as betas
+    S6 = rabbit_variable_queue:status(VQ6),
+    io:format("~p~n", [S6]),
+    assert_prop(S6, gamma, #gamma { start_seq_id = undefined,
+                                    count = 0,
+                                    end_seq_id = undefined }),
+    assert_prop(S6, q1, 1),
+    assert_prop(S6, q3, HalfSegment),
+    assert_prop(S6, len, HalfSegment + 1),
+    {VQ7, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
+                                           HalfSegment + 1, VQ6),
+    VQ8 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ7),
+    %% should be empty now
+    {empty, VQ9} = rabbit_variable_queue:fetch(VQ8),
+    rabbit_variable_queue:terminate(VQ9),
+
+    passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 3958216e..6fc89cb4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -134,7 +134,7 @@
 -spec(publish_delivered/2 :: (basic_message(), vqstate()) ->
              {ack(), vqstate()}).
 -spec(set_queue_ram_duration_target/2 ::
-      (('undefined' | number()), vqstate()) -> vqstate()).
+      (('undefined' | 'infinity' | number()), vqstate()) -> vqstate()).
 -spec(remeasure_rates/1 :: (vqstate()) -> vqstate()).
 -spec(ram_duration/1 :: (vqstate()) -> number()).
 -spec(fetch/1 :: (vqstate()) ->
@@ -159,6 +159,10 @@
 
 -endif.
 
+-define(BLANK_GAMMA, #gamma { start_seq_id = undefined,
+                              count = 0,
+                              end_seq_id = undefined }).
+
 %%----------------------------------------------------------------------------
 %% Public API
 %%----------------------------------------------------------------------------
@@ -169,8 +173,10 @@ init(QueueName) ->
     {GammaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
     Gamma = case GammaCount of
-                0 -> #gamma { seq_id = undefined, count = 0 };
-                _ -> #gamma { seq_id = GammaSeqId, count = GammaCount }
+                0 -> ?BLANK_GAMMA;
+                _ -> #gamma { start_seq_id = GammaSeqId,
+                              count = GammaCount,
+                              end_seq_id = NextSeqId }
             end,
     Now = now(),
     State =
@@ -472,7 +478,8 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count = RamMsgCount,
                   avg_egress_rate = AvgEgressRate,
-                  avg_ingress_rate = AvgIngressRate }) ->
+                  avg_ingress_rate = AvgIngressRate,
+                  next_seq_id = NextSeqId }) ->
     [ {q1, queue:len(Q1)},
       {q2, queue:len(Q2)},
       {gamma, Gamma},
@@ -483,7 +490,8 @@ status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
       {target_ram_msg_count, TargetRamMsgCount},
       {ram_msg_count, RamMsgCount},
       {avg_egress_rate, AvgEgressRate},
-      {avg_ingress_rate, AvgIngressRate} ].
+      {avg_ingress_rate, AvgIngressRate},
+      {next_seq_id, NextSeqId} ].
 
 %%----------------------------------------------------------------------------
 %% Minor helpers
@@ -508,12 +516,13 @@ entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
                               index_on_disk = IndexOnDisk }) ->
     {MsgId, SeqId, IsDelivered, true, IndexOnDisk}.
 
-betas_from_segment_entries(List) ->
+betas_from_segment_entries(List, SeqIdLimit) ->
     queue:from_list([#beta { msg_id = MsgId, seq_id = SeqId,
                              is_persistent = IsPersistent,
                              is_delivered = IsDelivered,
                              index_on_disk = true }
-                     || {MsgId, SeqId, IsPersistent, IsDelivered} <- List]).
+                     || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
+                        SeqId < SeqIdLimit ]).
 
 read_index_segment(SeqId, IndexState) ->
     SeqId1 = SeqId + rabbit_queue_index:segment_size(),
@@ -527,15 +536,18 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
       content = rabbit_binary_parser:clear_decoded_content(
                   rabbit_binary_generator:ensure_content_encoded(Content)) }.
 
-%% the first arg is the older gamma            
+%% the first arg is the older gamma
 combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) ->
-    #gamma { seq_id = undefined, count = 0 };
+    ?BLANK_GAMMA;
 combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
 combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
-combine_gammas(#gamma { seq_id = SeqIdLow,  count = CountLow },
-               #gamma { seq_id = SeqIdHigh, count = CountHigh}) ->
+combine_gammas(#gamma { start_seq_id = SeqIdLow,  count = CountLow},
+               #gamma { start_seq_id = SeqIdHigh, count = CountHigh,
+                        end_seq_id = SeqIdEnd }) ->
     true = SeqIdLow =< SeqIdHigh, %% ASSERTION
-    #gamma { seq_id = SeqIdLow, count = CountLow + CountHigh}.
+    Count = CountLow + CountHigh,
+    true = Count =< SeqIdEnd - SeqIdLow, %% ASSERTION
+    #gamma { start_seq_id = SeqIdLow, count = Count, end_seq_id = SeqIdEnd }.
 
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
@@ -549,8 +561,8 @@ delete1(NextSeqId, Count, GammaSeqId, IndexState) ->
     case rabbit_queue_index:read_segment_entries(GammaSeqId, IndexState) of
         {[], IndexState1} ->
             delete1(NextSeqId, Count, Gamma1SeqId, IndexState1);
-        {List, IndexState1} -> 
-            Q = betas_from_segment_entries(List),
+        {List, IndexState1} ->
+            Q = betas_from_segment_entries(List, Gamma1SeqId),
             {QCount, IndexState2} = remove_queue_entries(Q, IndexState1),
             delete1(NextSeqId, Count + QCount, Gamma1SeqId, IndexState2)
     end.
@@ -748,7 +760,8 @@ publish(neither, Msg = #basic_message { guid = MsgId,
     %% or equal to seq_id
     GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
         rabbit_queue_index:segment_size(),
-    Gamma1 = #gamma { seq_id = GammaSeqId, count = 1 },
+    Gamma1 = #gamma { start_seq_id = GammaSeqId, count = 1,
+                      end_seq_id = SeqId + 1 },
     State #vqstate { index_state = IndexState1,
                      gamma = combine_gammas(Gamma, Gamma1) }.
 
@@ -818,8 +831,10 @@ maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 } }) ->
 maybe_gammas_to_betas(State =
                       #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
                                  target_ram_msg_count = TargetRamMsgCount,
-                                 gamma = #gamma { seq_id = GammaSeqId,
-                                                  count = GammaCount }}) ->
+                                 gamma = #gamma { start_seq_id = GammaSeqId,
+                                                  count = GammaCount,
+                                                  end_seq_id = GammaSeqIdEnd }}
+                     ) ->
     case (not queue:is_empty(Q3)) andalso 0 == TargetRamMsgCount of
         true ->
             State;
@@ -832,20 +847,22 @@ maybe_gammas_to_betas(State =
             State1 = State #vqstate { index_state = IndexState1 },
             %% length(List) may be < segment_size because of acks. But
             %% it can't be []
-            Q3a = queue:join(Q3, betas_from_segment_entries(List)),
-            case GammaCount - length(List) of
+            Q3b = betas_from_segment_entries(List, GammaSeqIdEnd),
+            Q3a = queue:join(Q3, Q3b),
+            case GammaCount - queue:len(Q3b) of
                 0 ->
                     %% gamma is now empty, but it wasn't before, so
                     %% can now join q2 onto q3
-                    State1 #vqstate { gamma = #gamma { seq_id = undefined,
-                                                       count = 0 },
+                    State1 #vqstate { gamma = ?BLANK_GAMMA,
                                       q2 = queue:new(),
                                       q3 = queue:join(Q3a, Q2) };
                 N when N > 0 ->
                     maybe_gammas_to_betas(
-                      State1 #vqstate { q3 = Q3a, 
-                                        gamma = #gamma { seq_id = Gamma1SeqId,
-                                                         count = N } })
+                      State1 #vqstate {
+                        q3 = Q3a,
+                        gamma = #gamma { start_seq_id = Gamma1SeqId,
+                                         count = N,
+                                         end_seq_id = GammaSeqIdEnd } })
             end
     end.
 
@@ -897,13 +914,19 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
     %% transfer from q2 to gamma.
     {HighSeqId, Len1, Q2a, IndexState1} =
         push_betas_to_gammas(fun queue:out/1, undefined, Q2, IndexState),
-    Gamma1 = #gamma { seq_id = Gamma1SeqId } =
-        combine_gammas(Gamma, #gamma { seq_id = HighSeqId, count = Len1 }),
+    EndSeqId = case queue:out_r(Q2) of
+                   {empty, _Q2} -> undefined;
+                   {{value, #beta { seq_id = EndSeqId1 }}, _Q2} -> EndSeqId1 + 1
+               end,
+    Gamma1 = #gamma { start_seq_id = Gamma1SeqId } =
+        combine_gammas(Gamma, #gamma { start_seq_id = HighSeqId,
+                                       count = Len1,
+                                       end_seq_id = EndSeqId }),
     State1 = State #vqstate { q2 = Q2a, gamma = Gamma1,
                               index_state = IndexState1 },
     case queue:out(Q3) of
         {empty, _Q3} -> State1;
-        {{value, #beta { seq_id = SeqId }}, _Q3a} -> 
+        {{value, #beta { seq_id = SeqId }}, _Q3a} ->
             {{value, #beta { seq_id = SeqIdMax }}, _Q3b} = queue:out_r(Q3),
             Limit = rabbit_queue_index:next_segment_boundary(SeqId),
             %% ASSERTION
@@ -921,21 +944,22 @@ push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
                             _ -> (Gamma1SeqId - Limit) rem
                                      rabbit_queue_index:segment_size()
                         end,
-                    %% LowSeqId is low in the sense that it must be
+                    %% SeqIdMax is low in the sense that it must be
                     %% lower than the seq_id in gamma1, in fact either
                     %% gamma1 has undefined as its seq_id or there
-                    %% does not exist a seq_id X s.t. X > LowSeqId and
+                    %% does not exist a seq_id X s.t. X > SeqIdMax and
                     %% X < gamma1's seq_id (would be +1 if it wasn't
                     %% for the possibility of gaps in the seq_ids).
-                    %% But because we use queue:out_r, LowSeqId is
+                    %% But because we use queue:out_r, SeqIdMax is
                     %% actually also the highest seq_id of the betas we
                     %% transfer from q3 to gammas.
-                    {LowSeqId, Len2, Q3b, IndexState2} =
+                    {SeqIdMax, Len2, Q3b, IndexState2} =
                         push_betas_to_gammas(fun queue:out_r/1, Limit, Q3,
                                              IndexState1),
-                    true = Gamma1SeqId > LowSeqId, %% ASSERTION
-                    Gamma2 = combine_gammas(
-                               #gamma { seq_id = Limit, count = Len2}, Gamma1),
+                    Gamma2 = combine_gammas(#gamma { start_seq_id = Limit,
+                                                     count = Len2,
+                                                     end_seq_id = SeqIdMax+1 },
+                                            Gamma1),
                     State1 #vqstate { q3 = Q3b, gamma = Gamma2,
                                       index_state = IndexState2 }
             end
-- 
cgit v1.2.1


From bb57a98915f4ff95e9738d7fa4be2bb320d948b9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 27 Nov 2009 14:45:30 +0000
Subject: Removed mnesia table and dq_msg_loc record that's no longer needed

---
 include/rabbit.hrl    |  2 --
 src/rabbit_mnesia.erl | 15 +++++----------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 330eef80..28cfb2d9 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -65,8 +65,6 @@
 -record(basic_message, {exchange_name, routing_key, content,
                         guid, is_persistent}).
 
--record(dq_msg_loc, {queue_and_seq_id, is_delivered, is_persistent, msg_id}).
-
 -record(ssl_socket, {tcp, ssl}).
 -record(delivery, {mandatory, immediate, txn, sender, message}).
 
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index a1d886bb..1443d769 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -55,8 +55,8 @@
 -spec(cluster/1 :: ([erlang_node()]) -> 'ok').
 -spec(reset/0 :: () -> 'ok').
 -spec(force_reset/0 :: () -> 'ok').
--spec(is_clustered/0 :: () -> boolean()). 
--spec(empty_ram_only_tables/0 :: () -> 'ok'). 
+-spec(is_clustered/0 :: () -> boolean()).
+-spec(empty_ram_only_tables/0 :: () -> 'ok').
 -spec(create_tables/0 :: () -> 'ok').
 
 -endif.
@@ -162,12 +162,7 @@ table_definitions() ->
        {disc_copies, [node()]}]},
      {rabbit_queue,
       [{record_name, amqqueue},
-       {attributes, record_info(fields, amqqueue)}]},
-     {rabbit_disk_queue,
-      [{record_name, dq_msg_loc},
-       {attributes, record_info(fields, dq_msg_loc)},
-       {disc_copies, [node()]},
-       {local_content, true}]}
+       {attributes, record_info(fields, amqqueue)}]}
     ].
 
 table_names() ->
@@ -179,7 +174,7 @@ replicated_table_names() ->
     ].
 
 dir() -> mnesia:system_info(directory).
-    
+
 ensure_mnesia_dir() ->
     MnesiaDir = dir() ++ "/",
     case filelib:ensure_dir(MnesiaDir) of
@@ -396,7 +391,7 @@ wait_for_replicated_tables() -> wait_for_tables(replicated_table_names()).
 
 wait_for_tables() -> wait_for_tables(table_names()).
 
-wait_for_tables(TableNames) -> 
+wait_for_tables(TableNames) ->
     case check_schema_integrity() of
         ok ->
             case mnesia:wait_for_tables(TableNames, 30000) of
-- 
cgit v1.2.1


From a604110812ceea958f40d2da344cab3d2977671f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 27 Nov 2009 17:56:01 +0000
Subject: Removal of unnecessary clause in handle_info for queue process as
 prefetcher has gone. Bump rabbit supervisor to allow all processes to take as
 long as necessary to exit. This should ensure cached data gets flushed out.

---
 src/rabbit.erl                  |  3 ++-
 src/rabbit_amqqueue_process.erl | 10 ++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 405d170b..9ef49ac5 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -284,7 +284,8 @@ start_child(Mod) ->
 start_child(Mod, Args) ->
     {ok,_} = supervisor:start_child(rabbit_sup,
                                     {Mod, {Mod, start_link, Args},
-                                     transient, 5000, worker, [Mod]}),
+            %% 4294967295 is 2^32 - 1, which is the highest value allowed
+                                     transient, 4294967295, worker, [Mod]}),
     ok.
 
 ensure_working_log_handlers() ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 7364c479..9b9515c1 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -903,18 +903,12 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
         {ok, NewState}   -> noreply(NewState);
         {stop, NewState} -> {stop, normal, NewState}
     end;
-handle_info({'EXIT', _DownPid, normal}, State) ->
-    %% because we have trap_exit on, we'll pick up here the prefetcher
-    %% going down. We probably need to make sure that we really are
-    %% just picking up the prefetcher here. It's safe to ignore it
-    %% though, provided 'normal'
-    noreply(State);
 
 handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
     noreply(
       run_message_queue(
         State#q{variable_queue_state =
-                rabbit_variable_queue:tx_commit_from_vq(VQS)}));    
+                rabbit_variable_queue:tx_commit_from_vq(VQS)}));
 
 handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
@@ -927,7 +921,7 @@ handle_info(Info, State) ->
 handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
     VQS1 = rabbit_variable_queue:flush_journal(VQS),
     %% no activity for a while == 0 egress and ingress rates
-    DesiredDuration = 
+    DesiredDuration =
         rabbit_memory_monitor:report_queue_duration(self(), infinity),
     VQS2 = rabbit_variable_queue:set_queue_ram_duration_target(
              DesiredDuration, VQS1),
-- 
cgit v1.2.1


From 2028c70d6258054b0a25ca75725ab4e2d35079aa Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 30 Nov 2009 16:57:44 +0000
Subject: Garbage collect on memory reduction; remove delayed_write in queue
 index as we're handling buffering ourselves; add a hack to get the raw vq
 status out of rabbitmqctl

---
 src/rabbit_amqqueue_process.erl | 5 ++++-
 src/rabbit_queue_index.erl      | 3 +--
 src/rabbit_variable_queue.erl   | 4 +++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9b9515c1..9cefa926 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -90,7 +90,8 @@
          acks_uncommitted,
          consumers,
          transactions,
-         memory
+         memory,
+         raw_vq_status
         ]).
 
 %%----------------------------------------------------------------------------
@@ -585,6 +586,8 @@ i(transactions, _) ->
 i(memory, _) ->
     {memory, M} = process_info(self(), memory),
     M;
+i(raw_vq_status, State) ->
+    rabbit_variable_queue:status(State#q.variable_queue_state);
 i(Item, _) ->
     throw({bad_argument, Item}).
 
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6e1f496a..91ecd669 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -375,7 +375,7 @@ get_journal_handle(State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
             {Hdl, State};
         error ->
             Path = filename:join(Dir, ?JOURNAL_FILENAME),
-            Mode = [raw, binary, delayed_write, write, read, read_ahead],
+            Mode = [raw, binary, write, read, read_ahead],
             new_handle(journal, Path, Mode, State)
     end.
 
@@ -426,7 +426,6 @@ get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }
         error ->
             new_handle(SegNum, seg_num_to_path(Dir, SegNum),
                        [binary, raw, read, write,
-                        {delayed_write, ?SEGMENT_TOTAL_SIZE, 1000},
                         {read_ahead, ?SEGMENT_TOTAL_SIZE}],
                        State)
     end.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6fc89cb4..919af6c1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -245,7 +245,9 @@ set_queue_ram_duration_target(
     case TargetRamMsgCount1 == undefined orelse
         TargetRamMsgCount1 >= TargetRamMsgCount of
         true  -> State1;
-        false -> reduce_memory_use(State1)
+        false -> State2 = reduce_memory_use(State1),
+                 garbage_collect(self()),
+                 State2
     end.
 
 remeasure_rates(State = #vqstate { egress_rate = Egress,
-- 
cgit v1.2.1


From 6795d834af68916e8301f23c72cf97ddc4e37cb1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 30 Nov 2009 17:49:34 +0000
Subject: Remove the garbage_collect call as this doesn't seem to be necessary
 in R13B03

---
 src/rabbit_variable_queue.erl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 919af6c1..6fc89cb4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -245,9 +245,7 @@ set_queue_ram_duration_target(
     case TargetRamMsgCount1 == undefined orelse
         TargetRamMsgCount1 >= TargetRamMsgCount of
         true  -> State1;
-        false -> State2 = reduce_memory_use(State1),
-                 garbage_collect(self()),
-                 State2
+        false -> reduce_memory_use(State1)
     end.
 
 remeasure_rates(State = #vqstate { egress_rate = Egress,
-- 
cgit v1.2.1


From b5fd98023e9940c2f1390c19abc22228201e90e1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 30 Nov 2009 18:29:35 +0000
Subject: Only pull in one segment at a time from ? into q3

---
 src/rabbit_variable_queue.erl | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6fc89cb4..0a5909a0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -702,8 +702,10 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
             msg;
         _ ->
             case queue:is_empty(Q1) of
-                true -> index;
-                false -> msg %% can push out elders to disk
+                true  -> index;
+                %% Can push out elders (in q1) to disk. This may also
+                %% result in the msg itself going to disk and q2/q3.
+                false -> msg
             end
     end.
 
@@ -857,12 +859,11 @@ maybe_gammas_to_betas(State =
                                       q2 = queue:new(),
                                       q3 = queue:join(Q3a, Q2) };
                 N when N > 0 ->
-                    maybe_gammas_to_betas(
-                      State1 #vqstate {
-                        q3 = Q3a,
-                        gamma = #gamma { start_seq_id = Gamma1SeqId,
-                                         count = N,
-                                         end_seq_id = GammaSeqIdEnd } })
+                    State1 #vqstate {
+                      q3 = Q3a,
+                      gamma = #gamma { start_seq_id = Gamma1SeqId,
+                                       count = N,
+                                       end_seq_id = GammaSeqIdEnd } }
             end
     end.
 
-- 
cgit v1.2.1


From b5a4c07af200a4efb8819a5e1b5a8be6ff2d07db Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 1 Dec 2009 23:47:10 +0000
Subject: Wrote two key combinators which will be used by the new queue index
 shortly. qi3 is very temporary

---
 src/rabbit_queue_index3.erl | 159 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 src/rabbit_queue_index3.erl

diff --git a/src/rabbit_queue_index3.erl b/src/rabbit_queue_index3.erl
new file mode 100644
index 00000000..9a9a9c78
--- /dev/null
+++ b/src/rabbit_queue_index3.erl
@@ -0,0 +1,159 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_index3).
+
+%% Combine what we have just read from a segment file with what we're
+%% holding for that segment in memory. There must be no
+%% duplicates. Used when providing segment entries to the variable
+%% queue.
+journal_plus_segment(JEntries, SegDict) ->
+    dict:fold(fun (RelSeq, JObj, SegDictOut) ->
+                      SegEntry = case dict:find(RelSeq, SegDictOut) of
+                                     error -> not_found;
+                                     {ok, SObj = {_, _, _}} -> SObj
+                                 end,
+                      journal_plus_segment(JObj, SegEntry, RelSeq, SegDictOut)
+              end, SegDict, JEntries).
+
+%% Here, the OutDict is the SegDict which we may be adding to (for
+%% items only in the journal), modifying (bits in both), or erasing
+%% from (ack in journal, not segment).
+journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
+                     not_found,
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
+                     not_found,
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, ack},
+                     not_found,
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+
+journal_plus_segment({no_pub, del, no_ack},
+                     {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, {PubRecord, del, no_ack}, OutDict);
+
+journal_plus_segment({no_pub, del, ack},
+                     {{_MsgId, _IsPersistent}, no_del, no_ack},
+                     RelSeq, OutDict) ->
+    dict:erase(RelSeq, OutDict);
+journal_plus_segment({no_pub, no_del, ack},
+                     {{_MsgId, _IsPersistent}, del, no_ack},
+                     RelSeq, OutDict) ->
+    dict:erase(RelSeq, OutDict).
+
+
+%% Remove from the journal entries for a segment, items that are
+%% duplicates of entries found in the segment itself. Used on start up
+%% to clean up the journal.
+journal_minus_segment(JEntries, SegDict) ->
+    dict:fold(fun (RelSeq, JObj, JEntriesOut) ->
+                      SegEntry = case dict:find(RelSeq, SegDict) of
+                                     error -> not_found;
+                                     {ok, SObj = {_, _, _}} -> SObj
+                                 end,
+                      journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut)
+              end, dict:new(), JEntries).
+
+%% Here, the OutDict is a fresh journal that we're filling with valid
+%% entries.
+%% Both the same
+journal_minus_segment(_RelSeq, Obj, Obj, OutDict) ->
+    OutDict;
+
+%% Just publish in journal
+journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      not_found,
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+
+%% Just deliver in journal
+journal_minus_segment(Obj = {no_pub, del, no_ack},
+                      {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_minus_segment({no_pub, del, no_ack},
+                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      _RelSeq, OutDict) ->
+    OutDict;
+
+%% Just ack in journal
+journal_minus_segment(Obj = {no_pub, no_del, ack},
+                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_minus_segment({no_pub, no_del, ack},
+                      {{_MsgId, _IsPersistent}, del, ack},
+                      _RelSeq, OutDict) ->
+    OutDict;
+
+%% Publish and deliver in journal
+journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
+                      not_found,
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_minus_segment({PubRecord, del, no_ack},
+                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, {no_pub, del, no_ack}, OutDict);
+
+%% Deliver and ack in journal
+journal_minus_segment(Obj = {no_pub, del, ack},
+                      {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_minus_segment({no_pub, del, ack},
+                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, {no_pub, no_del, ack}, OutDict);
+journal_minus_segment({no_pub, del, ack},
+                      {{_MsgId, _IsPersistent}, del, ack},
+                      _RelSeq, OutDict) ->
+    OutDict;
+
+%% Publish, deliver and ack in journal
+journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
+                      not_found,
+                      _RelSeq, OutDict) ->
+    OutDict;
+journal_minus_segment({PubRecord, del, ack},
+                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, {no_pub, del, ack}, OutDict);
+journal_minus_segment({PubRecord, del, ack},
+                      {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
+                      RelSeq, OutDict) ->
+    dict:store(RelSeq, {no_pub, no_del, ack}, OutDict).
+
-- 
cgit v1.2.1


From 4105585dcd57880660acece369fedb13f5d45015 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Dec 2009 13:04:46 +0000
Subject: Lots of good progress on qi3. The code almost looks pretty in places

---
 src/rabbit_queue_index3.erl | 413 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 377 insertions(+), 36 deletions(-)

diff --git a/src/rabbit_queue_index3.erl b/src/rabbit_queue_index3.erl
index 9a9a9c78..eeb38dd2 100644
--- a/src/rabbit_queue_index3.erl
+++ b/src/rabbit_queue_index3.erl
@@ -31,6 +31,337 @@
 
 -module(rabbit_queue_index3).
 
+
+-define(CLEAN_FILENAME, "clean.dot").
+
+%% ---- Journal details ----
+
+-define(MAX_JOURNAL_ENTRY_COUNT, 32768).
+-define(JOURNAL_FILENAME, "journal.jif").
+
+-define(PUB_PERSIST_JPREFIX, 00).
+-define(PUB_TRANS_JPREFIX,   01).
+-define(DEL_JPREFIX,         10).
+-define(ACK_JPREFIX,         11).
+-define(JPREFIX_BITS, 2).
+-define(SEQ_BYTES, 8).
+-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)).
+
+%% ---- Segment details ----
+
+-define(SEGMENT_EXTENSION, ".idx").
+
+-define(REL_SEQ_BITS, 14).
+-define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + 8 - (?REL_SEQ_BITS rem 8))).
+-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
+
+%% seq only is binary 00 followed by 14 bits of rel seq id
+%% (range: 0 - 16383)
+-define(REL_SEQ_ONLY_PREFIX, 00).
+-define(REL_SEQ_ONLY_PREFIX_BITS, 2).
+-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
+
+%% publish record is binary 1 followed by a bit for is_persistent,
+%% then 14 bits of rel seq id, and 128 bits of md5sum msg id
+-define(PUBLISH_PREFIX, 1).
+-define(PUBLISH_PREFIX_BITS, 1).
+
+-define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)).
+%% 16 bytes for md5sum + 2 for seq, bits and prefix
+-define(PUBLISH_RECORD_LENGTH_BYTES, ?MSG_ID_BYTES + 2).
+
+%% 1 publish, 1 deliver, 1 ack per msg
+-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
+        (?PUBLISH_RECORD_LENGTH_BYTES +
+         (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
+
+%%----------------------------------------------------------------------------
+
+-record(qistate,
+        { dir,
+          segments,
+          journal_handle,
+          dirty_count
+        }).
+
+-record(segment,
+        { pubs,
+          acks,
+          handle,
+          journal_entries,
+          path,
+          num
+        }).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+terminate(State = #qistate { segments = Segments, journal_handle = JournalHdl,
+                             dir = Dir }) ->
+    ok = case JournalHdl of
+             undefined -> ok;
+             _         -> file_handle_cache:close(JournalHdl)
+         end,
+    ok = dict:fold(
+           fun (_Seg, #segment { handle = undefined }, ok) ->
+                   ok;
+               (_Seg, #segment { handle = Hdl }, ok) ->
+                   file_handle_cache:close(Hdl)
+           end, ok, Segments),
+    store_clean_shutdown(Dir),
+    State #qistate { journal_handle = undefined, segments = dict:new() }.
+
+terminate_and_erase(State) ->
+    State1 = terminate(State),
+    ok = delete_queue_directory(State1 #qistate.dir),
+    State1.
+
+%%----------------------------------------------------------------------------
+%% Minors
+%%----------------------------------------------------------------------------
+
+rev_sort(List) ->
+    lists:sort(fun (A, B) -> B < A end, List).
+
+seq_id_to_seg_and_rel_seq_id(SeqId) ->
+    { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
+
+reconstruct_seq_id(SegNum, RelSeq) ->
+    (SegNum * ?SEGMENT_ENTRY_COUNT) + RelSeq.
+
+seg_num_to_path(Dir, SegNum) ->
+    SegName = integer_to_list(SegNum),
+    filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
+
+delete_segment(#segment { handle = undefined }) ->
+    ok;
+delete_segment(#segment { handle = Hdl, path = Path }) ->
+    ok = file_handle_cache:close(Hdl),
+    ok = file:delete(Path),
+    ok.
+
+detect_clean_shutdown(Dir) ->
+    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+        ok              -> true;
+        {error, enoent} -> false
+    end.
+
+store_clean_shutdown(Dir) ->
+    {ok, Hdl} = file_handle_cache:open(filename:join(Dir, ?CLEAN_FILENAME),
+                                       [write, raw, binary],
+                                       [{write_buffer, unbuffered}]),
+    ok = file_handle_cache:close(Hdl).
+
+queue_name_to_dir_name(Name = #resource { kind = queue }) ->
+    Bin = term_to_binary(Name),
+    Size = 8*size(Bin),
+    <<Num:Size>> = Bin,
+    lists:flatten(io_lib:format("~.36B", [Num])).
+
+queues_dir() ->
+    filename:join(rabbit_mnesia:dir(), "queues").
+
+delete_queue_directory(Dir) ->
+    {ok, Entries} = file:list_dir(Dir),
+    ok = lists:foldl(fun (Entry, ok) ->
+                             file:delete(filename:join(Dir, Entry))
+                     end, ok, Entries),
+    ok = file:del_dir(Dir).
+
+get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
+    {ok, Hdl} = file_handle_cache:open(Path,
+                                       [binary, raw, read, write,
+                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+                                       [{write_buffer, infinity}]),
+    {Hdl, Segment #segment { handle = Hdl }};
+get_segment_handle(Segment = #segment { handle = Hdl }) ->
+    {Hdl, Segment}.
+
+find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
+    case dict:find(Seg, Segments) of
+        {ok, Segment = #segment{}} -> Segment;
+        error -> #segment { pubs = 0,
+                            acks = 0,
+                            handle = undefined,
+                            journal_entries = dict:new(),
+                            path = seg_num_to_path(Dir, Seg),
+                            num = Seg
+                          }
+    end.
+
+store_segment(Segment = #segment { num = Seg },
+              State = #qistate { segments = Segments }) ->
+    State #qistate { segments = dict:store(Seg, Segment, Segments) }.
+
+get_journal_handle(State =
+                   #qistate { journal_handle = undefined, dir = Dir }) ->
+    Path = filename:join(Dir, ?JOURNAL_FILENAME),
+    {ok, Hdl} = file_handle_cache:open(Path,
+                                       [binary, raw, read, write,
+                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+                                       [{write_buffer, infinity}]),
+    {Hdl, State #qistate { journal_handle = Hdl }};
+get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
+    {Hdl, State}.
+
+%%----------------------------------------------------------------------------
+%% Majors
+%%----------------------------------------------------------------------------
+
+%% Loading segments
+
+%% Does not do any combining with the journal at all
+load_segment(Seg, KeepAcks, State) ->
+    Segment = #segment { path = Path, handle = SegHdl } =
+        find_segment(Seg, State),
+    SegmentExists = case SegHdl of
+                        undefined -> filelib:is_file(Path);
+                        _         -> true
+                    end,
+    case SegmentExists of
+        false ->
+            {dict:new(), 0, 0, State};
+        true ->
+            {Hdl, Segment1} = get_segment_handle(Segment),
+            {ok, 0} = file_handle_cache:position(Hdl, bof),
+            {SegDict, PubCount, AckCount} =
+                load_segment_entries(KeepAcks, Hdl, dict:new(), 0, 0),
+            {SegDict, PubCount, AckCount, store_segment(Segment1, State)}
+    end.
+
+load_segment_entries(KeepAcks, Hdl, SegDict, PubCount, AckCount) ->
+    case file_handle_cache:read(Hdl, 1) of
+        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
+            {ok, LSB} = file_handle_cache:read(
+                          Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
+            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
+            {AckCount1, SegDict1} =
+                deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict),
+            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount, AckCount1);
+        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
+            %% because we specify /binary, and binaries are complete
+            %% bytes, the size spec is in bytes, not bits.
+            {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
+                file_handle_cache:read(
+                  Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
+            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
+            SegDict1 =
+                dict:store(RelSeq,
+                           {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
+                           SegDict),
+            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount+1, AckCount);
+        _ErrOrEoF ->
+            {SegDict, PubCount, AckCount}
+    end.
+
+deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict) ->
+    case dict:find(RelSeq, SegDict) of
+        {ok, {PubRecord, no_del, no_ack}} ->
+            {AckCount, dict:store(RelSeq, {PubRecord, del, no_ack}, SegDict)};
+        {ok, {PubRecord, del, no_ack}} when KeepAcks ->
+            {AckCount + 1, dict:store(RelSeq, {PubRecord, del, ack}, SegDict)};
+        {ok, {_PubRecord, del, no_ack}} when KeepAcks ->
+            {AckCount + 1, dict:erase(RelSeq, SegDict)}
+    end.
+
+%% Loading Journal. This isn't idempotent and will mess up the counts
+%% if you call it more than once on the same state. Assumes the counts
+%% are 0 to start with.
+
+load_journal(State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
+    State1 = #qistate { segments = Segments } = load_journal_entries(State),
+    dict:fold(
+      fun (Seg, #segment { journal_entries = JEntries,
+                           pubs = PubCountInJournal,
+                           acks = AckCountInJournal }, StateN) ->
+              %% We want to keep acks in so that we can remove them if
+              %% duplicates are in the journal. The counts here are
+              %% purely from the segment itself.
+              {SegDict, PubCount, AckCount, StateN1} =
+                  load_segment(Seg, true, StateN),
+              %% Removed counts here are the number of pubs and acks
+              %% that are duplicates - i.e. found in both the segment
+              %% and journal.
+              {JEntries1, PubsRemoved, AcksRemoved} =
+                  journal_minus_segment(JEntries, SegDict),
+              {Segment1, StateN2} = find_segment(Seg, StateN1),
+              PubCount1 = PubCount + PubCountInJournal - PubsRemoved,
+              AckCount1 = AckCount + AckCountInJournal - AcksRemoved,
+              store_segment(Segment1 #segment { journal_entries = JEntries1,
+                                                pubs = PubCount1,
+                                                acks = AckCount1 }, StateN2)
+      end, State1, Segments).
+
+load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
+            case Prefix of
+                ?DEL_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, del, State));
+                ?ACK_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, ack, State));
+                _ ->
+                    case file_handle_cache:read(Hdl, ?MSG_ID_BYTES) of
+                        {ok, <<MsgIdNum:?MSG_ID_BITS>>} ->
+                            %% work around for binary data
+                            %% fragmentation. See
+                            %% rabbit_msg_file:read_next/2
+                            <<MsgId:?MSG_ID_BYTES/binary>> =
+                                <<MsgIdNum:?MSG_ID_BITS>>,
+                            Publish = {MsgId,
+                                       case Prefix of
+                                           ?PUB_PERSIST_JPREFIX -> true;
+                                           ?PUB_TRANS_JPREFIX   -> false
+                                       end},
+                            load_journal_entries(
+                              add_to_journal(SeqId, Publish, State));
+                        _ErrOrEoF -> %% err, we've lost at least a publish
+                            State
+                    end
+            end;
+        _ErrOrEoF -> State
+    end.
+
+add_to_journal(SeqId, Action, State = #qistate {}) ->
+    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    Segment = #segment { journal_entries = SegJDict,
+                         pubs = PubCount, acks = AckCount } =
+        find_segment(Seg, State),
+    SegJDict1 = add_to_journal(RelSeq, Action, SegJDict),
+    Segment1 = Segment #segment { journal_entries = SegJDict1 },
+    Segment2 =
+        case Action of
+            del                     -> Segment1;
+            ack                     -> Segment1 #segment { acks = AckCount + 1 };
+            {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
+        end,
+    store_segment(Segment2, State);
+
+%% This is a more relaxed version of deliver_or_ack_msg because we can
+%% have dels or acks in the journal without the corresponding
+%% pub. Also, always want to keep acks. Things must occur in the right
+%% order though.
+add_to_journal(RelSeq, Action, SegJDict) ->
+    case dict:find(RelSeq, SegJDict) of
+        {ok, {PubRecord, no_del, no_ack}} when Action == del ->
+            dict:store(RelSeq, {PubRecord, del, no_ack}, SegJDict);
+        {ok, {PubRecord, DelRecord, no_ack}} when Action == ack ->
+            dict:store(RelSeq, {PubRecord, DelRecord, ack}, SegJDict);
+        error when Action == del ->
+            dict:store(RelSeq, {no_pub, del, no_ack}, SegJDict);
+        error when Action == ack ->
+            dict:store(RelSeq, {no_pub, no_del, ack}, SegJDict);
+        error ->
+            {_MsgId, _IsPersistent} = Action, %% ASSERTION
+            dict:store(RelSeq, {Action, no_del, no_ack}, SegJDict)
+    end.
+
 %% Combine what we have just read from a segment file with what we're
 %% holding for that segment in memory. There must be no
 %% duplicates. Used when providing segment entries to the variable
@@ -55,10 +386,10 @@ journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
                      not_found,
                      RelSeq, OutDict) ->
     dict:store(RelSeq, Obj, OutDict);
-journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, ack},
+journal_plus_segment({{_MsgId, _IsPersistent}, del, ack},
                      not_found,
                      RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+    dict:erase(RelSeq, OutDict);
 
 journal_plus_segment({no_pub, del, no_ack},
                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
@@ -79,81 +410,91 @@ journal_plus_segment({no_pub, no_del, ack},
 %% duplicates of entries found in the segment itself. Used on start up
 %% to clean up the journal.
 journal_minus_segment(JEntries, SegDict) ->
-    dict:fold(fun (RelSeq, JObj, JEntriesOut) ->
+    dict:fold(fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
                       SegEntry = case dict:find(RelSeq, SegDict) of
                                      error -> not_found;
                                      {ok, SObj = {_, _, _}} -> SObj
                                  end,
-                      journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut)
-              end, dict:new(), JEntries).
+                      journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
+                                            PubsRemoved, AcksRemoved)
+              end, {dict:new(), 0, 0}, JEntries).
 
 %% Here, the OutDict is a fresh journal that we're filling with valid
-%% entries.
-%% Both the same
-journal_minus_segment(_RelSeq, Obj, Obj, OutDict) ->
-    OutDict;
+%% entries. PubsRemoved and AcksRemoved only get increased when the a
+%% publish or ack is in both the journal and the segment.
+
+%% Both the same. Must be at least the publish
+journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, no_ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved + 1, AcksRemoved};
+journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved + 1, AcksRemoved + 1};
 
 %% Just publish in journal
 journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
                       not_found,
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
 
 %% Just deliver in journal
 journal_minus_segment(Obj = {no_pub, del, no_ack},
                       {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, no_ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      _RelSeq, OutDict) ->
-    OutDict;
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved};
 
 %% Just ack in journal
 journal_minus_segment(Obj = {no_pub, no_del, ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, no_del, ack},
                       {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutDict) ->
-    OutDict;
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved};
 
 %% Publish and deliver in journal
 journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
                       not_found,
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
 journal_minus_segment({PubRecord, del, no_ack},
                       {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, {no_pub, del, no_ack}, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, del, no_ack}, OutDict),
+     PubsRemoved + 1, AcksRemoved};
 
 %% Deliver and ack in journal
 journal_minus_segment(Obj = {no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, {no_pub, no_del, ack}, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
+     PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutDict) ->
-    OutDict;
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved + 1};
 
 %% Publish, deliver and ack in journal
 journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
                       not_found,
-                      _RelSeq, OutDict) ->
-    OutDict;
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved};
 journal_minus_segment({PubRecord, del, ack},
                       {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, {no_pub, del, ack}, OutDict);
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, del, ack}, OutDict),
+     PubsRemoved + 1, AcksRemoved};
 journal_minus_segment({PubRecord, del, ack},
                       {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict) ->
-    dict:store(RelSeq, {no_pub, no_del, ack}, OutDict).
-
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
+     PubsRemoved + 1, AcksRemoved}.
-- 
cgit v1.2.1


From d0f3f6898c09a1051cff000535cfe1ffd545e7b1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Dec 2009 14:56:32 +0000
Subject: More good progress on qi3. The code still almost looks pretty in
 places. Its prettiness is not diminishing.

---
 src/rabbit_queue_index3.erl | 182 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 175 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_queue_index3.erl b/src/rabbit_queue_index3.erl
index eeb38dd2..01a7c748 100644
--- a/src/rabbit_queue_index3.erl
+++ b/src/rabbit_queue_index3.erl
@@ -31,9 +31,14 @@
 
 -module(rabbit_queue_index3).
 
+-export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
+         write_delivered/2, write_acks/2, sync_seq_ids/3, flush_journal/1,
+         read_segment_entries/2, next_segment_boundary/1, segment_size/0,
+         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
 
+%%----------------------------------------------------------------------------
 %% ---- Journal details ----
 
 -define(MAX_JOURNAL_ENTRY_COUNT, 32768).
@@ -98,6 +103,61 @@
 
 %%----------------------------------------------------------------------------
 
+init(Name) ->
+    State = blank_state(Name),
+    %% 1. Load the journal completely. This will also load segments
+    %%    which have entries in the journal and remove duplicates.
+    %%    The counts will correctly reflect the combination of the
+    %%    segment and the journal.
+    State1 = load_journal(State),
+    %% 2. Flush the journal. This makes life easier for everyone, as
+    %%    it means there won't be any publishes in the journal alone.
+    State2 = #qistate { dir = Dir } = flush_journal(State1),
+    %% 3. Load each segment in turn and filter out messages that are
+    %%    not in the msg_store, by adding acks to the journal. These
+    %%    acks only go to the RAM journal as it doesn't matter if we
+    %%    lose them. Also mark delivered if not clean shutdown.
+    AllSegs = all_segment_nums(Dir),
+    CleanShutdown = detect_clean_shutdown(Dir),
+    %% We know the journal is empty here, so we don't need to combine
+    %% with the journal, and we don't need to worry about messages
+    %% that have been acked.
+    State3 =
+        lists:foldl(
+          fun (Seg, StateN) ->
+                  {SegDict, _PubCount, _AckCount, StateN1} =
+                      load_segment(Seg, false, StateN),
+                  dict:fold(
+                    fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
+                         StateM) ->
+                            SeqId = reconstruct_seq_id(Seg, RelSeq),
+                            InMsgStore = rabbit_msg_store:contains(MsgId),
+                            case {InMsgStore, CleanShutdown} of
+                                {true, true} ->
+                                    StateM;
+                                {true, false} when Del == del ->
+                                    StateM;
+                                {true, false} ->
+                                    add_to_journal(SeqId, del, StateM);
+                                {false, _} when Del == del ->
+                                    add_to_journal(SeqId, ack, StateM);
+                                {false, _} ->
+                                    add_to_journal(
+                                      SeqId, ack,
+                                      add_to_journal(SeqId, del, StateM))
+                            end
+                    end, StateN1, SegDict)
+          end, State2, AllSegs),
+    %% 4. Go through all segments and calculate the number of unacked
+    %%    messages we have.
+    Count = lists:foldl(
+              fun (Seg, CountAcc) ->
+                      #segment { pubs = PubCount, acks = AckCount } =
+                          find_segment(Seg, State3),
+                      CountAcc + PubCount - AckCount
+              end, 0, AllSegs),
+    {Count, State3}.
+
 terminate(State = #qistate { segments = Segments, journal_handle = JournalHdl,
                              dir = Dir }) ->
     ok = case JournalHdl of
@@ -118,21 +178,100 @@ terminate_and_erase(State) ->
     ok = delete_queue_directory(State1 #qistate.dir),
     State1.
 
+flush_journal(State = #qistate { dirty_count = 0 }) ->
+    State;
+flush_journal(State = #qistate { segments = Segments }) ->
+    dict:fold(
+      fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
+                            acks = AckCount } = Segment, StateN) ->
+              case dict:is_empty(JEntries) of
+                  true -> store_segment(Segment, StateN);
+                  false when AckCount == PubCount ->
+                      ok = delete_segment(Segment);
+                  false ->
+                      {Hdl, Segment1} = get_segment_handle(Segment),
+                      dict:fold(fun write_entry_to_segment/3,
+                                Hdl, JEntries),
+                      ok = file_handle_cache:sync(Hdl),
+                      store_segment(
+                        Segment1 #segment { journal_entries = dict:new() },
+                        StateN)
+              end
+      end, State, Segments).
+
+read_segment_entries(InitSeqId, State) ->
+    {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
+    {SegDict, _PubCount, _AckCount, State1} =
+        load_segment(Seg, false, State),
+    #segment { journal_entries = JEntries } = find_segment(Seg, State1),
+    SegDict1 = journal_plus_segment(JEntries, SegDict),
+    %% deliberately sort the list desc, because foldl will reverse it
+    RelSeqs = rev_sort(dict:fetch_keys(SegDict1)),
+    {lists:foldl(fun (RelSeq, Acc) ->
+                         {{MsgId, IsPersistent}, IsDelivered, no_ack} =
+                             dict:fetch(RelSeq, SegDict1),
+                         [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
+                            IsPersistent, IsDelivered} | Acc ]
+                 end, [], RelSeqs),
+     State1}.
+
+next_segment_boundary(SeqId) ->
+    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(Seg + 1, 0).
+
+segment_size() ->
+    ?SEGMENT_ENTRY_COUNT.
+
+find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
+    SegNums = all_segment_nums(Dir),
+    %% We don't want the lowest seq_id, merely the seq_id of the start
+    %% of the lowest segment. That seq_id may not actually exist, but
+    %% that's fine. The important thing is that the segment exists and
+    %% the seq_id reported is on a segment boundary.
+
+    %% We also don't really care about the max seq_id. Just start the
+    %% next segment: it makes life much easier.
+
+    %% SegNums is sorted, ascending.
+    {LowSeqIdSeg, NextSeqId} =
+        case SegNums of
+            []            -> {0, 0};
+            [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
+                           reconstruct_seq_id(lists:last(SegNums), 0)}
+        end,
+    {LowSeqIdSeg, NextSeqId, State}.
+
 %%----------------------------------------------------------------------------
 %% Minors
 %%----------------------------------------------------------------------------
 
+all_segment_nums(Dir) ->
+    lists:sort(
+      [list_to_integer(
+         lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
+       || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)]).
+
+blank_state(QueueName) ->
+    StrName = queue_name_to_dir_name(QueueName),
+    Dir = filename:join(queues_dir(), StrName),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    #qistate { dir            = Dir,
+               segments       = dict:new(),
+               journal_handle = undefined,
+               dirty_count    = 0
+             }.
+
 rev_sort(List) ->
     lists:sort(fun (A, B) -> B < A end, List).
 
 seq_id_to_seg_and_rel_seq_id(SeqId) ->
     { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
 
-reconstruct_seq_id(SegNum, RelSeq) ->
-    (SegNum * ?SEGMENT_ENTRY_COUNT) + RelSeq.
+reconstruct_seq_id(Seg, RelSeq) ->
+    (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
 
-seg_num_to_path(Dir, SegNum) ->
-    SegName = integer_to_list(SegNum),
+seg_num_to_path(Dir, Seg) ->
+    SegName = integer_to_list(Seg),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
 
 delete_segment(#segment { handle = undefined }) ->
@@ -206,13 +345,42 @@ get_journal_handle(State =
 get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
     {Hdl, State}.
 
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
+
+write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
+    ok = case Publish of
+             no_pub ->
+                 ok;
+             {MsgId, IsPersistent} ->
+                 file_handle_cache:append(
+                   Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                           (bool_to_int(IsPersistent)):1,
+                           RelSeq:?REL_SEQ_BITS>>, MsgId])
+         end,
+    ok = case {Del, Ack} of
+             {no_del, no_ack} -> ok;
+             _ -> Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                             RelSeq:?REL_SEQ_BITS>>,
+                  Data = case {Del, Ack} of
+                             {del, ack} -> [Binary, Binary];
+                             _          -> Binary
+                         end,
+                  file_handle_cache:append(Hdl, Data)
+         end,
+    Hdl.
+
 %%----------------------------------------------------------------------------
 %% Majors
 %%----------------------------------------------------------------------------
 
 %% Loading segments
 
-%% Does not do any combining with the journal at all
+%% Does not do any combining with the journal at all. The PubCount
+%% that comes back is the number of publishes in the segment. The
+%% number of unacked msgs is PubCount - AckCount. If KeepAcks is
+%% false, then dict:size(SegDict) == PubCount - AckCount. If KeepAcks
+%% is true, then dict:size(SegDict) == PubCount.
 load_segment(Seg, KeepAcks, State) ->
     Segment = #segment { path = Path, handle = SegHdl } =
         find_segment(Seg, State),
@@ -328,7 +496,7 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
         _ErrOrEoF -> State
     end.
 
-add_to_journal(SeqId, Action, State = #qistate {}) ->
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount }) ->
     {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     Segment = #segment { journal_entries = SegJDict,
                          pubs = PubCount, acks = AckCount } =
@@ -341,7 +509,7 @@ add_to_journal(SeqId, Action, State = #qistate {}) ->
             ack                     -> Segment1 #segment { acks = AckCount + 1 };
             {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
         end,
-    store_segment(Segment2, State);
+    store_segment(Segment2, State #qistate { dirty_count = DCount + 1 });
 
 %% This is a more relaxed version of deliver_or_ack_msg because we can
 %% have dels or acks in the journal without the corresponding
-- 
cgit v1.2.1


From fcb80173ce24878d5dfa01a4a64ad9f9ea93e1cc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Dec 2009 15:38:43 +0000
Subject: Finished. It might work - untested though

---
 src/rabbit_queue_index3.erl | 252 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 217 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_queue_index3.erl b/src/rabbit_queue_index3.erl
index 01a7c748..43a210d9 100644
--- a/src/rabbit_queue_index3.erl
+++ b/src/rabbit_queue_index3.erl
@@ -32,7 +32,7 @@
 -module(rabbit_queue_index3).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, sync_seq_ids/3, flush_journal/1,
+         write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
@@ -103,6 +103,41 @@
 
 %%----------------------------------------------------------------------------
 
+-ifdef(use_specs).
+
+-type(hdl() :: ('undefined' | any())).
+-type(msg_id() :: binary()).
+-type(seq_id() :: integer()).
+-type(qistate() :: #qistate { dir             :: file_path(),
+                              segments        :: dict(),
+                              journal_handle  :: hdl(),
+                              dirty_count     :: integer()
+                            }).
+
+-spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
+-spec(terminate/1 :: (qistate()) -> qistate()).
+-spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
+-spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
+      -> qistate()).
+-spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
+-spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(sync_seq_ids/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(flush_journal/1 :: (qistate()) -> qistate()).
+-spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
+             {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
+-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
+-spec(segment_size/0 :: () -> non_neg_integer()).
+-spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
+             {non_neg_integer(), non_neg_integer(), qistate()}).
+-spec(start_msg_store/1 :: ([amqqueue()]) -> 'ok').
+
+-endif.
+
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
 init(Name) ->
     State = blank_state(Name),
     %% 1. Load the journal completely. This will also load segments
@@ -158,46 +193,82 @@ init(Name) ->
               end, 0, AllSegs),
     {Count, State3}.
 
-terminate(State = #qistate { segments = Segments, journal_handle = JournalHdl,
-                             dir = Dir }) ->
-    ok = case JournalHdl of
-             undefined -> ok;
-             _         -> file_handle_cache:close(JournalHdl)
-         end,
-    ok = dict:fold(
-           fun (_Seg, #segment { handle = undefined }, ok) ->
-                   ok;
-               (_Seg, #segment { handle = Hdl }, ok) ->
-                   file_handle_cache:close(Hdl)
-           end, ok, Segments),
-    store_clean_shutdown(Dir),
-    State #qistate { journal_handle = undefined, segments = dict:new() }.
+terminate(State) ->
+    terminate(true, State).
 
 terminate_and_erase(State) ->
     State1 = terminate(State),
     ok = delete_queue_directory(State1 #qistate.dir),
     State1.
 
+write_published(MsgId, SeqId, IsPersistent, State)
+  when is_binary(MsgId) ->
+    ?MSG_ID_BYTES = size(MsgId),
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(JournalHdl,
+                                  [<<(case IsPersistent of
+                                          true  -> ?PUB_PERSIST_JPREFIX;
+                                          false -> ?PUB_TRANS_JPREFIX
+                                      end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>,
+                                   MsgId]),
+    maybe_flush_journal(add_to_journal(SeqId, {MsgId, IsPersistent}, State1)).
+
+write_delivered(SeqId, State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(JournalHdl,
+                                  <<?DEL_JPREFIX:?JPREFIX_BITS,
+                                   SeqId:?SEQ_BITS>>),
+    maybe_flush_journal(add_to_journal(SeqId, del, State1)).
+
+write_acks(SeqIds, State) ->
+    {SeqIds1, State1} = remove_pubs_dels_from_journal(SeqIds, State),
+    case SeqIds1 of
+        [] ->
+            State;
+        _ ->
+            {JournalHdl, State2} = get_journal_handle(State1),
+            ok = file_handle_cache:append(JournalHdl,
+                                          [<<?ACK_JPREFIX:?JPREFIX_BITS,
+                                            SeqId:?SEQ_BITS>>
+                                           || SeqId <- SeqIds1]),
+            State3 = lists:foldl(fun (SeqId, StateN) ->
+                                         add_to_journal(SeqId, ack, StateN)
+                                 end, State2, SeqIds1),
+            maybe_flush_journal(State3)
+    end.
+
+sync_seq_ids(_SeqIds, State = #qistate { journal_handle = undefined }) ->
+    State;
+sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
+    ok = file_handle_cache:sync(JournalHdl),
+    State.
+
 flush_journal(State = #qistate { dirty_count = 0 }) ->
     State;
 flush_journal(State = #qistate { segments = Segments }) ->
-    dict:fold(
-      fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
-                            acks = AckCount } = Segment, StateN) ->
-              case dict:is_empty(JEntries) of
-                  true -> store_segment(Segment, StateN);
-                  false when AckCount == PubCount ->
-                      ok = delete_segment(Segment);
-                  false ->
-                      {Hdl, Segment1} = get_segment_handle(Segment),
-                      dict:fold(fun write_entry_to_segment/3,
-                                Hdl, JEntries),
-                      ok = file_handle_cache:sync(Hdl),
-                      store_segment(
-                        Segment1 #segment { journal_entries = dict:new() },
-                        StateN)
-              end
-      end, State, Segments).
+    State1 =
+        dict:fold(
+          fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
+                                acks = AckCount } = Segment, StateN) ->
+                  case dict:is_empty(JEntries) of
+                      true -> store_segment(Segment, StateN);
+                      false when AckCount == PubCount ->
+                          ok = delete_segment(Segment);
+                      false ->
+                          {Hdl, Segment1} = get_segment_handle(Segment),
+                          dict:fold(fun write_entry_to_segment/3,
+                                    Hdl, JEntries),
+                          ok = file_handle_cache:sync(Hdl),
+                          store_segment(
+                            Segment1 #segment { journal_entries = dict:new() },
+                            StateN)
+                  end
+          end, State, Segments),
+    {JournalHdl, State2} = get_journal_handle(State1),
+    {ok, 0} = file_handle_cache:position(JournalHdl, bof),
+    ok = file_handle_cache:truncate(JournalHdl),
+    ok = file_handle_cache:sync(JournalHdl),
+    State2 #qistate { dirty_count = 0 }.
 
 read_segment_entries(InitSeqId, State) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
@@ -241,10 +312,81 @@ find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
         end,
     {LowSeqIdSeg, NextSeqId, State}.
 
+start_msg_store(DurableQueues) ->
+    DurableDict =
+        dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
+                          Queue #amqqueue.name} || Queue <- DurableQueues ]),
+    QueuesDir = queues_dir(),
+    Directories = case file:list_dir(QueuesDir) of
+                      {ok, Entries} ->
+                          [ Entry || Entry <- Entries,
+                                     filelib:is_dir(
+                                       filename:join(QueuesDir, Entry)) ];
+                      {error, enoent} ->
+                          []
+                  end,
+    DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
+    {DurableQueueNames, TransientDirs} =
+        lists:foldl(
+          fun (QueueDir, {DurableAcc, TransientAcc}) ->
+                  case sets:is_element(QueueDir, DurableDirectories) of
+                      true ->
+                          {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
+                           TransientAcc};
+                      false ->
+                          {DurableAcc, [QueueDir | TransientAcc]}
+                  end
+          end, {[], []}, Directories),
+    MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
+    ok = rabbit:start_child(rabbit_msg_store, [MsgStoreDir,
+                                               fun queue_index_walker/1,
+                                               DurableQueueNames]),
+    lists:foreach(fun (DirName) ->
+                          Dir = filename:join(queues_dir(), DirName),
+                          ok = delete_queue_directory(Dir)
+                  end, TransientDirs),
+    ok.
+
+%%----------------------------------------------------------------------------
+%% Msg Store Startup Delta Function
+%%----------------------------------------------------------------------------
+
+queue_index_walker([]) ->
+    finished;
+queue_index_walker([QueueName|QueueNames]) ->
+    State = #qistate { dir = Dir } = blank_state(QueueName),
+    State1 = #qistate { journal_handle = JHdl } = load_journal(State),
+    ok = file_handle_cache:close(JHdl),
+    SegNums = all_segment_nums(Dir),
+    queue_index_walker({SegNums, State1, QueueNames});
+
+queue_index_walker({[], State, QueueNames}) ->
+    _State = terminate(false, State),
+    queue_index_walker(QueueNames);
+queue_index_walker({[Seg | SegNums], State, QueueNames}) ->
+    SeqId = reconstruct_seq_id(Seg, 0),
+    {Messages, State1} = read_segment_entries(SeqId, State),
+    queue_index_walker({Messages, State1, SegNums, QueueNames});
+
+queue_index_walker({[], State, SegNums, QueueNames}) ->
+    queue_index_walker({SegNums, State, QueueNames});
+queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs],
+                    State, SegNums, QueueNames}) ->
+    case IsPersistent of
+        true  -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
+        false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
+    end.
+
 %%----------------------------------------------------------------------------
 %% Minors
 %%----------------------------------------------------------------------------
 
+maybe_flush_journal(State = #qistate { dirty_count = DCount })
+  when DCount > ?MAX_JOURNAL_ENTRY_COUNT ->
+    flush_journal(State);
+maybe_flush_journal(State) ->
+    State.
+
 all_segment_nums(Dir) ->
     lists:sort(
       [list_to_integer(
@@ -370,6 +512,46 @@ write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
          end,
     Hdl.
 
+terminate(StoreShutdown, State =
+          #qistate { segments = Segments, journal_handle = JournalHdl,
+                     dir = Dir }) ->
+    ok = case JournalHdl of
+             undefined -> ok;
+             _         -> file_handle_cache:close(JournalHdl)
+         end,
+    ok = dict:fold(
+           fun (_Seg, #segment { handle = undefined }, ok) ->
+                   ok;
+               (_Seg, #segment { handle = Hdl }, ok) ->
+                   file_handle_cache:close(Hdl)
+           end, ok, Segments),
+    case StoreShutdown of
+        true  -> store_clean_shutdown(Dir);
+        false -> ok
+    end,
+    State #qistate { journal_handle = undefined, segments = dict:new() }.
+
+remove_pubs_dels_from_journal(SeqIds, State) ->
+    lists:foldl(
+      fun (SeqId, {SeqIdsAcc, StateN}) ->
+              {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+              Segment = #segment { journal_entries = JEntries,
+                                   acks = AckCount } =
+                  find_segment(Seg, StateN),
+              case dict:find(RelSeq, JEntries) of
+                  {ok, {{_MsgId, _IsPersistent}, del, no_ack}} ->
+                      StateN1 =
+                          store_segment(
+                            Segment #segment { journal_entries =
+                                               dict:erase(RelSeq, JEntries),
+                                               acks = AckCount + 1 },
+                            StateN),
+                      {SeqIdsAcc, StateN1};
+                  _ ->
+                      {[SeqId | SeqIdsAcc], StateN}
+              end
+      end, {[], State}, SeqIds).
+
 %%----------------------------------------------------------------------------
 %% Majors
 %%----------------------------------------------------------------------------
@@ -451,7 +633,7 @@ load_journal(State) ->
               %% We want to keep acks in so that we can remove them if
               %% duplicates are in the journal. The counts here are
               %% purely from the segment itself.
-              {SegDict, PubCount, AckCount, StateN1} =
+              {SegDict, PubCountInSeg, AckCountInSeg, StateN1} =
                   load_segment(Seg, true, StateN),
               %% Removed counts here are the number of pubs and acks
               %% that are duplicates - i.e. found in both the segment
@@ -459,8 +641,8 @@ load_journal(State) ->
               {JEntries1, PubsRemoved, AcksRemoved} =
                   journal_minus_segment(JEntries, SegDict),
               {Segment1, StateN2} = find_segment(Seg, StateN1),
-              PubCount1 = PubCount + PubCountInJournal - PubsRemoved,
-              AckCount1 = AckCount + AckCountInJournal - AcksRemoved,
+              PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
+              AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
               store_segment(Segment1 #segment { journal_entries = JEntries1,
                                                 pubs = PubCount1,
                                                 acks = AckCount1 }, StateN2)
-- 
cgit v1.2.1


From 1547f4fc5424d38dfbff23302fd2192420978204 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Dec 2009 18:04:53 +0000
Subject: New qi in place, tested and debugged. It works. It's not quite as
 fast as before because of use of nested dicts. This can be solved by using
 sets for publishes, deliveries and acks within each segment - we don't
 actually need what the dict provides and sets will go much faster. That
 change should be fairly straight forward to do - the code will get a little
 longer, but not much more complex.

---
 src/rabbit_queue_index.erl    | 1186 ++++++++++++++++++++---------------------
 src/rabbit_queue_index3.erl   |  850 -----------------------------
 src/rabbit_tests.erl          |   16 +-
 src/rabbit_variable_queue.erl |    2 +-
 4 files changed, 594 insertions(+), 1460 deletions(-)
 delete mode 100644 src/rabbit_queue_index3.erl

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 91ecd669..2b4ec1a4 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,54 +32,28 @@
 -module(rabbit_queue_index).
 
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, sync_seq_ids/3, flush_journal/1,
+         write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
-%%----------------------------------------------------------------------------
-%% The queue disk index
-%%
-%% The queue disk index operates over a journal, and a number of
-%% segment files. Each segment is the same size, both in max number of
-%% entries, and max file size, owing to fixed sized records.
-%%
-%% Publishes are written directly to the segment files. The segment is
-%% found by dividing the sequence id by the the max number of entries
-%% per segment. Only the relative sequence within the segment is
-%% recorded as the sequence id within a segment file (i.e. sequence id
-%% modulo max number of entries per segment).  This is keeps entries
-%% as small as possible. Publishes are only ever going to be received
-%% in contiguous ascending order.
-%%
-%% Acks and deliveries are written to a bounded journal and are also
-%% held in memory, each in a dict with the segment as the key. Again,
-%% the records are fixed size: the entire sequence id is written and
-%% is limited to a 63-bit unsigned integer. The remaining bit
-%% indicates whether the journal entry is for a delivery or an
-%% ack. When the journal gets too big, or flush_journal is called, the
-%% journal is (possibly incrementally) flushed out to the segment
-%% files. As acks and delivery notes can be received in any order
-%% (this is not obvious for deliveries, but consider what happens when
-%% eg msgs are *re*queued - you'll publish and then mark the msgs
-%% delivered immediately, which may be out of order), this journal
-%% reduces seeking, and batches writes to the segment files, keeping
-%% performance high.
-%%
-%% On startup, the journal is read along with all the segment files,
-%% and the journal is fully flushed out to the segment files. Care is
-%% taken to ensure that no message can be delivered or ack'd twice.
-%%
-%%----------------------------------------------------------------------------
-
 -define(CLEAN_FILENAME, "clean.dot").
 
+%%----------------------------------------------------------------------------
+%% ---- Journal details ----
+
 -define(MAX_JOURNAL_ENTRY_COUNT, 32768).
 -define(JOURNAL_FILENAME, "journal.jif").
 
--define(DEL_BIT, 0).
--define(ACK_BIT, 1).
+-define(PUB_PERSIST_JPREFIX, 2#00).
+-define(PUB_TRANS_JPREFIX,   2#01).
+-define(DEL_JPREFIX,         2#10).
+-define(ACK_JPREFIX,         2#11).
+-define(JPREFIX_BITS, 2).
 -define(SEQ_BYTES, 8).
--define(SEQ_BITS, ((?SEQ_BYTES * 8) - 1)).
+-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)).
+
+%% ---- Segment details ----
+
 -define(SEGMENT_EXTENSION, ".idx").
 
 -define(REL_SEQ_BITS, 14).
@@ -111,13 +85,18 @@
 
 -record(qistate,
         { dir,
-          seg_num_handles,
-          journal_count,
-          journal_ack_dict,
-          journal_del_dict,
-          seg_ack_counts,
-          publish_handle,
-          partial_segments
+          segments,
+          journal_handle,
+          dirty_count
+        }).
+
+-record(segment,
+        { pubs,
+          acks,
+          handle,
+          journal_entries,
+          path,
+          num
         }).
 
 -include("rabbit.hrl").
@@ -129,16 +108,10 @@
 -type(hdl() :: ('undefined' | any())).
 -type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
--type(hdl_and_count() :: ('undefined' |
-                          {non_neg_integer(), hdl(), non_neg_integer()})).
--type(qistate() :: #qistate { dir               :: file_path(),
-                              seg_num_handles   :: dict(),
-                              journal_count     :: integer(),
-                              journal_ack_dict  :: dict(),
-                              journal_del_dict  :: dict(),
-                              seg_ack_counts    :: dict(),
-                              publish_handle    :: hdl_and_count(),
-                              partial_segments  :: dict()
+-type(qistate() :: #qistate { dir             :: file_path(),
+                              segments        :: dict(),
+                              journal_handle  :: hdl(),
+                              dirty_count     :: integer()
                             }).
 
 -spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
@@ -148,7 +121,7 @@
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(sync_seq_ids/3 :: ([seq_id()], boolean(), qistate()) -> qistate()).
+-spec(sync_seq_ids/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush_journal/1 :: (qistate()) -> qistate()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
              {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
@@ -167,16 +140,61 @@
 
 init(Name) ->
     State = blank_state(Name),
-    {TotalMsgCount, State1} = read_and_prune_segments(State),
-    scatter_journal(TotalMsgCount, State1).
-
-terminate(State = #qistate { seg_num_handles = SegHdls }) ->
-    case 0 == dict:size(SegHdls) of
-        true  -> State;
-        false -> State1 = #qistate { dir = Dir } = close_all_handles(State),
-                 store_clean_shutdown(Dir),
-                 State1 #qistate { publish_handle = undefined }
-    end.
+    %% 1. Load the journal completely. This will also load segments
+    %%    which have entries in the journal and remove duplicates.
+    %%    The counts will correctly reflect the combination of the
+    %%    segment and the journal.
+    State1 = load_journal(State),
+    %% 2. Flush the journal. This makes life easier for everyone, as
+    %%    it means there won't be any publishes in the journal alone.
+    State2 = #qistate { dir = Dir } = flush_journal(State1),
+    %% 3. Load each segment in turn and filter out messages that are
+    %%    not in the msg_store, by adding acks to the journal. These
+    %%    acks only go to the RAM journal as it doesn't matter if we
+    %%    lose them. Also mark delivered if not clean shutdown.
+    AllSegs = all_segment_nums(State2),
+    CleanShutdown = detect_clean_shutdown(Dir),
+    %% We know the journal is empty here, so we don't need to combine
+    %% with the journal, and we don't need to worry about messages
+    %% that have been acked.
+    State3 =
+        lists:foldl(
+          fun (Seg, StateN) ->
+                  {SegDict, _PubCount, _AckCount, StateN1} =
+                      load_segment(Seg, false, StateN),
+                  dict:fold(
+                    fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
+                         StateM) ->
+                            SeqId = reconstruct_seq_id(Seg, RelSeq),
+                            InMsgStore = rabbit_msg_store:contains(MsgId),
+                            case {InMsgStore, CleanShutdown} of
+                                {true, true} ->
+                                    StateM;
+                                {true, false} when Del == del ->
+                                    StateM;
+                                {true, false} ->
+                                    add_to_journal(SeqId, del, StateM);
+                                {false, _} when Del == del ->
+                                    add_to_journal(SeqId, ack, StateM);
+                                {false, _} ->
+                                    add_to_journal(
+                                      SeqId, ack,
+                                      add_to_journal(SeqId, del, StateM))
+                            end
+                    end, StateN1, SegDict)
+          end, State2, AllSegs),
+    %% 4. Go through all segments and calculate the number of unacked
+    %%    messages we have.
+    Count = lists:foldl(
+              fun (Seg, CountAcc) ->
+                      #segment { pubs = PubCount, acks = AckCount } =
+                          find_segment(Seg, State3),
+                      CountAcc + PubCount - AckCount
+              end, 0, AllSegs),
+    {Count, State3}.
+
+terminate(State) ->
+    terminate(true, State).
 
 terminate_and_erase(State) ->
     State1 = terminate(State),
@@ -186,123 +204,114 @@ terminate_and_erase(State) ->
 write_published(MsgId, SeqId, IsPersistent, State)
   when is_binary(MsgId) ->
     ?MSG_ID_BYTES = size(MsgId),
-    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    {Hdl, State1} = get_pub_handle(SegNum, State),
-    ok = file_handle_cache:append(Hdl,
-                                  <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                                   (bool_to_int(IsPersistent)):1,
-                                   RelSeq:?REL_SEQ_BITS, MsgId/binary>>),
-    State1.
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(JournalHdl,
+                                  [<<(case IsPersistent of
+                                          true  -> ?PUB_PERSIST_JPREFIX;
+                                          false -> ?PUB_TRANS_JPREFIX
+                                      end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>,
+                                   MsgId]),
+    maybe_flush_journal(add_to_journal(SeqId, {MsgId, IsPersistent}, State1)).
+
+write_delivered(SeqId, State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(JournalHdl,
+                                  <<?DEL_JPREFIX:?JPREFIX_BITS,
+                                   SeqId:?SEQ_BITS>>),
+    maybe_flush_journal(add_to_journal(SeqId, del, State1)).
+
+write_acks(SeqIds, State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(JournalHdl,
+                                  [<<?ACK_JPREFIX:?JPREFIX_BITS,
+                                    SeqId:?SEQ_BITS>> || SeqId <- SeqIds]),
+    State2 = lists:foldl(fun (SeqId, StateN) ->
+                                 add_to_journal(SeqId, ack, StateN)
+                         end, State1, SeqIds),
+    maybe_flush_journal(State2).
+
+sync_seq_ids(_SeqIds, State = #qistate { journal_handle = undefined }) ->
+    State;
+sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
+    ok = file_handle_cache:sync(JournalHdl),
+    State.
 
-write_delivered(SeqId, State = #qistate { journal_del_dict = JDelDict }) ->
-    {JDelDict1, State1} =
-        write_to_journal([<<?DEL_BIT:1, SeqId:?SEQ_BITS>>],
-                         [SeqId], JDelDict, State),
-    maybe_flush(State1 #qistate { journal_del_dict = JDelDict1 }).
-
-write_acks(SeqIds, State = #qistate { journal_ack_dict = JAckDict }) ->
-    {JAckDict1, State1} =
-        write_to_journal([<<?ACK_BIT:1, SeqId:?SEQ_BITS>> || SeqId <- SeqIds],
-                         SeqIds, JAckDict, State),
-    maybe_flush(State1 #qistate { journal_ack_dict = JAckDict1 }).
-
-sync_seq_ids(SeqIds, SyncAckJournal, State) ->
-    State1 = case SyncAckJournal of
-                 true  -> {Hdl, State2} = get_journal_handle(State),
-                          ok = file_handle_cache:sync(Hdl),
-                          State2;
-                 false -> State
-             end,
-    SegNumsSet =
-        lists:foldl(
-          fun (SeqId, Set) ->
-                  {SegNum, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-                  sets:add_element(SegNum, Set)
-          end, sets:new(), SeqIds),
-    sets:fold(
-      fun (SegNum, StateN) ->
-              {Hdl1, StateM} = get_seg_handle(SegNum, StateN),
-              ok = file_handle_cache:sync(Hdl1),
-              StateM
-      end, State1, SegNumsSet).
-
-flush_journal(State = #qistate { journal_count = 0 }) ->
+flush_journal(State = #qistate { dirty_count = 0 }) ->
     State;
-flush_journal(State = #qistate { journal_ack_dict = JAckDict,
-                                 journal_del_dict = JDelDict,
-                                 journal_count    = JCount }) ->
-    SegNum = case dict:fetch_keys(JAckDict) of
-                 []    -> hd(dict:fetch_keys(JDelDict));
-                 [N|_] -> N
-             end,
-    Dels = seg_entries_from_dict(SegNum, JDelDict),
-    Acks = seg_entries_from_dict(SegNum, JAckDict),
-    State1 = append_dels_to_segment(SegNum, Dels, State),
-    State2 = append_acks_to_segment(SegNum, Acks, State1),
-    JCount1 = JCount - length(Dels) - length(Acks),
-    State3 = State2 #qistate { journal_del_dict = dict:erase(SegNum, JDelDict),
-                               journal_ack_dict = dict:erase(SegNum, JAckDict),
-                               journal_count    = JCount1 },
-    case JCount1 of
-        0 -> {Hdl, State4} = get_journal_handle(State3),
-             {ok, 0} = file_handle_cache:position(Hdl, bof),
-             ok = file_handle_cache:truncate(Hdl),
-             ok = file_handle_cache:sync(Hdl),
-             State4;
-        _ -> flush_journal(State3)
-    end.
+flush_journal(State = #qistate { segments = Segments }) ->
+    State1 =
+        dict:fold(
+          fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
+                                acks = AckCount } = Segment, StateN) ->
+                  case PubCount > 0 andalso PubCount == AckCount of
+                      true ->
+                          ok = delete_segment(Segment),
+                          StateN;
+                      false ->
+                          case 0 == dict:size(JEntries) of
+                              true ->
+                                  store_segment(Segment, StateN);
+                              false ->
+                                  {Hdl, Segment1} = get_segment_handle(Segment),
+                                  dict:fold(fun write_entry_to_segment/3,
+                                            Hdl, JEntries),
+                                  ok = file_handle_cache:sync(Hdl),
+                                  store_segment(
+                                    Segment1 #segment { journal_entries =
+                                                        dict:new() }, StateN)
+                          end
+                  end
+          end, State #qistate { segments = dict:new() }, Segments),
+    {JournalHdl, State2} = get_journal_handle(State1),
+    {ok, 0} = file_handle_cache:position(JournalHdl, bof),
+    ok = file_handle_cache:truncate(JournalHdl),
+    ok = file_handle_cache:sync(JournalHdl),
+    State2 #qistate { dirty_count = 0 }.
 
 read_segment_entries(InitSeqId, State) ->
-    {SegNum, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
-        load_segment(SegNum, State),
+    {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
+    {SegDict, _PubCount, _AckCount, State1} =
+        load_segment(Seg, false, State),
+    #segment { journal_entries = JEntries } = find_segment(Seg, State1),
+    SegDict1 = journal_plus_segment(JEntries, SegDict),
     %% deliberately sort the list desc, because foldl will reverse it
-    RelSeqs = rev_sort(dict:fetch_keys(SDict)),
+    RelSeqs = rev_sort(dict:fetch_keys(SegDict1)),
     {lists:foldl(fun (RelSeq, Acc) ->
-                         {MsgId, IsDelivered, IsPersistent} =
-                             dict:fetch(RelSeq, SDict),
-                         [ {MsgId, reconstruct_seq_id(SegNum, RelSeq),
-                            IsPersistent, IsDelivered} | Acc]
+                         {{MsgId, IsPersistent}, IsDelivered, no_ack} =
+                             dict:fetch(RelSeq, SegDict1),
+                         [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
+                            IsPersistent, IsDelivered == del} | Acc ]
                  end, [], RelSeqs),
      State1}.
 
 next_segment_boundary(SeqId) ->
-    {SegNum, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    reconstruct_seq_id(SegNum + 1, 0).
+    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(Seg + 1, 0).
 
 segment_size() ->
     ?SEGMENT_ENTRY_COUNT.
 
-find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
-    SegNums = all_segment_nums(Dir),
+find_lowest_seq_id_seg_and_next_seq_id(State) ->
+    SegNums = all_segment_nums(State),
     %% We don't want the lowest seq_id, merely the seq_id of the start
     %% of the lowest segment. That seq_id may not actually exist, but
     %% that's fine. The important thing is that the segment exists and
     %% the seq_id reported is on a segment boundary.
 
+    %% We also don't really care about the max seq_id. Just start the
+    %% next segment: it makes life much easier.
+
     %% SegNums is sorted, ascending.
-    LowSeqIdSeg =
+    {LowSeqIdSeg, NextSeqId} =
         case SegNums of
-            []            -> 0;
-            [MinSegNum|_] -> reconstruct_seq_id(MinSegNum, 0)
+            []            -> {0, 0};
+            [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
+                           reconstruct_seq_id(1 + lists:last(SegNums), 0)}
         end,
-    {NextSeqId, State1} =
-        case SegNums of
-            [] -> {0, State};
-            _  -> MaxSegNum = lists:last(SegNums),
-                  {_SDict, PubCount, _AckCount, HighRelSeq, State2} =
-                      load_segment(MaxSegNum, State),
-                  NextSeqId1 = reconstruct_seq_id(MaxSegNum, HighRelSeq),
-                  NextSeqId2 = case PubCount of
-                                   0 -> NextSeqId1;
-                                   _ -> NextSeqId1 + 1
-                               end,
-                  {NextSeqId2, State2}
-        end,
-    {LowSeqIdSeg, NextSeqId, State1}.
+    {LowSeqIdSeg, NextSeqId, State}.
 
 start_msg_store(DurableQueues) ->
-    DurableDict = 
+    DurableDict =
         dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
                           Queue #amqqueue.name} || Queue <- DurableQueues ]),
     QueuesDir = queues_dir(),
@@ -337,175 +346,84 @@ start_msg_store(DurableQueues) ->
     ok.
 
 %%----------------------------------------------------------------------------
-%% Minor Helpers
+%% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
 
-maybe_flush(State = #qistate { journal_count = JCount })
-  when JCount > ?MAX_JOURNAL_ENTRY_COUNT ->
-    flush_journal(State);
-maybe_flush(State) ->
-    State.
-
-write_to_journal(BinList, SeqIds, Dict,
-                 State = #qistate { journal_count = JCount }) ->
-    {Hdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(Hdl, BinList),
-    {Dict1, JCount1} =
-        lists:foldl(
-          fun (SeqId, {Dict2, JCount2}) ->
-                  {add_seqid_to_dict(SeqId, Dict2), JCount2 + 1}
-          end, {Dict, JCount}, SeqIds),
-    {Dict1, State1 #qistate { journal_count = JCount1 }}.
-
-queue_name_to_dir_name(Name = #resource { kind = queue }) ->
-    Bin = term_to_binary(Name),
-    Size = 8*size(Bin),
-    <<Num:Size>> = Bin,
-    lists:flatten(io_lib:format("~.36B", [Num])).
-
-queues_dir() ->
-    filename:join(rabbit_mnesia:dir(), "queues").
+queue_index_walker([]) ->
+    finished;
+queue_index_walker([QueueName|QueueNames]) ->
+    State = blank_state(QueueName),
+    State1 = load_journal(State),
+    SegNums = all_segment_nums(State1),
+    queue_index_walker({SegNums, State1, QueueNames});
 
-rev_sort(List) ->
-    lists:sort(fun (A, B) -> B < A end, List).
+queue_index_walker({[], State, QueueNames}) ->
+    _State = terminate(false, State),
+    queue_index_walker(QueueNames);
+queue_index_walker({[Seg | SegNums], State, QueueNames}) ->
+    SeqId = reconstruct_seq_id(Seg, 0),
+    {Messages, State1} = read_segment_entries(SeqId, State),
+    queue_index_walker({Messages, State1, SegNums, QueueNames});
 
-get_journal_handle(State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
-    case dict:find(journal, SegHdls) of
-        {ok, Hdl} ->
-            {Hdl, State};
-        error ->
-            Path = filename:join(Dir, ?JOURNAL_FILENAME),
-            Mode = [raw, binary, write, read, read_ahead],
-            new_handle(journal, Path, Mode, State)
+queue_index_walker({[], State, SegNums, QueueNames}) ->
+    queue_index_walker({SegNums, State, QueueNames});
+queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs],
+                    State, SegNums, QueueNames}) ->
+    case IsPersistent of
+        true  -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
+        false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
     end.
 
-get_pub_handle(SegNum, State = #qistate { publish_handle = PubHandle }) ->
-    {State1, PubHandle1 = {_SegNum, Hdl, _Count}} =
-        get_counted_handle(SegNum, State, PubHandle),
-    {Hdl, State1 #qistate { publish_handle = PubHandle1 }}.
-
-get_counted_handle(SegNum, State, undefined) ->
-    get_counted_handle(SegNum, State, {SegNum, undefined, 0});
-get_counted_handle(SegNum, State = #qistate { partial_segments = Partials },
-                   {SegNum, undefined, Count}) ->
-    {Hdl, State1} = get_seg_handle(SegNum, State),
-    {CountExtra, Partials1} =
-        case dict:find(SegNum, Partials) of
-            {ok, CountExtra1} -> {CountExtra1, dict:erase(SegNum, Partials)};
-            error             -> {0, Partials}
-        end,
-    Count1 = Count + 1 + CountExtra,
-    {State1 #qistate { partial_segments = Partials1 }, {SegNum, Hdl, Count1}};
-get_counted_handle(SegNum, State, {SegNum, Hdl, Count})
-  when Count < ?SEGMENT_ENTRY_COUNT ->
-    {State, {SegNum, Hdl, Count + 1}};
-get_counted_handle(SegNumA, State, {SegNumB, Hdl, ?SEGMENT_ENTRY_COUNT})
-  when SegNumA == SegNumB + 1 ->
-    ok = file_handle_cache:append_write_buffer(Hdl),
-    get_counted_handle(SegNumA, State, undefined);
-get_counted_handle(SegNumA, State = #qistate { partial_segments = Partials,
-                                               seg_ack_counts = AckCounts },
-                   {SegNumB, _Hdl, Count}) ->
-    %% don't flush here because it's possible SegNumB has been deleted
-    State1 =
-        case dict:find(SegNumB, AckCounts) of
-            {ok, Count} ->
-                %% #acks == #pubs, and we're moving to different
-                %% segment, so delete.
-                delete_segment(SegNumB, State);
-            _ ->
-                State #qistate {
-                  partial_segments = dict:store(SegNumB, Count, Partials) }
-        end,
-    get_counted_handle(SegNumA, State1, undefined).
+%%----------------------------------------------------------------------------
+%% Minors
+%%----------------------------------------------------------------------------
 
-get_seg_handle(SegNum, State = #qistate { dir = Dir, seg_num_handles = SegHdls }) ->
-    case dict:find(SegNum, SegHdls) of
-        {ok, Hdl} ->
-            {Hdl, State};
-        error ->
-            new_handle(SegNum, seg_num_to_path(Dir, SegNum),
-                       [binary, raw, read, write,
-                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
-                       State)
-    end.
+maybe_flush_journal(State = #qistate { dirty_count = DCount })
+  when DCount > ?MAX_JOURNAL_ENTRY_COUNT ->
+    flush_journal(State);
+maybe_flush_journal(State) ->
+    State.
 
-delete_segment(SegNum, State = #qistate { dir = Dir,
-                                          seg_ack_counts = AckCounts,
-                                          partial_segments = Partials }) ->
-    State1 = close_handle(SegNum, State),
-    ok = case file:delete(seg_num_to_path(Dir, SegNum)) of
-             ok              -> ok;
-             {error, enoent} -> ok
-         end,
-    State1 #qistate {seg_ack_counts = dict:erase(SegNum, AckCounts),
-                     partial_segments = dict:erase(SegNum, Partials) }.
-
-new_handle(Key, Path, Mode, State = #qistate { seg_num_handles = SegHdls }) ->
-    {ok, Hdl} = file_handle_cache:open(Path, Mode, [{write_buffer, infinity}]),
-    {Hdl, State #qistate { seg_num_handles = dict:store(Key, Hdl, SegHdls) }}.
-
-close_handle(Key, State = #qistate { seg_num_handles = SegHdls }) ->
-    case dict:find(Key, SegHdls) of
-        {ok, Hdl} ->
-            ok = file_handle_cache:close(Hdl),
-            State #qistate { seg_num_handles = dict:erase(Key, SegHdls) };
-        error ->
-            State
-    end.
+all_segment_nums(#qistate { segments = Segments, dir = Dir }) ->
+    sets:to_list(
+      lists:foldl(
+        fun (SegName, Set) ->
+                sets:add_element(
+                  list_to_integer(
+                    lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                                    SegName)), Set)
+        end, sets:from_list(dict:fetch_keys(Segments)),
+        filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir))).
 
-close_all_handles(State = #qistate { seg_num_handles = SegHdls }) ->
-    ok = dict:fold(fun (_Key, Hdl, ok) ->
-                           file_handle_cache:close(Hdl)
-                   end, ok, SegHdls),
-    State #qistate { seg_num_handles = dict:new() }.
+blank_state(QueueName) ->
+    StrName = queue_name_to_dir_name(QueueName),
+    Dir = filename:join(queues_dir(), StrName),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    #qistate { dir            = Dir,
+               segments       = dict:new(),
+               journal_handle = undefined,
+               dirty_count    = 0
+             }.
 
-bool_to_int(true ) -> 1;
-bool_to_int(false) -> 0.
+rev_sort(List) ->
+    lists:sort(fun (A, B) -> B < A end, List).
 
 seq_id_to_seg_and_rel_seq_id(SeqId) ->
     { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
 
-reconstruct_seq_id(SegNum, RelSeq) ->
-    (SegNum * ?SEGMENT_ENTRY_COUNT) + RelSeq.
+reconstruct_seq_id(Seg, RelSeq) ->
+    (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
 
-seg_num_to_path(Dir, SegNum) ->
-    SegName = integer_to_list(SegNum),
+seg_num_to_path(Dir, Seg) ->
+    SegName = integer_to_list(Seg),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
 
-delete_queue_directory(Dir) ->
-    {ok, Entries} = file:list_dir(Dir),
-    ok = lists:foldl(fun (Entry, ok) ->
-                             file:delete(filename:join(Dir, Entry))
-                     end, ok, Entries),
-    ok = file:del_dir(Dir).
-
-add_seqid_to_dict(SeqId, Dict) ->
-    {SegNum, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    add_seqid_to_dict(SegNum, RelSeq, Dict).
-
-add_seqid_to_dict(SegNum, RelSeq, Dict) ->
-    dict:update(SegNum, fun(Lst) -> [RelSeq|Lst] end, [RelSeq], Dict).
-
-all_segment_nums(Dir) ->
-    lists:sort(
-      [list_to_integer(
-         lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
-       || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)]).
-
-blank_state(QueueName) ->
-    StrName = queue_name_to_dir_name(QueueName),
-    Dir = filename:join(queues_dir(), StrName),
-    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    #qistate { dir              = Dir,
-               seg_num_handles  = dict:new(),
-               journal_count    = 0,
-               journal_ack_dict = dict:new(),
-               journal_del_dict = dict:new(),
-               seg_ack_counts   = dict:new(),
-               publish_handle   = undefined,
-               partial_segments = dict:new()
-             }.
+delete_segment(#segment { handle = undefined }) ->
+    ok;
+delete_segment(#segment { handle = Hdl, path = Path }) ->
+    ok = file_handle_cache:close(Hdl),
+    ok = file:delete(Path),
+    ok.
 
 detect_clean_shutdown(Dir) ->
     case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
@@ -519,258 +437,143 @@ store_clean_shutdown(Dir) ->
                                        [{write_buffer, unbuffered}]),
     ok = file_handle_cache:close(Hdl).
 
-seg_entries_from_dict(SegNum, Dict) ->
-    case dict:find(SegNum, Dict) of
-        {ok, Entries} -> Entries;
-        error         -> []
-    end.
-
-
-%%----------------------------------------------------------------------------
-%% Msg Store Startup Delta Function
-%%----------------------------------------------------------------------------
-
-queue_index_walker([]) ->
-    finished;
-queue_index_walker([QueueName|QueueNames]) ->
-    State = blank_state(QueueName),
-    {Hdl, State1} = get_journal_handle(State),
-    {JAckDict, _JDelDict} = load_journal(Hdl, dict:new(), dict:new()),
-    State2 = #qistate { dir = Dir } =
-        close_handle(journal, State1 #qistate { journal_ack_dict = JAckDict }),
-    SegNums = all_segment_nums(Dir),
-    queue_index_walker({SegNums, State2, QueueNames});
-
-queue_index_walker({[], State, QueueNames}) ->
-    _State = terminate(State),
-    queue_index_walker(QueueNames);
-queue_index_walker({[SegNum | SegNums], State, QueueNames}) ->
-    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
-        load_segment(SegNum, State),
-    queue_index_walker({dict:to_list(SDict), State1, SegNums, QueueNames});
-
-queue_index_walker({[], State, SegNums, QueueNames}) ->
-    queue_index_walker({SegNums, State, QueueNames});
-queue_index_walker({[{_RelSeq, {MsgId, _IsDelivered, IsPersistent}} | Msgs],
-                    State, SegNums, QueueNames}) ->
-    case IsPersistent of
-        true  -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
-        false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
-    end.
+queue_name_to_dir_name(Name = #resource { kind = queue }) ->
+    Bin = term_to_binary(Name),
+    Size = 8*size(Bin),
+    <<Num:Size>> = Bin,
+    lists:flatten(io_lib:format("~.36B", [Num])).
 
+queues_dir() ->
+    filename:join(rabbit_mnesia:dir(), "queues").
 
-%%----------------------------------------------------------------------------
-%% Startup Functions
-%%----------------------------------------------------------------------------
+delete_queue_directory(Dir) ->
+    {ok, Entries} = file:list_dir(Dir),
+    ok = lists:foldl(fun (Entry, ok) ->
+                             file:delete(filename:join(Dir, Entry))
+                     end, ok, Entries),
+    ok = file:del_dir(Dir).
 
-read_and_prune_segments(State = #qistate { dir = Dir }) ->
-    SegNums = all_segment_nums(Dir),
-    CleanShutdown = detect_clean_shutdown(Dir),
-    {TotalMsgCount, State1} =
-        lists:foldl(
-          fun (SegNum, {TotalMsgCount1, StateN =
-                        #qistate { publish_handle = PublishHandle,
-                                   partial_segments = Partials }}) ->
-                  {SDict, PubCount, AckCount, _HighRelSeq, StateM} =
-                      load_segment(SegNum, StateN),
-                  StateL = #qistate { seg_ack_counts = AckCounts } =
-                      drop_and_deliver(SegNum, SDict, CleanShutdown, StateM),
-                  %% ignore the effect of drop_and_deliver on
-                  %% TotalMsgCount and AckCounts, as drop_and_deliver
-                  %% will add to the journal dicts, which will then
-                  %% effect TotalMsgCount when we scatter the journal
-                  TotalMsgCount2 = TotalMsgCount1 + dict:size(SDict),
-                  AckCounts1 = case AckCount of
-                                   0 -> AckCounts;
-                                   N -> dict:store(SegNum, N, AckCounts)
-                               end,
-                  %% In the following, whilst there may be several
-                  %% partial segments, we only remember the last
-                  %% one. All other partial segments get added into
-                  %% the partial_segments dict
-                  {PublishHandle1, Partials1} =
-                      case PubCount of
-                          ?SEGMENT_ENTRY_COUNT ->
-                              {PublishHandle, Partials};
-                          0 ->
-                              {PublishHandle, Partials};
-                          _ ->
-                              {{SegNum, undefined, PubCount},
-                               case PublishHandle of
-                                   undefined ->
-                                       Partials;
-                                   {SegNumOld, undefined, PubCountOld} ->
-                                       dict:store(SegNumOld, PubCountOld,
-                                                  Partials)
-                               end}
-                      end,
-                  {TotalMsgCount2,
-                   StateL #qistate { seg_ack_counts = AckCounts1,
-                                     publish_handle = PublishHandle1,
-                                     partial_segments = Partials1 }}
-          end, {0, State}, SegNums),
-    {TotalMsgCount, State1}.
-
-scatter_journal(TotalMsgCount, State = #qistate { dir = Dir }) ->
-    {Hdl, State1 = #qistate { journal_del_dict = JDelDict,
-                              journal_ack_dict = JAckDict }} =
-        get_journal_handle(State),
-    %% ADict and DDict may well contain duplicates. However, this is
-    %% ok, because we use sets to eliminate dups before writing to
-    %% segments
-    {ADict, DDict} = load_journal(Hdl, JAckDict, JDelDict),
-    State2 = close_handle(journal, State1),
-    {TotalMsgCount1, ADict1, State3} =
-        dict:fold(fun replay_journal_to_segment/3,
-                  {TotalMsgCount, ADict,
-                   %% supply empty dicts so that when
-                   %% replay_journal_to_segment loads segments, it
-                   %% gets all msgs, and ignores anything we've found
-                   %% in the journal.
-                   State2 #qistate { journal_del_dict = dict:new(),
-                                     journal_ack_dict = dict:new() }}, DDict),
-    %% replay for segments which only had acks, and no deliveries
-    {TotalMsgCount2, State4} =
-        dict:fold(fun replay_journal_acks_to_segment/3,
-                  {TotalMsgCount1, State3}, ADict1),
-    JournalPath = filename:join(Dir, ?JOURNAL_FILENAME),
-    ok = file:delete(JournalPath),
-    {TotalMsgCount2, State4}.
-
-load_journal(Hdl, ADict, DDict) ->
-    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
-        {ok, <<?DEL_BIT:1, SeqId:?SEQ_BITS>>} ->
-            load_journal(Hdl, ADict, add_seqid_to_dict(SeqId, DDict));
-        {ok, <<?ACK_BIT:1, SeqId:?SEQ_BITS>>} ->
-            load_journal(Hdl, add_seqid_to_dict(SeqId, ADict), DDict);
-        _ErrOrEoF ->
-            {ADict, DDict}
+get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
+    {ok, Hdl} = file_handle_cache:open(Path,
+                                       [binary, raw, read, write,
+                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+                                       [{write_buffer, infinity}]),
+    {Hdl, Segment #segment { handle = Hdl }};
+get_segment_handle(Segment = #segment { handle = Hdl }) ->
+    {Hdl, Segment}.
+
+find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
+    case dict:find(Seg, Segments) of
+        {ok, Segment = #segment{}} -> Segment;
+        error -> #segment { pubs = 0,
+                            acks = 0,
+                            handle = undefined,
+                            journal_entries = dict:new(),
+                            path = seg_num_to_path(Dir, Seg),
+                            num = Seg
+                          }
     end.
 
-replay_journal_to_segment(_SegNum, [], {TotalMsgCount, ADict, State}) ->
-    {TotalMsgCount, ADict, State};
-replay_journal_to_segment(SegNum, Dels, {TotalMsgCount, ADict, State}) ->
-    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
-        load_segment(SegNum, State),
-    ValidDels = sets:to_list(
-                  sets:filter(
-                    fun (RelSeq) ->
-                            case dict:find(RelSeq, SDict) of
-                                {ok, {_MsgId, false, _IsPersistent}} -> true;
-                                _                                    -> false
-                            end
-                    end, sets:from_list(Dels))),
-    State2 = append_dels_to_segment(SegNum, ValidDels, State1),
-    Acks = seg_entries_from_dict(SegNum, ADict),
-    case Acks of
-        [] -> {TotalMsgCount, ADict, State2};
-        _  -> ADict1 = dict:erase(SegNum, ADict),
-              {Count, State3} =
-                  filter_acks_and_append_to_segment(SegNum, SDict,
-                                                    Acks, State2),
-              {TotalMsgCount - Count, ADict1, State3}
-    end.
+store_segment(Segment = #segment { num = Seg },
+              State = #qistate { segments = Segments }) ->
+    State #qistate { segments = dict:store(Seg, Segment, Segments) }.
+
+get_journal_handle(State =
+                   #qistate { journal_handle = undefined, dir = Dir }) ->
+    Path = filename:join(Dir, ?JOURNAL_FILENAME),
+    {ok, Hdl} = file_handle_cache:open(Path,
+                                       [binary, raw, read, write,
+                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+                                       [{write_buffer, infinity}]),
+    {Hdl, State #qistate { journal_handle = Hdl }};
+get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
+    {Hdl, State}.
 
-replay_journal_acks_to_segment(_SegNum, [], {TotalMsgCount, State}) ->
-    {TotalMsgCount, State};
-replay_journal_acks_to_segment(SegNum, Acks, {TotalMsgCount, State}) ->
-    {SDict, _PubCount, _AckCount, _HighRelSeq, State1} =
-        load_segment(SegNum, State),
-    {Count, State2} =
-        filter_acks_and_append_to_segment(SegNum, SDict, Acks, State1),
-    {TotalMsgCount - Count, State2}.
-
-filter_acks_and_append_to_segment(SegNum, SDict, Acks, State) ->
-    ValidRelSeqIds = dict:fetch_keys(SDict),
-    ValidAcks = sets:to_list(sets:intersection(sets:from_list(ValidRelSeqIds),
-                                               sets:from_list(Acks))),
-    {length(ValidAcks), append_acks_to_segment(SegNum, ValidAcks, State)}.
-
-drop_and_deliver(SegNum, SDict, CleanShutdown,
-                 State = #qistate { journal_del_dict = JDelDict,
-                                    journal_ack_dict = JAckDict }) ->
-    {JDelDict1, JAckDict1} =
-        dict:fold(
-          fun (RelSeq, {MsgId, IsDelivered, true}, {JDelDict2, JAckDict2}) ->
-                  %% msg is persistent, keep only if the msg_store has it
-                  case {IsDelivered, rabbit_msg_store:contains(MsgId)} of
-                      {false, true} when not CleanShutdown ->
-                          %% not delivered, but dirty shutdown => mark delivered
-                          {add_seqid_to_dict(SegNum, RelSeq, JDelDict2),
-                           JAckDict2};
-                      {_, true} ->
-                          {JDelDict2, JAckDict2};
-                      {true, false} ->
-                          {JDelDict2,
-                           add_seqid_to_dict(SegNum, RelSeq, JAckDict2)};
-                      {false, false} ->
-                          {add_seqid_to_dict(SegNum, RelSeq, JDelDict2),
-                           add_seqid_to_dict(SegNum, RelSeq, JAckDict2)}
-                  end;
-              (RelSeq, {_MsgId, false, false}, {JDelDict2, JAckDict2}) ->
-                  %% not persistent and not delivered => deliver and ack it
-                  {add_seqid_to_dict(SegNum, RelSeq, JDelDict2),
-                   add_seqid_to_dict(SegNum, RelSeq, JAckDict2)};
-              (RelSeq, {_MsgId, true, false}, {JDelDict2, JAckDict2}) ->
-                  %% not persistent but delivered => ack it
-                  {JDelDict2,
-                   add_seqid_to_dict(SegNum, RelSeq, JAckDict2)}
-          end, {JDelDict, JAckDict}, SDict),
-    State #qistate { journal_del_dict = JDelDict1,
-                     journal_ack_dict = JAckDict1 }.
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
 
+write_entry_to_segment(_RelSeq, {{_MsgId, _IsPersistent}, del, ack}, Hdl) ->
+    Hdl;
+write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
+    ok = case Publish of
+             no_pub ->
+                 ok;
+             {MsgId, IsPersistent} ->
+                 file_handle_cache:append(
+                   Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                           (bool_to_int(IsPersistent)):1,
+                           RelSeq:?REL_SEQ_BITS>>, MsgId])
+         end,
+    ok = case {Del, Ack} of
+             {no_del, no_ack} -> ok;
+             _ -> Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                             RelSeq:?REL_SEQ_BITS>>,
+                  Data = case {Del, Ack} of
+                             {del, ack} -> [Binary, Binary];
+                             _          -> Binary
+                         end,
+                  file_handle_cache:append(Hdl, Data)
+         end,
+    Hdl.
+
+terminate(StoreShutdown, State =
+          #qistate { segments = Segments, journal_handle = JournalHdl,
+                     dir = Dir }) ->
+    ok = case JournalHdl of
+             undefined -> ok;
+             _         -> file_handle_cache:close(JournalHdl)
+         end,
+    ok = dict:fold(
+           fun (_Seg, #segment { handle = undefined }, ok) ->
+                   ok;
+               (_Seg, #segment { handle = Hdl }, ok) ->
+                   file_handle_cache:close(Hdl)
+           end, ok, Segments),
+    case StoreShutdown of
+        true  -> store_clean_shutdown(Dir);
+        false -> ok
+    end,
+    State #qistate { journal_handle = undefined, segments = dict:new() }.
 
 %%----------------------------------------------------------------------------
-%% Loading Segments
+%% Majors
 %%----------------------------------------------------------------------------
 
-load_segment(SegNum, State = #qistate { seg_num_handles = SegHdls,
-                                        dir = Dir }) ->
-    SegmentExists = case dict:find(SegNum, SegHdls) of
-                        {ok, _} -> true;
-                        error   -> filelib:is_file(seg_num_to_path(Dir, SegNum))
+%% Loading segments
+
+%% Does not do any combining with the journal at all. The PubCount
+%% that comes back is the number of publishes in the segment. The
+%% number of unacked msgs is PubCount - AckCount. If KeepAcks is
+%% false, then dict:size(SegDict) == PubCount - AckCount. If KeepAcks
+%% is true, then dict:size(SegDict) == PubCount.
+load_segment(Seg, KeepAcks, State) ->
+    Segment = #segment { path = Path, handle = SegHdl } =
+        find_segment(Seg, State),
+    SegmentExists = case SegHdl of
+                        undefined -> filelib:is_file(Path);
+                        _         -> true
                     end,
     case SegmentExists of
         false ->
-            {dict:new(), 0, 0, 0, State};
+            {dict:new(), 0, 0, State};
         true ->
-            {Hdl, State1 = #qistate { journal_del_dict = JDelDict,
-                                      journal_ack_dict = JAckDict }} =
-                get_seg_handle(SegNum, State),
+            {Hdl, Segment1} = get_segment_handle(Segment),
             {ok, 0} = file_handle_cache:position(Hdl, bof),
-            {SDict, PubCount, AckCount, HighRelSeq} =
-                load_segment_entries(Hdl, dict:new(), 0, 0, 0),
-            %% delete ack'd msgs first
-            {SDict1, AckCount1} =
-                lists:foldl(fun (RelSeq, {SDict2, AckCount2}) ->
-                                    {dict:erase(RelSeq, SDict2), AckCount2 + 1}
-                            end, {SDict, AckCount},
-                            seg_entries_from_dict(SegNum, JAckDict)),
-            %% ensure remaining msgs are delivered as necessary
-            SDict3 =
-                lists:foldl(
-                  fun (RelSeq, SDict4) ->
-                          case dict:find(RelSeq, SDict4) of
-                              {ok, {MsgId, false, IsPersistent}} ->
-                                  dict:store(RelSeq,
-                                             {MsgId, true, IsPersistent},
-                                             SDict4);
-                              _ ->
-                                  SDict4
-                          end
-                  end, SDict1, seg_entries_from_dict(SegNum, JDelDict)),
-            {SDict3, PubCount, AckCount1, HighRelSeq, State1}
+            {SegDict, PubCount, AckCount} =
+                load_segment_entries(KeepAcks, Hdl, dict:new(), 0, 0),
+            {SegDict, PubCount, AckCount, store_segment(Segment1, State)}
     end.
 
-load_segment_entries(Hdl, SDict, PubCount, AckCount, HighRelSeq) ->
+load_segment_entries(KeepAcks, Hdl, SegDict, PubCount, AckCount) ->
     case file_handle_cache:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                 MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file_handle_cache:read(
                           Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            {SDict1, AckCount1} = deliver_or_ack_msg(SDict, AckCount, RelSeq),
-            load_segment_entries(Hdl, SDict1, PubCount, AckCount1, HighRelSeq);
+            {AckCount1, SegDict1} =
+                deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict),
+            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount, AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                 IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
@@ -779,71 +582,252 @@ load_segment_entries(Hdl, SDict, PubCount, AckCount, HighRelSeq) ->
                 file_handle_cache:read(
                   Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            HighRelSeq1 = lists:max([RelSeq, HighRelSeq]),
-            load_segment_entries(
-              Hdl, dict:store(RelSeq, {MsgId, false, 1 == IsPersistentNum},
-                              SDict), PubCount + 1, AckCount, HighRelSeq1);
+            SegDict1 =
+                dict:store(RelSeq,
+                           {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
+                           SegDict),
+            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount+1, AckCount);
         _ErrOrEoF ->
-            {SDict, PubCount, AckCount, HighRelSeq}
+            {SegDict, PubCount, AckCount}
     end.
 
-deliver_or_ack_msg(SDict, AckCount, RelSeq) ->
-    case dict:find(RelSeq, SDict) of
-        {ok, {MsgId, false, IsPersistent}} ->
-            {dict:store(RelSeq, {MsgId, true, IsPersistent}, SDict), AckCount};
-        {ok, {_MsgId, true, _IsPersistent}} ->
-            {dict:erase(RelSeq, SDict), AckCount + 1}
+deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict) ->
+    case dict:find(RelSeq, SegDict) of
+        {ok, {PubRecord, no_del, no_ack}} ->
+            {AckCount, dict:store(RelSeq, {PubRecord, del, no_ack}, SegDict)};
+        {ok, {PubRecord, del, no_ack}} when KeepAcks ->
+            {AckCount + 1, dict:store(RelSeq, {PubRecord, del, ack}, SegDict)};
+        {ok, {_PubRecord, del, no_ack}} ->
+            {AckCount + 1, dict:erase(RelSeq, SegDict)}
     end.
 
+%% Loading Journal. This isn't idempotent and will mess up the counts
+%% if you call it more than once on the same state. Assumes the counts
+%% are 0 to start with.
+
+load_journal(State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
+    State2 = #qistate { segments = Segments } = load_journal_entries(State1),
+    dict:fold(
+      fun (Seg, #segment { journal_entries = JEntries,
+                           pubs = PubCountInJournal,
+                           acks = AckCountInJournal }, StateN) ->
+              %% We want to keep acks in so that we can remove them if
+              %% duplicates are in the journal. The counts here are
+              %% purely from the segment itself.
+              {SegDict, PubCountInSeg, AckCountInSeg, StateN1} =
+                  load_segment(Seg, true, StateN),
+              %% Removed counts here are the number of pubs and acks
+              %% that are duplicates - i.e. found in both the segment
+              %% and journal.
+              {JEntries1, PubsRemoved, AcksRemoved} =
+                  journal_minus_segment(JEntries, SegDict),
+              Segment1 = find_segment(Seg, StateN1),
+              PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
+              AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
+              store_segment(Segment1 #segment { journal_entries = JEntries1,
+                                                pubs = PubCount1,
+                                                acks = AckCount1 }, StateN1)
+      end, State2, Segments).
+
+load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
+            case Prefix of
+                ?DEL_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, del, State));
+                ?ACK_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, ack, State));
+                _ ->
+                    case file_handle_cache:read(Hdl, ?MSG_ID_BYTES) of
+                        {ok, <<MsgIdNum:?MSG_ID_BITS>>} ->
+                            %% work around for binary data
+                            %% fragmentation. See
+                            %% rabbit_msg_file:read_next/2
+                            <<MsgId:?MSG_ID_BYTES/binary>> =
+                                <<MsgIdNum:?MSG_ID_BITS>>,
+                            Publish = {MsgId,
+                                       case Prefix of
+                                           ?PUB_PERSIST_JPREFIX -> true;
+                                           ?PUB_TRANS_JPREFIX   -> false
+                                       end},
+                            load_journal_entries(
+                              add_to_journal(SeqId, Publish, State));
+                        _ErrOrEoF -> %% err, we've lost at least a publish
+                            State
+                    end
+            end;
+        _ErrOrEoF -> State
+    end.
 
-%%----------------------------------------------------------------------------
-%% Appending Acks or Dels to Segments
-%%----------------------------------------------------------------------------
-
-append_acks_to_segment(SegNum, Acks,
-                       State = #qistate { seg_ack_counts = AckCounts,
-                                          partial_segments = Partials }) ->
-    AckCount = case dict:find(SegNum, AckCounts) of
-                   {ok, AckCount1} -> AckCount1;
-                   error           -> 0
-               end,
-    AckTarget = case dict:find(SegNum, Partials) of
-                    {ok, PubCount} -> PubCount;
-                    error          -> ?SEGMENT_ENTRY_COUNT
-                end,
-    AckCount2 = AckCount + length(Acks),
-    append_acks_to_segment(SegNum, AckCount2, Acks, AckTarget, State).
-
-append_acks_to_segment(SegNum, AckCount, _Acks, AckCount, State =
-                       #qistate { publish_handle = PubHdl }) ->
-    PubHdl1 = case PubHdl of
-                  %% If we're adjusting the pubhdl here then there
-                  %% will be no entry in partials, thus the target ack
-                  %% count must be SEGMENT_ENTRY_COUNT
-                  {SegNum, Hdl, AckCount = ?SEGMENT_ENTRY_COUNT}
-                  when Hdl /= undefined ->
-                      {SegNum + 1, undefined, 0};
-                  _ ->
-                      PubHdl
-              end,
-    delete_segment(SegNum, State #qistate { publish_handle = PubHdl1 });
-append_acks_to_segment(_SegNum, _AckCount, [], _AckTarget, State) ->
-    State;
-append_acks_to_segment(SegNum, AckCount, Acks, AckTarget, State =
-                       #qistate { seg_ack_counts = AckCounts })
-  when AckCount < AckTarget ->
-    {Hdl, State1} = append_to_segment(SegNum, Acks, State),
-    ok = file_handle_cache:sync(Hdl),
-    State1 #qistate { seg_ack_counts =
-                      dict:store(SegNum, AckCount, AckCounts) }.
-
-append_dels_to_segment(SegNum, Dels, State) ->
-    {_Hdl, State1} = append_to_segment(SegNum, Dels, State),
-    State1.
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount }) ->
+    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    Segment = #segment { journal_entries = SegJDict,
+                         pubs = PubCount, acks = AckCount } =
+        find_segment(Seg, State),
+    SegJDict1 = add_to_journal(RelSeq, Action, SegJDict),
+    Segment1 = Segment #segment { journal_entries = SegJDict1 },
+    Segment2 =
+        case Action of
+            del                     -> Segment1;
+            ack                     -> Segment1 #segment { acks = AckCount + 1 };
+            {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
+        end,
+    store_segment(Segment2, State #qistate { dirty_count = DCount + 1 });
+
+%% This is a more relaxed version of deliver_or_ack_msg because we can
+%% have dels or acks in the journal without the corresponding
+%% pub. Also, always want to keep acks. Things must occur in the right
+%% order though.
+add_to_journal(RelSeq, Action, SegJDict) ->
+    case dict:find(RelSeq, SegJDict) of
+        {ok, {PubRecord, no_del, no_ack}} when Action == del ->
+            dict:store(RelSeq, {PubRecord, del, no_ack}, SegJDict);
+        {ok, {PubRecord, DelRecord, no_ack}} when Action == ack ->
+            dict:store(RelSeq, {PubRecord, DelRecord, ack}, SegJDict);
+        error when Action == del ->
+            dict:store(RelSeq, {no_pub, del, no_ack}, SegJDict);
+        error when Action == ack ->
+            dict:store(RelSeq, {no_pub, no_del, ack}, SegJDict);
+        error ->
+            {_MsgId, _IsPersistent} = Action, %% ASSERTION
+            dict:store(RelSeq, {Action, no_del, no_ack}, SegJDict)
+    end.
 
-append_to_segment(SegNum, AcksOrDels, State) ->
-    {Hdl, State1} = get_seg_handle(SegNum, State),
-    ok = file_handle_cache:append(
-           Hdl, [<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                  RelSeq:?REL_SEQ_BITS>> || RelSeq <- AcksOrDels ]),
-    {Hdl, State1}.
+%% Combine what we have just read from a segment file with what we're
+%% holding for that segment in memory. There must be no
+%% duplicates. Used when providing segment entries to the variable
+%% queue.
+journal_plus_segment(JEntries, SegDict) ->
+    dict:fold(fun (RelSeq, JObj, SegDictOut) ->
+                      SegEntry = case dict:find(RelSeq, SegDictOut) of
+                                     error -> not_found;
+                                     {ok, SObj = {_, _, _}} -> SObj
+                                 end,
+                      journal_plus_segment(JObj, SegEntry, RelSeq, SegDictOut)
+              end, SegDict, JEntries).
+
+%% Here, the OutDict is the SegDict which we may be adding to (for
+%% items only in the journal), modifying (bits in both), or erasing
+%% from (ack in journal, not segment).
+journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
+                     not_found,
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
+                     not_found,
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, Obj, OutDict);
+journal_plus_segment({{_MsgId, _IsPersistent}, del, ack},
+                     not_found,
+                     RelSeq, OutDict) ->
+    dict:erase(RelSeq, OutDict);
+
+journal_plus_segment({no_pub, del, no_ack},
+                     {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                     RelSeq, OutDict) ->
+    dict:store(RelSeq, {PubRecord, del, no_ack}, OutDict);
+
+journal_plus_segment({no_pub, del, ack},
+                     {{_MsgId, _IsPersistent}, no_del, no_ack},
+                     RelSeq, OutDict) ->
+    dict:erase(RelSeq, OutDict);
+journal_plus_segment({no_pub, no_del, ack},
+                     {{_MsgId, _IsPersistent}, del, no_ack},
+                     RelSeq, OutDict) ->
+    dict:erase(RelSeq, OutDict).
+
+
+%% Remove from the journal entries for a segment, items that are
+%% duplicates of entries found in the segment itself. Used on start up
+%% to clean up the journal.
+journal_minus_segment(JEntries, SegDict) ->
+    dict:fold(fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
+                      SegEntry = case dict:find(RelSeq, SegDict) of
+                                     error -> not_found;
+                                     {ok, SObj = {_, _, _}} -> SObj
+                                 end,
+                      journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
+                                            PubsRemoved, AcksRemoved)
+              end, {dict:new(), 0, 0}, JEntries).
+
+%% Here, the OutDict is a fresh journal that we're filling with valid
+%% entries. PubsRemoved and AcksRemoved only get increased when the a
+%% publish or ack is in both the journal and the segment.
+
+%% Both the same. Must be at least the publish
+journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, no_ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved + 1, AcksRemoved};
+journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved + 1, AcksRemoved + 1};
+
+%% Just publish in journal
+journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      not_found,
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+
+%% Just deliver in journal
+journal_minus_segment(Obj = {no_pub, del, no_ack},
+                      {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+journal_minus_segment({no_pub, del, no_ack},
+                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved};
+
+%% Just ack in journal
+journal_minus_segment(Obj = {no_pub, no_del, ack},
+                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+journal_minus_segment({no_pub, no_del, ack},
+                      {{_MsgId, _IsPersistent}, del, ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved};
+
+%% Publish and deliver in journal
+journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
+                      not_found,
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+journal_minus_segment({PubRecord, del, no_ack},
+                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, del, no_ack}, OutDict),
+     PubsRemoved + 1, AcksRemoved};
+
+%% Deliver and ack in journal
+journal_minus_segment(Obj = {no_pub, del, ack},
+                      {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+journal_minus_segment({no_pub, del, ack},
+                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
+     PubsRemoved, AcksRemoved};
+journal_minus_segment({no_pub, del, ack},
+                      {{_MsgId, _IsPersistent}, del, ack},
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved + 1};
+
+%% Publish, deliver and ack in journal
+journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
+                      not_found,
+                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {OutDict, PubsRemoved, AcksRemoved};
+journal_minus_segment({PubRecord, del, ack},
+                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, del, ack}, OutDict),
+     PubsRemoved + 1, AcksRemoved};
+journal_minus_segment({PubRecord, del, ack},
+                      {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
+                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
+    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
+     PubsRemoved + 1, AcksRemoved}.
diff --git a/src/rabbit_queue_index3.erl b/src/rabbit_queue_index3.erl
deleted file mode 100644
index 43a210d9..00000000
--- a/src/rabbit_queue_index3.erl
+++ /dev/null
@@ -1,850 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_queue_index3).
-
--export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
-         read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
-
--define(CLEAN_FILENAME, "clean.dot").
-
-%%----------------------------------------------------------------------------
-%% ---- Journal details ----
-
--define(MAX_JOURNAL_ENTRY_COUNT, 32768).
--define(JOURNAL_FILENAME, "journal.jif").
-
--define(PUB_PERSIST_JPREFIX, 00).
--define(PUB_TRANS_JPREFIX,   01).
--define(DEL_JPREFIX,         10).
--define(ACK_JPREFIX,         11).
--define(JPREFIX_BITS, 2).
--define(SEQ_BYTES, 8).
--define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)).
-
-%% ---- Segment details ----
-
--define(SEGMENT_EXTENSION, ".idx").
-
--define(REL_SEQ_BITS, 14).
--define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + 8 - (?REL_SEQ_BITS rem 8))).
--define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
-
-%% seq only is binary 00 followed by 14 bits of rel seq id
-%% (range: 0 - 16383)
--define(REL_SEQ_ONLY_PREFIX, 00).
--define(REL_SEQ_ONLY_PREFIX_BITS, 2).
--define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
-
-%% publish record is binary 1 followed by a bit for is_persistent,
-%% then 14 bits of rel seq id, and 128 bits of md5sum msg id
--define(PUBLISH_PREFIX, 1).
--define(PUBLISH_PREFIX_BITS, 1).
-
--define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
--define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)).
-%% 16 bytes for md5sum + 2 for seq, bits and prefix
--define(PUBLISH_RECORD_LENGTH_BYTES, ?MSG_ID_BYTES + 2).
-
-%% 1 publish, 1 deliver, 1 ack per msg
--define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
-        (?PUBLISH_RECORD_LENGTH_BYTES +
-         (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
-
-%%----------------------------------------------------------------------------
-
--record(qistate,
-        { dir,
-          segments,
-          journal_handle,
-          dirty_count
-        }).
-
--record(segment,
-        { pubs,
-          acks,
-          handle,
-          journal_entries,
-          path,
-          num
-        }).
-
--include("rabbit.hrl").
-
-%%----------------------------------------------------------------------------
-
--ifdef(use_specs).
-
--type(hdl() :: ('undefined' | any())).
--type(msg_id() :: binary()).
--type(seq_id() :: integer()).
--type(qistate() :: #qistate { dir             :: file_path(),
-                              segments        :: dict(),
-                              journal_handle  :: hdl(),
-                              dirty_count     :: integer()
-                            }).
-
--spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
--spec(terminate/1 :: (qistate()) -> qistate()).
--spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
--spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
-      -> qistate()).
--spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
--spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(sync_seq_ids/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(flush_journal/1 :: (qistate()) -> qistate()).
--spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
-             {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
--spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
--spec(segment_size/0 :: () -> non_neg_integer()).
--spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
-             {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(start_msg_store/1 :: ([amqqueue()]) -> 'ok').
-
--endif.
-
-
-%%----------------------------------------------------------------------------
-%% Public API
-%%----------------------------------------------------------------------------
-
-init(Name) ->
-    State = blank_state(Name),
-    %% 1. Load the journal completely. This will also load segments
-    %%    which have entries in the journal and remove duplicates.
-    %%    The counts will correctly reflect the combination of the
-    %%    segment and the journal.
-    State1 = load_journal(State),
-    %% 2. Flush the journal. This makes life easier for everyone, as
-    %%    it means there won't be any publishes in the journal alone.
-    State2 = #qistate { dir = Dir } = flush_journal(State1),
-    %% 3. Load each segment in turn and filter out messages that are
-    %%    not in the msg_store, by adding acks to the journal. These
-    %%    acks only go to the RAM journal as it doesn't matter if we
-    %%    lose them. Also mark delivered if not clean shutdown.
-    AllSegs = all_segment_nums(Dir),
-    CleanShutdown = detect_clean_shutdown(Dir),
-    %% We know the journal is empty here, so we don't need to combine
-    %% with the journal, and we don't need to worry about messages
-    %% that have been acked.
-    State3 =
-        lists:foldl(
-          fun (Seg, StateN) ->
-                  {SegDict, _PubCount, _AckCount, StateN1} =
-                      load_segment(Seg, false, StateN),
-                  dict:fold(
-                    fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
-                         StateM) ->
-                            SeqId = reconstruct_seq_id(Seg, RelSeq),
-                            InMsgStore = rabbit_msg_store:contains(MsgId),
-                            case {InMsgStore, CleanShutdown} of
-                                {true, true} ->
-                                    StateM;
-                                {true, false} when Del == del ->
-                                    StateM;
-                                {true, false} ->
-                                    add_to_journal(SeqId, del, StateM);
-                                {false, _} when Del == del ->
-                                    add_to_journal(SeqId, ack, StateM);
-                                {false, _} ->
-                                    add_to_journal(
-                                      SeqId, ack,
-                                      add_to_journal(SeqId, del, StateM))
-                            end
-                    end, StateN1, SegDict)
-          end, State2, AllSegs),
-    %% 4. Go through all segments and calculate the number of unacked
-    %%    messages we have.
-    Count = lists:foldl(
-              fun (Seg, CountAcc) ->
-                      #segment { pubs = PubCount, acks = AckCount } =
-                          find_segment(Seg, State3),
-                      CountAcc + PubCount - AckCount
-              end, 0, AllSegs),
-    {Count, State3}.
-
-terminate(State) ->
-    terminate(true, State).
-
-terminate_and_erase(State) ->
-    State1 = terminate(State),
-    ok = delete_queue_directory(State1 #qistate.dir),
-    State1.
-
-write_published(MsgId, SeqId, IsPersistent, State)
-  when is_binary(MsgId) ->
-    ?MSG_ID_BYTES = size(MsgId),
-    {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(JournalHdl,
-                                  [<<(case IsPersistent of
-                                          true  -> ?PUB_PERSIST_JPREFIX;
-                                          false -> ?PUB_TRANS_JPREFIX
-                                      end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>,
-                                   MsgId]),
-    maybe_flush_journal(add_to_journal(SeqId, {MsgId, IsPersistent}, State1)).
-
-write_delivered(SeqId, State) ->
-    {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(JournalHdl,
-                                  <<?DEL_JPREFIX:?JPREFIX_BITS,
-                                   SeqId:?SEQ_BITS>>),
-    maybe_flush_journal(add_to_journal(SeqId, del, State1)).
-
-write_acks(SeqIds, State) ->
-    {SeqIds1, State1} = remove_pubs_dels_from_journal(SeqIds, State),
-    case SeqIds1 of
-        [] ->
-            State;
-        _ ->
-            {JournalHdl, State2} = get_journal_handle(State1),
-            ok = file_handle_cache:append(JournalHdl,
-                                          [<<?ACK_JPREFIX:?JPREFIX_BITS,
-                                            SeqId:?SEQ_BITS>>
-                                           || SeqId <- SeqIds1]),
-            State3 = lists:foldl(fun (SeqId, StateN) ->
-                                         add_to_journal(SeqId, ack, StateN)
-                                 end, State2, SeqIds1),
-            maybe_flush_journal(State3)
-    end.
-
-sync_seq_ids(_SeqIds, State = #qistate { journal_handle = undefined }) ->
-    State;
-sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
-    ok = file_handle_cache:sync(JournalHdl),
-    State.
-
-flush_journal(State = #qistate { dirty_count = 0 }) ->
-    State;
-flush_journal(State = #qistate { segments = Segments }) ->
-    State1 =
-        dict:fold(
-          fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
-                                acks = AckCount } = Segment, StateN) ->
-                  case dict:is_empty(JEntries) of
-                      true -> store_segment(Segment, StateN);
-                      false when AckCount == PubCount ->
-                          ok = delete_segment(Segment);
-                      false ->
-                          {Hdl, Segment1} = get_segment_handle(Segment),
-                          dict:fold(fun write_entry_to_segment/3,
-                                    Hdl, JEntries),
-                          ok = file_handle_cache:sync(Hdl),
-                          store_segment(
-                            Segment1 #segment { journal_entries = dict:new() },
-                            StateN)
-                  end
-          end, State, Segments),
-    {JournalHdl, State2} = get_journal_handle(State1),
-    {ok, 0} = file_handle_cache:position(JournalHdl, bof),
-    ok = file_handle_cache:truncate(JournalHdl),
-    ok = file_handle_cache:sync(JournalHdl),
-    State2 #qistate { dirty_count = 0 }.
-
-read_segment_entries(InitSeqId, State) ->
-    {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    {SegDict, _PubCount, _AckCount, State1} =
-        load_segment(Seg, false, State),
-    #segment { journal_entries = JEntries } = find_segment(Seg, State1),
-    SegDict1 = journal_plus_segment(JEntries, SegDict),
-    %% deliberately sort the list desc, because foldl will reverse it
-    RelSeqs = rev_sort(dict:fetch_keys(SegDict1)),
-    {lists:foldl(fun (RelSeq, Acc) ->
-                         {{MsgId, IsPersistent}, IsDelivered, no_ack} =
-                             dict:fetch(RelSeq, SegDict1),
-                         [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
-                            IsPersistent, IsDelivered} | Acc ]
-                 end, [], RelSeqs),
-     State1}.
-
-next_segment_boundary(SeqId) ->
-    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    reconstruct_seq_id(Seg + 1, 0).
-
-segment_size() ->
-    ?SEGMENT_ENTRY_COUNT.
-
-find_lowest_seq_id_seg_and_next_seq_id(State = #qistate { dir = Dir }) ->
-    SegNums = all_segment_nums(Dir),
-    %% We don't want the lowest seq_id, merely the seq_id of the start
-    %% of the lowest segment. That seq_id may not actually exist, but
-    %% that's fine. The important thing is that the segment exists and
-    %% the seq_id reported is on a segment boundary.
-
-    %% We also don't really care about the max seq_id. Just start the
-    %% next segment: it makes life much easier.
-
-    %% SegNums is sorted, ascending.
-    {LowSeqIdSeg, NextSeqId} =
-        case SegNums of
-            []            -> {0, 0};
-            [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
-                           reconstruct_seq_id(lists:last(SegNums), 0)}
-        end,
-    {LowSeqIdSeg, NextSeqId, State}.
-
-start_msg_store(DurableQueues) ->
-    DurableDict =
-        dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
-                          Queue #amqqueue.name} || Queue <- DurableQueues ]),
-    QueuesDir = queues_dir(),
-    Directories = case file:list_dir(QueuesDir) of
-                      {ok, Entries} ->
-                          [ Entry || Entry <- Entries,
-                                     filelib:is_dir(
-                                       filename:join(QueuesDir, Entry)) ];
-                      {error, enoent} ->
-                          []
-                  end,
-    DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
-    {DurableQueueNames, TransientDirs} =
-        lists:foldl(
-          fun (QueueDir, {DurableAcc, TransientAcc}) ->
-                  case sets:is_element(QueueDir, DurableDirectories) of
-                      true ->
-                          {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
-                           TransientAcc};
-                      false ->
-                          {DurableAcc, [QueueDir | TransientAcc]}
-                  end
-          end, {[], []}, Directories),
-    MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
-    ok = rabbit:start_child(rabbit_msg_store, [MsgStoreDir,
-                                               fun queue_index_walker/1,
-                                               DurableQueueNames]),
-    lists:foreach(fun (DirName) ->
-                          Dir = filename:join(queues_dir(), DirName),
-                          ok = delete_queue_directory(Dir)
-                  end, TransientDirs),
-    ok.
-
-%%----------------------------------------------------------------------------
-%% Msg Store Startup Delta Function
-%%----------------------------------------------------------------------------
-
-queue_index_walker([]) ->
-    finished;
-queue_index_walker([QueueName|QueueNames]) ->
-    State = #qistate { dir = Dir } = blank_state(QueueName),
-    State1 = #qistate { journal_handle = JHdl } = load_journal(State),
-    ok = file_handle_cache:close(JHdl),
-    SegNums = all_segment_nums(Dir),
-    queue_index_walker({SegNums, State1, QueueNames});
-
-queue_index_walker({[], State, QueueNames}) ->
-    _State = terminate(false, State),
-    queue_index_walker(QueueNames);
-queue_index_walker({[Seg | SegNums], State, QueueNames}) ->
-    SeqId = reconstruct_seq_id(Seg, 0),
-    {Messages, State1} = read_segment_entries(SeqId, State),
-    queue_index_walker({Messages, State1, SegNums, QueueNames});
-
-queue_index_walker({[], State, SegNums, QueueNames}) ->
-    queue_index_walker({SegNums, State, QueueNames});
-queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs],
-                    State, SegNums, QueueNames}) ->
-    case IsPersistent of
-        true  -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
-        false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
-    end.
-
-%%----------------------------------------------------------------------------
-%% Minors
-%%----------------------------------------------------------------------------
-
-maybe_flush_journal(State = #qistate { dirty_count = DCount })
-  when DCount > ?MAX_JOURNAL_ENTRY_COUNT ->
-    flush_journal(State);
-maybe_flush_journal(State) ->
-    State.
-
-all_segment_nums(Dir) ->
-    lists:sort(
-      [list_to_integer(
-         lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end, SegName))
-       || SegName <- filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)]).
-
-blank_state(QueueName) ->
-    StrName = queue_name_to_dir_name(QueueName),
-    Dir = filename:join(queues_dir(), StrName),
-    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    #qistate { dir            = Dir,
-               segments       = dict:new(),
-               journal_handle = undefined,
-               dirty_count    = 0
-             }.
-
-rev_sort(List) ->
-    lists:sort(fun (A, B) -> B < A end, List).
-
-seq_id_to_seg_and_rel_seq_id(SeqId) ->
-    { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
-
-reconstruct_seq_id(Seg, RelSeq) ->
-    (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
-
-seg_num_to_path(Dir, Seg) ->
-    SegName = integer_to_list(Seg),
-    filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
-
-delete_segment(#segment { handle = undefined }) ->
-    ok;
-delete_segment(#segment { handle = Hdl, path = Path }) ->
-    ok = file_handle_cache:close(Hdl),
-    ok = file:delete(Path),
-    ok.
-
-detect_clean_shutdown(Dir) ->
-    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
-        ok              -> true;
-        {error, enoent} -> false
-    end.
-
-store_clean_shutdown(Dir) ->
-    {ok, Hdl} = file_handle_cache:open(filename:join(Dir, ?CLEAN_FILENAME),
-                                       [write, raw, binary],
-                                       [{write_buffer, unbuffered}]),
-    ok = file_handle_cache:close(Hdl).
-
-queue_name_to_dir_name(Name = #resource { kind = queue }) ->
-    Bin = term_to_binary(Name),
-    Size = 8*size(Bin),
-    <<Num:Size>> = Bin,
-    lists:flatten(io_lib:format("~.36B", [Num])).
-
-queues_dir() ->
-    filename:join(rabbit_mnesia:dir(), "queues").
-
-delete_queue_directory(Dir) ->
-    {ok, Entries} = file:list_dir(Dir),
-    ok = lists:foldl(fun (Entry, ok) ->
-                             file:delete(filename:join(Dir, Entry))
-                     end, ok, Entries),
-    ok = file:del_dir(Dir).
-
-get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
-    {ok, Hdl} = file_handle_cache:open(Path,
-                                       [binary, raw, read, write,
-                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
-                                       [{write_buffer, infinity}]),
-    {Hdl, Segment #segment { handle = Hdl }};
-get_segment_handle(Segment = #segment { handle = Hdl }) ->
-    {Hdl, Segment}.
-
-find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
-    case dict:find(Seg, Segments) of
-        {ok, Segment = #segment{}} -> Segment;
-        error -> #segment { pubs = 0,
-                            acks = 0,
-                            handle = undefined,
-                            journal_entries = dict:new(),
-                            path = seg_num_to_path(Dir, Seg),
-                            num = Seg
-                          }
-    end.
-
-store_segment(Segment = #segment { num = Seg },
-              State = #qistate { segments = Segments }) ->
-    State #qistate { segments = dict:store(Seg, Segment, Segments) }.
-
-get_journal_handle(State =
-                   #qistate { journal_handle = undefined, dir = Dir }) ->
-    Path = filename:join(Dir, ?JOURNAL_FILENAME),
-    {ok, Hdl} = file_handle_cache:open(Path,
-                                       [binary, raw, read, write,
-                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
-                                       [{write_buffer, infinity}]),
-    {Hdl, State #qistate { journal_handle = Hdl }};
-get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
-    {Hdl, State}.
-
-bool_to_int(true ) -> 1;
-bool_to_int(false) -> 0.
-
-write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
-    ok = case Publish of
-             no_pub ->
-                 ok;
-             {MsgId, IsPersistent} ->
-                 file_handle_cache:append(
-                   Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                           (bool_to_int(IsPersistent)):1,
-                           RelSeq:?REL_SEQ_BITS>>, MsgId])
-         end,
-    ok = case {Del, Ack} of
-             {no_del, no_ack} -> ok;
-             _ -> Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                             RelSeq:?REL_SEQ_BITS>>,
-                  Data = case {Del, Ack} of
-                             {del, ack} -> [Binary, Binary];
-                             _          -> Binary
-                         end,
-                  file_handle_cache:append(Hdl, Data)
-         end,
-    Hdl.
-
-terminate(StoreShutdown, State =
-          #qistate { segments = Segments, journal_handle = JournalHdl,
-                     dir = Dir }) ->
-    ok = case JournalHdl of
-             undefined -> ok;
-             _         -> file_handle_cache:close(JournalHdl)
-         end,
-    ok = dict:fold(
-           fun (_Seg, #segment { handle = undefined }, ok) ->
-                   ok;
-               (_Seg, #segment { handle = Hdl }, ok) ->
-                   file_handle_cache:close(Hdl)
-           end, ok, Segments),
-    case StoreShutdown of
-        true  -> store_clean_shutdown(Dir);
-        false -> ok
-    end,
-    State #qistate { journal_handle = undefined, segments = dict:new() }.
-
-remove_pubs_dels_from_journal(SeqIds, State) ->
-    lists:foldl(
-      fun (SeqId, {SeqIdsAcc, StateN}) ->
-              {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-              Segment = #segment { journal_entries = JEntries,
-                                   acks = AckCount } =
-                  find_segment(Seg, StateN),
-              case dict:find(RelSeq, JEntries) of
-                  {ok, {{_MsgId, _IsPersistent}, del, no_ack}} ->
-                      StateN1 =
-                          store_segment(
-                            Segment #segment { journal_entries =
-                                               dict:erase(RelSeq, JEntries),
-                                               acks = AckCount + 1 },
-                            StateN),
-                      {SeqIdsAcc, StateN1};
-                  _ ->
-                      {[SeqId | SeqIdsAcc], StateN}
-              end
-      end, {[], State}, SeqIds).
-
-%%----------------------------------------------------------------------------
-%% Majors
-%%----------------------------------------------------------------------------
-
-%% Loading segments
-
-%% Does not do any combining with the journal at all. The PubCount
-%% that comes back is the number of publishes in the segment. The
-%% number of unacked msgs is PubCount - AckCount. If KeepAcks is
-%% false, then dict:size(SegDict) == PubCount - AckCount. If KeepAcks
-%% is true, then dict:size(SegDict) == PubCount.
-load_segment(Seg, KeepAcks, State) ->
-    Segment = #segment { path = Path, handle = SegHdl } =
-        find_segment(Seg, State),
-    SegmentExists = case SegHdl of
-                        undefined -> filelib:is_file(Path);
-                        _         -> true
-                    end,
-    case SegmentExists of
-        false ->
-            {dict:new(), 0, 0, State};
-        true ->
-            {Hdl, Segment1} = get_segment_handle(Segment),
-            {ok, 0} = file_handle_cache:position(Hdl, bof),
-            {SegDict, PubCount, AckCount} =
-                load_segment_entries(KeepAcks, Hdl, dict:new(), 0, 0),
-            {SegDict, PubCount, AckCount, store_segment(Segment1, State)}
-    end.
-
-load_segment_entries(KeepAcks, Hdl, SegDict, PubCount, AckCount) ->
-    case file_handle_cache:read(Hdl, 1) of
-        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
-            {ok, LSB} = file_handle_cache:read(
-                          Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            {AckCount1, SegDict1} =
-                deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict),
-            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount, AckCount1);
-        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
-            %% because we specify /binary, and binaries are complete
-            %% bytes, the size spec is in bytes, not bits.
-            {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
-                file_handle_cache:read(
-                  Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            SegDict1 =
-                dict:store(RelSeq,
-                           {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
-                           SegDict),
-            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount+1, AckCount);
-        _ErrOrEoF ->
-            {SegDict, PubCount, AckCount}
-    end.
-
-deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict) ->
-    case dict:find(RelSeq, SegDict) of
-        {ok, {PubRecord, no_del, no_ack}} ->
-            {AckCount, dict:store(RelSeq, {PubRecord, del, no_ack}, SegDict)};
-        {ok, {PubRecord, del, no_ack}} when KeepAcks ->
-            {AckCount + 1, dict:store(RelSeq, {PubRecord, del, ack}, SegDict)};
-        {ok, {_PubRecord, del, no_ack}} when KeepAcks ->
-            {AckCount + 1, dict:erase(RelSeq, SegDict)}
-    end.
-
-%% Loading Journal. This isn't idempotent and will mess up the counts
-%% if you call it more than once on the same state. Assumes the counts
-%% are 0 to start with.
-
-load_journal(State) ->
-    {JournalHdl, State1} = get_journal_handle(State),
-    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
-    State1 = #qistate { segments = Segments } = load_journal_entries(State),
-    dict:fold(
-      fun (Seg, #segment { journal_entries = JEntries,
-                           pubs = PubCountInJournal,
-                           acks = AckCountInJournal }, StateN) ->
-              %% We want to keep acks in so that we can remove them if
-              %% duplicates are in the journal. The counts here are
-              %% purely from the segment itself.
-              {SegDict, PubCountInSeg, AckCountInSeg, StateN1} =
-                  load_segment(Seg, true, StateN),
-              %% Removed counts here are the number of pubs and acks
-              %% that are duplicates - i.e. found in both the segment
-              %% and journal.
-              {JEntries1, PubsRemoved, AcksRemoved} =
-                  journal_minus_segment(JEntries, SegDict),
-              {Segment1, StateN2} = find_segment(Seg, StateN1),
-              PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
-              AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
-              store_segment(Segment1 #segment { journal_entries = JEntries1,
-                                                pubs = PubCount1,
-                                                acks = AckCount1 }, StateN2)
-      end, State1, Segments).
-
-load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
-    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
-        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
-            case Prefix of
-                ?DEL_JPREFIX ->
-                    load_journal_entries(add_to_journal(SeqId, del, State));
-                ?ACK_JPREFIX ->
-                    load_journal_entries(add_to_journal(SeqId, ack, State));
-                _ ->
-                    case file_handle_cache:read(Hdl, ?MSG_ID_BYTES) of
-                        {ok, <<MsgIdNum:?MSG_ID_BITS>>} ->
-                            %% work around for binary data
-                            %% fragmentation. See
-                            %% rabbit_msg_file:read_next/2
-                            <<MsgId:?MSG_ID_BYTES/binary>> =
-                                <<MsgIdNum:?MSG_ID_BITS>>,
-                            Publish = {MsgId,
-                                       case Prefix of
-                                           ?PUB_PERSIST_JPREFIX -> true;
-                                           ?PUB_TRANS_JPREFIX   -> false
-                                       end},
-                            load_journal_entries(
-                              add_to_journal(SeqId, Publish, State));
-                        _ErrOrEoF -> %% err, we've lost at least a publish
-                            State
-                    end
-            end;
-        _ErrOrEoF -> State
-    end.
-
-add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount }) ->
-    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    Segment = #segment { journal_entries = SegJDict,
-                         pubs = PubCount, acks = AckCount } =
-        find_segment(Seg, State),
-    SegJDict1 = add_to_journal(RelSeq, Action, SegJDict),
-    Segment1 = Segment #segment { journal_entries = SegJDict1 },
-    Segment2 =
-        case Action of
-            del                     -> Segment1;
-            ack                     -> Segment1 #segment { acks = AckCount + 1 };
-            {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
-        end,
-    store_segment(Segment2, State #qistate { dirty_count = DCount + 1 });
-
-%% This is a more relaxed version of deliver_or_ack_msg because we can
-%% have dels or acks in the journal without the corresponding
-%% pub. Also, always want to keep acks. Things must occur in the right
-%% order though.
-add_to_journal(RelSeq, Action, SegJDict) ->
-    case dict:find(RelSeq, SegJDict) of
-        {ok, {PubRecord, no_del, no_ack}} when Action == del ->
-            dict:store(RelSeq, {PubRecord, del, no_ack}, SegJDict);
-        {ok, {PubRecord, DelRecord, no_ack}} when Action == ack ->
-            dict:store(RelSeq, {PubRecord, DelRecord, ack}, SegJDict);
-        error when Action == del ->
-            dict:store(RelSeq, {no_pub, del, no_ack}, SegJDict);
-        error when Action == ack ->
-            dict:store(RelSeq, {no_pub, no_del, ack}, SegJDict);
-        error ->
-            {_MsgId, _IsPersistent} = Action, %% ASSERTION
-            dict:store(RelSeq, {Action, no_del, no_ack}, SegJDict)
-    end.
-
-%% Combine what we have just read from a segment file with what we're
-%% holding for that segment in memory. There must be no
-%% duplicates. Used when providing segment entries to the variable
-%% queue.
-journal_plus_segment(JEntries, SegDict) ->
-    dict:fold(fun (RelSeq, JObj, SegDictOut) ->
-                      SegEntry = case dict:find(RelSeq, SegDictOut) of
-                                     error -> not_found;
-                                     {ok, SObj = {_, _, _}} -> SObj
-                                 end,
-                      journal_plus_segment(JObj, SegEntry, RelSeq, SegDictOut)
-              end, SegDict, JEntries).
-
-%% Here, the OutDict is the SegDict which we may be adding to (for
-%% items only in the journal), modifying (bits in both), or erasing
-%% from (ack in journal, not segment).
-journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
-                     not_found,
-                     RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
-journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
-                     not_found,
-                     RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
-journal_plus_segment({{_MsgId, _IsPersistent}, del, ack},
-                     not_found,
-                     RelSeq, OutDict) ->
-    dict:erase(RelSeq, OutDict);
-
-journal_plus_segment({no_pub, del, no_ack},
-                     {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                     RelSeq, OutDict) ->
-    dict:store(RelSeq, {PubRecord, del, no_ack}, OutDict);
-
-journal_plus_segment({no_pub, del, ack},
-                     {{_MsgId, _IsPersistent}, no_del, no_ack},
-                     RelSeq, OutDict) ->
-    dict:erase(RelSeq, OutDict);
-journal_plus_segment({no_pub, no_del, ack},
-                     {{_MsgId, _IsPersistent}, del, no_ack},
-                     RelSeq, OutDict) ->
-    dict:erase(RelSeq, OutDict).
-
-
-%% Remove from the journal entries for a segment, items that are
-%% duplicates of entries found in the segment itself. Used on start up
-%% to clean up the journal.
-journal_minus_segment(JEntries, SegDict) ->
-    dict:fold(fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
-                      SegEntry = case dict:find(RelSeq, SegDict) of
-                                     error -> not_found;
-                                     {ok, SObj = {_, _, _}} -> SObj
-                                 end,
-                      journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
-                                            PubsRemoved, AcksRemoved)
-              end, {dict:new(), 0, 0}, JEntries).
-
-%% Here, the OutDict is a fresh journal that we're filling with valid
-%% entries. PubsRemoved and AcksRemoved only get increased when the a
-%% publish or ack is in both the journal and the segment.
-
-%% Both the same. Must be at least the publish
-journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, no_ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved + 1, AcksRemoved};
-journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved + 1, AcksRemoved + 1};
-
-%% Just publish in journal
-journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      not_found,
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
-
-%% Just deliver in journal
-journal_minus_segment(Obj = {no_pub, del, no_ack},
-                      {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, del, no_ack},
-                      {{_MsgId, _IsPersistent}, del, no_ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved};
-
-%% Just ack in journal
-journal_minus_segment(Obj = {no_pub, no_del, ack},
-                      {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, no_del, ack},
-                      {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved};
-
-%% Publish and deliver in journal
-journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
-                      not_found,
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
-journal_minus_segment({PubRecord, del, no_ack},
-                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, del, no_ack}, OutDict),
-     PubsRemoved + 1, AcksRemoved};
-
-%% Deliver and ack in journal
-journal_minus_segment(Obj = {no_pub, del, ack},
-                      {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, del, ack},
-                      {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
-     PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, del, ack},
-                      {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved + 1};
-
-%% Publish, deliver and ack in journal
-journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
-                      not_found,
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved};
-journal_minus_segment({PubRecord, del, ack},
-                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, del, ack}, OutDict),
-     PubsRemoved + 1, AcksRemoved};
-journal_minus_segment({PubRecord, del, ack},
-                      {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
-     PubsRemoved + 1, AcksRemoved}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f84ba70a..dc81ea18 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1078,6 +1078,8 @@ verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
 
 test_queue_index() ->
+    SegmentSize = rabbit_queue_index:segment_size(),
+    TwoSegs = SegmentSize + SegmentSize,
     stop_msg_store(),
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(0,9999),
@@ -1086,7 +1088,7 @@ test_queue_index() ->
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
-    {0, 10000, Qi3} =
+    {0, SegSize, Qi3} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
     {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
@@ -1097,10 +1099,10 @@ test_queue_index() ->
     ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
     %% should get length back as 0, as all the msgs were transient
     {0, Qi6} = rabbit_queue_index:init(test_queue()),
-    {0, 10000, Qi7} =
+    {0, SegSize, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
-    {0, 20000, Qi9} =
+    {0, TwoSegs, Qi9} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
     {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
@@ -1111,7 +1113,7 @@ test_queue_index() ->
     %% should get length back as 10000
     LenB = length(SeqIdsB),
     {LenB, Qi12} = rabbit_queue_index:init(test_queue()),
-    {0, 20000, Qi13} =
+    {0, TwoSegs, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
     {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
@@ -1119,10 +1121,8 @@ test_queue_index() ->
                                     lists:reverse(SeqIdsMsgIdsB)),
     Qi16 = rabbit_queue_index:write_acks(SeqIdsB, Qi15),
     Qi17 = queue_index_flush_journal(Qi16),
-    %% the entire first segment will have gone as they were firstly
-    %% transient, and secondly ack'd
-    SegmentSize = rabbit_queue_index:segment_size(),
-    {SegmentSize, 20000, Qi18} =
+    %% Everything will have gone now because #pubs == #acks
+    {0, 0, Qi18} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate(Qi18),
     ok = stop_msg_store(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0a5909a0..f2d45700 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -460,7 +460,7 @@ tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
                   {SeqIdsAcc1, StateN1}
           end, {[], State1}, lists:flatten(lists:reverse(SPubs))),
     IndexState1 =
-        rabbit_queue_index:sync_seq_ids(PubSeqIds, [] /= SAcks, IndexState),
+        rabbit_queue_index:sync_seq_ids(PubSeqIds, IndexState),
     [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
-- 
cgit v1.2.1


From c6ccaca4bfa42d63e6e265949796105f9462b516 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Dec 2009 18:34:17 +0000
Subject: Allowed the journal to grow substantially bigger. This helps
 performance substantially.

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 2b4ec1a4..acebc32d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -41,7 +41,7 @@
 %%----------------------------------------------------------------------------
 %% ---- Journal details ----
 
--define(MAX_JOURNAL_ENTRY_COUNT, 32768).
+-define(MAX_JOURNAL_ENTRY_COUNT, 262144).
 -define(JOURNAL_FILENAME, "journal.jif").
 
 -define(PUB_PERSIST_JPREFIX, 2#00).
-- 
cgit v1.2.1


From dcb765c04aac925dbb0577d37ae980c539ebb648 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 2 Dec 2009 22:29:05 +0000
Subject: Up to 11kHz persistent on my home machine. Still a little way off the
 13kHz that I was getting before, but there were bugs in the previous QI (eg
 missing syncs) which could well have led to the old version being too fast.
 Added two functions to fhc: delete/1 which deletes without flushing any data
 at all. It will refuse to delete if the file isn't open;
 discard_write_buffer/1 which does what it says. We use the latter after
 scattering the journal as after we hit each segment, we sync the segment, so
 at that point there's no need at all to force out the data in the fhc for the
 journal prior to truncation.

---
 src/file_handle_cache.erl  |  26 ++++++++++-
 src/rabbit_queue_index.erl | 110 +++++++++++++++++++++++++++++++++------------
 2 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 2a3f1ded..8e084902 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -119,7 +119,8 @@
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
-         append_write_buffer/1, copy/3, set_maximum_since_use/1]).
+         append_write_buffer/1, copy/3, set_maximum_since_use/1, delete/1,
+         discard_write_buffer/1]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -193,6 +194,8 @@
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
              ({'ok', integer()} | error())).
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
+-spec(delete/1 :: (ref()) -> ok_or_error()). 
+-spec(discard_write_buffer/1 :: (ref()) -> ok_or_error()). 
 
 -endif.
 
@@ -361,6 +364,27 @@ copy(Src, Dest, Count) ->
               {error, incorrect_handle_modes}
       end).
 
+delete(Ref) ->
+    case erase({Ref, fhc_handle}) of
+        undefined -> ok;
+        Handle = #handle { path = Path } ->
+            Handle1 = Handle #handle { is_dirty = false, write_buffer = [] },
+            case close1(Ref, Handle1, hard) of
+                ok -> file:delete(Path);
+                Error -> Error
+            end
+    end.
+
+discard_write_buffer(Ref) ->
+    with_handles(
+      [Ref],
+      fun ([#handle { write_buffer = [] }]) ->
+              ok;
+          ([Handle = #handle { write_buffer_size = Size, offset = Offset }]) ->
+              {ok, [Handle #handle { write_buffer = [], write_buffer_size = 0,
+                                     offset = Offset - Size }]}
+      end).
+
 set_maximum_since_use(MaximumAge) ->
     Now = now(),
     case lists:foldl(
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index acebc32d..bbd95086 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -87,7 +87,9 @@
         { dir,
           segments,
           journal_handle,
-          dirty_count
+          dirty_count,
+          last_seg_a,
+          last_seg_b
         }).
 
 -record(segment,
@@ -238,8 +240,9 @@ sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
 
 flush_journal(State = #qistate { dirty_count = 0 }) ->
     State;
-flush_journal(State = #qistate { segments = Segments }) ->
-    State1 =
+flush_journal(State) ->
+    State1 = #qistate { segments = Segments } = get_all_segments(State),
+    State2 =
         dict:fold(
           fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
                                 acks = AckCount } = Segment, StateN) ->
@@ -261,12 +264,13 @@ flush_journal(State = #qistate { segments = Segments }) ->
                                                         dict:new() }, StateN)
                           end
                   end
-          end, State #qistate { segments = dict:new() }, Segments),
-    {JournalHdl, State2} = get_journal_handle(State1),
+          end, State1 #qistate { segments = dict:new() }, Segments),
+    {JournalHdl, State3} = get_journal_handle(State2),
+    ok = file_handle_cache:discard_write_buffer(JournalHdl),
     {ok, 0} = file_handle_cache:position(JournalHdl, bof),
     ok = file_handle_cache:truncate(JournalHdl),
     ok = file_handle_cache:sync(JournalHdl),
-    State2 #qistate { dirty_count = 0 }.
+    State3 #qistate { dirty_count = 0 }.
 
 read_segment_entries(InitSeqId, State) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
@@ -384,7 +388,8 @@ maybe_flush_journal(State = #qistate { dirty_count = DCount })
 maybe_flush_journal(State) ->
     State.
 
-all_segment_nums(#qistate { segments = Segments, dir = Dir }) ->
+all_segment_nums(State = #qistate { dir = Dir }) ->
+    #qistate { segments = Segments } = get_all_segments(State),
     sets:to_list(
       lists:foldl(
         fun (SegName, Set) ->
@@ -402,7 +407,9 @@ blank_state(QueueName) ->
     #qistate { dir            = Dir,
                segments       = dict:new(),
                journal_handle = undefined,
-               dirty_count    = 0
+               dirty_count    = 0,
+               last_seg_a     = undefined,
+               last_seg_b     = undefined
              }.
 
 rev_sort(List) ->
@@ -420,9 +427,8 @@ seg_num_to_path(Dir, Seg) ->
 
 delete_segment(#segment { handle = undefined }) ->
     ok;
-delete_segment(#segment { handle = Hdl, path = Path }) ->
-    ok = file_handle_cache:close(Hdl),
-    ok = file:delete(Path),
+delete_segment(#segment { handle = Hdl }) ->
+    ok = file_handle_cache:delete(Hdl),
     ok.
 
 detect_clean_shutdown(Dir) ->
@@ -462,6 +468,10 @@ get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
 get_segment_handle(Segment = #segment { handle = Hdl }) ->
     {Hdl, Segment}.
 
+find_segment(Seg, #qistate { last_seg_a = #segment { num = Seg } = Segment }) ->
+    Segment;
+find_segment(Seg, #qistate { last_seg_b = #segment { num = Seg } = Segment }) ->
+    Segment;
 find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
     case dict:find(Seg, Segments) of
         {ok, Segment = #segment{}} -> Segment;
@@ -474,9 +484,46 @@ find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
                           }
     end.
 
-store_segment(Segment = #segment { num = Seg },
-              State = #qistate { segments = Segments }) ->
-    State #qistate { segments = dict:store(Seg, Segment, Segments) }.
+store_segment(Segment = #segment { num = Seg }, State =
+              #qistate { last_seg_a = #segment { num = Seg }}) ->
+    State #qistate { last_seg_a = Segment };
+store_segment(Segment = #segment { num = Seg }, State =
+              #qistate { last_seg_b = #segment { num = Seg }}) ->
+    State #qistate { last_seg_b = Segment };
+store_segment(Segment, State =
+              #qistate { last_seg_a = LastSegA, last_seg_b = LastSegB }) ->
+    case LastSegA of
+        undefined ->
+            State #qistate { last_seg_a = Segment };
+        _ ->
+            case LastSegB of
+                undefined ->
+                    State #qistate { last_seg_b = Segment };
+                _ ->
+                    State1 = #qistate { segments = Segments } =
+                        State #qistate { last_seg_a = LastSegB,
+                                         last_seg_b = Segment },
+                    State1 #qistate {
+                      segments = return_segment_to_dict(LastSegA, Segments) }
+            end
+    end.
+
+get_all_segments(State = #qistate { last_seg_a = undefined,
+                                    last_seg_b = undefined }) ->
+    State;
+get_all_segments(State = #qistate { segments = Segments,
+                                    last_seg_a = LastSegA,
+                                    last_seg_b = LastSegB }) ->
+    State #qistate { last_seg_a = undefined,
+                     last_seg_b = undefined,
+                     segments = return_segment_to_dict(
+                                  LastSegB,
+                                  return_segment_to_dict(LastSegA, Segments)) }.
+
+return_segment_to_dict(undefined, Segments) ->
+    Segments;
+return_segment_to_dict(Segment = #segment { num = Seg }, Segments) ->
+    dict:store(Seg, Segment, Segments).
 
 get_journal_handle(State =
                    #qistate { journal_handle = undefined, dir = Dir }) ->
@@ -517,8 +564,9 @@ write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
     Hdl.
 
 terminate(StoreShutdown, State =
-          #qistate { segments = Segments, journal_handle = JournalHdl,
+          #qistate { journal_handle = JournalHdl,
                      dir = Dir }) ->
+    State1 = #qistate { segments = Segments } = get_all_segments(State),
     ok = case JournalHdl of
              undefined -> ok;
              _         -> file_handle_cache:close(JournalHdl)
@@ -533,7 +581,7 @@ terminate(StoreShutdown, State =
         true  -> store_clean_shutdown(Dir);
         false -> ok
     end,
-    State #qistate { journal_handle = undefined, segments = dict:new() }.
+    State1 #qistate { journal_handle = undefined, segments = dict:new() }.
 
 %%----------------------------------------------------------------------------
 %% Majors
@@ -608,7 +656,8 @@ deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict) ->
 load_journal(State) ->
     {JournalHdl, State1} = get_journal_handle(State),
     {ok, 0} = file_handle_cache:position(JournalHdl, 0),
-    State2 = #qistate { segments = Segments } = load_journal_entries(State1),
+    State2 = #qistate { segments = Segments } =
+        get_all_segments(load_journal_entries(State1)),
     dict:fold(
       fun (Seg, #segment { journal_entries = JEntries,
                            pubs = PubCountInJournal,
@@ -681,18 +730,21 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount }) ->
 %% pub. Also, always want to keep acks. Things must occur in the right
 %% order though.
 add_to_journal(RelSeq, Action, SegJDict) ->
-    case dict:find(RelSeq, SegJDict) of
-        {ok, {PubRecord, no_del, no_ack}} when Action == del ->
-            dict:store(RelSeq, {PubRecord, del, no_ack}, SegJDict);
-        {ok, {PubRecord, DelRecord, no_ack}} when Action == ack ->
-            dict:store(RelSeq, {PubRecord, DelRecord, ack}, SegJDict);
-        error when Action == del ->
-            dict:store(RelSeq, {no_pub, del, no_ack}, SegJDict);
-        error when Action == ack ->
-            dict:store(RelSeq, {no_pub, no_del, ack}, SegJDict);
-        error ->
-            {_MsgId, _IsPersistent} = Action, %% ASSERTION
-            dict:store(RelSeq, {Action, no_del, no_ack}, SegJDict)
+    case dict:is_key(RelSeq, SegJDict) of
+        true ->
+            dict:update(RelSeq,
+                        fun ({PubRecord, no_del, no_ack}) when Action == del ->
+                                {PubRecord, del, no_ack};
+                            ({PubRecord, Del, no_ack}) when Action == ack ->
+                                {PubRecord, Del, ack}
+                        end, SegJDict);
+        false ->
+            dict:store(RelSeq,
+                       case Action of
+                           del -> {no_pub, del, no_ack};
+                           ack -> {no_pub, no_del, ack};
+                           {_Msg, _IsPersistent} -> {Action, no_del, no_ack}
+                       end, SegJDict)
     end.
 
 %% Combine what we have just read from a segment file with what we're
-- 
cgit v1.2.1


From 2bf4f8a3d520402b9f526d4ee5c10717903ac28d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 2 Dec 2009 23:35:26 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 8e084902..ef201aee 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -194,8 +194,8 @@
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
              ({'ok', integer()} | error())).
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
--spec(delete/1 :: (ref()) -> ok_or_error()). 
--spec(discard_write_buffer/1 :: (ref()) -> ok_or_error()). 
+-spec(delete/1 :: (ref()) -> ok_or_error()).
+-spec(discard_write_buffer/1 :: (ref()) -> ok_or_error()).
 
 -endif.
 
@@ -366,11 +366,12 @@ copy(Src, Dest, Count) ->
 
 delete(Ref) ->
     case erase({Ref, fhc_handle}) of
-        undefined -> ok;
+        undefined ->
+            ok;
         Handle = #handle { path = Path } ->
-            Handle1 = Handle #handle { is_dirty = false, write_buffer = [] },
-            case close1(Ref, Handle1, hard) of
-                ok -> file:delete(Path);
+            case close1(Ref, Handle #handle { is_dirty = false,
+                                              write_buffer = [] }, hard) of
+                ok    -> file:delete(Path);
                 Error -> Error
             end
     end.
-- 
cgit v1.2.1


From 5b9aebb037715d8e087e2a22e7197790c0803d51 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 12:00:06 +0000
Subject: source code file compression

---
 src/rabbit_msg_store.erl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 3d38f721..cdeee98c 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -242,7 +242,7 @@ sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 
 init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     process_flag(trap_exit, true),
-    
+
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     MsgLocations = ets:new(?MSG_LOC_NAME,
@@ -266,7 +266,7 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                   },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
-    FileNames = 
+    FileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
@@ -515,7 +515,7 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
                 ets:lookup(FileSummary, File),
             ContiguousTop1 = lists:min([ContiguousTop, Offset]),
             ValidTotalSize1 = ValidTotalSize - TotalSize,
-            true = ets:insert(FileSummary, FSEntry #file_summary { 
+            true = ets:insert(FileSummary, FSEntry #file_summary {
                                              valid_total_size = ValidTotalSize1,
                                              contiguous_top = ContiguousTop1 }),
             {compact, File};
@@ -542,7 +542,7 @@ close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
 
 get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC }) ->
     case dict:find(FileNum, FHC) of
-        {ok, Hdl} -> {Hdl, State}; 
+        {ok, Hdl} -> {Hdl, State};
         error -> new_handle(FileNum, filenum_to_name(FileNum),
                             [read | ?BINARY_MODE], State)
     end.
@@ -619,7 +619,6 @@ index_search_by_file(File, #msstate { msg_locations = MsgLocations }) ->
                end, ets:match_object(MsgLocations,
                                      #msg_location { file = File, _ = '_' })).
 
-    
 index_delete_by_file(File, #msstate { msg_locations = MsgLocations }) ->
     MatchHead = #msg_location { file = File, _ = '_' },
     ets:select_delete(MsgLocations, [{MatchHead, [], [true]}]),
@@ -918,7 +917,7 @@ combine_file(File, State = #msstate { file_summary = FileSummary,
 adjust_meta_and_combine(
   LeftObj = #file_summary {
     file = LeftFile, valid_total_size = LeftValidData, right = RightFile },
-  RightObj = #file_summary { 
+  RightObj = #file_summary {
     file = RightFile, valid_total_size = RightValidData, left = LeftFile,
     right = RightRight },
   State = #msstate { file_size_limit = FileSizeLimit,
-- 
cgit v1.2.1


From d58f2cda54d8b7ded218007ad19772dbff6540ab Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 13:10:59 +0000
Subject: Some renaming in the fhc and associated changes in qi and ms.

---
 src/file_handle_cache.erl  | 29 ++++++++++++++++++++---------
 src/rabbit_msg_store.erl   | 10 +++-------
 src/rabbit_queue_index.erl |  5 +----
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index ef201aee..6007e554 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -119,8 +119,8 @@
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
-         append_write_buffer/1, copy/3, set_maximum_since_use/1, delete/1,
-         discard_write_buffer/1]).
+         flush/1, copy/3, set_maximum_since_use/1, delete/1,
+         clear/1]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -190,12 +190,12 @@
 -spec(last_sync_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
 -spec(current_virtual_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
 -spec(current_raw_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
--spec(append_write_buffer/1 :: (ref()) -> ok_or_error()).
+-spec(flush/1 :: (ref()) -> ok_or_error()).
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
              ({'ok', integer()} | error())).
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(delete/1 :: (ref()) -> ok_or_error()).
--spec(discard_write_buffer/1 :: (ref()) -> ok_or_error()).
+-spec(clear/1 :: (ref()) -> ok_or_error()).
 
 -endif.
 
@@ -343,7 +343,7 @@ current_virtual_offset(Ref) ->
 current_raw_offset(Ref) ->
     with_handles([Ref], fun ([Handle]) -> {ok, Handle #handle.offset} end).
 
-append_write_buffer(Ref) ->
+flush(Ref) ->
     with_flushed_handles([Ref], fun ([Handle]) -> {ok, [Handle]} end).
 
 copy(Src, Dest, Count) ->
@@ -376,14 +376,25 @@ delete(Ref) ->
             end
     end.
 
-discard_write_buffer(Ref) ->
+clear(Ref) ->
     with_handles(
       [Ref],
-      fun ([#handle { write_buffer = [] }]) ->
+      fun ([#handle { at_eof = true, write_buffer_size = 0, offset = 0 }]) ->
               ok;
           ([Handle = #handle { write_buffer_size = Size, offset = Offset }]) ->
-              {ok, [Handle #handle { write_buffer = [], write_buffer_size = 0,
-                                     offset = Offset - Size }]}
+              Handle1 =
+                  Handle #handle { write_buffer = [], write_buffer_size = 0,
+                                   offset = Offset - Size },
+              case maybe_seek(bof, Handle1) of
+                  {{ok, 0}, Handle2 = #handle { hdl = Hdl }} ->
+                      case file:truncate(Hdl) of
+                          ok -> {ok, [Handle2 #handle { at_eof = true,
+                                                        trusted_offset = 0 }]};
+                          Error -> {Error, [Handle2]}
+                      end;
+                  Error ->
+                      {Error, [Handle1]}
+              end
       end).
 
 set_maximum_since_use(MaximumAge) ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index cdeee98c..c21e4bd9 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -299,11 +299,8 @@ handle_call({read, MsgId}, _From, State =
                     not_found ->
                         ok = case CurFile =:= File andalso {ok, Offset} >=
                                  file_handle_cache:current_raw_offset(CurHdl) of
-                                 true ->
-                                     file_handle_cache:append_write_buffer(
-                                       CurHdl);
-                                 false ->
-                                     ok
+                                 true  -> file_handle_cache:flush(CurHdl);
+                                 false -> ok
                              end,
                         {Hdl, State2} = get_read_handle(File, State),
                         {ok, Offset} = file_handle_cache:position(Hdl, Offset),
@@ -736,8 +733,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE),
             {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize),
             ok = file_handle_cache:close(MainHdl),
-            ok = file_handle_cache:close(TmpHdl),
-            ok = file:delete(TmpPath),
+            ok = file_handle_cache:delete(TmpHdl),
 
             {ok, _MainMessages, MsgIdsMain} =
                 scan_file_for_valid_messages_msg_ids(
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index bbd95086..adc3f742 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -266,10 +266,7 @@ flush_journal(State) ->
                   end
           end, State1 #qistate { segments = dict:new() }, Segments),
     {JournalHdl, State3} = get_journal_handle(State2),
-    ok = file_handle_cache:discard_write_buffer(JournalHdl),
-    {ok, 0} = file_handle_cache:position(JournalHdl, bof),
-    ok = file_handle_cache:truncate(JournalHdl),
-    ok = file_handle_cache:sync(JournalHdl),
+    ok = file_handle_cache:clear(JournalHdl),
     State3 #qistate { dirty_count = 0 }.
 
 read_segment_entries(InitSeqId, State) ->
-- 
cgit v1.2.1


From 86012205063ccf01e32916f3be09ca8f1864bec5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Dec 2009 13:38:48 +0000
Subject: cosmetic

---
 src/file_handle_cache.erl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 6007e554..e8d7cf6e 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -119,8 +119,7 @@
 
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
-         flush/1, copy/3, set_maximum_since_use/1, delete/1,
-         clear/1]).
+         flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -382,14 +381,15 @@ clear(Ref) ->
       fun ([#handle { at_eof = true, write_buffer_size = 0, offset = 0 }]) ->
               ok;
           ([Handle = #handle { write_buffer_size = Size, offset = Offset }]) ->
-              Handle1 =
-                  Handle #handle { write_buffer = [], write_buffer_size = 0,
-                                   offset = Offset - Size },
+              Handle1 = Handle #handle { write_buffer = [],
+                                         write_buffer_size = 0,
+                                         offset = Offset - Size },
               case maybe_seek(bof, Handle1) of
                   {{ok, 0}, Handle2 = #handle { hdl = Hdl }} ->
                       case file:truncate(Hdl) of
-                          ok -> {ok, [Handle2 #handle { at_eof = true,
-                                                        trusted_offset = 0 }]};
+                          ok    -> {ok, [Handle2 #handle {
+                                           at_eof = true,
+                                           trusted_offset = 0 }]};
                           Error -> {Error, [Handle2]}
                       end;
                   Error ->
-- 
cgit v1.2.1


From 73fc505fc4ffcffbbe3e977160d3d3a5a842b8e8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 15:41:16 +0000
Subject: Abstracted the segment caching in the qi, and associated improvements
 as not all the state needs to be passed around all the time.

---
 src/rabbit_queue_index.erl | 250 ++++++++++++++++++++++++---------------------
 1 file changed, 131 insertions(+), 119 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index adc3f742..829b03aa 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -87,9 +87,7 @@
         { dir,
           segments,
           journal_handle,
-          dirty_count,
-          last_seg_a,
-          last_seg_b
+          dirty_count
         }).
 
 -record(segment,
@@ -108,10 +106,19 @@
 -ifdef(use_specs).
 
 -type(hdl() :: ('undefined' | any())).
+-type(segment() :: ('undefined' |
+                    #segment { pubs            :: non_neg_integer(),
+                               acks            :: non_neg_integer(),
+                               handle          :: hdl(),
+                               journal_entries :: dict(),
+                               path            :: file_path(),
+                               num             :: non_neg_integer()
+                             })).
 -type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
+-type(seg_dict() :: {dict(), [segment()], file_path()}).
 -type(qistate() :: #qistate { dir             :: file_path(),
-                              segments        :: dict(),
+                              segments        :: seg_dict(),
                               journal_handle  :: hdl(),
                               dirty_count     :: integer()
                             }).
@@ -159,11 +166,13 @@ init(Name) ->
     %% We know the journal is empty here, so we don't need to combine
     %% with the journal, and we don't need to worry about messages
     %% that have been acked.
-    State3 =
+    State3 = #qistate { segments = Segments } =
         lists:foldl(
-          fun (Seg, StateN) ->
-                  {SegDict, _PubCount, _AckCount, StateN1} =
-                      load_segment(Seg, false, StateN),
+          fun (Seg, StateN = #qistate { segments = SegmentsN }) ->
+                  Segment = segment_find(Seg, SegmentsN),
+                  {SegDict, _PubCount, _AckCount, Segment1} =
+                      load_segment(false, Segment),
+                  SegmentsN1 = segment_store(Segment1, SegmentsN),
                   dict:fold(
                     fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
                          StateM) ->
@@ -183,14 +192,14 @@ init(Name) ->
                                       SeqId, ack,
                                       add_to_journal(SeqId, del, StateM))
                             end
-                    end, StateN1, SegDict)
+                    end, StateN #qistate { segments = SegmentsN1 }, SegDict)
           end, State2, AllSegs),
     %% 4. Go through all segments and calculate the number of unacked
     %%    messages we have.
     Count = lists:foldl(
               fun (Seg, CountAcc) ->
                       #segment { pubs = PubCount, acks = AckCount } =
-                          find_segment(Seg, State3),
+                          segment_find(Seg, Segments),
                       CountAcc + PubCount - AckCount
               end, 0, AllSegs),
     {Count, State3}.
@@ -240,40 +249,40 @@ sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
 
 flush_journal(State = #qistate { dirty_count = 0 }) ->
     State;
-flush_journal(State) ->
-    State1 = #qistate { segments = Segments } = get_all_segments(State),
-    State2 =
-        dict:fold(
+flush_journal(State = #qistate { segments = Segments }) ->
+    Segments1 =
+        segment_fold(
           fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
-                                acks = AckCount } = Segment, StateN) ->
+                                acks = AckCount } = Segment, SegmentsN) ->
                   case PubCount > 0 andalso PubCount == AckCount of
                       true ->
-                          ok = delete_segment(Segment),
-                          StateN;
+                          segment_erase(delete_segment(Segment), SegmentsN);
                       false ->
                           case 0 == dict:size(JEntries) of
                               true ->
-                                  store_segment(Segment, StateN);
+                                  SegmentsN;
                               false ->
                                   {Hdl, Segment1} = get_segment_handle(Segment),
                                   dict:fold(fun write_entry_to_segment/3,
                                             Hdl, JEntries),
                                   ok = file_handle_cache:sync(Hdl),
-                                  store_segment(
+                                  segment_store(
                                     Segment1 #segment { journal_entries =
-                                                        dict:new() }, StateN)
+                                                        dict:new() }, SegmentsN)
                           end
                   end
-          end, State1 #qistate { segments = dict:new() }, Segments),
-    {JournalHdl, State3} = get_journal_handle(State2),
+          end, Segments, Segments),
+    {JournalHdl, State1} =
+        get_journal_handle(State #qistate { segments = Segments1 }),
     ok = file_handle_cache:clear(JournalHdl),
-    State3 #qistate { dirty_count = 0 }.
+    State1 #qistate { dirty_count = 0 }.
 
-read_segment_entries(InitSeqId, State) ->
+read_segment_entries(InitSeqId, State = #qistate { segments = Segments }) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    {SegDict, _PubCount, _AckCount, State1} =
-        load_segment(Seg, false, State),
-    #segment { journal_entries = JEntries } = find_segment(Seg, State1),
+    Segment = segment_find(Seg, Segments),
+    {SegDict, _PubCount, _AckCount,
+     Segment1 = #segment { journal_entries = JEntries }} =
+        load_segment(false, Segment),
     SegDict1 = journal_plus_segment(JEntries, SegDict),
     %% deliberately sort the list desc, because foldl will reverse it
     RelSeqs = rev_sort(dict:fetch_keys(SegDict1)),
@@ -283,7 +292,7 @@ read_segment_entries(InitSeqId, State) ->
                          [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
                             IsPersistent, IsDelivered == del} | Acc ]
                  end, [], RelSeqs),
-     State1}.
+     State #qistate { segments = segment_store(Segment1, Segments) }}.
 
 next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
@@ -385,8 +394,7 @@ maybe_flush_journal(State = #qistate { dirty_count = DCount })
 maybe_flush_journal(State) ->
     State.
 
-all_segment_nums(State = #qistate { dir = Dir }) ->
-    #qistate { segments = Segments } = get_all_segments(State),
+all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
     sets:to_list(
       lists:foldl(
         fun (SegName, Set) ->
@@ -394,7 +402,7 @@ all_segment_nums(State = #qistate { dir = Dir }) ->
                   list_to_integer(
                     lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
                                     SegName)), Set)
-        end, sets:from_list(dict:fetch_keys(Segments)),
+        end, sets:from_list(segment_fetch_keys(Segments)),
         filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir))).
 
 blank_state(QueueName) ->
@@ -402,11 +410,9 @@ blank_state(QueueName) ->
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     #qistate { dir            = Dir,
-               segments       = dict:new(),
+               segments       = segment_new(Dir),
                journal_handle = undefined,
-               dirty_count    = 0,
-               last_seg_a     = undefined,
-               last_seg_b     = undefined
+               dirty_count    = 0
              }.
 
 rev_sort(List) ->
@@ -422,11 +428,11 @@ seg_num_to_path(Dir, Seg) ->
     SegName = integer_to_list(Seg),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
 
-delete_segment(#segment { handle = undefined }) ->
-    ok;
-delete_segment(#segment { handle = Hdl }) ->
+delete_segment(Segment = #segment { handle = undefined }) ->
+    Segment;
+delete_segment(Segment = #segment { handle = Hdl }) ->
     ok = file_handle_cache:delete(Hdl),
-    ok.
+    Segment #segment { handle = undefined }.
 
 detect_clean_shutdown(Dir) ->
     case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
@@ -465,11 +471,11 @@ get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
 get_segment_handle(Segment = #segment { handle = Hdl }) ->
     {Hdl, Segment}.
 
-find_segment(Seg, #qistate { last_seg_a = #segment { num = Seg } = Segment }) ->
-    Segment;
-find_segment(Seg, #qistate { last_seg_b = #segment { num = Seg } = Segment }) ->
-    Segment;
-find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
+segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_], _Dir}) ->
+    Segment; %% 1 or (2, matches head)
+segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }], _Dir}) ->
+    Segment; %% 2, matches tail
+segment_find(Seg, {Segments, _, Dir}) -> %% no match
     case dict:find(Seg, Segments) of
         {ok, Segment = #segment{}} -> Segment;
         error -> #segment { pubs = 0,
@@ -481,46 +487,52 @@ find_segment(Seg, #qistate { segments = Segments, dir = Dir }) ->
                           }
     end.
 
-store_segment(Segment = #segment { num = Seg }, State =
-              #qistate { last_seg_a = #segment { num = Seg }}) ->
-    State #qistate { last_seg_a = Segment };
-store_segment(Segment = #segment { num = Seg }, State =
-              #qistate { last_seg_b = #segment { num = Seg }}) ->
-    State #qistate { last_seg_b = Segment };
-store_segment(Segment, State =
-              #qistate { last_seg_a = LastSegA, last_seg_b = LastSegB }) ->
-    case LastSegA of
-        undefined ->
-            State #qistate { last_seg_a = Segment };
-        _ ->
-            case LastSegB of
-                undefined ->
-                    State #qistate { last_seg_b = Segment };
-                _ ->
-                    State1 = #qistate { segments = Segments } =
-                        State #qistate { last_seg_a = LastSegB,
-                                         last_seg_b = Segment },
-                    State1 #qistate {
-                      segments = return_segment_to_dict(LastSegA, Segments) }
-            end
-    end.
-
-get_all_segments(State = #qistate { last_seg_a = undefined,
-                                    last_seg_b = undefined }) ->
-    State;
-get_all_segments(State = #qistate { segments = Segments,
-                                    last_seg_a = LastSegA,
-                                    last_seg_b = LastSegB }) ->
-    State #qistate { last_seg_a = undefined,
-                     last_seg_b = undefined,
-                     segments = return_segment_to_dict(
-                                  LastSegB,
-                                  return_segment_to_dict(LastSegA, Segments)) }.
-
-return_segment_to_dict(undefined, Segments) ->
-    Segments;
-return_segment_to_dict(Segment = #segment { num = Seg }, Segments) ->
-    dict:store(Seg, Segment, Segments).
+segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
+              {Segments, [#segment { num = Seg } | Tail], Dir}) ->
+    {Segments, [Segment | Tail], Dir};
+segment_store(Segment = #segment { num = Seg }, %% 2, matches tail
+              {Segments, [SegmentA, #segment { num = Seg }], Dir}) ->
+    {Segments, [SegmentA, Segment], Dir};
+segment_store(Segment = #segment { num = Seg },
+              {Segments, [], Dir}) ->
+    {dict:erase(Seg, Segments), [Segment], Dir};
+segment_store(Segment = #segment { num = Seg },
+              {Segments, [SegmentA], Dir}) ->
+    {dict:erase(Seg, Segments), [Segment, SegmentA], Dir};
+segment_store(Segment = #segment { num = Seg },
+              {Segments, [SegmentA, SegmentB], Dir}) ->
+    {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)),
+     [Segment, SegmentA], Dir}.
+
+segment_fold(Fun, Acc, {Segments, [], _Dir}) ->
+    dict:fold(Fun, Acc, Segments);
+segment_fold(Fun, Acc, {Segments, CachedSegments, _Dir}) ->
+    Acc1 = lists:foldl(fun (Segment = #segment { num = Num }, AccN) ->
+                               Fun(Num, Segment, AccN)
+                       end, Acc, CachedSegments),
+    dict:fold(Fun, Acc1, Segments).
+
+segment_map(Fun, {Segments, CachedSegments, Dir}) ->
+    {dict:map(Fun, Segments),
+     lists:map(fun (Segment = #segment { num = Num }) -> Fun(Num, Segment) end,
+               CachedSegments), Dir}.
+
+segment_fetch_keys({Segments, CachedSegments, _Dir}) ->
+    lists:map(fun (Segment) -> Segment#segment.num end, CachedSegments) ++
+        dict:fetch_keys(Segments).
+
+segment_erase(#segment { handle = undefined, num = Num },
+              {Segments, [#segment { num = Num } | Rest], Dir}) ->
+    {Segments, Rest, Dir}; %% 1 or (2, matches head)
+segment_erase(#segment { handle = undefined, num = Num },
+              {Segments, [Head, #segment { num = Num }], Dir}) ->
+    {Segments, [Head], Dir}; %% 2, matches tail
+segment_erase(#segment { handle = undefined, num = Num },
+              {Segments, CachedSegments, Dir}) ->
+    {dict:erase(Num, Segments), CachedSegments, Dir}.
+
+segment_new(Dir) ->
+    {dict:new(), [], Dir}.
 
 get_journal_handle(State =
                    #qistate { journal_handle = undefined, dir = Dir }) ->
@@ -562,13 +574,12 @@ write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
 
 terminate(StoreShutdown, State =
           #qistate { journal_handle = JournalHdl,
-                     dir = Dir }) ->
-    State1 = #qistate { segments = Segments } = get_all_segments(State),
+                     dir = Dir, segments = Segments }) ->
     ok = case JournalHdl of
              undefined -> ok;
              _         -> file_handle_cache:close(JournalHdl)
          end,
-    ok = dict:fold(
+    ok = segment_fold(
            fun (_Seg, #segment { handle = undefined }, ok) ->
                    ok;
                (_Seg, #segment { handle = Hdl }, ok) ->
@@ -578,7 +589,7 @@ terminate(StoreShutdown, State =
         true  -> store_clean_shutdown(Dir);
         false -> ok
     end,
-    State1 #qistate { journal_handle = undefined, segments = dict:new() }.
+    State #qistate { journal_handle = undefined, segments = segment_new(Dir) }.
 
 %%----------------------------------------------------------------------------
 %% Majors
@@ -591,22 +602,21 @@ terminate(StoreShutdown, State =
 %% number of unacked msgs is PubCount - AckCount. If KeepAcks is
 %% false, then dict:size(SegDict) == PubCount - AckCount. If KeepAcks
 %% is true, then dict:size(SegDict) == PubCount.
-load_segment(Seg, KeepAcks, State) ->
-    Segment = #segment { path = Path, handle = SegHdl } =
-        find_segment(Seg, State),
+load_segment(KeepAcks,
+             Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
                         undefined -> filelib:is_file(Path);
                         _         -> true
                     end,
     case SegmentExists of
         false ->
-            {dict:new(), 0, 0, State};
+            {dict:new(), 0, 0, Segment};
         true ->
             {Hdl, Segment1} = get_segment_handle(Segment),
             {ok, 0} = file_handle_cache:position(Hdl, bof),
             {SegDict, PubCount, AckCount} =
                 load_segment_entries(KeepAcks, Hdl, dict:new(), 0, 0),
-            {SegDict, PubCount, AckCount, store_segment(Segment1, State)}
+            {SegDict, PubCount, AckCount, Segment1}
     end.
 
 load_segment_entries(KeepAcks, Hdl, SegDict, PubCount, AckCount) ->
@@ -653,29 +663,29 @@ deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict) ->
 load_journal(State) ->
     {JournalHdl, State1} = get_journal_handle(State),
     {ok, 0} = file_handle_cache:position(JournalHdl, 0),
-    State2 = #qistate { segments = Segments } =
-        get_all_segments(load_journal_entries(State1)),
-    dict:fold(
-      fun (Seg, #segment { journal_entries = JEntries,
-                           pubs = PubCountInJournal,
-                           acks = AckCountInJournal }, StateN) ->
-              %% We want to keep acks in so that we can remove them if
-              %% duplicates are in the journal. The counts here are
-              %% purely from the segment itself.
-              {SegDict, PubCountInSeg, AckCountInSeg, StateN1} =
-                  load_segment(Seg, true, StateN),
-              %% Removed counts here are the number of pubs and acks
-              %% that are duplicates - i.e. found in both the segment
-              %% and journal.
-              {JEntries1, PubsRemoved, AcksRemoved} =
-                  journal_minus_segment(JEntries, SegDict),
-              Segment1 = find_segment(Seg, StateN1),
-              PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
-              AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
-              store_segment(Segment1 #segment { journal_entries = JEntries1,
-                                                pubs = PubCount1,
-                                                acks = AckCount1 }, StateN1)
-      end, State2, Segments).
+    State2 = #qistate { segments = Segments } = load_journal_entries(State1),
+    Segments1 =
+        segment_map(
+          fun (_Seg, Segment = #segment { journal_entries = JEntries,
+                                          pubs = PubCountInJournal,
+                                          acks = AckCountInJournal }) ->
+                  %% We want to keep acks in so that we can remove
+                  %% them if duplicates are in the journal. The counts
+                  %% here are purely from the segment itself.
+                  {SegDict, PubCountInSeg, AckCountInSeg, Segment1} =
+                      load_segment(true, Segment),
+                  %% Removed counts here are the number of pubs and
+                  %% acks that are duplicates - i.e. found in both the
+                  %% segment and journal.
+                  {JEntries1, PubsRemoved, AcksRemoved} =
+                      journal_minus_segment(JEntries, SegDict),
+                  PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
+                  AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
+                  Segment1 #segment { journal_entries = JEntries1,
+                                      pubs = PubCount1,
+                                      acks = AckCount1 }
+          end, Segments),
+    State2 #qistate { segments = Segments1 }.
 
 load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
     case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
@@ -707,11 +717,12 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
         _ErrOrEoF -> State
     end.
 
-add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount }) ->
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
+                                                 segments = Segments }) ->
     {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     Segment = #segment { journal_entries = SegJDict,
                          pubs = PubCount, acks = AckCount } =
-        find_segment(Seg, State),
+        segment_find(Seg, Segments),
     SegJDict1 = add_to_journal(RelSeq, Action, SegJDict),
     Segment1 = Segment #segment { journal_entries = SegJDict1 },
     Segment2 =
@@ -720,7 +731,8 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount }) ->
             ack                     -> Segment1 #segment { acks = AckCount + 1 };
             {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
         end,
-    store_segment(Segment2, State #qistate { dirty_count = DCount + 1 });
+    State #qistate { dirty_count = DCount + 1,
+                     segments = segment_store(Segment2, Segments) };
 
 %% This is a more relaxed version of deliver_or_ack_msg because we can
 %% have dels or acks in the journal without the corresponding
-- 
cgit v1.2.1


From 96f7b35fce5c3bbd464c2952693872fd6d39b710 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 15:59:00 +0000
Subject: If I write it this way then it saves code and is maybe slightly
 faster. maybe.

---
 src/rabbit_queue_index.erl | 52 ++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 829b03aa..5ae5c773 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -249,29 +249,32 @@ sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
 
 flush_journal(State = #qistate { dirty_count = 0 }) ->
     State;
-flush_journal(State = #qistate { segments = Segments }) ->
+flush_journal(State = #qistate { segments = Segments, dir = Dir }) ->
     Segments1 =
         segment_fold(
           fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
                                 acks = AckCount } = Segment, SegmentsN) ->
                   case PubCount > 0 andalso PubCount == AckCount of
                       true ->
-                          segment_erase(delete_segment(Segment), SegmentsN);
+                          ok = delete_segment(Segment),
+                          SegmentsN;
                       false ->
-                          case 0 == dict:size(JEntries) of
-                              true ->
-                                  SegmentsN;
-                              false ->
-                                  {Hdl, Segment1} = get_segment_handle(Segment),
-                                  dict:fold(fun write_entry_to_segment/3,
-                                            Hdl, JEntries),
-                                  ok = file_handle_cache:sync(Hdl),
-                                  segment_store(
-                                    Segment1 #segment { journal_entries =
-                                                        dict:new() }, SegmentsN)
-                          end
+                          Segment1 =
+                              case 0 == dict:size(JEntries) of
+                                  true ->
+                                      SegmentsN;
+                                  false ->
+                                      {Hdl, Segment2} =
+                                          get_segment_handle(Segment),
+                                      dict:fold(fun write_entry_to_segment/3,
+                                                Hdl, JEntries),
+                                      ok = file_handle_cache:sync(Hdl),
+                                      Segment2 #segment { journal_entries =
+                                                          dict:new() }
+                              end,
+                          segment_store(Segment1, SegmentsN)
                   end
-          end, Segments, Segments),
+          end, segment_new(Dir), Segments),
     {JournalHdl, State1} =
         get_journal_handle(State #qistate { segments = Segments1 }),
     ok = file_handle_cache:clear(JournalHdl),
@@ -428,11 +431,10 @@ seg_num_to_path(Dir, Seg) ->
     SegName = integer_to_list(Seg),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
 
-delete_segment(Segment = #segment { handle = undefined }) ->
-    Segment;
-delete_segment(Segment = #segment { handle = Hdl }) ->
-    ok = file_handle_cache:delete(Hdl),
-    Segment #segment { handle = undefined }.
+delete_segment(#segment { handle = undefined }) ->
+    ok;
+delete_segment(#segment { handle = Hdl }) ->
+    ok = file_handle_cache:delete(Hdl).
 
 detect_clean_shutdown(Dir) ->
     case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
@@ -521,16 +523,6 @@ segment_fetch_keys({Segments, CachedSegments, _Dir}) ->
     lists:map(fun (Segment) -> Segment#segment.num end, CachedSegments) ++
         dict:fetch_keys(Segments).
 
-segment_erase(#segment { handle = undefined, num = Num },
-              {Segments, [#segment { num = Num } | Rest], Dir}) ->
-    {Segments, Rest, Dir}; %% 1 or (2, matches head)
-segment_erase(#segment { handle = undefined, num = Num },
-              {Segments, [Head, #segment { num = Num }], Dir}) ->
-    {Segments, [Head], Dir}; %% 2, matches tail
-segment_erase(#segment { handle = undefined, num = Num },
-              {Segments, CachedSegments, Dir}) ->
-    {dict:erase(Num, Segments), CachedSegments, Dir}.
-
 segment_new(Dir) ->
     {dict:new(), [], Dir}.
 
-- 
cgit v1.2.1


From b5a5502974b48e0ded93b24631c4cdd3ee6b8bc3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 16:39:12 +0000
Subject: Switched to using array instead of dict for the inner journal. It's
 about 10% faster.

---
 src/rabbit_queue_index.erl | 142 +++++++++++++++++++++++----------------------
 1 file changed, 72 insertions(+), 70 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5ae5c773..e28ceeeb 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -110,7 +110,7 @@
                     #segment { pubs            :: non_neg_integer(),
                                acks            :: non_neg_integer(),
                                handle          :: hdl(),
-                               journal_entries :: dict(),
+                               journal_entries :: array(),
                                path            :: file_path(),
                                num             :: non_neg_integer()
                              })).
@@ -260,17 +260,18 @@ flush_journal(State = #qistate { segments = Segments, dir = Dir }) ->
                           SegmentsN;
                       false ->
                           Segment1 =
-                              case 0 == dict:size(JEntries) of
+                              case 0 == array:sparse_size(JEntries) of
                                   true ->
                                       SegmentsN;
                                   false ->
                                       {Hdl, Segment2} =
                                           get_segment_handle(Segment),
-                                      dict:fold(fun write_entry_to_segment/3,
-                                                Hdl, JEntries),
+                                      array:sparse_foldl(
+                                        fun write_entry_to_segment/3, Hdl,
+                                        JEntries),
                                       ok = file_handle_cache:sync(Hdl),
                                       Segment2 #segment { journal_entries =
-                                                          dict:new() }
+                                                          journal_new() }
                               end,
                           segment_store(Segment1, SegmentsN)
                   end
@@ -418,6 +419,9 @@ blank_state(QueueName) ->
                dirty_count    = 0
              }.
 
+journal_new() ->
+    array:new([{default, undefined}]).
+
 rev_sort(List) ->
     lists:sort(fun (A, B) -> B < A end, List).
 
@@ -483,7 +487,7 @@ segment_find(Seg, {Segments, _, Dir}) -> %% no match
         error -> #segment { pubs = 0,
                             acks = 0,
                             handle = undefined,
-                            journal_entries = dict:new(),
+                            journal_entries = journal_new(),
                             path = seg_num_to_path(Dir, Seg),
                             num = Seg
                           }
@@ -730,22 +734,19 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
 %% have dels or acks in the journal without the corresponding
 %% pub. Also, always want to keep acks. Things must occur in the right
 %% order though.
-add_to_journal(RelSeq, Action, SegJDict) ->
-    case dict:is_key(RelSeq, SegJDict) of
-        true ->
-            dict:update(RelSeq,
-                        fun ({PubRecord, no_del, no_ack}) when Action == del ->
-                                {PubRecord, del, no_ack};
-                            ({PubRecord, Del, no_ack}) when Action == ack ->
-                                {PubRecord, Del, ack}
-                        end, SegJDict);
-        false ->
-            dict:store(RelSeq,
-                       case Action of
-                           del -> {no_pub, del, no_ack};
-                           ack -> {no_pub, no_del, ack};
-                           {_Msg, _IsPersistent} -> {Action, no_del, no_ack}
-                       end, SegJDict)
+add_to_journal(RelSeq, Action, SegJArray) ->
+    case array:get(RelSeq, SegJArray) of
+        undefined ->
+            array:set(RelSeq,
+                      case Action of
+                          {_Msg, _IsPersistent} -> {Action, no_del, no_ack};
+                          del                   -> {no_pub,    del, no_ack};
+                          ack                   -> {no_pub, no_del,    ack}
+                      end, SegJArray);
+        ({PubRecord, no_del, no_ack}) when Action == del ->
+            array:set(RelSeq, {PubRecord, del, no_ack}, SegJArray);
+        ({PubRecord,    Del, no_ack}) when Action == ack ->
+            array:set(RelSeq, {PubRecord, Del,    ack}, SegJArray)
     end.
 
 %% Combine what we have just read from a segment file with what we're
@@ -753,13 +754,14 @@ add_to_journal(RelSeq, Action, SegJDict) ->
 %% duplicates. Used when providing segment entries to the variable
 %% queue.
 journal_plus_segment(JEntries, SegDict) ->
-    dict:fold(fun (RelSeq, JObj, SegDictOut) ->
-                      SegEntry = case dict:find(RelSeq, SegDictOut) of
-                                     error -> not_found;
-                                     {ok, SObj = {_, _, _}} -> SObj
-                                 end,
-                      journal_plus_segment(JObj, SegEntry, RelSeq, SegDictOut)
-              end, SegDict, JEntries).
+    array:sparse_foldl(
+      fun (RelSeq, JObj, SegDictOut) ->
+              SegEntry = case dict:find(RelSeq, SegDictOut) of
+                             error -> not_found;
+                             {ok, SObj = {_, _, _}} -> SObj
+                         end,
+              journal_plus_segment(JObj, SegEntry, RelSeq, SegDictOut)
+      end, SegDict, JEntries).
 
 %% Here, the OutDict is the SegDict which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
@@ -791,96 +793,96 @@ journal_plus_segment({no_pub, no_del, ack},
                      RelSeq, OutDict) ->
     dict:erase(RelSeq, OutDict).
 
-
 %% Remove from the journal entries for a segment, items that are
 %% duplicates of entries found in the segment itself. Used on start up
 %% to clean up the journal.
 journal_minus_segment(JEntries, SegDict) ->
-    dict:fold(fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
-                      SegEntry = case dict:find(RelSeq, SegDict) of
-                                     error -> not_found;
-                                     {ok, SObj = {_, _, _}} -> SObj
-                                 end,
-                      journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
-                                            PubsRemoved, AcksRemoved)
-              end, {dict:new(), 0, 0}, JEntries).
-
-%% Here, the OutDict is a fresh journal that we're filling with valid
+    array:sparse_foldl(
+      fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
+              SegEntry = case dict:find(RelSeq, SegDict) of
+                             error -> not_found;
+                             {ok, SObj = {_, _, _}} -> SObj
+                         end,
+              journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
+                                    PubsRemoved, AcksRemoved)
+      end, {journal_new(), 0, 0}, JEntries).
+
+%% Here, the OutArray is a fresh journal that we're filling with valid
 %% entries. PubsRemoved and AcksRemoved only get increased when the a
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
 journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, no_ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved + 1, AcksRemoved};
+                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {OutArray, PubsRemoved + 1, AcksRemoved};
 journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved + 1, AcksRemoved + 1};
+                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {OutArray, PubsRemoved + 1, AcksRemoved + 1};
 
 %% Just publish in journal
 journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
                       not_found,
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
 
 %% Just deliver in journal
 journal_minus_segment(Obj = {no_pub, del, no_ack},
                       {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, no_ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved};
+                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {OutArray, PubsRemoved, AcksRemoved};
 
 %% Just ack in journal
 journal_minus_segment(Obj = {no_pub, no_del, ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, no_del, ack},
                       {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved};
+                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {OutArray, PubsRemoved, AcksRemoved};
 
 %% Publish and deliver in journal
 journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
                       not_found,
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
 journal_minus_segment({PubRecord, del, no_ack},
                       {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, del, no_ack}, OutDict),
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, del, no_ack}, OutArray),
      PubsRemoved + 1, AcksRemoved};
 
 %% Deliver and ack in journal
 journal_minus_segment(Obj = {no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, Obj, OutDict), PubsRemoved, AcksRemoved};
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, no_del, ack}, OutArray),
      PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved + 1};
+                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {OutArray, PubsRemoved, AcksRemoved + 1};
 
 %% Publish, deliver and ack in journal
 journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
                       not_found,
-                      _RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {OutDict, PubsRemoved, AcksRemoved};
+                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {OutArray, PubsRemoved, AcksRemoved};
 journal_minus_segment({PubRecord, del, ack},
                       {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, del, ack}, OutDict),
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, del, ack}, OutArray),
      PubsRemoved + 1, AcksRemoved};
 journal_minus_segment({PubRecord, del, ack},
                       {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutDict, PubsRemoved, AcksRemoved) ->
-    {dict:store(RelSeq, {no_pub, no_del, ack}, OutDict),
+                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, no_del, ack}, OutArray),
      PubsRemoved + 1, AcksRemoved}.
-- 
cgit v1.2.1


From 3e1268ad5908a48c66baafd27ba07b8d7009e0f7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 17:00:20 +0000
Subject: Make the arrays fixed size and tell them the max size. The don't
 actually take this much space initally, but it makes them a bit faster as
 they can make more sensible balancing decisions, we think.

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e28ceeeb..970e36f6 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -420,7 +420,7 @@ blank_state(QueueName) ->
              }.
 
 journal_new() ->
-    array:new([{default, undefined}]).
+    array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
 
 rev_sort(List) ->
     lists:sort(fun (A, B) -> B < A end, List).
-- 
cgit v1.2.1


From bcc03d39008e72d031825d23a6d97e137afdec0b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 17:52:34 +0000
Subject: What a difference, two characters make...

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 970e36f6..908a711a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -262,7 +262,7 @@ flush_journal(State = #qistate { segments = Segments, dir = Dir }) ->
                           Segment1 =
                               case 0 == array:sparse_size(JEntries) of
                                   true ->
-                                      SegmentsN;
+                                      Segment;
                                   false ->
                                       {Hdl, Segment2} =
                                           get_segment_handle(Segment),
-- 
cgit v1.2.1


From 8e292f20bb9182d8817584ed4ced590ad7261b7d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 3 Dec 2009 17:55:34 +0000
Subject: With R13B03, it would appear we need to be slightly more hesitant:
 currently I'm seeing the memory limits being hit and the queue duration is
 still fairly large. Whilst it does eventually fall, setting this scalar a
 little lower should help. However, this will probably need tweaking again,
 once we've moved away from ets in the msg_store

---
 src/rabbit_memory_monitor.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 5c0b2daa..d6693d95 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -65,7 +65,7 @@
 %% of the system. In short, we aim to stay some distance away from
 %% when the memory alarms will go off, which cause channel.flow.
 %% Note that all other Thresholds are relative to this scaling.
--define(MEMORY_LIMIT_SCALING, 0.6).
+-define(MEMORY_LIMIT_SCALING, 0.4).
 
 -define(LIMIT_THRESHOLD, 0.5). %% don't limit queues when mem use is < this
 
-- 
cgit v1.2.1


From 55b2e9820b71d3e4b651559486a11c1f90e58cc0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Dec 2009 17:55:38 +0000
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 908a711a..136ff829 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -260,18 +260,16 @@ flush_journal(State = #qistate { segments = Segments, dir = Dir }) ->
                           SegmentsN;
                       false ->
                           Segment1 =
-                              case 0 == array:sparse_size(JEntries) of
-                                  true ->
-                                      Segment;
-                                  false ->
-                                      {Hdl, Segment2} =
-                                          get_segment_handle(Segment),
-                                      array:sparse_foldl(
-                                        fun write_entry_to_segment/3, Hdl,
-                                        JEntries),
-                                      ok = file_handle_cache:sync(Hdl),
-                                      Segment2 #segment { journal_entries =
-                                                          journal_new() }
+                              case array:sparse_size(JEntries) of
+                                  0 -> Segment;
+                                  _ -> {Hdl, Segment2} =
+                                           get_segment_handle(Segment),
+                                       array:sparse_foldl(
+                                         fun write_entry_to_segment/3, Hdl,
+                                         JEntries),
+                                       ok = file_handle_cache:sync(Hdl),
+                                       Segment2 #segment { journal_entries =
+                                                           journal_new() }
                               end,
                           segment_store(Segment1, SegmentsN)
                   end
@@ -318,7 +316,7 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
     %% SegNums is sorted, ascending.
     {LowSeqIdSeg, NextSeqId} =
         case SegNums of
-            []            -> {0, 0};
+            []         -> {0, 0};
             [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
                            reconstruct_seq_id(1 + lists:last(SegNums), 0)}
         end,
-- 
cgit v1.2.1


From eeb8e7a35ca93bc3f364475052760d939ba7088a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 4 Dec 2009 00:06:44 +0000
Subject: combined steps 3 and 4 of init, and made segment_find have the same
 type as dict:find. Dropped the Dir from the Segments, and added
 find_segment_or_new/3

---
 src/rabbit_queue_index.erl | 153 +++++++++++++++++++++++----------------------
 1 file changed, 78 insertions(+), 75 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 136ff829..6345428e 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -116,7 +116,7 @@
                              })).
 -type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
--type(seg_dict() :: {dict(), [segment()], file_path()}).
+-type(seg_dict() :: {dict(), [segment()]}).
 -type(qistate() :: #qistate { dir             :: file_path(),
                               segments        :: seg_dict(),
                               journal_handle  :: hdl(),
@@ -160,48 +160,45 @@ init(Name) ->
     %% 3. Load each segment in turn and filter out messages that are
     %%    not in the msg_store, by adding acks to the journal. These
     %%    acks only go to the RAM journal as it doesn't matter if we
-    %%    lose them. Also mark delivered if not clean shutdown.
+    %%    lose them. Also mark delivered if not clean shutdown. Also
+    %%    find the number of unacked messages.
     AllSegs = all_segment_nums(State2),
     CleanShutdown = detect_clean_shutdown(Dir),
     %% We know the journal is empty here, so we don't need to combine
     %% with the journal, and we don't need to worry about messages
     %% that have been acked.
-    State3 = #qistate { segments = Segments } =
+    {State3 = #qistate { segments = Segments }, Count} =
         lists:foldl(
-          fun (Seg, StateN = #qistate { segments = SegmentsN }) ->
-                  Segment = segment_find(Seg, SegmentsN),
+          fun (Seg, {StateN = #qistate { segments = SegmentsN }, CountAcc}) ->
+                  Segment = segment_find_or_new(Seg, Dir, SegmentsN),
                   {SegDict, _PubCount, _AckCount, Segment1} =
                       load_segment(false, Segment),
                   SegmentsN1 = segment_store(Segment1, SegmentsN),
-                  dict:fold(
-                    fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
-                         StateM) ->
-                            SeqId = reconstruct_seq_id(Seg, RelSeq),
-                            InMsgStore = rabbit_msg_store:contains(MsgId),
-                            case {InMsgStore, CleanShutdown} of
-                                {true, true} ->
-                                    StateM;
-                                {true, false} when Del == del ->
-                                    StateM;
-                                {true, false} ->
-                                    add_to_journal(SeqId, del, StateM);
-                                {false, _} when Del == del ->
-                                    add_to_journal(SeqId, ack, StateM);
-                                {false, _} ->
-                                    add_to_journal(
-                                      SeqId, ack,
-                                      add_to_journal(SeqId, del, StateM))
-                            end
-                    end, StateN #qistate { segments = SegmentsN1 }, SegDict)
-          end, State2, AllSegs),
-    %% 4. Go through all segments and calculate the number of unacked
-    %%    messages we have.
-    Count = lists:foldl(
-              fun (Seg, CountAcc) ->
-                      #segment { pubs = PubCount, acks = AckCount } =
-                          segment_find(Seg, Segments),
-                      CountAcc + PubCount - AckCount
-              end, 0, AllSegs),
+                  StateN1 = #qistate { segments = SegmentsN2 } =
+                      dict:fold(
+                        fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
+                             StateM) ->
+                                SeqId = reconstruct_seq_id(Seg, RelSeq),
+                                InMsgStore = rabbit_msg_store:contains(MsgId),
+                                case {InMsgStore, CleanShutdown} of
+                                    {true, true} ->
+                                        StateM;
+                                    {true, false} when Del == del ->
+                                        StateM;
+                                    {true, false} ->
+                                        add_to_journal(SeqId, del, StateM);
+                                    {false, _} when Del == del ->
+                                        add_to_journal(SeqId, ack, StateM);
+                                    {false, _} ->
+                                        add_to_journal(
+                                          SeqId, ack,
+                                          add_to_journal(SeqId, del, StateM))
+                                end
+                        end, StateN #qistate { segments=SegmentsN1 }, SegDict),
+                  {ok, #segment { pubs = PubCount, acks = AckCount }} =
+                      segment_find(Seg, SegmentsN2),
+                  {StateN1, CountAcc + PubCount - AckCount}
+          end, {State2, 0}, AllSegs),
     {Count, State3}.
 
 terminate(State) ->
@@ -249,7 +246,7 @@ sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
 
 flush_journal(State = #qistate { dirty_count = 0 }) ->
     State;
-flush_journal(State = #qistate { segments = Segments, dir = Dir }) ->
+flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
           fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
@@ -273,15 +270,16 @@ flush_journal(State = #qistate { segments = Segments, dir = Dir }) ->
                               end,
                           segment_store(Segment1, SegmentsN)
                   end
-          end, segment_new(Dir), Segments),
+          end, segments_new(), Segments),
     {JournalHdl, State1} =
         get_journal_handle(State #qistate { segments = Segments1 }),
     ok = file_handle_cache:clear(JournalHdl),
     State1 #qistate { dirty_count = 0 }.
 
-read_segment_entries(InitSeqId, State = #qistate { segments = Segments }) ->
+read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
+                                                   dir = Dir }) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    Segment = segment_find(Seg, Segments),
+    Segment = segment_find_or_new(Seg, Dir, Segments),
     {SegDict, _PubCount, _AckCount,
      Segment1 = #segment { journal_entries = JEntries }} =
         load_segment(false, Segment),
@@ -412,7 +410,7 @@ blank_state(QueueName) ->
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     #qistate { dir            = Dir,
-               segments       = segment_new(Dir),
+               segments       = segments_new(),
                journal_handle = undefined,
                dirty_count    = 0
              }.
@@ -475,58 +473,62 @@ get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
 get_segment_handle(Segment = #segment { handle = Hdl }) ->
     {Hdl, Segment}.
 
-segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_], _Dir}) ->
-    Segment; %% 1 or (2, matches head)
-segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }], _Dir}) ->
-    Segment; %% 2, matches tail
-segment_find(Seg, {Segments, _, Dir}) -> %% no match
-    case dict:find(Seg, Segments) of
-        {ok, Segment = #segment{}} -> Segment;
-        error -> #segment { pubs = 0,
-                            acks = 0,
-                            handle = undefined,
-                            journal_entries = journal_new(),
-                            path = seg_num_to_path(Dir, Seg),
-                            num = Seg
-                          }
+segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
+    {ok, Segment}; %% 1 or (2, matches head)
+segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) ->
+    {ok, Segment}; %% 2, matches tail
+segment_find(Seg, {Segments, _}) -> %% no match
+    dict:find(Seg, Segments).
+
+segment_new(Seg, Dir) ->
+    #segment { pubs = 0,
+               acks = 0,
+               handle = undefined,
+               journal_entries = journal_new(),
+               path = seg_num_to_path(Dir, Seg),
+               num = Seg
+             }.
+
+segment_find_or_new(Seg, Dir, Segments) ->
+    case segment_find(Seg, Segments) of
+        error -> segment_new(Seg, Dir); 
+        {ok, Segment} -> Segment
     end.
 
 segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
-              {Segments, [#segment { num = Seg } | Tail], Dir}) ->
-    {Segments, [Segment | Tail], Dir};
+              {Segments, [#segment { num = Seg } | Tail]}) ->
+    {Segments, [Segment | Tail]};
 segment_store(Segment = #segment { num = Seg }, %% 2, matches tail
-              {Segments, [SegmentA, #segment { num = Seg }], Dir}) ->
-    {Segments, [SegmentA, Segment], Dir};
-segment_store(Segment = #segment { num = Seg },
-              {Segments, [], Dir}) ->
-    {dict:erase(Seg, Segments), [Segment], Dir};
-segment_store(Segment = #segment { num = Seg },
-              {Segments, [SegmentA], Dir}) ->
-    {dict:erase(Seg, Segments), [Segment, SegmentA], Dir};
+              {Segments, [SegmentA, #segment { num = Seg }]}) ->
+    {Segments, [SegmentA, Segment]};
+segment_store(Segment = #segment { num = Seg }, {Segments, []}) ->
+    {dict:erase(Seg, Segments), [Segment]};
+segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) ->
+    {dict:erase(Seg, Segments), [Segment, SegmentA]};
 segment_store(Segment = #segment { num = Seg },
-              {Segments, [SegmentA, SegmentB], Dir}) ->
+              {Segments, [SegmentA, SegmentB]}) ->
     {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)),
-     [Segment, SegmentA], Dir}.
+     [Segment, SegmentA]}.
 
-segment_fold(Fun, Acc, {Segments, [], _Dir}) ->
+segment_fold(Fun, Acc, {Segments, []}) ->
     dict:fold(Fun, Acc, Segments);
-segment_fold(Fun, Acc, {Segments, CachedSegments, _Dir}) ->
+segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
     Acc1 = lists:foldl(fun (Segment = #segment { num = Num }, AccN) ->
                                Fun(Num, Segment, AccN)
                        end, Acc, CachedSegments),
     dict:fold(Fun, Acc1, Segments).
 
-segment_map(Fun, {Segments, CachedSegments, Dir}) ->
+segment_map(Fun, {Segments, CachedSegments}) ->
     {dict:map(Fun, Segments),
      lists:map(fun (Segment = #segment { num = Num }) -> Fun(Num, Segment) end,
-               CachedSegments), Dir}.
+               CachedSegments)}.
 
-segment_fetch_keys({Segments, CachedSegments, _Dir}) ->
+segment_fetch_keys({Segments, CachedSegments}) ->
     lists:map(fun (Segment) -> Segment#segment.num end, CachedSegments) ++
         dict:fetch_keys(Segments).
 
-segment_new(Dir) ->
-    {dict:new(), [], Dir}.
+segments_new() ->
+    {dict:new(), []}.
 
 get_journal_handle(State =
                    #qistate { journal_handle = undefined, dir = Dir }) ->
@@ -583,7 +585,7 @@ terminate(StoreShutdown, State =
         true  -> store_clean_shutdown(Dir);
         false -> ok
     end,
-    State #qistate { journal_handle = undefined, segments = segment_new(Dir) }.
+    State #qistate { journal_handle = undefined, segments = segments_new() }.
 
 %%----------------------------------------------------------------------------
 %% Majors
@@ -712,11 +714,12 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
     end.
 
 add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
-                                                 segments = Segments }) ->
+                                                 segments = Segments,
+                                                 dir = Dir }) ->
     {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     Segment = #segment { journal_entries = SegJDict,
                          pubs = PubCount, acks = AckCount } =
-        segment_find(Seg, Segments),
+        segment_find_or_new(Seg, Dir, Segments),
     SegJDict1 = add_to_journal(RelSeq, Action, SegJDict),
     Segment1 = Segment #segment { journal_entries = SegJDict1 },
     Segment2 =
-- 
cgit v1.2.1


From 2987800846c3baeca14f73a70266ba089a32bfb1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 4 Dec 2009 12:10:10 +0000
Subject: refactoring

---
 src/rabbit_queue_index.erl | 73 +++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6345428e..ed469849 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -156,7 +156,8 @@ init(Name) ->
     State1 = load_journal(State),
     %% 2. Flush the journal. This makes life easier for everyone, as
     %%    it means there won't be any publishes in the journal alone.
-    State2 = #qistate { dir = Dir } = flush_journal(State1),
+    State2 = #qistate { dir = Dir, segments = Segments,
+                        dirty_count = DCount } = flush_journal(State1),
     %% 3. Load each segment in turn and filter out messages that are
     %%    not in the msg_store, by adding acks to the journal. These
     %%    acks only go to the RAM journal as it doesn't matter if we
@@ -167,39 +168,41 @@ init(Name) ->
     %% We know the journal is empty here, so we don't need to combine
     %% with the journal, and we don't need to worry about messages
     %% that have been acked.
-    {State3 = #qistate { segments = Segments }, Count} =
+    {Segments1, Count, DCount1} =
         lists:foldl(
-          fun (Seg, {StateN = #qistate { segments = SegmentsN }, CountAcc}) ->
-                  Segment = segment_find_or_new(Seg, Dir, SegmentsN),
+          fun (Seg, {Segments2, CountAcc, DCountAcc}) ->
+                  Segment = segment_find_or_new(Seg, Dir, Segments2),
                   {SegDict, _PubCount, _AckCount, Segment1} =
                       load_segment(false, Segment),
-                  SegmentsN1 = segment_store(Segment1, SegmentsN),
-                  StateN1 = #qistate { segments = SegmentsN2 } =
+                  {Segment2 = #segment { pubs = PubCount, acks = AckCount },
+                   DCountAcc1} =
                       dict:fold(
                         fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
-                             StateM) ->
-                                SeqId = reconstruct_seq_id(Seg, RelSeq),
+                             {Segment3, DCountAcc2}) ->
                                 InMsgStore = rabbit_msg_store:contains(MsgId),
                                 case {InMsgStore, CleanShutdown} of
                                     {true, true} ->
-                                        StateM;
+                                        {Segment3, DCountAcc};
                                     {true, false} when Del == del ->
-                                        StateM;
+                                        {Segment3, DCountAcc};
                                     {true, false} ->
-                                        add_to_journal(SeqId, del, StateM);
+                                        {add_to_journal(RelSeq, del, Segment3),
+                                         DCountAcc2 + 1};
                                     {false, _} when Del == del ->
-                                        add_to_journal(SeqId, ack, StateM);
+                                        {add_to_journal(RelSeq, ack, Segment3),
+                                         DCountAcc2 + 1};
                                     {false, _} ->
-                                        add_to_journal(
-                                          SeqId, ack,
-                                          add_to_journal(SeqId, del, StateM))
+                                        {add_to_journal(
+                                           RelSeq, ack,
+                                           add_to_journal(
+                                             RelSeq, del, Segment3)),
+                                         DCountAcc2 + 2}
                                 end
-                        end, StateN #qistate { segments=SegmentsN1 }, SegDict),
-                  {ok, #segment { pubs = PubCount, acks = AckCount }} =
-                      segment_find(Seg, SegmentsN2),
-                  {StateN1, CountAcc + PubCount - AckCount}
-          end, {State2, 0}, AllSegs),
-    {Count, State3}.
+                        end, {Segment1, DCountAcc}, SegDict),
+                  {segment_store(Segment2, Segments2),
+                   CountAcc + PubCount - AckCount, DCountAcc1}
+          end, {Segments, 0, DCount}, AllSegs),
+    {Count, State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
 
 terminate(State) ->
     terminate(true, State).
@@ -491,7 +494,7 @@ segment_new(Seg, Dir) ->
 
 segment_find_or_new(Seg, Dir, Segments) ->
     case segment_find(Seg, Segments) of
-        error -> segment_new(Seg, Dir); 
+        error -> segment_new(Seg, Dir);
         {ok, Segment} -> Segment
     end.
 
@@ -717,19 +720,21 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
                                                  segments = Segments,
                                                  dir = Dir }) ->
     {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    Segment = #segment { journal_entries = SegJDict,
-                         pubs = PubCount, acks = AckCount } =
-        segment_find_or_new(Seg, Dir, Segments),
-    SegJDict1 = add_to_journal(RelSeq, Action, SegJDict),
-    Segment1 = Segment #segment { journal_entries = SegJDict1 },
-    Segment2 =
-        case Action of
-            del                     -> Segment1;
-            ack                     -> Segment1 #segment { acks = AckCount + 1 };
-            {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
-        end,
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    Segment1 = add_to_journal(RelSeq, Action, Segment),
     State #qistate { dirty_count = DCount + 1,
-                     segments = segment_store(Segment2, Segments) };
+                     segments = segment_store(Segment1, Segments) };
+
+add_to_journal(RelSeq, Action, Segment =
+               #segment { journal_entries = SegJournal,
+                          pubs = PubCount, acks = AckCount }) ->
+    SegJournal1 = add_to_journal(RelSeq, Action, SegJournal),
+    Segment1 = Segment #segment { journal_entries = SegJournal1 },
+    case Action of
+        del                     -> Segment1;
+        ack                     -> Segment1 #segment { acks = AckCount + 1 };
+        {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
+    end;
 
 %% This is a more relaxed version of deliver_or_ack_msg because we can
 %% have dels or acks in the journal without the corresponding
-- 
cgit v1.2.1


From e5266ebce6c0c047d0b3cd1aca281487dc1bb7b4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 4 Dec 2009 12:38:51 +0000
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index ed469849..c95c8033 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -726,10 +726,10 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
                      segments = segment_store(Segment1, Segments) };
 
 add_to_journal(RelSeq, Action, Segment =
-               #segment { journal_entries = SegJournal,
+               #segment { journal_entries = JEntries,
                           pubs = PubCount, acks = AckCount }) ->
-    SegJournal1 = add_to_journal(RelSeq, Action, SegJournal),
-    Segment1 = Segment #segment { journal_entries = SegJournal1 },
+    JEntries1 = add_to_journal(RelSeq, Action, JEntries),
+    Segment1 = Segment #segment { journal_entries = JEntries1 },
     case Action of
         del                     -> Segment1;
         ack                     -> Segment1 #segment { acks = AckCount + 1 };
-- 
cgit v1.2.1


From 763fb6f992f32d767900024e8a2ea5a070ad52a3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 4 Dec 2009 13:02:00 +0000
Subject: segments now stored in array, not dict

---
 src/rabbit_queue_index.erl | 188 ++++++++++++++++++++++-----------------------
 1 file changed, 93 insertions(+), 95 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c95c8033..56cbee1e 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -172,11 +172,11 @@ init(Name) ->
         lists:foldl(
           fun (Seg, {Segments2, CountAcc, DCountAcc}) ->
                   Segment = segment_find_or_new(Seg, Dir, Segments2),
-                  {SegDict, _PubCount, _AckCount, Segment1} =
+                  {SegEntries, _PubCount, _AckCount, Segment1} =
                       load_segment(false, Segment),
                   {Segment2 = #segment { pubs = PubCount, acks = AckCount },
                    DCountAcc1} =
-                      dict:fold(
+                      array:sparse_foldl(
                         fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
                              {Segment3, DCountAcc2}) ->
                                 InMsgStore = rabbit_msg_store:contains(MsgId),
@@ -198,7 +198,7 @@ init(Name) ->
                                              RelSeq, del, Segment3)),
                                          DCountAcc2 + 2}
                                 end
-                        end, {Segment1, DCountAcc}, SegDict),
+                        end, {Segment1, DCountAcc}, SegEntries),
                   {segment_store(Segment2, Segments2),
                    CountAcc + PubCount - AckCount, DCountAcc1}
           end, {Segments, 0, DCount}, AllSegs),
@@ -283,18 +283,16 @@ read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
                                                    dir = Dir }) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
     Segment = segment_find_or_new(Seg, Dir, Segments),
-    {SegDict, _PubCount, _AckCount,
+    {SegEntries, _PubCount, _AckCount,
      Segment1 = #segment { journal_entries = JEntries }} =
         load_segment(false, Segment),
-    SegDict1 = journal_plus_segment(JEntries, SegDict),
+    SegEntries1 = journal_plus_segment(JEntries, SegEntries),
     %% deliberately sort the list desc, because foldl will reverse it
-    RelSeqs = rev_sort(dict:fetch_keys(SegDict1)),
-    {lists:foldl(fun (RelSeq, Acc) ->
-                         {{MsgId, IsPersistent}, IsDelivered, no_ack} =
-                             dict:fetch(RelSeq, SegDict1),
-                         [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
-                            IsPersistent, IsDelivered == del} | Acc ]
-                 end, [], RelSeqs),
+    {array:sparse_foldr(
+       fun (RelSeq, {{MsgId, IsPersistent}, IsDelivered, no_ack}, Acc) ->
+               [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
+                  IsPersistent, IsDelivered == del} | Acc ]
+       end, [], SegEntries1),
      State #qistate { segments = segment_store(Segment1, Segments) }}.
 
 next_segment_boundary(SeqId) ->
@@ -421,9 +419,6 @@ blank_state(QueueName) ->
 journal_new() ->
     array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
 
-rev_sort(List) ->
-    lists:sort(fun (A, B) -> B < A end, List).
-
 seq_id_to_seg_and_rel_seq_id(SeqId) ->
     { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
 
@@ -599,8 +594,9 @@ terminate(StoreShutdown, State =
 %% Does not do any combining with the journal at all. The PubCount
 %% that comes back is the number of publishes in the segment. The
 %% number of unacked msgs is PubCount - AckCount. If KeepAcks is
-%% false, then dict:size(SegDict) == PubCount - AckCount. If KeepAcks
-%% is true, then dict:size(SegDict) == PubCount.
+%% false, then array:sparse_size(SegEntries) == PubCount -
+%% AckCount. If KeepAcks is true, then array:sparse_size(SegEntries)
+%% == PubCount.
 load_segment(KeepAcks,
              Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
@@ -613,21 +609,22 @@ load_segment(KeepAcks,
         true ->
             {Hdl, Segment1} = get_segment_handle(Segment),
             {ok, 0} = file_handle_cache:position(Hdl, bof),
-            {SegDict, PubCount, AckCount} =
-                load_segment_entries(KeepAcks, Hdl, dict:new(), 0, 0),
-            {SegDict, PubCount, AckCount, Segment1}
+            {SegEntries, PubCount, AckCount} =
+                load_segment_entries(KeepAcks, Hdl, journal_new(), 0, 0),
+            {SegEntries, PubCount, AckCount, Segment1}
     end.
 
-load_segment_entries(KeepAcks, Hdl, SegDict, PubCount, AckCount) ->
+load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
     case file_handle_cache:read(Hdl, 1) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
                 MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
             {ok, LSB} = file_handle_cache:read(
                           Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            {AckCount1, SegDict1} =
-                deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict),
-            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount, AckCount1);
+            {AckCount1, SegEntries1} =
+                deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries),
+            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount,
+                                 AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                 IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
             %% because we specify /binary, and binaries are complete
@@ -636,23 +633,24 @@ load_segment_entries(KeepAcks, Hdl, SegDict, PubCount, AckCount) ->
                 file_handle_cache:read(
                   Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
             <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
-            SegDict1 =
-                dict:store(RelSeq,
-                           {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
-                           SegDict),
-            load_segment_entries(KeepAcks, Hdl, SegDict1, PubCount+1, AckCount);
+            SegEntries1 =
+                array:set(RelSeq,
+                          {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
+                          SegEntries),
+            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount + 1,
+                                 AckCount);
         _ErrOrEoF ->
-            {SegDict, PubCount, AckCount}
+            {SegEntries, PubCount, AckCount}
     end.
 
-deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegDict) ->
-    case dict:find(RelSeq, SegDict) of
-        {ok, {PubRecord, no_del, no_ack}} ->
-            {AckCount, dict:store(RelSeq, {PubRecord, del, no_ack}, SegDict)};
-        {ok, {PubRecord, del, no_ack}} when KeepAcks ->
-            {AckCount + 1, dict:store(RelSeq, {PubRecord, del, ack}, SegDict)};
-        {ok, {_PubRecord, del, no_ack}} ->
-            {AckCount + 1, dict:erase(RelSeq, SegDict)}
+deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries) ->
+    case array:get(RelSeq, SegEntries) of
+        {PubRecord, no_del, no_ack} ->
+            {AckCount, array:set(RelSeq, {PubRecord, del, no_ack}, SegEntries)};
+        {PubRecord, del, no_ack} when KeepAcks ->
+            {AckCount + 1, array:set(RelSeq, {PubRecord, del, ack}, SegEntries)};
+        {_PubRecord, del, no_ack} ->
+            {AckCount + 1, array:reset(RelSeq, SegEntries)}
     end.
 
 %% Loading Journal. This isn't idempotent and will mess up the counts
@@ -671,13 +669,13 @@ load_journal(State) ->
                   %% We want to keep acks in so that we can remove
                   %% them if duplicates are in the journal. The counts
                   %% here are purely from the segment itself.
-                  {SegDict, PubCountInSeg, AckCountInSeg, Segment1} =
+                  {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
                       load_segment(true, Segment),
                   %% Removed counts here are the number of pubs and
                   %% acks that are duplicates - i.e. found in both the
                   %% segment and journal.
                   {JEntries1, PubsRemoved, AcksRemoved} =
-                      journal_minus_segment(JEntries, SegDict),
+                      journal_minus_segment(JEntries, SegEntries),
                   PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
                   AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
                   Segment1 #segment { journal_entries = JEntries1,
@@ -759,136 +757,136 @@ add_to_journal(RelSeq, Action, SegJArray) ->
 %% holding for that segment in memory. There must be no
 %% duplicates. Used when providing segment entries to the variable
 %% queue.
-journal_plus_segment(JEntries, SegDict) ->
+journal_plus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
-      fun (RelSeq, JObj, SegDictOut) ->
-              SegEntry = case dict:find(RelSeq, SegDictOut) of
-                             error -> not_found;
-                             {ok, SObj = {_, _, _}} -> SObj
+      fun (RelSeq, JObj, SegEntriesOut) ->
+              SegEntry = case array:get(RelSeq, SegEntriesOut) of
+                             undefined -> not_found;
+                             SObj = {_, _, _} -> SObj
                          end,
-              journal_plus_segment(JObj, SegEntry, RelSeq, SegDictOut)
-      end, SegDict, JEntries).
+              journal_plus_segment(JObj, SegEntry, RelSeq, SegEntriesOut)
+      end, SegEntries, JEntries).
 
-%% Here, the OutDict is the SegDict which we may be adding to (for
+%% Here, the Out is the Seg Array which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
 %% from (ack in journal, not segment).
 journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
                      not_found,
-                     RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                     RelSeq, Out) ->
+    array:set(RelSeq, Obj, Out);
 journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
                      not_found,
-                     RelSeq, OutDict) ->
-    dict:store(RelSeq, Obj, OutDict);
+                     RelSeq, Out) ->
+    array:set(RelSeq, Obj, Out);
 journal_plus_segment({{_MsgId, _IsPersistent}, del, ack},
                      not_found,
-                     RelSeq, OutDict) ->
-    dict:erase(RelSeq, OutDict);
+                     RelSeq, Out) ->
+    array:reset(RelSeq, Out);
 
 journal_plus_segment({no_pub, del, no_ack},
                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                     RelSeq, OutDict) ->
-    dict:store(RelSeq, {PubRecord, del, no_ack}, OutDict);
+                     RelSeq, Out) ->
+    array:set(RelSeq, {PubRecord, del, no_ack}, Out);
 
 journal_plus_segment({no_pub, del, ack},
                      {{_MsgId, _IsPersistent}, no_del, no_ack},
-                     RelSeq, OutDict) ->
-    dict:erase(RelSeq, OutDict);
+                     RelSeq, Out) ->
+    array:reset(RelSeq, Out);
 journal_plus_segment({no_pub, no_del, ack},
                      {{_MsgId, _IsPersistent}, del, no_ack},
-                     RelSeq, OutDict) ->
-    dict:erase(RelSeq, OutDict).
+                     RelSeq, Out) ->
+    array:reset(RelSeq, Out).
 
 %% Remove from the journal entries for a segment, items that are
 %% duplicates of entries found in the segment itself. Used on start up
 %% to clean up the journal.
-journal_minus_segment(JEntries, SegDict) ->
+journal_minus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
-              SegEntry = case dict:find(RelSeq, SegDict) of
-                             error -> not_found;
-                             {ok, SObj = {_, _, _}} -> SObj
+              SegEntry = case array:get(RelSeq, SegEntries) of
+                             undefined -> not_found;
+                             SObj = {_, _, _} -> SObj
                          end,
               journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
                                     PubsRemoved, AcksRemoved)
       end, {journal_new(), 0, 0}, JEntries).
 
-%% Here, the OutArray is a fresh journal that we're filling with valid
+%% Here, the Out is a fresh journal that we're filling with valid
 %% entries. PubsRemoved and AcksRemoved only get increased when the a
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
 journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, no_ack},
-                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {OutArray, PubsRemoved + 1, AcksRemoved};
+                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {Out, PubsRemoved + 1, AcksRemoved};
 journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, ack},
-                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {OutArray, PubsRemoved + 1, AcksRemoved + 1};
+                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {Out, PubsRemoved + 1, AcksRemoved + 1};
 
 %% Just publish in journal
 journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
                       not_found,
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 
 %% Just deliver in journal
 journal_minus_segment(Obj = {no_pub, del, no_ack},
                       {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, no_ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {OutArray, PubsRemoved, AcksRemoved};
+                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {Out, PubsRemoved, AcksRemoved};
 
 %% Just ack in journal
 journal_minus_segment(Obj = {no_pub, no_del, ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, no_del, ack},
                       {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {OutArray, PubsRemoved, AcksRemoved};
+                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {Out, PubsRemoved, AcksRemoved};
 
 %% Publish and deliver in journal
 journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
                       not_found,
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({PubRecord, del, no_ack},
                       {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, del, no_ack}, OutArray),
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, del, no_ack}, Out),
      PubsRemoved + 1, AcksRemoved};
 
 %% Deliver and ack in journal
 journal_minus_segment(Obj = {no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, OutArray), PubsRemoved, AcksRemoved};
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, no_del, ack}, OutArray),
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, no_del, ack}, Out),
      PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
                       {{_MsgId, _IsPersistent}, del, ack},
-                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {OutArray, PubsRemoved, AcksRemoved + 1};
+                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {Out, PubsRemoved, AcksRemoved + 1};
 
 %% Publish, deliver and ack in journal
 journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
                       not_found,
-                      _RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {OutArray, PubsRemoved, AcksRemoved};
+                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {Out, PubsRemoved, AcksRemoved};
 journal_minus_segment({PubRecord, del, ack},
                       {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, del, ack}, OutArray),
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, del, ack}, Out),
      PubsRemoved + 1, AcksRemoved};
 journal_minus_segment({PubRecord, del, ack},
                       {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
-                      RelSeq, OutArray, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, no_del, ack}, OutArray),
+                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
+    {array:set(RelSeq, {no_pub, no_del, ack}, Out),
      PubsRemoved + 1, AcksRemoved}.
-- 
cgit v1.2.1


From ae85a1c4506145af60da51a45bfdd8432d0dc86f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 4 Dec 2009 13:10:21 +0000
Subject: cough, splutter, sneeze etc

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 56cbee1e..46d606d3 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -605,7 +605,7 @@ load_segment(KeepAcks,
                     end,
     case SegmentExists of
         false ->
-            {dict:new(), 0, 0, Segment};
+            {journal_new(), 0, 0, Segment};
         true ->
             {Hdl, Segment1} = get_segment_handle(Segment),
             {ok, 0} = file_handle_cache:position(Hdl, bof),
-- 
cgit v1.2.1


From bf8ce668b53e940fcfb2b3cf629901e7698dcfc6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 4 Dec 2009 13:16:36 +0000
Subject: appropriately renamed

---
 src/rabbit_queue_index.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 46d606d3..347742d6 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -269,7 +269,7 @@ flush_journal(State = #qistate { segments = Segments }) ->
                                          JEntries),
                                        ok = file_handle_cache:sync(Hdl),
                                        Segment2 #segment { journal_entries =
-                                                           journal_new() }
+                                                           array_new() }
                               end,
                           segment_store(Segment1, SegmentsN)
                   end
@@ -416,7 +416,7 @@ blank_state(QueueName) ->
                dirty_count    = 0
              }.
 
-journal_new() ->
+array_new() ->
     array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
 
 seq_id_to_seg_and_rel_seq_id(SeqId) ->
@@ -482,7 +482,7 @@ segment_new(Seg, Dir) ->
     #segment { pubs = 0,
                acks = 0,
                handle = undefined,
-               journal_entries = journal_new(),
+               journal_entries = array_new(),
                path = seg_num_to_path(Dir, Seg),
                num = Seg
              }.
@@ -605,12 +605,12 @@ load_segment(KeepAcks,
                     end,
     case SegmentExists of
         false ->
-            {journal_new(), 0, 0, Segment};
+            {array_new(), 0, 0, Segment};
         true ->
             {Hdl, Segment1} = get_segment_handle(Segment),
             {ok, 0} = file_handle_cache:position(Hdl, bof),
             {SegEntries, PubCount, AckCount} =
-                load_segment_entries(KeepAcks, Hdl, journal_new(), 0, 0),
+                load_segment_entries(KeepAcks, Hdl, array_new(), 0, 0),
             {SegEntries, PubCount, AckCount, Segment1}
     end.
 
@@ -809,7 +809,7 @@ journal_minus_segment(JEntries, SegEntries) ->
                          end,
               journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
                                     PubsRemoved, AcksRemoved)
-      end, {journal_new(), 0, 0}, JEntries).
+      end, {array_new(), 0, 0}, JEntries).
 
 %% Here, the Out is a fresh journal that we're filling with valid
 %% entries. PubsRemoved and AcksRemoved only get increased when the a
-- 
cgit v1.2.1


From cf21ce5caad7a3b71f5430ced7e7e32c9c9179c4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 4 Dec 2009 22:12:30 +0000
Subject: cosmetic and some refactoring

---
 src/rabbit_queue_index.erl | 163 +++++++++++++++++++++------------------------
 1 file changed, 77 insertions(+), 86 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 347742d6..eda0a43a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -179,31 +179,28 @@ init(Name) ->
                       array:sparse_foldl(
                         fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
                              {Segment3, DCountAcc2}) ->
-                                InMsgStore = rabbit_msg_store:contains(MsgId),
-                                case {InMsgStore, CleanShutdown} of
-                                    {true, true} ->
-                                        {Segment3, DCountAcc};
-                                    {true, false} when Del == del ->
-                                        {Segment3, DCountAcc};
-                                    {true, false} ->
-                                        {add_to_journal(RelSeq, del, Segment3),
-                                         DCountAcc2 + 1};
-                                    {false, _} when Del == del ->
-                                        {add_to_journal(RelSeq, ack, Segment3),
-                                         DCountAcc2 + 1};
-                                    {false, _} ->
-                                        {add_to_journal(
-                                           RelSeq, ack,
-                                           add_to_journal(
-                                             RelSeq, del, Segment3)),
-                                         DCountAcc2 + 2}
-                                end
+                                {Segment4, DCountDelta} =
+                                    maybe_add_to_journal(
+                                      rabbit_msg_store:contains(MsgId),
+                                      CleanShutdown, Del, RelSeq, Segment3),
+                                {Segment4, DCountAcc2 + DCountDelta}
                         end, {Segment1, DCountAcc}, SegEntries),
                   {segment_store(Segment2, Segments2),
                    CountAcc + PubCount - AckCount, DCountAcc1}
           end, {Segments, 0, DCount}, AllSegs),
     {Count, State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
 
+maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
+    {Segment, 0};
+maybe_add_to_journal( true, false,  del, _RelSeq, Segment) ->
+    {Segment, 0};
+maybe_add_to_journal( true, false, _Del,  RelSeq, Segment) ->
+    {add_to_journal(RelSeq, del, Segment), 1};
+maybe_add_to_journal(false,     _,  del,  RelSeq, Segment) ->
+    {add_to_journal(RelSeq, ack, Segment), 1};
+maybe_add_to_journal(false,     _, _Del,  RelSeq, Segment) ->
+    {add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)), 2}.
+
 terminate(State) ->
     terminate(true, State).
 
@@ -212,34 +209,30 @@ terminate_and_erase(State) ->
     ok = delete_queue_directory(State1 #qistate.dir),
     State1.
 
-write_published(MsgId, SeqId, IsPersistent, State)
-  when is_binary(MsgId) ->
+write_published(MsgId, SeqId, IsPersistent, State) when is_binary(MsgId) ->
     ?MSG_ID_BYTES = size(MsgId),
     {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(JournalHdl,
-                                  [<<(case IsPersistent of
-                                          true  -> ?PUB_PERSIST_JPREFIX;
-                                          false -> ?PUB_TRANS_JPREFIX
-                                      end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>,
-                                   MsgId]),
+    ok = file_handle_cache:append(
+           JournalHdl, [<<(case IsPersistent of
+                               true  -> ?PUB_PERSIST_JPREFIX;
+                               false -> ?PUB_TRANS_JPREFIX
+                           end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, MsgId]),
     maybe_flush_journal(add_to_journal(SeqId, {MsgId, IsPersistent}, State1)).
 
 write_delivered(SeqId, State) ->
     {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(JournalHdl,
-                                  <<?DEL_JPREFIX:?JPREFIX_BITS,
-                                   SeqId:?SEQ_BITS>>),
+    ok = file_handle_cache:append(
+           JournalHdl, <<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>),
     maybe_flush_journal(add_to_journal(SeqId, del, State1)).
 
 write_acks(SeqIds, State) ->
     {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(JournalHdl,
-                                  [<<?ACK_JPREFIX:?JPREFIX_BITS,
-                                    SeqId:?SEQ_BITS>> || SeqId <- SeqIds]),
-    State2 = lists:foldl(fun (SeqId, StateN) ->
-                                 add_to_journal(SeqId, ack, StateN)
-                         end, State1, SeqIds),
-    maybe_flush_journal(State2).
+    ok = file_handle_cache:append(
+           JournalHdl, [<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>> ||
+                           SeqId <- SeqIds]),
+    maybe_flush_journal(lists:foldl(fun (SeqId, StateN) ->
+                                            add_to_journal(SeqId, ack, StateN)
+                                    end, State1, SeqIds)).
 
 sync_seq_ids(_SeqIds, State = #qistate { journal_handle = undefined }) ->
     State;
@@ -255,23 +248,11 @@ flush_journal(State = #qistate { segments = Segments }) ->
           fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
                                 acks = AckCount } = Segment, SegmentsN) ->
                   case PubCount > 0 andalso PubCount == AckCount of
-                      true ->
-                          ok = delete_segment(Segment),
-                          SegmentsN;
-                      false ->
-                          Segment1 =
-                              case array:sparse_size(JEntries) of
-                                  0 -> Segment;
-                                  _ -> {Hdl, Segment2} =
-                                           get_segment_handle(Segment),
-                                       array:sparse_foldl(
-                                         fun write_entry_to_segment/3, Hdl,
-                                         JEntries),
-                                       ok = file_handle_cache:sync(Hdl),
-                                       Segment2 #segment { journal_entries =
-                                                           array_new() }
-                              end,
-                          segment_store(Segment1, SegmentsN)
+                      true  -> ok = delete_segment(Segment),
+                               SegmentsN;
+                      false -> segment_store(
+                                 append_journal_to_segment(Segment, JEntries),
+                                 SegmentsN)
                   end
           end, segments_new(), Segments),
     {JournalHdl, State1} =
@@ -279,6 +260,15 @@ flush_journal(State = #qistate { segments = Segments }) ->
     ok = file_handle_cache:clear(JournalHdl),
     State1 #qistate { dirty_count = 0 }.
 
+append_journal_to_segment(Segment, JEntries) ->
+    case array:sparse_size(JEntries) of
+        0 -> Segment;
+        _ -> {Hdl, Segment1} = get_segment_handle(Segment),
+             array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
+             ok = file_handle_cache:sync(Hdl),
+             Segment1 #segment { journal_entries = array_new() }
+    end.
+
 read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
                                                    dir = Dir }) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
@@ -544,8 +534,8 @@ bool_to_int(false) -> 0.
 
 write_entry_to_segment(_RelSeq, {{_MsgId, _IsPersistent}, del, ack}, Hdl) ->
     Hdl;
-write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
-    ok = case Publish of
+write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
+    ok = case Pub of
              no_pub ->
                  ok;
              {MsgId, IsPersistent} ->
@@ -555,14 +545,16 @@ write_entry_to_segment(RelSeq, {Publish, Del, Ack}, Hdl) ->
                            RelSeq:?REL_SEQ_BITS>>, MsgId])
          end,
     ok = case {Del, Ack} of
-             {no_del, no_ack} -> ok;
-             _ -> Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                             RelSeq:?REL_SEQ_BITS>>,
-                  Data = case {Del, Ack} of
-                             {del, ack} -> [Binary, Binary];
-                             _          -> Binary
-                         end,
-                  file_handle_cache:append(Hdl, Data)
+             {no_del, no_ack} ->
+                 ok;
+             _ ->
+                 Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                            RelSeq:?REL_SEQ_BITS>>,
+                 file_handle_cache:append(
+                   Hdl, case {Del, Ack} of
+                            {del, ack} -> [Binary, Binary];
+                            _          -> Binary
+                        end)
          end,
     Hdl.
 
@@ -645,11 +637,11 @@ load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
 
 deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries) ->
     case array:get(RelSeq, SegEntries) of
-        {PubRecord, no_del, no_ack} ->
-            {AckCount, array:set(RelSeq, {PubRecord, del, no_ack}, SegEntries)};
-        {PubRecord, del, no_ack} when KeepAcks ->
-            {AckCount + 1, array:set(RelSeq, {PubRecord, del, ack}, SegEntries)};
-        {_PubRecord, del, no_ack} ->
+        {Pub, no_del, no_ack} ->
+            {AckCount, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
+        {Pub, del, no_ack} when KeepAcks ->
+            {AckCount + 1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
+        {_Pub, del, no_ack} ->
             {AckCount + 1, array:reset(RelSeq, SegEntries)}
     end.
 
@@ -700,11 +692,10 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
                             %% rabbit_msg_file:read_next/2
                             <<MsgId:?MSG_ID_BYTES/binary>> =
                                 <<MsgIdNum:?MSG_ID_BITS>>,
-                            Publish = {MsgId,
-                                       case Prefix of
-                                           ?PUB_PERSIST_JPREFIX -> true;
-                                           ?PUB_TRANS_JPREFIX   -> false
-                                       end},
+                            Publish = {MsgId, case Prefix of
+                                                  ?PUB_PERSIST_JPREFIX -> true;
+                                                  ?PUB_TRANS_JPREFIX   -> false
+                                              end},
                             load_journal_entries(
                               add_to_journal(SeqId, Publish, State));
                         _ErrOrEoF -> %% err, we've lost at least a publish
@@ -747,10 +738,10 @@ add_to_journal(RelSeq, Action, SegJArray) ->
                           del                   -> {no_pub,    del, no_ack};
                           ack                   -> {no_pub, no_del,    ack}
                       end, SegJArray);
-        ({PubRecord, no_del, no_ack}) when Action == del ->
-            array:set(RelSeq, {PubRecord, del, no_ack}, SegJArray);
-        ({PubRecord,    Del, no_ack}) when Action == ack ->
-            array:set(RelSeq, {PubRecord, Del,    ack}, SegJArray)
+        ({Pub, no_del, no_ack}) when Action == del ->
+            array:set(RelSeq, {Pub, del, no_ack}, SegJArray);
+        ({Pub,    Del, no_ack}) when Action == ack ->
+            array:set(RelSeq, {Pub, Del,    ack}, SegJArray)
     end.
 
 %% Combine what we have just read from a segment file with what we're
@@ -784,9 +775,9 @@ journal_plus_segment({{_MsgId, _IsPersistent}, del, ack},
     array:reset(RelSeq, Out);
 
 journal_plus_segment({no_pub, del, no_ack},
-                     {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+                     {Pub = {_MsgId, _IsPersistent}, no_del, no_ack},
                      RelSeq, Out) ->
-    array:set(RelSeq, {PubRecord, del, no_ack}, Out);
+    array:set(RelSeq, {Pub, del, no_ack}, Out);
 
 journal_plus_segment({no_pub, del, ack},
                      {{_MsgId, _IsPersistent}, no_del, no_ack},
@@ -854,8 +845,8 @@ journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
                       not_found,
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
-journal_minus_segment({PubRecord, del, no_ack},
-                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+journal_minus_segment({Pub, del, no_ack},
+                      {Pub = {_MsgId, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, del, no_ack}, Out),
      PubsRemoved + 1, AcksRemoved};
@@ -880,13 +871,13 @@ journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
                       not_found,
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved, AcksRemoved};
-journal_minus_segment({PubRecord, del, ack},
-                      {PubRecord = {_MsgId, _IsPersistent}, no_del, no_ack},
+journal_minus_segment({Pub, del, ack},
+                      {Pub = {_MsgId, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, del, ack}, Out),
      PubsRemoved + 1, AcksRemoved};
-journal_minus_segment({PubRecord, del, ack},
-                      {PubRecord = {_MsgId, _IsPersistent}, del, no_ack},
+journal_minus_segment({Pub, del, ack},
+                      {Pub = {_MsgId, _IsPersistent}, del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, no_del, ack}, Out),
      PubsRemoved + 1, AcksRemoved}.
-- 
cgit v1.2.1


From a66d76441171dad034017e5fe48db04790683266 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 6 Dec 2009 13:51:11 +0000
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index eda0a43a..7a880bd4 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -273,16 +273,13 @@ read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
                                                    dir = Dir }) ->
     {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
     Segment = segment_find_or_new(Seg, Dir, Segments),
-    {SegEntries, _PubCount, _AckCount,
-     Segment1 = #segment { journal_entries = JEntries }} =
-        load_segment(false, Segment),
-    SegEntries1 = journal_plus_segment(JEntries, SegEntries),
-    %% deliberately sort the list desc, because foldl will reverse it
+    {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
+    #segment { journal_entries = JEntries } = Segment1,
     {array:sparse_foldr(
        fun (RelSeq, {{MsgId, IsPersistent}, IsDelivered, no_ack}, Acc) ->
                [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
                   IsPersistent, IsDelivered == del} | Acc ]
-       end, [], SegEntries1),
+       end, [], journal_plus_segment(JEntries, SegEntries)),
      State #qistate { segments = segment_store(Segment1, Segments) }}.
 
 next_segment_boundary(SeqId) ->
@@ -714,11 +711,11 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
     State #qistate { dirty_count = DCount + 1,
                      segments = segment_store(Segment1, Segments) };
 
-add_to_journal(RelSeq, Action, Segment =
-               #segment { journal_entries = JEntries,
-                          pubs = PubCount, acks = AckCount }) ->
-    JEntries1 = add_to_journal(RelSeq, Action, JEntries),
-    Segment1 = Segment #segment { journal_entries = JEntries1 },
+add_to_journal(RelSeq, Action,
+               Segment = #segment { journal_entries = JEntries,
+                                    pubs = PubCount, acks = AckCount }) ->
+    Segment1 = Segment #segment {
+                 journal_entries = add_to_journal(RelSeq, Action, JEntries) },
     case Action of
         del                     -> Segment1;
         ack                     -> Segment1 #segment { acks = AckCount + 1 };
-- 
cgit v1.2.1


From 1aa82eccc289e9a4026b5f4efb5fe37058649bc1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 11 Dec 2009 16:32:22 +0000
Subject: Read 2 bytes instead of one at the start of load_segment_entries.
 This makes the code cleaner and avoids a further read in the case of a del or
 ack

---
 src/rabbit_queue_index.erl | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 7a880bd4..014afbc8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -57,7 +57,6 @@
 -define(SEGMENT_EXTENSION, ".idx").
 
 -define(REL_SEQ_BITS, 14).
--define(REL_SEQ_BITS_BYTE_ALIGNED, (?REL_SEQ_BITS + 8 - (?REL_SEQ_BITS rem 8))).
 -define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
 
 %% seq only is binary 00 followed by 14 bits of rel seq id
@@ -604,24 +603,18 @@ load_segment(KeepAcks,
     end.
 
 load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
-    case file_handle_cache:read(Hdl, 1) of
+    case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                MSB:(8-?REL_SEQ_ONLY_PREFIX_BITS)>>} ->
-            {ok, LSB} = file_handle_cache:read(
-                          Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
+                RelSeq:?REL_SEQ_BITS>>} ->
             {AckCount1, SegEntries1} =
                 deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries),
             load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount,
                                  AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                IsPersistentNum:1, MSB:(7-?PUBLISH_PREFIX_BITS)>>} ->
+                IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
-            {ok, <<LSB:1/binary, MsgId:?MSG_ID_BYTES/binary>>} =
-                file_handle_cache:read(
-                  Hdl, ?PUBLISH_RECORD_LENGTH_BYTES - 1),
-            <<RelSeq:?REL_SEQ_BITS_BYTE_ALIGNED>> = <<MSB, LSB/binary>>,
+            {ok, MsgId} = file_handle_cache:read(Hdl, ?MSG_ID_BYTES),
             SegEntries1 =
                 array:set(RelSeq,
                           {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
-- 
cgit v1.2.1


From f6699af0f8395570bc456987266872bc20637167 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 11 Dec 2009 16:55:19 +0000
Subject: load_segments deliberately does not adjust the pubs and acks count in
 the segments because doing so would clobber over values carefully constructed
 to contain both information from the journal and from segment, which is used
 to detect when flushing to a segment is unnecessary because the file is
 "full" and so can be deleted (i.e. pubs == acks which is sum both of in
 segfile and journal). On startup, we were failing to set the pubs/acks counts
 correctly, leading to queues doing recovery correctly but then returning a
 length of 0.

---
 src/rabbit_queue_index.erl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 014afbc8..003e2d62 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -171,9 +171,9 @@ init(Name) ->
         lists:foldl(
           fun (Seg, {Segments2, CountAcc, DCountAcc}) ->
                   Segment = segment_find_or_new(Seg, Dir, Segments2),
-                  {SegEntries, _PubCount, _AckCount, Segment1} =
+                  {SegEntries, PubCount, AckCount, Segment1} =
                       load_segment(false, Segment),
-                  {Segment2 = #segment { pubs = PubCount, acks = AckCount },
+                  {Segment2 = #segment { pubs = PubCount1, acks = AckCount1 },
                    DCountAcc1} =
                       array:sparse_foldl(
                         fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
@@ -183,9 +183,11 @@ init(Name) ->
                                       rabbit_msg_store:contains(MsgId),
                                       CleanShutdown, Del, RelSeq, Segment3),
                                 {Segment4, DCountAcc2 + DCountDelta}
-                        end, {Segment1, DCountAcc}, SegEntries),
+                        end, {Segment1 #segment { pubs = PubCount,
+                                                  acks = AckCount }, DCountAcc},
+                        SegEntries),
                   {segment_store(Segment2, Segments2),
-                   CountAcc + PubCount - AckCount, DCountAcc1}
+                   CountAcc + PubCount1 - AckCount1, DCountAcc1}
           end, {Segments, 0, DCount}, AllSegs),
     {Count, State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
 
-- 
cgit v1.2.1


From 83c52a48c1c95dd7908f0595dc3fa70b11f0fdc4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 15 Dec 2009 10:57:11 +0000
Subject: Do not kill rabbit after 1 second when being asked to stop. Also, the
 default timeout on startup should be the max value allowed, not 30 seconds.
 Sheesh.

---
 src/rabbit.erl       | 8 --------
 src/rabbit_multi.erl | 4 ++--
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 9ef49ac5..401f20ba 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -90,14 +90,6 @@ stop() ->
     ok = rabbit_misc:stop_applications(?APPS).
 
 stop_and_halt() ->
-    spawn(fun () ->
-                  SleepTime = 1000,
-                  rabbit_log:info("Stop-and-halt request received; "
-                                  "halting in ~p milliseconds~n",
-                                  [SleepTime]),
-                  timer:sleep(SleepTime),
-                  init:stop()
-          end),
     case catch stop() of _ -> ok end.
 
 status() ->
diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl
index f364872e..2fc7dcd1 100644
--- a/src/rabbit_multi.erl
+++ b/src/rabbit_multi.erl
@@ -50,8 +50,8 @@
 start() ->
     RpcTimeout =
         case init:get_argument(maxwait) of
-            {ok,[[N1]]} -> 1000 * list_to_integer(N1);
-            _ -> 30000
+            {ok, [[N1]]} -> 1000 * list_to_integer(N1);
+            _            -> 16#ffffffff %% max allowed value according to docs
         end,
     case init:get_plain_arguments() of
         [] ->
-- 
cgit v1.2.1


From 1356d8715b3bc43931c226a2fd75ad8fb7efa57c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 15 Dec 2009 11:11:34 +0000
Subject: Turns out that it's actually important to stop erlang from time to
 time...

---
 src/rabbit.erl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 401f20ba..2aa58fc0 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -67,7 +67,7 @@
               {nodes, [erlang_node()]} |
               {running_nodes, [erlang_node()]}]).
 -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()).
--spec(start_child/2 :: (atom(), [any()]) -> 'ok'). 
+-spec(start_child/2 :: (atom(), [any()]) -> 'ok').
 
 -endif.
 
@@ -80,7 +80,7 @@ prepare() ->
 start() ->
     try
         ok = prepare(),
-        ok = rabbit_misc:start_applications(?APPS) 
+        ok = rabbit_misc:start_applications(?APPS)
     after
         %%give the error loggers some time to catch up
         timer:sleep(100)
@@ -90,7 +90,12 @@ stop() ->
     ok = rabbit_misc:stop_applications(?APPS).
 
 stop_and_halt() ->
-    case catch stop() of _ -> ok end.
+    try
+        stop()
+    after
+        init:stop()
+    end,
+    ok.
 
 status() ->
     [{running_applications, application:which_applications()}] ++
@@ -214,7 +219,7 @@ stop(_State) ->
 %---------------------------------------------------------------------------
 
 log_location(Type) ->
-    case application:get_env(Type, case Type of 
+    case application:get_env(Type, case Type of
                                        kernel -> error_logger;
                                        sasl   -> sasl_error_logger
                                    end) of
@@ -305,7 +310,7 @@ ensure_working_log_handler(OldFHandler, NewFHandler, TTYHandler,
                              throw({error, {cannot_log_to_tty,
                                             TTYHandler, not_installed}})
                      end;
-        _         -> case lists:member(NewFHandler, Handlers) of 
+        _         -> case lists:member(NewFHandler, Handlers) of
                          true  -> ok;
                          false -> case rotate_logs(LogLocation, "",
                                                    OldFHandler, NewFHandler) of
-- 
cgit v1.2.1


From c2149311d927f635d7e61bcfa1aaaf6b0e1828ef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 15 Dec 2009 13:11:40 +0000
Subject: qi: all_segment_nums was not returning its list sorted. As a result,
 find_lowest_seq_id_seg_and_next_seq_id could return the wrong values. vq:
 because of the bug in qi, vq could construct ? wrongly, violating the vq
 invariants. This would lead to explosions elsewhere.

---
 src/rabbit_queue_index.erl | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 003e2d62..2007b00e 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -384,15 +384,16 @@ maybe_flush_journal(State) ->
     State.
 
 all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
-    sets:to_list(
-      lists:foldl(
-        fun (SegName, Set) ->
-                sets:add_element(
-                  list_to_integer(
-                    lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
-                                    SegName)), Set)
-        end, sets:from_list(segment_fetch_keys(Segments)),
-        filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir))).
+    lists:sort(
+      sets:to_list(
+        lists:foldl(
+          fun (SegName, Set) ->
+                  sets:add_element(
+                    list_to_integer(
+                      lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                                      SegName)), Set)
+          end, sets:from_list(segment_fetch_keys(Segments)),
+          filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
 
 blank_state(QueueName) ->
     StrName = queue_name_to_dir_name(QueueName),
-- 
cgit v1.2.1


From b283a7842a0b67beda629e054a9a8aefb38f150f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 15 Dec 2009 13:30:00 +0000
Subject: Noticed that sometimes the shutdown message that appears to the queue
 process is of the form
 {shutdown,{gen_server2,call,[rabbit_msg_store,{read,<<96,94,147,36,83,202,129,231,131,151,203,11,95,112,69,118>>},infinity]}}.
 I presume this is when we're shutdown whilst in the middle of a call. In any
 case, it was falling through to the wrong case in queue process terminate,
 and resulting in the queue being deleted. Brilliant. Sadly, the fix is pretty
 much code duplication because you can't do a partial pattern match /
 unification in a guard.

---
 src/rabbit_amqqueue_process.erl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9cefa926..d4d9611a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -124,6 +124,9 @@ init(Q = #amqqueue { name = QName }) ->
 terminate(shutdown, #q{variable_queue_state = VQS}) ->
     ok = rabbit_memory_monitor:deregister(self()),
     _VQS = rabbit_variable_queue:terminate(VQS);
+terminate({shutdown, _}, #q{variable_queue_state = VQS}) ->
+    ok = rabbit_memory_monitor:deregister(self()),
+    _VQS = rabbit_variable_queue:terminate(VQS);
 terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
     ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
-- 
cgit v1.2.1


From 2cef7bbf99a623f978a6fe7f987f01b2460f0306 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 15 Dec 2009 17:43:03 +0000
Subject: Don't reverse the entire list, only to pull off one element

---
 src/rabbit_msg_store.erl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c21e4bd9..798e9d65 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -792,11 +792,12 @@ build_index(Files, State) ->
 
 build_index(Left, [], FilesToCompact, State) ->
     ok = index_delete_by_file(undefined, State),
-    Offset = case lists:reverse(index_search_by_file(Left, State)) of
-                 [] -> 0;
-                 [#msg_location { offset = MaxOffset,
-                                  total_size = TotalSize } | _] ->
-                     MaxOffset + TotalSize
+    Offset = case index_search_by_file(Left, State) of
+                 []   -> 0;
+                 List -> #msg_location { offset = MaxOffset,
+                                         total_size = TotalSize } =
+                             lists:last(List),
+                         MaxOffset + TotalSize
              end,
     {Offset, compact(FilesToCompact, %% this never includes the current file
                      State #msstate { current_file = Left })};
-- 
cgit v1.2.1


From d7424357ff5d05d3307ff491c761befdf18ad95c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 16 Dec 2009 19:14:53 +0000
Subject: Added accountancy for file size and sums thereof plus sums of valid
 data. This simplifies build_index slightly and complicates remove_message
 slightly. Also found that in delete_if_empty, was failing to close any file
 handle open on the file prior to it being deleted.

---
 src/rabbit_msg_file.erl  |   6 +-
 src/rabbit_msg_store.erl | 176 ++++++++++++++++++++++++++++-------------------
 2 files changed, 107 insertions(+), 75 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index c0826159..bf367ede 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -59,7 +59,7 @@
 -spec(read/2 :: (io_device(), msg_size()) ->
              ({'ok', {msg_id(), msg()}} | {'error', any()})).
 -spec(scan/1 :: (io_device()) ->
-             {'ok', [{msg_id(), msg_size(), position()}]}).
+             {'ok', [{msg_id(), msg_size(), position()}], position()}).
 
 -endif.
 
@@ -95,14 +95,14 @@ scan(FileHdl) -> scan(FileHdl, 0, []).
 
 scan(FileHdl, Offset, Acc) ->
     case read_next(FileHdl, Offset) of
-        eof -> {ok, Acc};
+        eof -> {ok, Acc, Offset};
         {corrupted, NextOffset} ->
             scan(FileHdl, NextOffset, Acc);
         {ok, {MsgId, TotalSize, NextOffset}} ->
             scan(FileHdl, NextOffset, [{MsgId, TotalSize, Offset} | Acc]);
         _KO ->
             %% bad message, but we may still have recovered some valid messages
-            {ok, Acc}
+            {ok, Acc, Offset}
     end.
 
 read_next(FileHdl, Offset) ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 798e9d65..b9bffef6 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -81,14 +81,16 @@
          file_handle_cache,      %% file handle cache
          on_sync,                %% pending sync requests
          sync_timer_ref,         %% TRef for our interval timer
-         message_cache           %% ets message cache
+         message_cache,          %% ets message cache
+         sum_valid_data,         %% sum of valid data in all files
+         sum_file_size           %% sum of file sizes
          }).
 
 -record(msg_location,
         {msg_id, ref_count, file, offset, total_size}).
 
 -record(file_summary,
-        {file, valid_total_size, contiguous_top, left, right}).
+        {file, valid_total_size, contiguous_top, left, right, file_size}).
 
 -define(MSG_LOC_NAME,          rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
@@ -262,7 +264,9 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                    file_handle_cache      = dict:new(),
                    on_sync                = [],
                    sync_timer_ref         = undefined,
-                   message_cache          = MessageCache
+                   message_cache          = MessageCache,
+                   sum_valid_data         = 0,
+                   sum_file_size          = 0
                   },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
@@ -341,7 +345,9 @@ handle_call({contains, MsgId}, _From, State) ->
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
                                current_file        = CurFile,
-                               file_summary        = FileSummary }) ->
+                               file_summary        = FileSummary,
+                               sum_valid_data      = SumValid,
+                               sum_file_size       = SumFileSize }) ->
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
@@ -353,7 +359,8 @@ handle_cast({write, MsgId, Msg},
                               State),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
-                                       right = undefined }] =
+                                       right = undefined,
+                                       file_size = FileSize }] =
                 ets:lookup(FileSummary, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
@@ -363,9 +370,13 @@ handle_cast({write, MsgId, Msg},
                              end,
             true = ets:insert(FileSummary, FSEntry #file_summary {
                                              valid_total_size = ValidTotalSize1,
-                                             contiguous_top = ContiguousTop1 }),
+                                             contiguous_top = ContiguousTop1,
+                                             file_size = FileSize + TotalSize }),
             NextOffset = CurOffset + TotalSize,
-            noreply(maybe_roll_to_new_file(NextOffset, State));
+            noreply(maybe_roll_to_new_file(
+                      NextOffset,
+                      State #msstate { sum_valid_data = SumValid + TotalSize,
+                                       sum_file_size = SumFileSize + TotalSize }));
         StoreEntry = #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter
             ok = index_update(StoreEntry #msg_location {
@@ -374,19 +385,19 @@ handle_cast({write, MsgId, Msg},
     end;
 
 handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
-    noreply(
-      compact(sets:to_list(
-                lists:foldl(
-                  fun (MsgId, Files1) ->
-                          case remove_message(MsgId, State) of
-                              {compact, File} ->
-                                  if CurFile =:= File -> Files1;
-                                     true -> sets:add_element(File, Files1)
-                                  end;
-                              no_compact -> Files1
-                        end
-                  end, sets:new(), MsgIds)),
-              State));
+    {Files, State1} =
+        lists:foldl(
+          fun (MsgId, {Files1, State2}) ->
+                  case remove_message(MsgId, State2) of
+                      {compact, File, State3} ->
+                          {if CurFile =:= File -> Files1;
+                              true -> sets:add_element(File, Files1)
+                           end, State3};
+                      {no_compact, State3} ->
+                          {Files1, State3}
+                  end
+          end, {sets:new(), State}, MsgIds),
+    noreply(compact(sets:to_list(Files), State1));
 
 handle_cast({release, MsgIds}, State) ->
     lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
@@ -499,7 +510,8 @@ sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
-remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
+remove_message(MsgId, State = #msstate { file_summary = FileSummary,
+                                         sum_valid_data = SumValid }) ->
     StoreEntry = #msg_location { ref_count = RefCount, file = File,
                                  offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
@@ -515,12 +527,13 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary }) ->
             true = ets:insert(FileSummary, FSEntry #file_summary {
                                              valid_total_size = ValidTotalSize1,
                                              contiguous_top = ContiguousTop1 }),
-            {compact, File};
+            {compact, File, State #msstate {
+                              sum_valid_data = SumValid - TotalSize }};
         _ when 1 < RefCount ->
             ok = decrement_cache(MsgId, State),
             ok = index_update(StoreEntry #msg_location {
                                 ref_count = RefCount - 1 }, State),
-            no_compact
+            {no_compact, State}
     end.
 
 close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
@@ -752,7 +765,7 @@ is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
 scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
-    {ok, Messages} = scan_file_for_valid_messages(Dir, FileName),
+    {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [MsgId || {MsgId, _TotalSize, _FileOffset} <- Messages]}.
 
 scan_file_for_valid_messages(Dir, FileName) ->
@@ -763,7 +776,7 @@ scan_file_for_valid_messages(Dir, FileName) ->
             %% but ignore
             file_handle_cache:close(Hdl),
             Valid;
-        {error, enoent} -> {ok, []};
+        {error, enoent} -> {ok, [], 0};
         {error, Reason} -> throw({error,
                                   {unable_to_scan_file, FileName, Reason}})
     end.
@@ -790,50 +803,63 @@ build_index([], State) ->
 build_index(Files, State) ->
     build_index(undefined, Files, [], State).
 
-build_index(Left, [], FilesToCompact, State) ->
+build_index(Left, [], FilesToCompact, State =
+            #msstate { file_summary = FileSummary }) ->
     ok = index_delete_by_file(undefined, State),
-    Offset = case index_search_by_file(Left, State) of
-                 []   -> 0;
-                 List -> #msg_location { offset = MaxOffset,
-                                         total_size = TotalSize } =
-                             lists:last(List),
-                         MaxOffset + TotalSize
+    Offset = case ets:lookup(FileSummary, Left) of
+                 []                                       -> 0;
+                 [#file_summary { file_size = FileSize }] -> FileSize
              end,
     {Offset, compact(FilesToCompact, %% this never includes the current file
                      State #msstate { current_file = Left })};
 build_index(Left, [File|Files], FilesToCompact,
-            State = #msstate { dir = Dir, file_summary = FileSummary }) ->
-    {ok, Messages} = scan_file_for_valid_messages(Dir, filenum_to_name(File)),
-    {ValidMessages, ValidTotalSize, AllValid} =
+            State = #msstate { dir = Dir, file_summary = FileSummary,
+                               sum_valid_data = SumValid,
+                               sum_file_size = SumFileSize }) ->
+    {ok, Messages, FileSize} =
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+    {ValidMessages, ValidTotalSize} =
         lists:foldl(
-          fun (Obj = {MsgId, TotalSize, Offset},
-               {VMAcc, VTSAcc, AVAcc}) ->
+          fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                   case index_lookup(MsgId, State) of
-                      not_found -> {VMAcc, VTSAcc, false};
+                      not_found -> {VMAcc, VTSAcc};
                       StoreEntry ->
                           ok = index_update(StoreEntry #msg_location {
                                               file = File, offset = Offset,
                                               total_size = TotalSize },
                                             State),
-                          {[Obj | VMAcc], VTSAcc + TotalSize, AVAcc}
+                          {[Obj | VMAcc], VTSAcc + TotalSize}
                   end
-          end, {[], 0, Messages =/= []}, Messages),
+          end, {[], 0}, Messages),
     %% foldl reverses lists, find_contiguous_block_prefix needs
     %% msgs eldest first, so, ValidMessages is the right way round
     {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
-    Right = case Files of
-                []    -> undefined;
-                [F|_] -> F
-            end,
-    true = ets:insert_new(FileSummary, #file_summary {
-                            file = File, valid_total_size = ValidTotalSize,
-                            contiguous_top = ContiguousTop,
-                            left = Left, right = Right }),
-    FilesToCompact1 = case AllValid orelse Right =:= undefined of
-                          true  -> FilesToCompact;
-                          false -> [File | FilesToCompact]
-                      end,
-    build_index(File, Files, FilesToCompact1, State).
+    {Right, FileSize1} =
+        case Files of
+            %% if it's the last file, we'll truncate to remove any
+            %% rubbish above the last valid message. This affects the
+            %% file size.
+            []    -> {undefined, case ValidMessages of
+                                     [] -> 0;
+                                     _  -> {_MsgId, TotalSize, Offset} =
+                                               lists:last(ValidMessages),
+                                           Offset + TotalSize
+                                 end};
+            [F|_] -> {F, FileSize}
+        end,
+    true =
+        ets:insert_new(FileSummary, #file_summary {
+                         file = File, valid_total_size = ValidTotalSize,
+                         contiguous_top = ContiguousTop,
+                         left = Left, right = Right, file_size = FileSize1 }),
+    FilesToCompact1 =
+        case FileSize1 == ContiguousTop orelse Right =:= undefined of
+            true  -> FilesToCompact;
+            false -> [File | FilesToCompact]
+        end,
+    build_index(File, Files, FilesToCompact1,
+                State #msstate { sum_valid_data = SumValid + ValidTotalSize,
+                                 sum_file_size = SumFileSize + FileSize1 }).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation
@@ -855,7 +881,7 @@ maybe_roll_to_new_file(Offset,
     true = ets:insert_new(
              FileSummary, #file_summary {
                file = NextFile, valid_total_size = 0, contiguous_top = 0,
-               left = CurFile, right = undefined }),
+               left = CurFile, right = undefined, file_size = 0 }),
     State2 = State1 #msstate { current_file_handle = NextHdl,
                                current_file        = NextFile },
     compact([CurFile], State2);
@@ -866,14 +892,14 @@ compact(Files, State) ->
     %% smallest number, hence eldest, hence left-most, first
     SortedFiles = lists:sort(Files),
     %% foldl reverses, so now youngest/right-most first
-    RemainingFiles =
-        lists:foldl(fun (File, Acc) ->
-                            case delete_file_if_empty(File, State) of
-                                true  -> Acc;
-                                false -> [File | Acc]
+    {RemainingFiles, State1} =
+        lists:foldl(fun (File, {Acc, State2}) ->
+                            case delete_file_if_empty(File, State2) of
+                                {true,  State3} -> {Acc, State3};
+                                {false, State3} -> {[File | Acc], State3}
                             end
-                    end, [], SortedFiles),
-    lists:foldl(fun combine_file/2, State, lists:reverse(RemainingFiles)).
+                    end, {[], State}, SortedFiles),
+    lists:foldl(fun combine_file/2, State1, lists:reverse(RemainingFiles)).
 
 %% At this stage, we simply know that the file has had msgs removed
 %% from it. However, we don't know if we need to merge it left (which
@@ -913,12 +939,14 @@ combine_file(File, State = #msstate { file_summary = FileSummary,
 
 adjust_meta_and_combine(
   LeftObj = #file_summary {
-    file = LeftFile, valid_total_size = LeftValidData, right = RightFile },
+    file = LeftFile, valid_total_size = LeftValidData, right = RightFile,
+    file_size = LeftFileSize },
   RightObj = #file_summary {
     file = RightFile, valid_total_size = RightValidData, left = LeftFile,
-    right = RightRight },
+    right = RightRight, file_size = RightFileSize },
   State = #msstate { file_size_limit = FileSizeLimit,
-                     file_summary = FileSummary }) ->
+                     file_summary = FileSummary,
+                     sum_file_size = SumFileSize }) ->
     TotalValidData = LeftValidData + RightValidData,
     if FileSizeLimit >= TotalValidData ->
             State1 = combine_files(RightObj, LeftObj, State),
@@ -928,9 +956,12 @@ adjust_meta_and_combine(
             true = ets:insert(FileSummary, LeftObj #file_summary {
                                              valid_total_size = TotalValidData,
                                              contiguous_top = TotalValidData,
+                                             file_size = TotalValidData,
                                              right = RightRight }),
             true = ets:delete(FileSummary, RightFile),
-            {true, State1};
+            {true, State1 #msstate { sum_file_size =
+                                     SumFileSize - LeftFileSize - RightFileSize
+                                     + TotalValidData }};
        true -> {false, State}
     end.
 
@@ -980,7 +1011,7 @@ combine_files(#file_summary { file = Source,
                    DestinationHdl, TmpHdl, Destination, State1),
             TmpSize = DestinationValid - DestinationContiguousTop,
             %% so now Tmp contains everything we need to salvage from
-            %% Destination, and MsgLocationDets has been updated to
+            %% Destination, and MsgLocation has been updated to
             %% reflect compaction of Destination so truncate
             %% Destination and copy from Tmp back to the end
             {ok, 0} = file_handle_cache:position(TmpHdl, 0),
@@ -1011,7 +1042,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
-                  %% update MsgLocationDets to reflect change of file and offset
+                  %% update MsgLocation to reflect change of file and offset
                   ok = index_update(StoreEntry #msg_location {
                                       file = Destination,
                                       offset = CurOffset }, State),
@@ -1041,9 +1072,9 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
     ok = file_handle_cache:sync(DestinationHdl),
     ok.
 
-delete_file_if_empty(File,
-                     #msstate { dir = Dir, file_summary = FileSummary }) ->
-    [#file_summary { valid_total_size = ValidData,
+delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
+                                      sum_file_size = SumFileSize } = State) ->
+    [#file_summary { valid_total_size = ValidData, file_size = FileSize,
                      left = Left, right = Right }] =
         ets:lookup(FileSummary, File),
     case ValidData of
@@ -1063,7 +1094,8 @@ delete_file_if_empty(File,
                                             {#file_summary.right, Right})
              end,
              true = ets:delete(FileSummary, File),
+             State1 = close_handle(File, State),
              ok = file:delete(form_filename(Dir, filenum_to_name(File))),
-             true;
-        _ -> false
+             {true, State1 #msstate { sum_file_size = SumFileSize - FileSize }};
+        _ -> {false, State}
     end.
-- 
cgit v1.2.1


From 91d1eb55d90cbf16f089ed75de63a77c95b601c3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 17 Dec 2009 13:08:01 +0000
Subject: Reworked the GC of msg_store so that it scans the files themselves
 for their content, rather than a select on ets. This bounds the time it can
 take (ets could have many billions of other entries in it), and also makes it
 simpler to make the msg_location pluggable => toke. Also reduce the msg file
 size to 16MB from 256MB as tests show that although max write speed drops
 (more fsyncs and fclose), the GC is much faster. This may go back up a bit
 when lazy+background GC arrives.

---
 src/rabbit_msg_store.erl | 50 ++++++++++++++++++++++++++++++------------------
 src/rabbit_tests.erl     |  3 +++
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b9bffef6..0702cf36 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -43,7 +43,7 @@
 
 -define(SERVER, ?MODULE).
 
--define(FILE_SIZE_LIMIT,       (256*1024*1024)).
+-define(FILE_SIZE_LIMIT,       (16*1024*1024)).
 -define(SYNC_INTERVAL,         5). %% milliseconds
 -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
 
@@ -622,13 +622,6 @@ index_delete(Key, #msstate { msg_locations = MsgLocations }) ->
     true = ets:delete(MsgLocations, Key),
     ok.
 
-index_search_by_file(File, #msstate { msg_locations = MsgLocations }) ->
-    lists:sort(fun (#msg_location { offset = OffA },
-                    #msg_location { offset = OffB }) ->
-                       OffA < OffB
-               end, ets:match_object(MsgLocations,
-                                     #msg_location { file = File, _ = '_' })).
-
 index_delete_by_file(File, #msstate { msg_locations = MsgLocations }) ->
     MatchHead = #msg_location { file = File, _ = '_' },
     ets:select_delete(MsgLocations, [{MatchHead, [], [true]}]),
@@ -798,8 +791,7 @@ find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
 build_index([], State) ->
-    CurFile = State #msstate.current_file,
-    build_index(undefined, [CurFile], [], State);
+    build_index(undefined, [State #msstate.current_file], [], State);
 build_index(Files, State) ->
     build_index(undefined, Files, [], State).
 
@@ -990,8 +982,6 @@ combine_files(#file_summary { file = Source,
             ok = truncate_and_extend_file(DestinationHdl,
                                           DestinationValid, ExpectedSize);
        true ->
-            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
             Worklist =
                 lists:dropwhile(
                   fun (#msg_location { offset = Offset })
@@ -1005,7 +995,9 @@ combine_files(#file_summary { file = Source,
                           %% that the list should be naturally sorted
                           %% as we require, however, we need to
                           %% enforce it anyway
-                  end, index_search_by_file(Destination, State1)),
+                  end, find_unremoved_messages_in_file(Destination, State1)),
+            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State1),
@@ -1024,7 +1016,7 @@ combine_files(#file_summary { file = Source,
             ok = file_handle_cache:close(TmpHdl),
             ok = file:delete(form_filename(Dir, Tmp))
     end,
-    SourceWorkList = index_search_by_file(Source, State1),
+    SourceWorkList = find_unremoved_messages_in_file(Source, State1),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State1),
     %% tidy up
@@ -1033,6 +1025,19 @@ combine_files(#file_summary { file = Source,
     ok = file:delete(form_filename(Dir, SourceName)),
     State1.
 
+find_unremoved_messages_in_file(File, State = #msstate { dir = Dir }) ->
+    %% Msgs here will be end-of-file at start-of-list
+    {ok, Messages, _FileSize} =
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+    %% foldl will reverse so will end up with msgs in ascending offset order
+    lists:foldl(
+      fun ({MsgId, _TotalSize, _Offset}, Acc) ->
+              case index_lookup(MsgId, State) of
+                  Entry = #msg_location { file = File } -> [ Entry | Acc ];
+                  _                                     -> Acc
+              end
+      end, [], Messages).
+
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, State) ->
     {FinalOffset, BlockStart1, BlockEnd1} =
@@ -1065,11 +1070,18 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                           {NextOffset, Offset, Offset + TotalSize}
                   end
           end, {InitOffset, undefined, undefined}, WorkList),
-    %% do the last remaining block
-    BSize1 = BlockEnd1 - BlockStart1,
-    {ok, BlockStart1} = file_handle_cache:position(SourceHdl, BlockStart1),
-    {ok, BSize1} = file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
-    ok = file_handle_cache:sync(DestinationHdl),
+    case WorkList of
+        [] ->
+            ok;
+        _ ->
+            %% do the last remaining block
+            BSize1 = BlockEnd1 - BlockStart1,
+            {ok, BlockStart1} =
+                file_handle_cache:position(SourceHdl, BlockStart1),
+            {ok, BSize1} =
+                file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
+            ok = file_handle_cache:sync(DestinationHdl)
+    end,
     ok.
 
 delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index dc81ea18..fe782049 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1005,6 +1005,9 @@ test_msg_store() ->
     %% this should force some sort of sync internally otherwise misread
     ok = msg_store_read(MsgIds1stHalf),
     ok = rabbit_msg_store:remove(MsgIds1stHalf),
+    %% restart empty
+    ok = stop_msg_store(),
+    ok = start_msg_store_empty(), %% now safe to reuse msg_ids
     %% push a lot of msgs in...
     BigCount = 100000,
     MsgIdsBig = lists:seq(1, BigCount),
-- 
cgit v1.2.1


From 0cc5117167d0857cf6aaf3f53f58f372394b34c2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 17 Dec 2009 18:42:12 +0000
Subject: All sorts of stuff. See the bug. GC is off here (other that deleting
 empty files). File locking is in. Some other machinery is in wrt background
 GC. Lots of reworking of the GC code to get it into a more useful state
 (everything from adjust_meta downwards). Tests do actually pass, but with GC
 otherwise off, you'd be mad to run this.

---
 src/rabbit_misc.erl      |   6 +-
 src/rabbit_msg_store.erl | 459 +++++++++++++++++++++++++++--------------------
 2 files changed, 271 insertions(+), 194 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 97c96fc7..9f74f604 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -487,9 +487,9 @@ unfold(Fun, Acc, Init) ->
 
 ceil(N) ->
     T = trunc(N),
-    case N - T of
-        0 -> N;
-        _ -> 1 + T
+    case N == T of
+        true  -> T;
+        false -> 1 + T
     end.
 
 %% Sorts a list of AMQP table fields as per the AMQP spec
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 0702cf36..b8373fd1 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -83,20 +83,30 @@
          sync_timer_ref,         %% TRef for our interval timer
          message_cache,          %% ets message cache
          sum_valid_data,         %% sum of valid data in all files
-         sum_file_size           %% sum of file sizes
-         }).
+         sum_file_size,          %% sum of file sizes
+         pending_gc_completion,  %% things to do once GC completes
+         gc_pid                  %% pid of the GC process
+        }).
 
 -record(msg_location,
         {msg_id, ref_count, file, offset, total_size}).
 
 -record(file_summary,
-        {file, valid_total_size, contiguous_top, left, right, file_size}).
+        {file, valid_total_size, contiguous_top, left, right, file_size,
+         locked}).
+
+-record(gcstate,
+        {dir
+        }).
 
 -define(MSG_LOC_NAME,          rabbit_disk_queue_msg_location).
 -define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
 -define(FILE_EXTENSION,        ".rdq").
 -define(FILE_EXTENSION_TMP,    ".rdt").
 -define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
+%% We run GC whenever the amount of garbage is >= GARBAGE_FRACTION *
+%% Total Valid Data
+-define(GARBAGE_FRACTION,      1.0).
 
 -define(BINARY_MODE,     [raw, binary]).
 -define(READ_MODE,       [read]).
@@ -248,11 +258,12 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     MsgLocations = ets:new(?MSG_LOC_NAME,
-                           [set, private, {keypos, #msg_location.msg_id}]),
+                           [set, protected, {keypos, #msg_location.msg_id}]),
 
     InitFile = 0,
     FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
-                          [set, private, {keypos, #file_summary.file}]),
+                          [ordered_set, protected,
+                           {keypos, #file_summary.file}]),
     MessageCache = ets:new(?CACHE_ETS_NAME, [set, private]),
     State =
         #msstate { dir                    = Dir,
@@ -266,7 +277,9 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                    sync_timer_ref         = undefined,
                    message_cache          = MessageCache,
                    sum_valid_data         = 0,
-                   sum_file_size          = 0
+                   sum_file_size          = 0,
+                   pending_gc_completion  = [],
+                   gc_pid = undefined
                   },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
@@ -289,52 +302,12 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     {ok, State1 #msstate { current_file_handle = FileHdl }}.
 
-handle_call({read, MsgId}, _From, State =
-            #msstate { current_file = CurFile,
-                       current_file_handle = CurHdl }) ->
-    {Result, State1} =
-        case index_lookup(MsgId, State) of
-            not_found -> {not_found, State};
-            #msg_location { ref_count  = RefCount,
-                            file       = File,
-                            offset     = Offset,
-                            total_size = TotalSize } ->
-                case fetch_and_increment_cache(MsgId, State) of
-                    not_found ->
-                        ok = case CurFile =:= File andalso {ok, Offset} >=
-                                 file_handle_cache:current_raw_offset(CurHdl) of
-                                 true  -> file_handle_cache:flush(CurHdl);
-                                 false -> ok
-                             end,
-                        {Hdl, State2} = get_read_handle(File, State),
-                        {ok, Offset} = file_handle_cache:position(Hdl, Offset),
-                        {ok, {MsgId, Msg}} =
-                            case rabbit_msg_file:read(Hdl, TotalSize) of
-                                {ok, {MsgId, _}} = Obj -> Obj;
-                                Rest ->
-                                    throw({error, {misread, [{old_state, State},
-                                                             {file_num, File},
-                                                             {offset, Offset},
-                                                             {read, Rest},
-                                                             {proc_dict, get()}
-                                                            ]}})
-                            end,
-                        ok = case RefCount > 1 of
-                                 true ->
-                                     insert_into_cache(MsgId, Msg, State2);
-                                 false ->
-                                     %% it's not in the cache and we
-                                     %% only have one reference to the
-                                     %% message. So don't bother
-                                     %% putting it in the cache.
-                                     ok
-                             end,
-                        {{ok, Msg}, State2};
-                    {Msg, _RefCount} ->
-                        {{ok, Msg}, State}
-                end
-        end,
-    reply(Result, State1);
+handle_call({read, MsgId}, From, State) ->
+    case read_message(MsgId, State) of
+        {ok, Msg, State1} -> reply({ok, Msg}, State1);
+        {blocked, State1} -> noreply(add_to_pending_gc_completion(
+                                       {read, MsgId, From}, State1))
+    end;
 
 handle_call({contains, MsgId}, _From, State) ->
     reply(case index_lookup(MsgId, State) of
@@ -360,6 +333,7 @@ handle_cast({write, MsgId, Msg},
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
                                        right = undefined,
+                                       locked = false,
                                        file_size = FileSize }] =
                 ets:lookup(FileSummary, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
@@ -373,10 +347,11 @@ handle_cast({write, MsgId, Msg},
                                              contiguous_top = ContiguousTop1,
                                              file_size = FileSize + TotalSize }),
             NextOffset = CurOffset + TotalSize,
-            noreply(maybe_roll_to_new_file(
-                      NextOffset,
-                      State #msstate { sum_valid_data = SumValid + TotalSize,
-                                       sum_file_size = SumFileSize + TotalSize }));
+            noreply(maybe_compact(maybe_roll_to_new_file(
+                                    NextOffset, State #msstate
+                                    { sum_valid_data = SumValid + TotalSize,
+                                      sum_file_size = SumFileSize + TotalSize }
+                                   )));
         StoreEntry = #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter
             ok = index_update(StoreEntry #msg_location {
@@ -384,20 +359,11 @@ handle_cast({write, MsgId, Msg},
             noreply(State)
     end;
 
-handle_cast({remove, MsgIds}, State = #msstate { current_file = CurFile }) ->
-    {Files, State1} =
-        lists:foldl(
-          fun (MsgId, {Files1, State2}) ->
-                  case remove_message(MsgId, State2) of
-                      {compact, File, State3} ->
-                          {if CurFile =:= File -> Files1;
-                              true -> sets:add_element(File, Files1)
-                           end, State3};
-                      {no_compact, State3} ->
-                          {Files1, State3}
-                  end
-          end, {sets:new(), State}, MsgIds),
-    noreply(compact(sets:to_list(Files), State1));
+handle_cast({remove, MsgIds}, State) ->
+    State1 = lists:foldl(
+               fun (MsgId, State2) -> remove_message(MsgId, State2) end,
+               State, MsgIds),
+    noreply(maybe_compact(State1));
 
 handle_cast({release, MsgIds}, State) ->
     lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
@@ -421,11 +387,23 @@ handle_cast({sync, MsgIds, K},
 handle_cast(sync, State) ->
     noreply(sync(State)).
 
+%% handle_cast({gc_finished, GCPid, RemainingFile, DeletedFile, MsgLocations},
+%%             State = #msstate { file_summary = FileSummary,
+%%                                gc_pid = GCPid }) ->
+%%     true = ets:delete(FileSummary, DeletedFile),
+%%     true = ets:insert(FileSummary, RemainingFile),
+%%     State1 = lists:foldl(fun index_insert/2, State, MsgLocations),
+%%     noreply(maybe_compact(run_pending(State1))).
+
 handle_info(timeout, State) ->
     noreply(sync(State));
 
 handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State);
+
+handle_info({'EXIT', _Pid, normal}, State) ->
+    %% this is just the GC process going down
     noreply(State).
 
 terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
@@ -510,6 +488,63 @@ sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
+read_message(MsgId, State =
+             #msstate { current_file = CurFile,
+                        current_file_handle = CurHdl,
+                        file_summary = FileSummary }) ->
+    case index_lookup(MsgId, State) of
+        not_found -> {ok, not_found, State};
+        #msg_location { ref_count  = RefCount,
+                        file       = File,
+                        offset     = Offset,
+                        total_size = TotalSize } ->
+            case fetch_and_increment_cache(MsgId, State) of
+                not_found ->
+                    [#file_summary { locked = Locked }] =
+                        ets:lookup(FileSummary, File),
+                    case Locked of
+                        true ->
+                            {blocked, State};
+                        false ->
+                            ok = case CurFile =:= File andalso {ok, Offset} >=
+                                     file_handle_cache:current_raw_offset(
+                                       CurHdl) of
+                                     true  -> file_handle_cache:flush(CurHdl);
+                                     false -> ok
+                                 end,
+                            {Hdl, State1} = get_read_handle(File, State),
+                            {ok, Offset} =
+                                file_handle_cache:position(Hdl, Offset),
+                            {ok, {MsgId, Msg}} =
+                                case rabbit_msg_file:read(Hdl, TotalSize) of
+                                    {ok, {MsgId, _}} = Obj -> Obj;
+                                    Rest ->
+                                        throw({error, {misread,
+                                                       [{old_state, State},
+                                                        {file_num, File},
+                                                        {offset, Offset},
+                                                        {read, Rest},
+                                                        {proc_dict, get()}
+                                                       ]}})
+                                end,
+                            ok = case RefCount > 1 of
+                                     true ->
+                                         insert_into_cache(MsgId, Msg, State1);
+                                     false ->
+                                         %% it's not in the cache and
+                                         %% we only have one reference
+                                         %% to the message. So don't
+                                         %% bother putting it in the
+                                         %% cache.
+                                         ok
+                                 end,
+                            {ok, Msg, State1}
+                    end;
+                {Msg, _RefCount} ->
+                    {ok, Msg, State}
+            end
+    end.
+
 remove_message(MsgId, State = #msstate { file_summary = FileSummary,
                                          sum_valid_data = SumValid }) ->
     StoreEntry = #msg_location { ref_count = RefCount, file = File,
@@ -517,25 +552,50 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary,
         index_lookup(MsgId, State),
     case RefCount of
         1 ->
-            ok = index_delete(MsgId, State),
             ok = remove_cache_entry(MsgId, State),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
-                                       contiguous_top = ContiguousTop }] =
+                                       contiguous_top = ContiguousTop,
+                                       locked = Locked }] =
                 ets:lookup(FileSummary, File),
-            ContiguousTop1 = lists:min([ContiguousTop, Offset]),
-            ValidTotalSize1 = ValidTotalSize - TotalSize,
-            true = ets:insert(FileSummary, FSEntry #file_summary {
-                                             valid_total_size = ValidTotalSize1,
-                                             contiguous_top = ContiguousTop1 }),
-            {compact, File, State #msstate {
-                              sum_valid_data = SumValid - TotalSize }};
+            case Locked of
+                true ->
+                    add_to_pending_gc_completion({remove, MsgId}, State);
+                false ->
+                    ok = index_delete(MsgId, State),
+                    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+                    ValidTotalSize1 = ValidTotalSize - TotalSize,
+                    true = ets:insert(
+                             FileSummary, FSEntry #file_summary {
+                                            valid_total_size = ValidTotalSize1,
+                                            contiguous_top = ContiguousTop1 }),
+                    State1 = delete_file_if_empty(File, State),
+                    State1 #msstate { sum_valid_data = SumValid - TotalSize }
+            end;
         _ when 1 < RefCount ->
             ok = decrement_cache(MsgId, State),
-            ok = index_update(StoreEntry #msg_location {
-                                ref_count = RefCount - 1 }, State),
-            {no_compact, State}
+            ok = index_update(StoreEntry #msg_location
+                              { ref_count = RefCount - 1 }, State),
+            State
     end.
 
+add_to_pending_gc_completion(
+  Op, State = #msstate { pending_gc_completion = Pending }) ->
+    State #msstate { pending_gc_completion = [Op, Pending] }.
+
+run_pending(State = #msstate { pending_gc_completion = Pending }) ->
+    State1 = State #msstate { pending_gc_completion = [] },
+    lists:foldl(fun run_pending/2, State1, Pending).
+
+run_pending({read, MsgId, From}, State) ->
+    case read_message(MsgId, State) of
+        {ok, Msg, State1} -> gen_server2:reply(From, {ok, Msg}),
+                             State1;
+        {blocked, State1} -> add_to_pending_gc_completion(
+                               {read, MsgId, From}, State1)
+    end;
+run_pending({remove, MsgId}, State) ->
+    remove_message(MsgId, State).
+
 close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
     case dict:find(Key, FHC) of
         {ok, Hdl} ->
@@ -791,20 +851,19 @@ find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
 build_index([], State) ->
-    build_index(undefined, [State #msstate.current_file], [], State);
+    build_index(undefined, [State #msstate.current_file], State);
 build_index(Files, State) ->
-    build_index(undefined, Files, [], State).
+    {Offset, State1} = build_index(undefined, Files, State),
+    {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)}.
 
-build_index(Left, [], FilesToCompact, State =
-            #msstate { file_summary = FileSummary }) ->
+build_index(Left, [], State = #msstate { file_summary = FileSummary }) ->
     ok = index_delete_by_file(undefined, State),
     Offset = case ets:lookup(FileSummary, Left) of
                  []                                       -> 0;
                  [#file_summary { file_size = FileSize }] -> FileSize
              end,
-    {Offset, compact(FilesToCompact, %% this never includes the current file
-                     State #msstate { current_file = Left })};
-build_index(Left, [File|Files], FilesToCompact,
+    {Offset, State #msstate { current_file = Left }};
+build_index(Left, [File|Files],
             State = #msstate { dir = Dir, file_summary = FileSummary,
                                sum_valid_data = SumValid,
                                sum_file_size = SumFileSize }) ->
@@ -842,14 +901,9 @@ build_index(Left, [File|Files], FilesToCompact,
     true =
         ets:insert_new(FileSummary, #file_summary {
                          file = File, valid_total_size = ValidTotalSize,
-                         contiguous_top = ContiguousTop,
+                         contiguous_top = ContiguousTop, locked = false,
                          left = Left, right = Right, file_size = FileSize1 }),
-    FilesToCompact1 =
-        case FileSize1 == ContiguousTop orelse Right =:= undefined of
-            true  -> FilesToCompact;
-            false -> [File | FilesToCompact]
-        end,
-    build_index(File, Files, FilesToCompact1,
+    build_index(File, Files,
                 State #msstate { sum_valid_data = SumValid + ValidTotalSize,
                                  sum_file_size = SumFileSize + FileSize1 }).
 
@@ -873,13 +927,27 @@ maybe_roll_to_new_file(Offset,
     true = ets:insert_new(
              FileSummary, #file_summary {
                file = NextFile, valid_total_size = 0, contiguous_top = 0,
-               left = CurFile, right = undefined, file_size = 0 }),
-    State2 = State1 #msstate { current_file_handle = NextHdl,
-                               current_file        = NextFile },
-    compact([CurFile], State2);
+               left = CurFile, right = undefined, file_size = 0,
+               locked = false }),
+    State1 #msstate { current_file_handle = NextHdl,
+                      current_file        = NextFile };
 maybe_roll_to_new_file(_, State) ->
     State.
 
+maybe_compact(State = #msstate { sum_valid_data = SumValid,
+                                 sum_file_size = SumFileSize,
+                                 gc_pid = undefined,
+                                 file_summary = FileSummary })
+  when (SumFileSize - SumValid) > ?GARBAGE_FRACTION * SumValid ->
+    %% Pid = spawn_link(fun() ->
+    %%                          io:format("GC process!~n")
+    %%                          %% gen_server2:pcast(?SERVER, 9, {gc_finished, self(),}),
+    %%                          end),
+    %% State #msstate { gc_pid = Pid };
+    State;
+maybe_compact(State) ->
+    State.
+
 compact(Files, State) ->
     %% smallest number, hence eldest, hence left-most, first
     SortedFiles = lists:sort(Files),
@@ -932,30 +1000,25 @@ combine_file(File, State = #msstate { file_summary = FileSummary,
 adjust_meta_and_combine(
   LeftObj = #file_summary {
     file = LeftFile, valid_total_size = LeftValidData, right = RightFile,
-    file_size = LeftFileSize },
+    file_size = LeftFileSize, locked = true },
   RightObj = #file_summary {
     file = RightFile, valid_total_size = RightValidData, left = LeftFile,
-    right = RightRight, file_size = RightFileSize },
-  State = #msstate { file_size_limit = FileSizeLimit,
-                     file_summary = FileSummary,
-                     sum_file_size = SumFileSize }) ->
+    right = RightRight, file_size = RightFileSize, locked = true },
+  State) ->
     TotalValidData = LeftValidData + RightValidData,
-    if FileSizeLimit >= TotalValidData ->
-            State1 = combine_files(RightObj, LeftObj, State),
-            %% this could fail if RightRight is undefined
-            ets:update_element(FileSummary, RightRight,
-                               {#file_summary.left, LeftFile}),
-            true = ets:insert(FileSummary, LeftObj #file_summary {
-                                             valid_total_size = TotalValidData,
-                                             contiguous_top = TotalValidData,
-                                             file_size = TotalValidData,
-                                             right = RightRight }),
-            true = ets:delete(FileSummary, RightFile),
-            {true, State1 #msstate { sum_file_size =
-                                     SumFileSize - LeftFileSize - RightFileSize
-                                     + TotalValidData }};
-       true -> {false, State}
-    end.
+    {NewMsgLocs, State1} = combine_files(RightObj, LeftObj, State),
+    %% %% this could fail if RightRight is undefined
+    %% ets:update_element(FileSummary, RightRight,
+    %%                    {#file_summary.left, LeftFile}),
+    %% true = ets:delete(FileSummary, RightFile),
+    LeftObj1 = LeftObj #file_summary {
+                 valid_total_size = TotalValidData,
+                 contiguous_top = TotalValidData,
+                 file_size = TotalValidData,
+                 right = RightRight },
+    {RightFile, LeftObj1, NewMsgLocs,
+     TotalValidData - LeftFileSize - RightFileSize,
+     State1}.
 
 combine_files(#file_summary { file = Source,
                               valid_total_size = SourceValid,
@@ -964,7 +1027,7 @@ combine_files(#file_summary { file = Source,
                               valid_total_size = DestinationValid,
                               contiguous_top = DestinationContiguousTop,
                               right = Source },
-              State = #msstate { dir = Dir }) ->
+              State = #gcstate { dir = Dir }) ->
     State1 = close_handle(Source, close_handle(Destination, State)),
     SourceName = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
@@ -978,54 +1041,62 @@ combine_files(#file_summary { file = Source,
     %%   the DestinationContiguousTop to a tmp file then truncate,
     %%   copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
-    if DestinationContiguousTop =:= DestinationValid ->
-            ok = truncate_and_extend_file(DestinationHdl,
-                                          DestinationValid, ExpectedSize);
-       true ->
-            Worklist =
-                lists:dropwhile(
-                  fun (#msg_location { offset = Offset })
-                      when Offset /= DestinationContiguousTop ->
-                          %% it cannot be that Offset ==
-                          %% DestinationContiguousTop because if it
-                          %% was then DestinationContiguousTop would
-                          %% have been extended by TotalSize
-                          Offset < DestinationContiguousTop
-                          %% Given expected access patterns, I suspect
-                          %% that the list should be naturally sorted
-                          %% as we require, however, we need to
-                          %% enforce it anyway
-                  end, find_unremoved_messages_in_file(Destination, State1)),
-            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
-            ok = copy_messages(
-                   Worklist, DestinationContiguousTop, DestinationValid,
-                   DestinationHdl, TmpHdl, Destination, State1),
-            TmpSize = DestinationValid - DestinationContiguousTop,
-            %% so now Tmp contains everything we need to salvage from
-            %% Destination, and MsgLocation has been updated to
-            %% reflect compaction of Destination so truncate
-            %% Destination and copy from Tmp back to the end
-            {ok, 0} = file_handle_cache:position(TmpHdl, 0),
-            ok = truncate_and_extend_file(
-                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
-            {ok, TmpSize} =
-                file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
-            %% position in DestinationHdl should now be DestinationValid
-            ok = file_handle_cache:sync(DestinationHdl),
-            ok = file_handle_cache:close(TmpHdl),
-            ok = file:delete(form_filename(Dir, Tmp))
-    end,
+    NewDestLocs =
+        if DestinationContiguousTop =:= DestinationValid ->
+                ok = truncate_and_extend_file(DestinationHdl,
+                                              DestinationValid, ExpectedSize),
+                [];
+           true ->
+                Worklist =
+                    lists:dropwhile(
+                      fun (#msg_location { offset = Offset })
+                          when Offset /= DestinationContiguousTop ->
+                              %% it cannot be that Offset ==
+                              %% DestinationContiguousTop because if
+                              %% it was then DestinationContiguousTop
+                              %% would have been extended by TotalSize
+                              Offset < DestinationContiguousTop
+                              %% Given expected access patterns, I
+                              %% suspect that the list should be
+                              %% naturally sorted as we require,
+                              %% however, we need to enforce it anyway
+                      end,
+                      find_unremoved_messages_in_file(Destination, State1)),
+                Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+                {ok, TmpHdl} =
+                    open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+                {ok, NewDestLocs1} =
+                    copy_messages(
+                      Worklist, DestinationContiguousTop, DestinationValid,
+                      DestinationHdl, TmpHdl, Destination),
+                TmpSize = DestinationValid - DestinationContiguousTop,
+                %% so now Tmp contains everything we need to salvage
+                %% from Destination, and NewDestLocs1 contains
+                %% msg_locations reflecting the compaction of
+                %% Destination so truncate Destination and copy from
+                %% Tmp back to the end
+                {ok, 0} = file_handle_cache:position(TmpHdl, 0),
+                ok = truncate_and_extend_file(
+                       DestinationHdl, DestinationContiguousTop, ExpectedSize),
+                {ok, TmpSize} =
+                    file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
+                %% position in DestinationHdl should now be DestinationValid
+                ok = file_handle_cache:sync(DestinationHdl),
+                ok = file_handle_cache:close(TmpHdl),
+                ok = file:delete(form_filename(Dir, Tmp)),
+                NewDestLocs1
+        end,
     SourceWorkList = find_unremoved_messages_in_file(Source, State1),
-    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
-                       SourceHdl, DestinationHdl, Destination, State1),
+    {ok, NewSourceLocs} =
+        copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                      SourceHdl, DestinationHdl, Destination),
     %% tidy up
     ok = file_handle_cache:close(SourceHdl),
     ok = file_handle_cache:close(DestinationHdl),
     ok = file:delete(form_filename(Dir, SourceName)),
-    State1.
+    {[NewDestLocs, NewSourceLocs], State1}.
 
-find_unremoved_messages_in_file(File, State = #msstate { dir = Dir }) ->
+find_unremoved_messages_in_file(File, State = #gcstate { dir = Dir }) ->
     %% Msgs here will be end-of-file at start-of-list
     {ok, Messages, _FileSize} =
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
@@ -1039,37 +1110,41 @@ find_unremoved_messages_in_file(File, State = #msstate { dir = Dir }) ->
       end, [], Messages).
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
-              Destination, State) ->
-    {FinalOffset, BlockStart1, BlockEnd1} =
+              Destination) ->
+    {FinalOffset, BlockStart1, BlockEnd1, NewMsgLocations} =
         lists:foldl(
           fun (StoreEntry = #msg_location { offset = Offset,
                                             total_size = TotalSize },
-               {CurOffset, BlockStart, BlockEnd}) ->
+               {CurOffset, BlockStart, BlockEnd, NewMsgLocs}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   %% update MsgLocation to reflect change of file and offset
-                  ok = index_update(StoreEntry #msg_location {
-                                      file = Destination,
-                                      offset = CurOffset }, State),
+                  NewMsgLocs1 =
+                      [StoreEntry #msg_location {
+                         file = Destination,
+                         offset = CurOffset } | NewMsgLocs],
                   NextOffset = CurOffset + TotalSize,
-                  if BlockStart =:= undefined ->
-                          %% base case, called only for the first list elem
-                          {NextOffset, Offset, Offset + TotalSize};
-                     Offset =:= BlockEnd ->
-                          %% extend the current block because the next
-                          %% msg follows straight on
-                          {NextOffset, BlockStart, BlockEnd + TotalSize};
-                     true ->
-                          %% found a gap, so actually do the work for
-                          %% the previous block
-                          BSize = BlockEnd - BlockStart,
-                          {ok, BlockStart} =
-                              file_handle_cache:position(SourceHdl, BlockStart),
-                          {ok, BSize} = file_handle_cache:copy(
-                                          SourceHdl, DestinationHdl, BSize),
-                          {NextOffset, Offset, Offset + TotalSize}
-                  end
-          end, {InitOffset, undefined, undefined}, WorkList),
+                  {BlockStart2, BlockEnd2} =
+                      if BlockStart =:= undefined ->
+                              %% base case, called only for the first list elem
+                              {Offset, Offset + TotalSize};
+                         Offset =:= BlockEnd ->
+                              %% extend the current block because the
+                              %% next msg follows straight on
+                              {BlockStart, BlockEnd + TotalSize};
+                         true ->
+                              %% found a gap, so actually do the work
+                              %% for the previous block
+                              BSize = BlockEnd - BlockStart,
+                              {ok, BlockStart} =
+                                  file_handle_cache:position(SourceHdl,
+                                                             BlockStart),
+                              {ok, BSize} = file_handle_cache:copy(
+                                              SourceHdl, DestinationHdl, BSize),
+                              {Offset, Offset + TotalSize}
+                      end,
+                  {NextOffset, BlockStart2, BlockEnd2, NewMsgLocs1}
+          end, {InitOffset, undefined, undefined, []}, WorkList),
     case WorkList of
         [] ->
             ok;
@@ -1082,8 +1157,10 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                 file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
             ok = file_handle_cache:sync(DestinationHdl)
     end,
-    ok.
+    {ok, NewMsgLocations}.
 
+delete_file_if_empty(File, State = #msstate { current_file = File }) ->
+    State;
 delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
                                       sum_file_size = SumFileSize } = State) ->
     [#file_summary { valid_total_size = ValidData, file_size = FileSize,
@@ -1108,6 +1185,6 @@ delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
              true = ets:delete(FileSummary, File),
              State1 = close_handle(File, State),
              ok = file:delete(form_filename(Dir, filenum_to_name(File))),
-             {true, State1 #msstate { sum_file_size = SumFileSize - FileSize }};
-        _ -> {false, State}
+             State1 #msstate { sum_file_size = SumFileSize - FileSize };
+        _ -> State
     end.
-- 
cgit v1.2.1


From ae3a821babc64cb1c464fec9c9acebb3936ca2ae Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 17 Dec 2009 18:50:02 +0000
Subject: geometric distribution

---
 src/random_distributions.erl | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 src/random_distributions.erl

diff --git a/src/random_distributions.erl b/src/random_distributions.erl
new file mode 100644
index 00000000..dfcdc834
--- /dev/null
+++ b/src/random_distributions.erl
@@ -0,0 +1,38 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(random_distributions).
+
+-export([geometric/1]).
+
+geometric(P) when 0.0 < P andalso P < 1.0 ->
+    U = 1.0 - random:uniform(),
+    rabbit_misc:ceil(math:log(U) / math:log(1.0 - P)).
-- 
cgit v1.2.1


From 509de3766e907871aefd699f4439025c9a7e7707 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 18 Dec 2009 11:49:21 +0000
Subject: correct condition for doing GC

---
 src/rabbit_msg_store.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b8373fd1..0b711b13 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -104,9 +104,8 @@
 -define(FILE_EXTENSION,        ".rdq").
 -define(FILE_EXTENSION_TMP,    ".rdt").
 -define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
-%% We run GC whenever the amount of garbage is >= GARBAGE_FRACTION *
-%% Total Valid Data
--define(GARBAGE_FRACTION,      1.0).
+%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
+-define(GARBAGE_FRACTION,      0.5).
 
 -define(BINARY_MODE,     [raw, binary]).
 -define(READ_MODE,       [read]).
@@ -938,7 +937,7 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid,
                                  sum_file_size = SumFileSize,
                                  gc_pid = undefined,
                                  file_summary = FileSummary })
-  when (SumFileSize - SumValid) > ?GARBAGE_FRACTION * SumValid ->
+  when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     %% Pid = spawn_link(fun() ->
     %%                          io:format("GC process!~n")
     %%                          %% gen_server2:pcast(?SERVER, 9, {gc_finished, self(),}),
-- 
cgit v1.2.1


From 7fd06f90d82f96a74cb3ea90f83baf55e8f74188 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 18 Dec 2009 17:21:43 +0000
Subject: support insomniation

---
 src/gen_server2.erl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index 53edf8de..c7250827 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -126,6 +126,7 @@
 %%%   handle_pre_hibernate(State)
 %%%
 %%%    ==> {hibernate, State}
+%%%        {insomniate, State}
 %%%        {stop, Reason, State}
 %%%              Reason = normal | shutdown | Term, terminate(State) is called
 %%%
@@ -545,6 +546,9 @@ pre_hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
                 {hibernate, NState} ->
                     hibernate(Parent, Name, NState, Mod, TimeoutState, Queue,
                               Debug);
+                {insomniate, NState} ->
+                    process_next_msg(Parent, Name, NState, Mod, hibernate,
+                                     TimeoutState, Queue, Debug);
                 Reply ->
                     handle_common_termination(Reply, Name, pre_hibernate,
                                               Mod, State, Debug)
-- 
cgit v1.2.1


From b06aa9199fa6b424fe6a000bef7f785be7acbf06 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 18 Dec 2009 17:32:16 +0000
Subject: document insomniation and also ready the msg_store for such gadgetry

---
 src/gen_server2.erl      |  9 ++++++---
 src/rabbit_msg_store.erl | 37 ++++++++++++++++++++++++++-----------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index c7250827..c4806151 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -25,8 +25,11 @@
 %% handle_pre_hibernate/1 and handle_post_hibernate/1. These will be
 %% called immediately prior to and post hibernation, respectively. If
 %% handle_pre_hibernate returns {hibernate, NewState} then the process
-%% will hibernate. If the module does not implement
-%% handle_pre_hibernate/1 then the default action is to hibernate.
+%% will hibernate. If handle_pre_hibernate returns {insomniate,
+%% NewState} then the process will go around again, trying to receive
+%% for up to the current timeout value before attempting to hibernate
+%% again. If the module does not implement handle_pre_hibernate/1 then
+%% the default action is to hibernate.
 %%
 %% 6) init can return a 4th arg, {backoff, InitialTimeout,
 %% MinimumTimeout, DesiredHibernatePeriod} (all in
@@ -36,7 +39,7 @@
 %% InitialTimeout supplied from init). After this timeout has
 %% occurred, hibernation will occur as normal. Upon awaking, a new
 %% current timeout value will be calculated.
-%% 
+%%
 %% The purpose is that the gen_server2 takes care of adjusting the
 %% current timeout value such that the process will increase the
 %% timeout value repeatedly if it is unable to sleep for the
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 0b711b13..6306ac32 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -39,7 +39,7 @@
 -export([sync/0]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+         terminate/2, code_change/3, handle_pre_hibernate/1]).
 
 -define(SERVER, ?MODULE).
 
@@ -112,6 +112,9 @@
 -define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
 -define(WRITE_MODE,      [write]).
 
+-define(HIBERNATE_AFTER_MIN,        1000).
+-define(DESIRED_HIBERNATE,         10000).
+
 %% The components:
 %%
 %% MsgLocation: this is an ets table which contains:
@@ -299,7 +302,8 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
 
-    {ok, State1 #msstate { current_file_handle = FileHdl }}.
+    {ok, State1 #msstate { current_file_handle = FileHdl }, hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({read, MsgId}, From, State) ->
     case read_message(MsgId, State) of
@@ -424,6 +428,13 @@ terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
+handle_pre_hibernate(State) ->
+    {Result, State1} = maybe_compact1(State),
+    {case Result of
+        true  -> insomniate;
+        false -> hibernate
+     end, State1}.
+
 %%----------------------------------------------------------------------------
 %% general helper functions
 %%----------------------------------------------------------------------------
@@ -437,11 +448,11 @@ reply(Reply, State) ->
     {reply, Reply, State1, Timeout}.
 
 next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) ->
-    {State, infinity};
+    {State, hibernate};
 next_state(State = #msstate { sync_timer_ref = undefined }) ->
     {start_sync_timer(State), 0};
 next_state(State = #msstate { on_sync = [] }) ->
-    {stop_sync_timer(State), infinity};
+    {stop_sync_timer(State), hibernate};
 next_state(State) ->
     {State, 0}.
 
@@ -933,19 +944,23 @@ maybe_roll_to_new_file(Offset,
 maybe_roll_to_new_file(_, State) ->
     State.
 
-maybe_compact(State = #msstate { sum_valid_data = SumValid,
-                                 sum_file_size = SumFileSize,
-                                 gc_pid = undefined,
-                                 file_summary = FileSummary })
+maybe_compact(State) ->
+    {_Bool, State1} = maybe_compact1(State),
+    State1.
+
+maybe_compact1(State = #msstate { sum_valid_data = SumValid,
+                                  sum_file_size = SumFileSize,
+                                  gc_pid = undefined,
+                                  file_summary = FileSummary })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     %% Pid = spawn_link(fun() ->
     %%                          io:format("GC process!~n")
     %%                          %% gen_server2:pcast(?SERVER, 9, {gc_finished, self(),}),
     %%                          end),
     %% State #msstate { gc_pid = Pid };
-    State;
-maybe_compact(State) ->
-    State.
+    {true, State};
+maybe_compact1(State) ->
+    {false, State}.
 
 compact(Files, State) ->
     %% smallest number, hence eldest, hence left-most, first
-- 
cgit v1.2.1


From 54e4214d39335982d23d7165c2a2696e7db19a81 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 19 Dec 2009 16:27:30 +0000
Subject: Any exit signal arriving at the amqqueue_process should result in the
 process stopping and calling terminate

---
 src/rabbit_amqqueue_process.erl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index d4d9611a..7fe89af6 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -920,6 +920,9 @@ handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
     noreply(State);
 
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State};
+
 handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
-- 
cgit v1.2.1


From b02a20b6d2e91035d2941c40b5cfb0f15d1b6a83 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 19 Dec 2009 16:36:06 +0000
Subject: msg_store traps exits, and so it should have a handle_info clause to
 catch that

---
 src/rabbit_msg_store.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 0702cf36..f139fc45 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -426,7 +426,10 @@ handle_info(timeout, State) ->
 
 handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
-    noreply(State).
+    noreply(State);
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State}.
 
 terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
                                       file_summary           = FileSummary,
-- 
cgit v1.2.1


From 5307e4c6a7f21458fda7fb97020dd838005046e2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 20 Dec 2009 03:43:21 +0000
Subject: background lazy GC in and working

---
 include/rabbit_msg_store.hrl       |  51 ++++
 src/rabbit_msg_store.erl           | 523 ++++++++++++-------------------------
 src/rabbit_msg_store_ets_index.erl |  71 +++++
 src/rabbit_msg_store_gc.erl        | 249 ++++++++++++++++++
 src/rabbit_msg_store_misc.erl      |  74 ++++++
 5 files changed, 611 insertions(+), 357 deletions(-)
 create mode 100644 include/rabbit_msg_store.hrl
 create mode 100644 src/rabbit_msg_store_ets_index.erl
 create mode 100644 src/rabbit_msg_store_gc.erl
 create mode 100644 src/rabbit_msg_store_misc.erl

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
new file mode 100644
index 00000000..925d5d8e
--- /dev/null
+++ b/include/rabbit_msg_store.hrl
@@ -0,0 +1,51 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-record(msg_location,
+        {msg_id, ref_count, file, offset, total_size}).
+
+-record(file_summary,
+        {file, valid_total_size, contiguous_top, left, right, file_size,
+         locked}).
+
+-define(BINARY_MODE,     [raw, binary]).
+-define(READ_MODE,       [read]).
+-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
+-define(WRITE_MODE,      [write]).
+
+-define(HIBERNATE_AFTER_MIN,        1000).
+-define(DESIRED_HIBERNATE,         10000).
+-define(FILE_EXTENSION,        ".rdq").
+-define(FILE_EXTENSION_TMP,    ".rdt").
+
+-define(FILE_SIZE_LIMIT,       (16*1024*1024)).
+
+-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c8d27ba6..f40c6270 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -36,16 +36,16 @@
 -export([start_link/3, write/2, read/1, contains/1, remove/1, release/1,
          sync/2]).
 
--export([sync/0]). %% internal
+-export([sync/0, gc_done/3]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
 
 -define(SERVER, ?MODULE).
 
--define(FILE_SIZE_LIMIT,       (16*1024*1024)).
 -define(SYNC_INTERVAL,         5). %% milliseconds
--define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
+
+-define(GEOMETRIC_P, 0.3). %% parameter to geometric distribution rng
 
 %%----------------------------------------------------------------------------
 
@@ -54,6 +54,7 @@
 -type(msg_id() :: binary()).
 -type(msg() :: any()).
 -type(file_path() :: any()).
+-type(file_num() :: non_neg_integer()).
 
 -spec(start_link/3 ::
       (file_path(),
@@ -65,6 +66,7 @@
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
 -spec(sync/2 :: ([msg_id()], fun (() -> any())) -> 'ok').
+-spec(gc_done/3 :: (non_neg_integer(), file_num(), file_num()) -> 'ok').
 
 -endif.
 
@@ -72,12 +74,12 @@
 
 -record(msstate,
         {dir,                    %% store directory
-         msg_locations,          %% where are messages?
+         index_module,           %% the module for index ops
+         index_state,            %% where are messages?
          file_summary,           %% what's in the files?
          current_file,           %% current file name as number
          current_file_handle,    %% current file handle
                                  %% since the last fsync?
-         file_size_limit,        %% how big can our files get?
          file_handle_cache,      %% file handle cache
          on_sync,                %% pending sync requests
          sync_timer_ref,         %% TRef for our interval timer
@@ -85,36 +87,17 @@
          sum_valid_data,         %% sum of valid data in all files
          sum_file_size,          %% sum of file sizes
          pending_gc_completion,  %% things to do once GC completes
-         gc_pid                  %% pid of the GC process
+         gc_running              %% is the GC currently working?
         }).
 
--record(msg_location,
-        {msg_id, ref_count, file, offset, total_size}).
-
--record(file_summary,
-        {file, valid_total_size, contiguous_top, left, right, file_size,
-         locked}).
-
--record(gcstate,
-        {dir
-        }).
+-include("rabbit_msg_store.hrl").
 
--define(MSG_LOC_NAME,          rabbit_disk_queue_msg_location).
--define(FILE_SUMMARY_ETS_NAME, rabbit_disk_queue_file_summary).
--define(FILE_EXTENSION,        ".rdq").
--define(FILE_EXTENSION_TMP,    ".rdt").
--define(CACHE_ETS_NAME,        rabbit_disk_queue_cache).
+-define(FILE_SUMMARY_ETS_NAME, rabbit_msg_store_file_summary).
+-define(CACHE_ETS_NAME,        rabbit_msg_store_cache).
 %% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
+%% It is not recommended to set this to < 0.5
 -define(GARBAGE_FRACTION,      0.5).
 
--define(BINARY_MODE,     [raw, binary]).
--define(READ_MODE,       [read]).
--define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
--define(WRITE_MODE,      [write]).
-
--define(HIBERNATE_AFTER_MIN,        1000).
--define(DESIRED_HIBERNATE,         10000).
-
 %% The components:
 %%
 %% MsgLocation: this is an ets table which contains:
@@ -249,6 +232,8 @@ remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
 sync(MsgIds, K)    -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
 sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+gc_done(Reclaimed, Source, Destination) ->
+    gen_server2:pcast(?SERVER, 9, {gc_done, Reclaimed, Source, Destination}).
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
@@ -259,21 +244,21 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
-    MsgLocations = ets:new(?MSG_LOC_NAME,
-                           [set, protected, {keypos, #msg_location.msg_id}]),
+    IndexModule = rabbit_msg_store_ets_index,
+    IndexState = IndexModule:init(),
 
     InitFile = 0,
     FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
-                          [ordered_set, protected,
+                          [ordered_set, public,
                            {keypos, #file_summary.file}]),
     MessageCache = ets:new(?CACHE_ETS_NAME, [set, private]),
     State =
         #msstate { dir                    = Dir,
-                   msg_locations          = MsgLocations,
+                   index_module           = IndexModule,
+                   index_state            = IndexState,
                    file_summary           = FileSummary,
                    current_file           = InitFile,
                    current_file_handle    = undefined,
-                   file_size_limit        = ?FILE_SIZE_LIMIT,
                    file_handle_cache      = dict:new(),
                    on_sync                = [],
                    sync_timer_ref         = undefined,
@@ -281,7 +266,7 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                    sum_valid_data         = 0,
                    sum_file_size          = 0,
                    pending_gc_completion  = [],
-                   gc_pid = undefined
+                   gc_running             = false
                   },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
@@ -297,11 +282,15 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
         build_index(Files, State),
 
     %% read is only needed so that we can seek
-    {ok, FileHdl} = open_file(Dir, filenum_to_name(CurFile),
-                              [read | ?WRITE_MODE]),
+    {ok, FileHdl} = rabbit_msg_store_misc:open_file(
+                      Dir, rabbit_msg_store_misc:filenum_to_name(CurFile),
+                      [read | ?WRITE_MODE]),
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
 
+    {ok, _Pid} = rabbit_msg_store_gc:start_link(
+                   Dir, IndexState, FileSummary, IndexModule),
+
     {ok, State1 #msstate { current_file_handle = FileHdl }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -355,10 +344,12 @@ handle_cast({write, MsgId, Msg},
                                     { sum_valid_data = SumValid + TotalSize,
                                       sum_file_size = SumFileSize + TotalSize }
                                    )));
-        StoreEntry = #msg_location { ref_count = RefCount } ->
-            %% We already know about it, just update counter
-            ok = index_update(StoreEntry #msg_location {
-                                ref_count = RefCount + 1 }, State),
+        #msg_location { ref_count = RefCount } ->
+            %% We already know about it, just update counter. Only
+            %% update field otherwise bad interaction with concurrent GC
+            ok = index_update_fields(MsgId,
+                                     {#msg_location.ref_count, RefCount + 1},
+                                     State),
             noreply(State)
     end;
 
@@ -388,15 +379,27 @@ handle_cast({sync, MsgIds, K},
     end;
 
 handle_cast(sync, State) ->
-    noreply(sync(State)).
+    noreply(sync(State));
 
-%% handle_cast({gc_finished, GCPid, RemainingFile, DeletedFile, MsgLocations},
-%%             State = #msstate { file_summary = FileSummary,
-%%                                gc_pid = GCPid }) ->
-%%     true = ets:delete(FileSummary, DeletedFile),
-%%     true = ets:insert(FileSummary, RemainingFile),
-%%     State1 = lists:foldl(fun index_insert/2, State, MsgLocations),
-%%     noreply(maybe_compact(run_pending(State1))).
+handle_cast({gc_done, Reclaimed, Source, Dest},
+            State = #msstate { sum_file_size = SumFileSize,
+                               gc_running = true,
+                               file_summary = FileSummary }) ->
+    %% we always move data left, so Source has gone and was on the
+    %% right, so need to make dest = source.right.left, and also
+    %% dest.right = source.right
+    [#file_summary { left = Dest, right = SourceRight, locked = true }] =
+        ets:lookup(FileSummary, Source),
+    %% this could fail if SourceRight == undefined
+    ets:update_element(FileSummary, SourceRight,
+                       {#file_summary.left, Dest}),
+    true = ets:update_element(FileSummary, Dest,
+                              [{#file_summary.locked, false},
+                               {#file_summary.right, SourceRight}]),
+    true = ets:delete(FileSummary, Source),
+    noreply(run_pending(
+              State #msstate { sum_file_size = SumFileSize - Reclaimed,
+                               gc_running = false })).
 
 handle_info(timeout, State) ->
     noreply(sync(State));
@@ -408,9 +411,13 @@ handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State}.
 
-terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
+terminate(_Reason, State = #msstate { index_state            = IndexState,
+                                      index_module           = IndexModule,
                                       file_summary           = FileSummary,
                                       current_file_handle    = FileHdl }) ->
+    %% stop the gc first, otherwise it could be working and we pull
+    %% out the ets tables from under it.
+    ok = rabbit_msg_store_gc:stop(),
     State1 = case FileHdl of
                  undefined -> State;
                  _ -> State2 = sync(State),
@@ -418,9 +425,9 @@ terminate(_Reason, State = #msstate { msg_locations          = MsgLocations,
                       State2
              end,
     State3 = close_all_handles(State1),
-    ets:delete(MsgLocations),
     ets:delete(FileSummary),
-    State3 #msstate { msg_locations       = undefined,
+    IndexModule:terminate(IndexState),
+    State3 #msstate { index_state         = undefined,
                       file_summary        = undefined,
                       current_file_handle = undefined }.
 
@@ -428,11 +435,7 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 handle_pre_hibernate(State) ->
-    {Result, State1} = maybe_compact1(State),
-    {case Result of
-        true  -> insomniate;
-        false -> hibernate
-     end, State1}.
+    {hibernate, maybe_compact(State)}.
 
 %%----------------------------------------------------------------------------
 %% general helper functions
@@ -465,27 +468,12 @@ stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
     State #msstate { sync_timer_ref = undefined }.
 
-form_filename(Dir, Name) -> filename:join(Dir, Name).
-
-filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
-
 filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
 
 sort_file_names(FileNames) ->
     lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
                FileNames).
 
-preallocate(Hdl, FileSizeLimit, FinalPos) ->
-    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
-    ok = file_handle_cache:truncate(Hdl),
-    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
-    ok.
-
-truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
-    {ok, Lowpoint} = file_handle_cache:position(FileHdl, Lowpoint),
-    ok = file_handle_cache:truncate(FileHdl),
-    ok = preallocate(FileHdl, Highpoint, Lowpoint).
-
 sync(State = #msstate { current_file_handle = CurHdl,
                         on_sync = Syncs }) ->
     State1 = stop_sync_timer(State),
@@ -556,8 +544,8 @@ read_message(MsgId, State =
 
 remove_message(MsgId, State = #msstate { file_summary = FileSummary,
                                          sum_valid_data = SumValid }) ->
-    StoreEntry = #msg_location { ref_count = RefCount, file = File,
-                                 offset = Offset, total_size = TotalSize } =
+    #msg_location { ref_count = RefCount, file = File,
+                    offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
     case RefCount of
         1 ->
@@ -582,18 +570,22 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary,
             end;
         _ when 1 < RefCount ->
             ok = decrement_cache(MsgId, State),
-            ok = index_update(StoreEntry #msg_location
-                              { ref_count = RefCount - 1 }, State),
+            %% only update field, otherwise bad interaction with concurrent GC
+            ok = index_update_fields(MsgId,
+                                     {#msg_location.ref_count, RefCount - 1},
+                                     State),
             State
     end.
 
 add_to_pending_gc_completion(
   Op, State = #msstate { pending_gc_completion = Pending }) ->
-    State #msstate { pending_gc_completion = [Op, Pending] }.
+    State #msstate { pending_gc_completion = [Op | Pending] }.
 
+run_pending(State = #msstate { pending_gc_completion = [] }) ->
+    State;
 run_pending(State = #msstate { pending_gc_completion = Pending }) ->
     State1 = State #msstate { pending_gc_completion = [] },
-    lists:foldl(fun run_pending/2, State1, Pending).
+    lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)).
 
 run_pending({read, MsgId, From}, State) ->
     case read_message(MsgId, State) of
@@ -622,19 +614,16 @@ close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
 get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC }) ->
     case dict:find(FileNum, FHC) of
         {ok, Hdl} -> {Hdl, State};
-        error -> new_handle(FileNum, filenum_to_name(FileNum),
+        error -> new_handle(FileNum,
+                            rabbit_msg_store_misc:filenum_to_name(FileNum),
                             [read | ?BINARY_MODE], State)
     end.
 
 new_handle(Key, FileName, Mode, State = #msstate { file_handle_cache = FHC,
                                                    dir = Dir }) ->
-    {ok, Hdl} = open_file(Dir, FileName, Mode),
+    {ok, Hdl} = rabbit_msg_store_misc:open_file(Dir, FileName, Mode),
     {Hdl, State #msstate { file_handle_cache = dict:store(Key, Hdl, FHC) }}.
 
-open_file(Dir, FileName, Mode) ->
-    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
-                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
-
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
@@ -673,28 +662,25 @@ insert_into_cache(MsgId, Msg, #msstate { message_cache = Cache }) ->
 %% index
 %%----------------------------------------------------------------------------
 
-index_lookup(Key, #msstate { msg_locations = MsgLocations }) ->
-    case ets:lookup(MsgLocations, Key) of
-        []      -> not_found;
-        [Entry] -> Entry
-    end.
+index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
+    Index:lookup(Key, State).
 
-index_insert(Obj, #msstate { msg_locations = MsgLocations }) ->
-    true = ets:insert_new(MsgLocations, Obj),
-    ok.
+index_insert(Obj, #msstate { index_module = Index, index_state = State }) ->
+    Index:insert(Obj, State).
 
-index_update(Obj, #msstate { msg_locations = MsgLocations }) ->
-    true = ets:insert(MsgLocations, Obj),
-    ok.
+index_update(Obj, #msstate { index_module = Index, index_state = State }) ->
+    Index:update(Obj, State).
 
-index_delete(Key, #msstate { msg_locations = MsgLocations }) ->
-    true = ets:delete(MsgLocations, Key),
-    ok.
+index_update_fields(Key, Updates,
+                    #msstate { index_module = Index, index_state = State }) ->
+    Index:update_fields(Key, Updates, State).
 
-index_delete_by_file(File, #msstate { msg_locations = MsgLocations }) ->
-    MatchHead = #msg_location { file = File, _ = '_' },
-    ets:select_delete(MsgLocations, [{MatchHead, [], [true]}]),
-    ok.
+index_delete(Key, #msstate { index_module = Index, index_state = State }) ->
+    Index:delete(Key, State).
+
+index_delete_by_file(File, #msstate { index_module = Index,
+                                      index_state = State }) ->
+    Index:delete_by_file(File, State).
 
 %%----------------------------------------------------------------------------
 %% recovery
@@ -762,7 +748,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
     %%    consist only of valid messages. Plan: Truncate the main file
     %%    back to before any of the files in the tmp file and copy
     %%    them over again
-    TmpPath = form_filename(Dir, TmpFileName),
+    TmpPath = rabbit_msg_store_misc:form_filename(Dir, TmpFileName),
     case is_sublist(MsgIdsTmp, MsgIds) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
                 %% note this also catches the case when the tmp file
@@ -794,8 +780,8 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% are in the tmp file
             true = is_disjoint(MsgIds1, MsgIdsTmp),
             %% must open with read flag, otherwise will stomp over contents
-            {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
-                                      [read | ?WRITE_MODE]),
+            {ok, MainHdl} = rabbit_msg_store_misc:open_file(
+                              Dir, NonTmpRelatedFileName, [read | ?WRITE_MODE]),
             %% Wipe out any rubbish at the end of the file. Remember
             %% the head of the list will be the highest entry in the
             %% file.
@@ -804,8 +790,10 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% Extend the main file as big as necessary in a single
             %% move. If we run out of disk space, this truncate could
             %% fail, but we still aren't risking losing data
-            ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
-            {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE),
+            ok = rabbit_msg_store_misc:truncate_and_extend_file(
+                   MainHdl, Top, Top + TmpSize),
+            {ok, TmpHdl} = rabbit_msg_store_misc:open_file(
+                             Dir, TmpFileName, ?READ_AHEAD_MODE),
             {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize),
             ok = file_handle_cache:close(MainHdl),
             ok = file_handle_cache:delete(TmpHdl),
@@ -827,22 +815,10 @@ is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
 scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
-    {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName),
+    {ok, Messages, _FileSize} =
+        rabbit_msg_store_misc:scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [MsgId || {MsgId, _TotalSize, _FileOffset} <- Messages]}.
 
-scan_file_for_valid_messages(Dir, FileName) ->
-    case open_file(Dir, FileName, ?READ_MODE) of
-        {ok, Hdl} ->
-            Valid = rabbit_msg_file:scan(Hdl),
-            %% if something really bad's happened, the close could fail,
-            %% but ignore
-            file_handle_cache:close(Hdl),
-            Valid;
-        {error, enoent} -> {ok, [], 0};
-        {error, Reason} -> throw({error,
-                                  {unable_to_scan_file, FileName, Reason}})
-    end.
-
 %% Takes the list in *ascending* order (i.e. eldest message
 %% first). This is the opposite of what scan_file_for_valid_messages
 %% produces. The list of msgs that is produced is youngest first.
@@ -877,7 +853,8 @@ build_index(Left, [File|Files],
                                sum_valid_data = SumValid,
                                sum_file_size = SumFileSize }) ->
     {ok, Messages, FileSize} =
-        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+        rabbit_msg_store_misc:scan_file_for_valid_messages(
+          Dir, rabbit_msg_store_misc:filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
           fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
@@ -922,262 +899,93 @@ build_index(Left, [File|Files],
 
 maybe_roll_to_new_file(Offset,
                        State = #msstate { dir                 = Dir,
-                                          file_size_limit     = FileSizeLimit,
                                           current_file_handle = CurHdl,
                                           current_file        = CurFile,
                                           file_summary        = FileSummary })
-  when Offset >= FileSizeLimit ->
+  when Offset >= ?FILE_SIZE_LIMIT ->
     State1 = sync(State),
     ok = file_handle_cache:close(CurHdl),
     NextFile = CurFile + 1,
-    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
-    true = ets:update_element(FileSummary, CurFile,
-                              {#file_summary.right, NextFile}),
+    {ok, NextHdl} = rabbit_msg_store_misc:open_file(
+                      Dir, rabbit_msg_store_misc:filenum_to_name(NextFile),
+                      ?WRITE_MODE),
     true = ets:insert_new(
              FileSummary, #file_summary {
                file = NextFile, valid_total_size = 0, contiguous_top = 0,
                left = CurFile, right = undefined, file_size = 0,
                locked = false }),
+    true = ets:update_element(FileSummary, CurFile,
+                              {#file_summary.right, NextFile}),
     State1 #msstate { current_file_handle = NextHdl,
                       current_file        = NextFile };
 maybe_roll_to_new_file(_, State) ->
     State.
 
+maybe_compact(State = #msstate { sum_valid_data = SumValid,
+                                 sum_file_size = SumFileSize,
+                                 file_summary = FileSummary,
+                                 gc_running = false })
+  when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
+    First = ets:first(FileSummary),
+    N = random_distributions:geometric(?GEOMETRIC_P),
+    case find_files_to_gc(FileSummary, N, First) of
+        undefined ->
+            State;
+        {Source, Dest} ->
+            State1 = close_handle(Source, close_handle(Dest, State)),
+            true = ets:update_element(FileSummary, Source,
+                                      {#file_summary.locked, true}),
+            true = ets:update_element(FileSummary, Dest,
+                                      {#file_summary.locked, true}),
+            ok = rabbit_msg_store_gc:gc(Source, Dest),
+            State1 #msstate { gc_running = true }
+    end;
 maybe_compact(State) ->
-    {_Bool, State1} = maybe_compact1(State),
-    State1.
+    State.
 
-maybe_compact1(State = #msstate { sum_valid_data = SumValid,
-                                  sum_file_size = SumFileSize,
-                                  gc_pid = undefined,
-                                  file_summary = FileSummary })
-  when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
-    %% Pid = spawn_link(fun() ->
-    %%                          io:format("GC process!~n")
-    %%                          %% gen_server2:pcast(?SERVER, 9, {gc_finished, self(),}),
-    %%                          end),
-    %% State #msstate { gc_pid = Pid };
-    {true, State};
-maybe_compact1(State) ->
-    {false, State}.
-
-compact(Files, State) ->
-    %% smallest number, hence eldest, hence left-most, first
-    SortedFiles = lists:sort(Files),
-    %% foldl reverses, so now youngest/right-most first
-    {RemainingFiles, State1} =
-        lists:foldl(fun (File, {Acc, State2}) ->
-                            case delete_file_if_empty(File, State2) of
-                                {true,  State3} -> {Acc, State3};
-                                {false, State3} -> {[File | Acc], State3}
-                            end
-                    end, {[], State}, SortedFiles),
-    lists:foldl(fun combine_file/2, State1, lists:reverse(RemainingFiles)).
-
-%% At this stage, we simply know that the file has had msgs removed
-%% from it. However, we don't know if we need to merge it left (which
-%% is what we would prefer), or merge it right. If we merge left, then
-%% this file is the source, and the left file is the destination. If
-%% we merge right then this file is the destination and the right file
-%% is the source.
-combine_file(File, State = #msstate { file_summary = FileSummary,
-                                      current_file = CurFile }) ->
-    %% the file we're looking at may no longer exist as it may have
-    %% been deleted within the current GC run
-    case ets:lookup(FileSummary, File) of
-        [] -> State;
-        [FSEntry = #file_summary { left = Left, right = Right }] ->
-            GoRight =
-                fun() ->
-                        case Right of
-                            undefined -> State;
-                            _ when not (CurFile == Right) ->
-                                [FSRight] = ets:lookup(FileSummary, Right),
-                                {_, State1} = adjust_meta_and_combine(
-                                                FSEntry, FSRight, State),
-                                State1;
-                            _ -> State
-                        end
-                end,
-            case Left of
-                undefined ->
-                    GoRight();
-                _ -> [FSLeft] = ets:lookup(FileSummary, Left),
-                     case adjust_meta_and_combine(FSLeft, FSEntry, State) of
-                         {true, State1} -> State1;
-                         {false, State} -> GoRight()
-                     end
-            end
+find_files_to_gc(_FileSummary, _N, '$end_of_table') ->
+    undefined;
+find_files_to_gc(FileSummary, N, First) ->
+    [FirstObj = #file_summary { right = Right }] =
+        ets:lookup(FileSummary, First),
+    Pairs =
+        find_files_to_gc(FileSummary, N, FirstObj,
+                         ets:lookup(FileSummary, Right), []),
+    case Pairs of
+        []     -> undefined;
+        [Pair] -> Pair;
+        _      -> M = 1 + (N rem length(Pairs)),
+                  lists:nth(M, Pairs)
     end.
 
-adjust_meta_and_combine(
-  LeftObj = #file_summary {
-    file = LeftFile, valid_total_size = LeftValidData, right = RightFile,
-    file_size = LeftFileSize, locked = true },
-  RightObj = #file_summary {
-    file = RightFile, valid_total_size = RightValidData, left = LeftFile,
-    right = RightRight, file_size = RightFileSize, locked = true },
-  State) ->
-    TotalValidData = LeftValidData + RightValidData,
-    {NewMsgLocs, State1} = combine_files(RightObj, LeftObj, State),
-    %% %% this could fail if RightRight is undefined
-    %% ets:update_element(FileSummary, RightRight,
-    %%                    {#file_summary.left, LeftFile}),
-    %% true = ets:delete(FileSummary, RightFile),
-    LeftObj1 = LeftObj #file_summary {
-                 valid_total_size = TotalValidData,
-                 contiguous_top = TotalValidData,
-                 file_size = TotalValidData,
-                 right = RightRight },
-    {RightFile, LeftObj1, NewMsgLocs,
-     TotalValidData - LeftFileSize - RightFileSize,
-     State1}.
-
-combine_files(#file_summary { file = Source,
-                              valid_total_size = SourceValid,
-                              left = Destination },
-              #file_summary { file = Destination,
-                              valid_total_size = DestinationValid,
-                              contiguous_top = DestinationContiguousTop,
-                              right = Source },
-              State = #gcstate { dir = Dir }) ->
-    State1 = close_handle(Source, close_handle(Destination, State)),
-    SourceName = filenum_to_name(Source),
-    DestinationName = filenum_to_name(Destination),
-    {ok, SourceHdl} = open_file(Dir, SourceName, ?READ_AHEAD_MODE),
-    {ok, DestinationHdl} = open_file(Dir, DestinationName,
-                                     ?READ_AHEAD_MODE ++ ?WRITE_MODE),
-    ExpectedSize = SourceValid + DestinationValid,
-    %% if DestinationValid =:= DestinationContiguousTop then we don't
-    %% need a tmp file
-    %% if they're not equal, then we need to write out everything past
-    %%   the DestinationContiguousTop to a tmp file then truncate,
-    %%   copy back in, and then copy over from Source
-    %% otherwise we just truncate straight away and copy over from Source
-    NewDestLocs =
-        if DestinationContiguousTop =:= DestinationValid ->
-                ok = truncate_and_extend_file(DestinationHdl,
-                                              DestinationValid, ExpectedSize),
-                [];
-           true ->
-                Worklist =
-                    lists:dropwhile(
-                      fun (#msg_location { offset = Offset })
-                          when Offset /= DestinationContiguousTop ->
-                              %% it cannot be that Offset ==
-                              %% DestinationContiguousTop because if
-                              %% it was then DestinationContiguousTop
-                              %% would have been extended by TotalSize
-                              Offset < DestinationContiguousTop
-                              %% Given expected access patterns, I
-                              %% suspect that the list should be
-                              %% naturally sorted as we require,
-                              %% however, we need to enforce it anyway
-                      end,
-                      find_unremoved_messages_in_file(Destination, State1)),
-                Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
-                {ok, TmpHdl} =
-                    open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
-                {ok, NewDestLocs1} =
-                    copy_messages(
-                      Worklist, DestinationContiguousTop, DestinationValid,
-                      DestinationHdl, TmpHdl, Destination),
-                TmpSize = DestinationValid - DestinationContiguousTop,
-                %% so now Tmp contains everything we need to salvage
-                %% from Destination, and NewDestLocs1 contains
-                %% msg_locations reflecting the compaction of
-                %% Destination so truncate Destination and copy from
-                %% Tmp back to the end
-                {ok, 0} = file_handle_cache:position(TmpHdl, 0),
-                ok = truncate_and_extend_file(
-                       DestinationHdl, DestinationContiguousTop, ExpectedSize),
-                {ok, TmpSize} =
-                    file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
-                %% position in DestinationHdl should now be DestinationValid
-                ok = file_handle_cache:sync(DestinationHdl),
-                ok = file_handle_cache:close(TmpHdl),
-                ok = file:delete(form_filename(Dir, Tmp)),
-                NewDestLocs1
-        end,
-    SourceWorkList = find_unremoved_messages_in_file(Source, State1),
-    {ok, NewSourceLocs} =
-        copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
-                      SourceHdl, DestinationHdl, Destination),
-    %% tidy up
-    ok = file_handle_cache:close(SourceHdl),
-    ok = file_handle_cache:close(DestinationHdl),
-    ok = file:delete(form_filename(Dir, SourceName)),
-    {[NewDestLocs, NewSourceLocs], State1}.
-
-find_unremoved_messages_in_file(File, State = #gcstate { dir = Dir }) ->
-    %% Msgs here will be end-of-file at start-of-list
-    {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
-    %% foldl will reverse so will end up with msgs in ascending offset order
-    lists:foldl(
-      fun ({MsgId, _TotalSize, _Offset}, Acc) ->
-              case index_lookup(MsgId, State) of
-                  Entry = #msg_location { file = File } -> [ Entry | Acc ];
-                  _                                     -> Acc
-              end
-      end, [], Messages).
-
-copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
-              Destination) ->
-    {FinalOffset, BlockStart1, BlockEnd1, NewMsgLocations} =
-        lists:foldl(
-          fun (StoreEntry = #msg_location { offset = Offset,
-                                            total_size = TotalSize },
-               {CurOffset, BlockStart, BlockEnd, NewMsgLocs}) ->
-                  %% CurOffset is in the DestinationFile.
-                  %% Offset, BlockStart and BlockEnd are in the SourceFile
-                  %% update MsgLocation to reflect change of file and offset
-                  NewMsgLocs1 =
-                      [StoreEntry #msg_location {
-                         file = Destination,
-                         offset = CurOffset } | NewMsgLocs],
-                  NextOffset = CurOffset + TotalSize,
-                  {BlockStart2, BlockEnd2} =
-                      if BlockStart =:= undefined ->
-                              %% base case, called only for the first list elem
-                              {Offset, Offset + TotalSize};
-                         Offset =:= BlockEnd ->
-                              %% extend the current block because the
-                              %% next msg follows straight on
-                              {BlockStart, BlockEnd + TotalSize};
-                         true ->
-                              %% found a gap, so actually do the work
-                              %% for the previous block
-                              BSize = BlockEnd - BlockStart,
-                              {ok, BlockStart} =
-                                  file_handle_cache:position(SourceHdl,
-                                                             BlockStart),
-                              {ok, BSize} = file_handle_cache:copy(
-                                              SourceHdl, DestinationHdl, BSize),
-                              {Offset, Offset + TotalSize}
-                      end,
-                  {NextOffset, BlockStart2, BlockEnd2, NewMsgLocs1}
-          end, {InitOffset, undefined, undefined, []}, WorkList),
-    case WorkList of
-        [] ->
-            ok;
-        _ ->
-            %% do the last remaining block
-            BSize1 = BlockEnd1 - BlockStart1,
-            {ok, BlockStart1} =
-                file_handle_cache:position(SourceHdl, BlockStart1),
-            {ok, BSize1} =
-                file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
-            ok = file_handle_cache:sync(DestinationHdl)
-    end,
-    {ok, NewMsgLocations}.
+find_files_to_gc(_FileSummary, _N, #file_summary {}, [], Pairs) ->
+    lists:reverse(Pairs);
+find_files_to_gc(FileSummary, N,
+                 #file_summary { right = Source, file = Dest,
+                                 valid_total_size = DestValid },
+                 [SourceObj = #file_summary { left = Dest, right = SourceRight,
+                                              valid_total_size = SourceValid,
+                                              file = Source }],
+                 Pairs) when DestValid + SourceValid =< ?FILE_SIZE_LIMIT andalso
+                             not is_atom(SourceRight) ->
+    Pair = {Source, Dest},
+    case N == 1 of
+        true  -> [Pair];
+        false -> find_files_to_gc(FileSummary, (N - 1), SourceObj,
+                                  ets:lookup(FileSummary, SourceRight),
+                                  [Pair | Pairs])
+    end;
+find_files_to_gc(FileSummary, N, _Left,
+                 [Right = #file_summary { right = RightRight }], Pairs) ->
+    find_files_to_gc(FileSummary, N, Right,
+                     ets:lookup(FileSummary, RightRight), Pairs).
 
 delete_file_if_empty(File, State = #msstate { current_file = File }) ->
     State;
 delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
                                       sum_file_size = SumFileSize } = State) ->
     [#file_summary { valid_total_size = ValidData, file_size = FileSize,
-                     left = Left, right = Right }] =
+                     left = Left, right = Right, locked = false }] =
         ets:lookup(FileSummary, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
@@ -1188,16 +996,17 @@ delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
                      true = ets:update_element(
                               FileSummary, Right,
                               {#file_summary.left, undefined});
-                 {_, _} when not (is_atom(Right)) ->
+                 {_, _} when not is_atom(Right) ->
                      true = ets:update_element(FileSummary, Right,
                                                {#file_summary.left, Left}),
-                     true =
-                         ets:update_element(FileSummary, Left,
-                                            {#file_summary.right, Right})
+                     true = ets:update_element(FileSummary, Left,
+                                               {#file_summary.right, Right})
              end,
              true = ets:delete(FileSummary, File),
              State1 = close_handle(File, State),
-             ok = file:delete(form_filename(Dir, filenum_to_name(File))),
+             ok = file:delete(rabbit_msg_store_misc:form_filename(
+                                Dir,
+                                rabbit_msg_store_misc:filenum_to_name(File))),
              State1 #msstate { sum_file_size = SumFileSize - FileSize };
         _ -> State
     end.
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
new file mode 100644
index 00000000..cb13ed86
--- /dev/null
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -0,0 +1,71 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_ets_index).
+-export([init/0, lookup/2, insert/2, update/2, update_fields/3, delete/2,
+         delete_by_file/2, terminate/1]).
+
+-define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
+
+-include("rabbit_msg_store.hrl").
+
+init() ->
+    ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]).
+
+lookup(Key, MsgLocations) ->
+    case ets:lookup(MsgLocations, Key) of
+        []      -> not_found;
+        [Entry] -> Entry
+    end.
+
+insert(Obj, MsgLocations) ->
+    true = ets:insert_new(MsgLocations, Obj),
+    ok.
+
+update(Obj, MsgLocations) ->
+    true = ets:insert(MsgLocations, Obj),
+    ok.
+
+update_fields(Key, Updates, MsgLocations) ->
+    true = ets:update_element(MsgLocations, Key, Updates),
+    ok.
+
+delete(Key, MsgLocations) ->
+    true = ets:delete(MsgLocations, Key),
+    ok.
+
+delete_by_file(File, MsgLocations) ->
+    MatchHead = #msg_location { file = File, _ = '_' },
+    ets:select_delete(MsgLocations, [{MatchHead, [], [true]}]),
+    ok.
+
+terminate(MsgLocations) ->
+    ets:delete(MsgLocations).
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
new file mode 100644
index 00000000..729cd287
--- /dev/null
+++ b/src/rabbit_msg_store_gc.erl
@@ -0,0 +1,249 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_gc).
+
+-behaviour(gen_server2).
+
+-export([start_link/4, gc/2, stop/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-record(gcstate,
+        {dir,
+         index_state,
+         file_summary,
+         index_module
+        }).
+
+-include("rabbit_msg_store.hrl").
+
+-define(SERVER, ?MODULE).
+
+%%----------------------------------------------------------------------------
+
+start_link(Dir, IndexState, FileSummary, IndexModule) ->
+    gen_server2:start_link({local, ?SERVER}, ?MODULE,
+                           [Dir, IndexState, FileSummary, IndexModule],
+                           [{timeout, infinity}]).
+
+gc(Source, Destination) ->
+    gen_server2:cast(?SERVER, {gc, Source, Destination}).
+
+stop() ->
+    gen_server2:call(?SERVER, stop).
+
+%%----------------------------------------------------------------------------
+
+init([Dir, IndexState, FileSummary, IndexModule]) ->
+    {ok, #gcstate { dir = Dir, index_state = IndexState,
+                    file_summary = FileSummary, index_module = IndexModule },
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}.
+
+handle_cast({gc, Source, Destination}, State) ->
+    Reclaimed = adjust_meta_and_combine(Source, Destination, State),
+    ok = rabbit_msg_store:gc_done(Reclaimed, Source, Destination),
+    {noreply, State, hibernate}.
+
+handle_info(Info, State) ->
+    {stop, {unhandled_info, Info}, State}.
+
+terminate(_Reason, State) ->
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%%----------------------------------------------------------------------------
+
+adjust_meta_and_combine(SourceFile, DestFile,
+                        State = #gcstate { file_summary = FileSummary }) ->
+
+    [SourceObj = #file_summary {
+       valid_total_size = SourceValidData, left = DestFile,
+       file_size = SourceFileSize, locked = true }] =
+        ets:lookup(FileSummary, SourceFile),
+    [DestObj = #file_summary {
+       valid_total_size = DestValidData, right = SourceFile,
+       file_size = DestFileSize, locked = true }] =
+        ets:lookup(FileSummary, DestFile),
+
+    TotalValidData = DestValidData + SourceValidData,
+    ok = combine_files(SourceObj, DestObj, State),
+    %% don't update dest.right, because it could be changing at the same time
+    true =
+        ets:update_element(FileSummary, DestFile,
+                           [{#file_summary.valid_total_size, TotalValidData},
+                            {#file_summary.contiguous_top,   TotalValidData},
+                            {#file_summary.file_size,        TotalValidData}]),
+    SourceFileSize + DestFileSize - TotalValidData.
+
+combine_files(#file_summary { file = Source,
+                              valid_total_size = SourceValid,
+                              left = Destination },
+              #file_summary { file = Destination,
+                              valid_total_size = DestinationValid,
+                              contiguous_top = DestinationContiguousTop,
+                              right = Source },
+              State = #gcstate { dir = Dir }) ->
+    SourceName = rabbit_msg_store_misc:filenum_to_name(Source),
+    DestinationName = rabbit_msg_store_misc:filenum_to_name(Destination),
+    {ok, SourceHdl} =
+        rabbit_msg_store_misc:open_file(Dir, SourceName, ?READ_AHEAD_MODE),
+    {ok, DestinationHdl} =
+        rabbit_msg_store_misc:open_file(Dir, DestinationName,
+                                        ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+    ExpectedSize = SourceValid + DestinationValid,
+    %% if DestinationValid =:= DestinationContiguousTop then we don't
+    %% need a tmp file
+    %% if they're not equal, then we need to write out everything past
+    %%   the DestinationContiguousTop to a tmp file then truncate,
+    %%   copy back in, and then copy over from Source
+    %% otherwise we just truncate straight away and copy over from Source
+    if DestinationContiguousTop =:= DestinationValid ->
+            ok = rabbit_msg_store_misc:truncate_and_extend_file(
+                   DestinationHdl, DestinationValid, ExpectedSize);
+       true ->
+            Worklist =
+                lists:dropwhile(
+                  fun (#msg_location { offset = Offset })
+                      when Offset /= DestinationContiguousTop ->
+                          %% it cannot be that Offset ==
+                          %% DestinationContiguousTop because if it
+                          %% was then DestinationContiguousTop would
+                          %% have been extended by TotalSize
+                          Offset < DestinationContiguousTop
+                          %% Given expected access patterns, I suspect
+                          %% that the list should be naturally sorted
+                          %% as we require, however, we need to
+                          %% enforce it anyway
+                  end,
+                  find_unremoved_messages_in_file(Destination, State)),
+            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+            {ok, TmpHdl} = rabbit_msg_store_misc:open_file(
+                             Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+            ok = copy_messages(
+                   Worklist, DestinationContiguousTop, DestinationValid,
+                   DestinationHdl, TmpHdl, Destination, State),
+            TmpSize = DestinationValid - DestinationContiguousTop,
+            %% so now Tmp contains everything we need to salvage from
+            %% Destination, and index_state has been updated to
+            %% reflect the compaction of Destination so truncate
+            %% Destination and copy from Tmp back to the end
+            {ok, 0} = file_handle_cache:position(TmpHdl, 0),
+            ok = rabbit_msg_store_misc:truncate_and_extend_file(
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
+            {ok, TmpSize} =
+                file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
+            %% position in DestinationHdl should now be DestinationValid
+            ok = file_handle_cache:sync(DestinationHdl),
+            ok = file_handle_cache:close(TmpHdl),
+            ok = file:delete(rabbit_msg_store_misc:form_filename(Dir, Tmp))
+    end,
+    SourceWorkList = find_unremoved_messages_in_file(Source, State),
+    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                       SourceHdl, DestinationHdl, Destination, State),
+    %% tidy up
+    ok = file_handle_cache:close(SourceHdl),
+    ok = file_handle_cache:close(DestinationHdl),
+    ok = file:delete(rabbit_msg_store_misc:form_filename(Dir, SourceName)),
+    ok.
+
+find_unremoved_messages_in_file(File, #gcstate { dir = Dir,
+                                                 index_state = IndexState,
+                                                 index_module = Index }) ->
+    %% Msgs here will be end-of-file at start-of-list
+    {ok, Messages, _FileSize} =
+        rabbit_msg_store_misc:scan_file_for_valid_messages(
+          Dir, rabbit_msg_store_misc:filenum_to_name(File)),
+    %% foldl will reverse so will end up with msgs in ascending offset order
+    lists:foldl(
+      fun ({MsgId, _TotalSize, _Offset}, Acc) ->
+              case Index:lookup(MsgId, IndexState) of
+                  Entry = #msg_location { file = File } -> [ Entry | Acc ];
+                  _                                     -> Acc
+              end
+      end, [], Messages).
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+              Destination, #gcstate { index_module = Index,
+                                      index_state = IndexState }) ->
+    {FinalOffset, BlockStart1, BlockEnd1} =
+        lists:foldl(
+          fun (#msg_location { msg_id = MsgId, offset = Offset,
+                               total_size = TotalSize },
+               {CurOffset, BlockStart, BlockEnd}) ->
+                  %% CurOffset is in the DestinationFile.
+                  %% Offset, BlockStart and BlockEnd are in the SourceFile
+                  %% update MsgLocation to reflect change of file and offset
+                  ok = Index:update_fields(MsgId,
+                                           [{#msg_location.file, Destination},
+                                            {#msg_location.offset, CurOffset}],
+                                           IndexState),
+                  {BlockStart2, BlockEnd2} =
+                      if BlockStart =:= undefined ->
+                              %% base case, called only for the first list elem
+                              {Offset, Offset + TotalSize};
+                         Offset =:= BlockEnd ->
+                              %% extend the current block because the
+                              %% next msg follows straight on
+                              {BlockStart, BlockEnd + TotalSize};
+                         true ->
+                              %% found a gap, so actually do the work
+                              %% for the previous block
+                              BSize = BlockEnd - BlockStart,
+                              {ok, BlockStart} =
+                                  file_handle_cache:position(SourceHdl,
+                                                             BlockStart),
+                              {ok, BSize} = file_handle_cache:copy(
+                                              SourceHdl, DestinationHdl, BSize),
+                              {Offset, Offset + TotalSize}
+                      end,
+                  {CurOffset + TotalSize, BlockStart2, BlockEnd2}
+          end, {InitOffset, undefined, undefined}, WorkList),
+    case WorkList of
+        [] ->
+            ok;
+        _ ->
+            %% do the last remaining block
+            BSize1 = BlockEnd1 - BlockStart1,
+            {ok, BlockStart1} =
+                file_handle_cache:position(SourceHdl, BlockStart1),
+            {ok, BSize1} =
+                file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
+            ok = file_handle_cache:sync(DestinationHdl)
+    end,
+    ok.
diff --git a/src/rabbit_msg_store_misc.erl b/src/rabbit_msg_store_misc.erl
new file mode 100644
index 00000000..cf76cf21
--- /dev/null
+++ b/src/rabbit_msg_store_misc.erl
@@ -0,0 +1,74 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_misc).
+
+-export([open_file/3, preallocate/3, truncate_and_extend_file/3,
+         form_filename/2, filenum_to_name/1, scan_file_for_valid_messages/2]).
+
+-include("rabbit_msg_store.hrl").
+
+
+%%----------------------------------------------------------------------------
+
+open_file(Dir, FileName, Mode) ->
+    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
+                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
+
+%%----------------------------------------------------------------------------
+
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
+    ok = file_handle_cache:truncate(Hdl),
+    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
+    ok.
+
+truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file_handle_cache:position(FileHdl, Lowpoint),
+    ok = file_handle_cache:truncate(FileHdl),
+    ok = preallocate(FileHdl, Highpoint, Lowpoint).
+
+form_filename(Dir, Name) -> filename:join(Dir, Name).
+
+filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
+
+scan_file_for_valid_messages(Dir, FileName) ->
+    case open_file(Dir, FileName, ?READ_MODE) of
+        {ok, Hdl} ->
+            Valid = rabbit_msg_file:scan(Hdl),
+            %% if something really bad's happened, the close could fail,
+            %% but ignore
+            file_handle_cache:close(Hdl),
+            Valid;
+        {error, enoent} -> {ok, [], 0};
+        {error, Reason} -> throw({error,
+                                  {unable_to_scan_file, FileName, Reason}})
+    end.
-- 
cgit v1.2.1


From 69d1c25e9139d2e8e7121a72af73a43ad1c90726 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 20 Dec 2009 04:10:45 +0000
Subject: Must start msg_store *before* amqqueue_sup, otherwise on shutdown,
 there's a nasty race which blows up the channel on notify_all_down because
 the msg_store exits wrongly, first, killing the queue process when it calls
 msg_store:read, and then the channel can't call the queue. Thus start
 msg_store before queue_sup, and none of this problem exists

---
 src/rabbit.erl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 2aa58fc0..fe1be7c2 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -150,12 +150,11 @@ start(normal, []) ->
                              start_child(vm_memory_monitor, [MemoryWatermark])
                      end,
 
-                ok = rabbit_amqqueue:start(),
+                ok = start_child(rabbit_memory_monitor),
+                ok = start_child(rabbit_guid),
 
                 ok = start_child(rabbit_router),
-                ok = start_child(rabbit_guid),
-                ok = start_child(rabbit_node_monitor),
-                ok = start_child(rabbit_memory_monitor)
+                ok = start_child(rabbit_node_monitor)
         end},
        {"recovery",
         fun () ->
@@ -163,6 +162,9 @@ start(normal, []) ->
                 ok = rabbit_exchange:recover(),
                 DurableQueues = rabbit_amqqueue:find_durable_queues(),
                 ok = rabbit_queue_index:start_msg_store(DurableQueues),
+
+                ok = rabbit_amqqueue:start(),
+
                 {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues)
                 %% TODO - RealDurableQueues is a subset of
                 %% DurableQueues. It may have queues removed which
-- 
cgit v1.2.1


From 9c43ce8583aba0344a02187573edb3958409499a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 20 Dec 2009 17:22:43 +0000
Subject: Ensure that :contains calls can't overtake :remove casts. Also modify
 the tests slightly.

---
 src/rabbit_msg_store.erl | 66 ++++++++++++++++++++++++++++--------------------
 src/rabbit_tests.erl     | 25 +++++++++++-------
 2 files changed, 55 insertions(+), 36 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index f40c6270..c060c8d4 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -87,7 +87,7 @@
          sum_valid_data,         %% sum of valid data in all files
          sum_file_size,          %% sum of file sizes
          pending_gc_completion,  %% things to do once GC completes
-         gc_running              %% is the GC currently working?
+         gc_active               %% is the GC currently working?
         }).
 
 -include("rabbit_msg_store.hrl").
@@ -266,7 +266,7 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                    sum_valid_data         = 0,
                    sum_file_size          = 0,
                    pending_gc_completion  = [],
-                   gc_running             = false
+                   gc_active              = false
                   },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
@@ -295,17 +295,12 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({read, MsgId}, From, State) ->
-    case read_message(MsgId, State) of
-        {ok, Msg, State1} -> reply({ok, Msg}, State1);
-        {blocked, State1} -> noreply(add_to_pending_gc_completion(
-                                       {read, MsgId, From}, State1))
-    end;
+    State1 = read_message(MsgId, From, State),
+    noreply(State1);
 
-handle_call({contains, MsgId}, _From, State) ->
-    reply(case index_lookup(MsgId, State) of
-              not_found        -> false;
-              #msg_location {} -> true
-          end, State).
+handle_call({contains, MsgId}, From, State) ->
+    State1 = contains_message(MsgId, From, State),
+    noreply(State1).
 
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
@@ -383,7 +378,7 @@ handle_cast(sync, State) ->
 
 handle_cast({gc_done, Reclaimed, Source, Dest},
             State = #msstate { sum_file_size = SumFileSize,
-                               gc_running = true,
+                               gc_active = {Source, Dest},
                                file_summary = FileSummary }) ->
     %% we always move data left, so Source has gone and was on the
     %% right, so need to make dest = source.right.left, and also
@@ -399,7 +394,7 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
     true = ets:delete(FileSummary, Source),
     noreply(run_pending(
               State #msstate { sum_file_size = SumFileSize - Reclaimed,
-                               gc_running = false })).
+                               gc_active = false })).
 
 handle_info(timeout, State) ->
     noreply(sync(State));
@@ -485,12 +480,13 @@ sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
-read_message(MsgId, State =
+read_message(MsgId, From, State =
              #msstate { current_file = CurFile,
                         current_file_handle = CurHdl,
                         file_summary = FileSummary }) ->
     case index_lookup(MsgId, State) of
-        not_found -> {ok, not_found, State};
+        not_found -> gen_server2:reply(From, not_found),
+                     State;
         #msg_location { ref_count  = RefCount,
                         file       = File,
                         offset     = Offset,
@@ -501,7 +497,8 @@ read_message(MsgId, State =
                         ets:lookup(FileSummary, File),
                     case Locked of
                         true ->
-                            {blocked, State};
+                            add_to_pending_gc_completion({read, MsgId, From},
+                                                         State);
                         false ->
                             ok = case CurFile =:= File andalso {ok, Offset} >=
                                      file_handle_cache:current_raw_offset(
@@ -535,10 +532,28 @@ read_message(MsgId, State =
                                          %% cache.
                                          ok
                                  end,
-                            {ok, Msg, State1}
+                            gen_server2:reply(From, {ok, Msg}),
+                            State1
                     end;
                 {Msg, _RefCount} ->
-                    {ok, Msg, State}
+                    gen_server2:reply(From, {ok, Msg}),
+                    State
+            end
+    end.
+
+contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
+    case index_lookup(MsgId, State) of
+        not_found ->
+            gen_server2:reply(From, false),
+            State;
+        #msg_location { file = File } ->
+            case GCActive of
+                {A, B} when File == A orelse File == B ->
+                    add_to_pending_gc_completion(
+                      {contains, MsgId, From}, State);
+                _ ->
+                    gen_server2:reply(From, true),
+                    State
             end
     end.
 
@@ -588,12 +603,9 @@ run_pending(State = #msstate { pending_gc_completion = Pending }) ->
     lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)).
 
 run_pending({read, MsgId, From}, State) ->
-    case read_message(MsgId, State) of
-        {ok, Msg, State1} -> gen_server2:reply(From, {ok, Msg}),
-                             State1;
-        {blocked, State1} -> add_to_pending_gc_completion(
-                               {read, MsgId, From}, State1)
-    end;
+    read_message(MsgId, From, State);
+run_pending({contains, MsgId, From}, State) ->
+    contains_message(MsgId, From, State);
 run_pending({remove, MsgId}, State) ->
     remove_message(MsgId, State).
 
@@ -924,7 +936,7 @@ maybe_roll_to_new_file(_, State) ->
 maybe_compact(State = #msstate { sum_valid_data = SumValid,
                                  sum_file_size = SumFileSize,
                                  file_summary = FileSummary,
-                                 gc_running = false })
+                                 gc_active = false })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     First = ets:first(FileSummary),
     N = random_distributions:geometric(?GEOMETRIC_P),
@@ -938,7 +950,7 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid,
             true = ets:update_element(FileSummary, Dest,
                                       {#file_summary.locked, true}),
             ok = rabbit_msg_store_gc:gc(Source, Dest),
-            State1 #msstate { gc_running = true }
+            State1 #msstate { gc_active = {Source, Dest} }
     end;
 maybe_compact(State) ->
     State.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7187e322..f5d7978c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -921,7 +921,7 @@ msg_store_write(MsgIds) ->
     ok = lists:foldl(
            fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
            ok, MsgIds).
-                            
+
 test_msg_store() ->
     stop_msg_store(),
     ok = start_msg_store_empty(),
@@ -1016,16 +1016,23 @@ test_msg_store() ->
            fun (MsgId, ok) ->
                    rabbit_msg_store:write(msg_id_bin(MsgId), Payload)
            end, ok, MsgIdsBig),
-    %% .., then remove even numbers ascending, and odd numbers
-    %% descending. This hits the GC.
+    %% .., then 3s by 1...
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove([msg_id_bin(
-                                              case MsgId rem 2 of
-                                                  0 -> MsgId;
-                                                  1 -> BigCount - MsgId
-                                              end)])
-           end, ok, MsgIdsBig),
+                   rabbit_msg_store:remove([msg_id_bin(MsgId)])
+           end, ok, lists:seq(BigCount, 1, -3)),
+    %% .., then remove 3s by 2, from the young end first. This hits
+    %% GC (under 50% good data left, but no empty files. Must GC).
+    ok = lists:foldl(
+           fun (MsgId, ok) ->
+                   rabbit_msg_store:remove([msg_id_bin(MsgId)])
+           end, ok, lists:seq(BigCount-1, 1, -3)),
+    %% .., then remove 3s by 3, from the young end first. This hits
+    %% GC...
+    ok = lists:foldl(
+           fun (MsgId, ok) ->
+                   rabbit_msg_store:remove([msg_id_bin(MsgId)])
+           end, ok, lists:seq(BigCount-2, 1, -3)),
     %% ensure empty
     false = msg_store_contains(false, [msg_id_bin(M) || M <- MsgIdsBig]),
     %% restart empty
-- 
cgit v1.2.1


From 1880374b8c1ff57643c194f691fc8db9d2d6286b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 22 Dec 2009 17:34:41 +0000
Subject: Support pluggable msg_store index module through the
 rabbit_msg_store_index_module rabbit application var

---
 src/rabbit_msg_store.erl           | 10 ++++++++--
 src/rabbit_msg_store_ets_index.erl |  4 ++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c060c8d4..e3bd7316 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -244,8 +244,14 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
-    IndexModule = rabbit_msg_store_ets_index,
-    IndexState = IndexModule:init(),
+    IndexModule =
+        case application:get_env(rabbit_msg_store_index_module) of
+            {ok, Module} -> Module;
+            _            -> rabbit_msg_store_ets_index
+        end,
+    rabbit_log:info("Using ~p to provide index for message store~n",
+                    [IndexModule]),
+    IndexState = IndexModule:init(Dir),
 
     InitFile = 0,
     FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index cb13ed86..e8d596f9 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -30,14 +30,14 @@
 %%
 
 -module(rabbit_msg_store_ets_index).
--export([init/0, lookup/2, insert/2, update/2, update_fields/3, delete/2,
+-export([init/1, lookup/2, insert/2, update/2, update_fields/3, delete/2,
          delete_by_file/2, terminate/1]).
 
 -define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
 
 -include("rabbit_msg_store.hrl").
 
-init() ->
+init(_Dir) ->
     ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]).
 
 lookup(Key, MsgLocations) ->
-- 
cgit v1.2.1


From 9c09e25e47f6bb674581bdd1c9621b9f24f742a7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 26 Dec 2009 21:30:15 +0000
Subject: Put the default index module (ets) in the .app template, and assume
 the env var exists

---
 ebin/rabbit_app.in       | 1 +
 src/rabbit_msg_store.erl | 6 +-----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index 3616fcbf..b5998112 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -18,6 +18,7 @@
          {ssl_listeners, []},
          {ssl_options, []},
          {vm_memory_high_watermark, 0.4},
+         {rabbit_msg_store_index_module, rabbit_msg_store_ets_index},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e3bd7316..48704f99 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -244,11 +244,7 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
-    IndexModule =
-        case application:get_env(rabbit_msg_store_index_module) of
-            {ok, Module} -> Module;
-            _            -> rabbit_msg_store_ets_index
-        end,
+    {ok, IndexModule} = application:get_env(rabbit_msg_store_index_module),
     rabbit_log:info("Using ~p to provide index for message store~n",
                     [IndexModule]),
     IndexState = IndexModule:init(Dir),
-- 
cgit v1.2.1


From 44f2a10e5418c342723e56e89071bc37d9162782 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 26 Dec 2009 21:34:55 +0000
Subject: Env var does not need rabbit_ prefix

---
 ebin/rabbit_app.in       | 2 +-
 src/rabbit_msg_store.erl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index b5998112..035fa054 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -18,7 +18,7 @@
          {ssl_listeners, []},
          {ssl_options, []},
          {vm_memory_high_watermark, 0.4},
-         {rabbit_msg_store_index_module, rabbit_msg_store_ets_index},
+         {msg_store_index_module, rabbit_msg_store_ets_index},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 48704f99..7bf91bb3 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -244,7 +244,7 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
 
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
-    {ok, IndexModule} = application:get_env(rabbit_msg_store_index_module),
+    {ok, IndexModule} = application:get_env(msg_store_index_module),
     rabbit_log:info("Using ~p to provide index for message store~n",
                     [IndexModule]),
     IndexState = IndexModule:init(Dir),
-- 
cgit v1.2.1


From 661eae55a69dd5dff671861acf4ef51ceeeb4bbf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 7 Jan 2010 16:26:24 +0000
Subject: Toughened up cache accessors and switched to using named tables in
 prep for concurrent readers of msg_store.

---
 include/rabbit_msg_store.hrl |   3 +
 src/rabbit_msg_store.erl     | 188 +++++++++++++++++++++----------------------
 src/rabbit_msg_store_gc.erl  |  20 +++--
 3 files changed, 104 insertions(+), 107 deletions(-)

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 925d5d8e..0e9a0408 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -49,3 +49,6 @@
 -define(FILE_SIZE_LIMIT,       (16*1024*1024)).
 
 -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
+
+-define(FILE_SUMMARY_ETS_NAME, rabbit_msg_store_file_summary).
+-define(CACHE_ETS_NAME,        rabbit_msg_store_cache).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 7bf91bb3..8acd9149 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -76,14 +76,12 @@
         {dir,                    %% store directory
          index_module,           %% the module for index ops
          index_state,            %% where are messages?
-         file_summary,           %% what's in the files?
          current_file,           %% current file name as number
          current_file_handle,    %% current file handle
                                  %% since the last fsync?
          file_handle_cache,      %% file handle cache
          on_sync,                %% pending sync requests
          sync_timer_ref,         %% TRef for our interval timer
-         message_cache,          %% ets message cache
          sum_valid_data,         %% sum of valid data in all files
          sum_file_size,          %% sum of file sizes
          pending_gc_completion,  %% things to do once GC completes
@@ -92,8 +90,6 @@
 
 -include("rabbit_msg_store.hrl").
 
--define(FILE_SUMMARY_ETS_NAME, rabbit_msg_store_file_summary).
--define(CACHE_ETS_NAME,        rabbit_msg_store_cache).
 %% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
 %% It is not recommended to set this to < 0.5
 -define(GARBAGE_FRACTION,      0.5).
@@ -250,21 +246,19 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     IndexState = IndexModule:init(Dir),
 
     InitFile = 0,
-    FileSummary = ets:new(?FILE_SUMMARY_ETS_NAME,
-                          [ordered_set, public,
-                           {keypos, #file_summary.file}]),
-    MessageCache = ets:new(?CACHE_ETS_NAME, [set, private]),
+    ?FILE_SUMMARY_ETS_NAME = ets:new(?FILE_SUMMARY_ETS_NAME,
+                                     [ordered_set, public, named_table,
+                                      {keypos, #file_summary.file}]),
+    ?CACHE_ETS_NAME = ets:new(?CACHE_ETS_NAME, [set, public, named_table]),
     State =
         #msstate { dir                    = Dir,
                    index_module           = IndexModule,
                    index_state            = IndexState,
-                   file_summary           = FileSummary,
                    current_file           = InitFile,
                    current_file_handle    = undefined,
                    file_handle_cache      = dict:new(),
                    on_sync                = [],
                    sync_timer_ref         = undefined,
-                   message_cache          = MessageCache,
                    sum_valid_data         = 0,
                    sum_file_size          = 0,
                    pending_gc_completion  = [],
@@ -290,8 +284,7 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
 
-    {ok, _Pid} = rabbit_msg_store_gc:start_link(
-                   Dir, IndexState, FileSummary, IndexModule),
+    {ok, _Pid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule),
 
     {ok, State1 #msstate { current_file_handle = FileHdl }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -307,7 +300,6 @@ handle_call({contains, MsgId}, From, State) ->
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
                                current_file        = CurFile,
-                               file_summary        = FileSummary,
                                sum_valid_data      = SumValid,
                                sum_file_size       = SumFileSize }) ->
     case index_lookup(MsgId, State) of
@@ -324,17 +316,18 @@ handle_cast({write, MsgId, Msg},
                                        right = undefined,
                                        locked = false,
                                        file_size = FileSize }] =
-                ets:lookup(FileSummary, CurFile),
+                ets:lookup(?FILE_SUMMARY_ETS_NAME, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
                                      %% can't be any holes in this file
                                      ValidTotalSize1;
                                 true -> ContiguousTop
                              end,
-            true = ets:insert(FileSummary, FSEntry #file_summary {
-                                             valid_total_size = ValidTotalSize1,
-                                             contiguous_top = ContiguousTop1,
-                                             file_size = FileSize + TotalSize }),
+            true = ets:insert(?FILE_SUMMARY_ETS_NAME,
+                              FSEntry #file_summary {
+                                valid_total_size = ValidTotalSize1,
+                                contiguous_top = ContiguousTop1,
+                                file_size = FileSize + TotalSize }),
             NextOffset = CurOffset + TotalSize,
             noreply(maybe_compact(maybe_roll_to_new_file(
                                     NextOffset, State #msstate
@@ -357,7 +350,7 @@ handle_cast({remove, MsgIds}, State) ->
     noreply(maybe_compact(State1));
 
 handle_cast({release, MsgIds}, State) ->
-    lists:foreach(fun (MsgId) -> decrement_cache(MsgId, State) end, MsgIds),
+    lists:foreach(fun (MsgId) -> decrement_cache(MsgId) end, MsgIds),
     noreply(State);
 
 handle_cast({sync, MsgIds, K},
@@ -380,20 +373,19 @@ handle_cast(sync, State) ->
 
 handle_cast({gc_done, Reclaimed, Source, Dest},
             State = #msstate { sum_file_size = SumFileSize,
-                               gc_active = {Source, Dest},
-                               file_summary = FileSummary }) ->
+                               gc_active = {Source, Dest} }) ->
     %% we always move data left, so Source has gone and was on the
     %% right, so need to make dest = source.right.left, and also
     %% dest.right = source.right
     [#file_summary { left = Dest, right = SourceRight, locked = true }] =
-        ets:lookup(FileSummary, Source),
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, Source),
     %% this could fail if SourceRight == undefined
-    ets:update_element(FileSummary, SourceRight,
+    ets:update_element(?FILE_SUMMARY_ETS_NAME, SourceRight,
                        {#file_summary.left, Dest}),
-    true = ets:update_element(FileSummary, Dest,
+    true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Dest,
                               [{#file_summary.locked, false},
                                {#file_summary.right, SourceRight}]),
-    true = ets:delete(FileSummary, Source),
+    true = ets:delete(?FILE_SUMMARY_ETS_NAME, Source),
     noreply(run_pending(
               State #msstate { sum_file_size = SumFileSize - Reclaimed,
                                gc_active = false })).
@@ -410,7 +402,6 @@ handle_info({'EXIT', _Pid, Reason}, State) ->
 
 terminate(_Reason, State = #msstate { index_state            = IndexState,
                                       index_module           = IndexModule,
-                                      file_summary           = FileSummary,
                                       current_file_handle    = FileHdl }) ->
     %% stop the gc first, otherwise it could be working and we pull
     %% out the ets tables from under it.
@@ -422,10 +413,9 @@ terminate(_Reason, State = #msstate { index_state            = IndexState,
                       State2
              end,
     State3 = close_all_handles(State1),
-    ets:delete(FileSummary),
+    ets:delete(?FILE_SUMMARY_ETS_NAME),
     IndexModule:terminate(IndexState),
     State3 #msstate { index_state         = undefined,
-                      file_summary        = undefined,
                       current_file_handle = undefined }.
 
 code_change(_OldVsn, State, _Extra) ->
@@ -484,8 +474,7 @@ sync(State = #msstate { current_file_handle = CurHdl,
 
 read_message(MsgId, From, State =
              #msstate { current_file = CurFile,
-                        current_file_handle = CurHdl,
-                        file_summary = FileSummary }) ->
+                        current_file_handle = CurHdl }) ->
     case index_lookup(MsgId, State) of
         not_found -> gen_server2:reply(From, not_found),
                      State;
@@ -493,10 +482,10 @@ read_message(MsgId, From, State =
                         file       = File,
                         offset     = Offset,
                         total_size = TotalSize } ->
-            case fetch_and_increment_cache(MsgId, State) of
+            case fetch_and_increment_cache(MsgId) of
                 not_found ->
                     [#file_summary { locked = Locked }] =
-                        ets:lookup(FileSummary, File),
+                        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
                     case Locked of
                         true ->
                             add_to_pending_gc_completion({read, MsgId, From},
@@ -525,7 +514,7 @@ read_message(MsgId, From, State =
                                 end,
                             ok = case RefCount > 1 of
                                      true ->
-                                         insert_into_cache(MsgId, Msg, State1);
+                                         insert_into_cache(MsgId, Msg);
                                      false ->
                                          %% it's not in the cache and
                                          %% we only have one reference
@@ -537,7 +526,7 @@ read_message(MsgId, From, State =
                             gen_server2:reply(From, {ok, Msg}),
                             State1
                     end;
-                {Msg, _RefCount} ->
+                Msg ->
                     gen_server2:reply(From, {ok, Msg}),
                     State
             end
@@ -559,18 +548,17 @@ contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
             end
     end.
 
-remove_message(MsgId, State = #msstate { file_summary = FileSummary,
-                                         sum_valid_data = SumValid }) ->
+remove_message(MsgId, State = #msstate { sum_valid_data = SumValid }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
     case RefCount of
         1 ->
-            ok = remove_cache_entry(MsgId, State),
+            ok = remove_cache_entry(MsgId),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
                                        locked = Locked }] =
-                ets:lookup(FileSummary, File),
+                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
             case Locked of
                 true ->
                     add_to_pending_gc_completion({remove, MsgId}, State);
@@ -578,15 +566,15 @@ remove_message(MsgId, State = #msstate { file_summary = FileSummary,
                     ok = index_delete(MsgId, State),
                     ContiguousTop1 = lists:min([ContiguousTop, Offset]),
                     ValidTotalSize1 = ValidTotalSize - TotalSize,
-                    true = ets:insert(
-                             FileSummary, FSEntry #file_summary {
-                                            valid_total_size = ValidTotalSize1,
-                                            contiguous_top = ContiguousTop1 }),
+                    true = ets:insert(?FILE_SUMMARY_ETS_NAME,
+                                      FSEntry #file_summary {
+                                        valid_total_size = ValidTotalSize1,
+                                        contiguous_top = ContiguousTop1 }),
                     State1 = delete_file_if_empty(File, State),
                     State1 #msstate { sum_valid_data = SumValid - TotalSize }
             end;
         _ when 1 < RefCount ->
-            ok = decrement_cache(MsgId, State),
+            ok = decrement_cache(MsgId),
             %% only update field, otherwise bad interaction with concurrent GC
             ok = index_update_fields(MsgId,
                                      {#msg_location.ref_count, RefCount - 1},
@@ -642,22 +630,27 @@ new_handle(Key, FileName, Mode, State = #msstate { file_handle_cache = FHC,
 %% message cache helper functions
 %%----------------------------------------------------------------------------
 
-remove_cache_entry(MsgId, #msstate { message_cache = Cache }) ->
-    true = ets:delete(Cache, MsgId),
+remove_cache_entry(MsgId) ->
+    true = ets:delete(?CACHE_ETS_NAME, MsgId),
     ok.
 
-fetch_and_increment_cache(MsgId, #msstate { message_cache = Cache }) ->
-    case ets:lookup(Cache, MsgId) of
+fetch_and_increment_cache(MsgId) ->
+    case ets:lookup(?CACHE_ETS_NAME, MsgId) of
         [] ->
             not_found;
-        [{MsgId, Msg, _RefCount}] ->
-            NewRefCount = ets:update_counter(Cache, MsgId, {3, 1}),
-            {Msg, NewRefCount}
+        [{_MsgId, Msg, _RefCount}] ->
+            try
+                ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, 1})
+            catch error:badarg ->
+                    %% someone has deleted us in the meantime, insert us
+                    ok = insert_into_cache(MsgId, Msg)
+            end,
+            Msg
     end.
 
-decrement_cache(MsgId, #msstate { message_cache = Cache }) ->
-    true = try case ets:update_counter(Cache, MsgId, {3, -1}) of
-                   N when N =< 0 -> true = ets:delete(Cache, MsgId);
+decrement_cache(MsgId) ->
+    true = try case ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, -1}) of
+                   N when N =< 0 -> true = ets:delete(?CACHE_ETS_NAME, MsgId);
                    _N -> true
                end
            catch error:badarg ->
@@ -668,9 +661,16 @@ decrement_cache(MsgId, #msstate { message_cache = Cache }) ->
            end,
     ok.
 
-insert_into_cache(MsgId, Msg, #msstate { message_cache = Cache }) ->
-    true = ets:insert_new(Cache, {MsgId, Msg, 1}),
-    ok.
+insert_into_cache(MsgId, Msg) ->
+    case ets:insert_new(?CACHE_ETS_NAME, {MsgId, Msg, 1}) of
+        true  -> ok;
+        false -> try
+                     ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, 1}),
+                     ok
+                 catch error:badarg ->
+                         insert_into_cache(MsgId, Msg)
+                 end
+    end.
 
 %%----------------------------------------------------------------------------
 %% index
@@ -855,16 +855,15 @@ build_index(Files, State) ->
     {Offset, State1} = build_index(undefined, Files, State),
     {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)}.
 
-build_index(Left, [], State = #msstate { file_summary = FileSummary }) ->
+build_index(Left, [], State) ->
     ok = index_delete_by_file(undefined, State),
-    Offset = case ets:lookup(FileSummary, Left) of
+    Offset = case ets:lookup(?FILE_SUMMARY_ETS_NAME, Left) of
                  []                                       -> 0;
                  [#file_summary { file_size = FileSize }] -> FileSize
              end,
     {Offset, State #msstate { current_file = Left }};
 build_index(Left, [File|Files],
-            State = #msstate { dir = Dir, file_summary = FileSummary,
-                               sum_valid_data = SumValid,
+            State = #msstate { dir = Dir, sum_valid_data = SumValid,
                                sum_file_size = SumFileSize }) ->
     {ok, Messages, FileSize} =
         rabbit_msg_store_misc:scan_file_for_valid_messages(
@@ -899,10 +898,10 @@ build_index(Left, [File|Files],
             [F|_] -> {F, FileSize}
         end,
     true =
-        ets:insert_new(FileSummary, #file_summary {
-                         file = File, valid_total_size = ValidTotalSize,
-                         contiguous_top = ContiguousTop, locked = false,
-                         left = Left, right = Right, file_size = FileSize1 }),
+        ets:insert_new(?FILE_SUMMARY_ETS_NAME, #file_summary {
+                          file = File, valid_total_size = ValidTotalSize,
+                          contiguous_top = ContiguousTop, locked = false,
+                          left = Left, right = Right, file_size = FileSize1 }),
     build_index(File, Files,
                 State #msstate { sum_valid_data = SumValid + ValidTotalSize,
                                  sum_file_size = SumFileSize + FileSize1 }).
@@ -914,8 +913,7 @@ build_index(Left, [File|Files],
 maybe_roll_to_new_file(Offset,
                        State = #msstate { dir                 = Dir,
                                           current_file_handle = CurHdl,
-                                          current_file        = CurFile,
-                                          file_summary        = FileSummary })
+                                          current_file        = CurFile })
   when Offset >= ?FILE_SIZE_LIMIT ->
     State1 = sync(State),
     ok = file_handle_cache:close(CurHdl),
@@ -924,11 +922,11 @@ maybe_roll_to_new_file(Offset,
                       Dir, rabbit_msg_store_misc:filenum_to_name(NextFile),
                       ?WRITE_MODE),
     true = ets:insert_new(
-             FileSummary, #file_summary {
-               file = NextFile, valid_total_size = 0, contiguous_top = 0,
-               left = CurFile, right = undefined, file_size = 0,
-               locked = false }),
-    true = ets:update_element(FileSummary, CurFile,
+             ?FILE_SUMMARY_ETS_NAME, #file_summary {
+                file = NextFile, valid_total_size = 0, contiguous_top = 0,
+                left = CurFile, right = undefined, file_size = 0,
+                locked = false }),
+    true = ets:update_element(?FILE_SUMMARY_ETS_NAME, CurFile,
                               {#file_summary.right, NextFile}),
     State1 #msstate { current_file_handle = NextHdl,
                       current_file        = NextFile };
@@ -937,19 +935,18 @@ maybe_roll_to_new_file(_, State) ->
 
 maybe_compact(State = #msstate { sum_valid_data = SumValid,
                                  sum_file_size = SumFileSize,
-                                 file_summary = FileSummary,
                                  gc_active = false })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
-    First = ets:first(FileSummary),
+    First = ets:first(?FILE_SUMMARY_ETS_NAME),
     N = random_distributions:geometric(?GEOMETRIC_P),
-    case find_files_to_gc(FileSummary, N, First) of
+    case find_files_to_gc(N, First) of
         undefined ->
             State;
         {Source, Dest} ->
             State1 = close_handle(Source, close_handle(Dest, State)),
-            true = ets:update_element(FileSummary, Source,
+            true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Source,
                                       {#file_summary.locked, true}),
-            true = ets:update_element(FileSummary, Dest,
+            true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Dest,
                                       {#file_summary.locked, true}),
             ok = rabbit_msg_store_gc:gc(Source, Dest),
             State1 #msstate { gc_active = {Source, Dest} }
@@ -957,14 +954,13 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid,
 maybe_compact(State) ->
     State.
 
-find_files_to_gc(_FileSummary, _N, '$end_of_table') ->
+find_files_to_gc(_N, '$end_of_table') ->
     undefined;
-find_files_to_gc(FileSummary, N, First) ->
+find_files_to_gc(N, First) ->
     [FirstObj = #file_summary { right = Right }] =
-        ets:lookup(FileSummary, First),
-    Pairs =
-        find_files_to_gc(FileSummary, N, FirstObj,
-                         ets:lookup(FileSummary, Right), []),
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, First),
+    Pairs = find_files_to_gc(N, FirstObj,
+                             ets:lookup(?FILE_SUMMARY_ETS_NAME, Right), []),
     case Pairs of
         []     -> undefined;
         [Pair] -> Pair;
@@ -972,9 +968,9 @@ find_files_to_gc(FileSummary, N, First) ->
                   lists:nth(M, Pairs)
     end.
 
-find_files_to_gc(_FileSummary, _N, #file_summary {}, [], Pairs) ->
+find_files_to_gc(_N, #file_summary {}, [], Pairs) ->
     lists:reverse(Pairs);
-find_files_to_gc(FileSummary, N,
+find_files_to_gc(N,
                  #file_summary { right = Source, file = Dest,
                                  valid_total_size = DestValid },
                  [SourceObj = #file_summary { left = Dest, right = SourceRight,
@@ -985,22 +981,22 @@ find_files_to_gc(FileSummary, N,
     Pair = {Source, Dest},
     case N == 1 of
         true  -> [Pair];
-        false -> find_files_to_gc(FileSummary, (N - 1), SourceObj,
-                                  ets:lookup(FileSummary, SourceRight),
+        false -> find_files_to_gc((N - 1), SourceObj,
+                                  ets:lookup(?FILE_SUMMARY_ETS_NAME, SourceRight),
                                   [Pair | Pairs])
     end;
-find_files_to_gc(FileSummary, N, _Left,
+find_files_to_gc(N, _Left,
                  [Right = #file_summary { right = RightRight }], Pairs) ->
-    find_files_to_gc(FileSummary, N, Right,
-                     ets:lookup(FileSummary, RightRight), Pairs).
+    find_files_to_gc(
+      N, Right, ets:lookup(?FILE_SUMMARY_ETS_NAME, RightRight), Pairs).
 
 delete_file_if_empty(File, State = #msstate { current_file = File }) ->
     State;
-delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
-                                      sum_file_size = SumFileSize } = State) ->
+delete_file_if_empty(File, State =
+                     #msstate { dir = Dir, sum_file_size = SumFileSize }) ->
     [#file_summary { valid_total_size = ValidData, file_size = FileSize,
                      left = Left, right = Right, locked = false }] =
-        ets:lookup(FileSummary, File),
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
@@ -1008,15 +1004,15 @@ delete_file_if_empty(File, #msstate { dir = Dir, file_summary = FileSummary,
                  {undefined, _} when not is_atom(Right) ->
                      %% the eldest file is empty.
                      true = ets:update_element(
-                              FileSummary, Right,
+                              ?FILE_SUMMARY_ETS_NAME, Right,
                               {#file_summary.left, undefined});
                  {_, _} when not is_atom(Right) ->
-                     true = ets:update_element(FileSummary, Right,
+                     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Right,
                                                {#file_summary.left, Left}),
-                     true = ets:update_element(FileSummary, Left,
+                     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Left,
                                                {#file_summary.right, Right})
              end,
-             true = ets:delete(FileSummary, File),
+             true = ets:delete(?FILE_SUMMARY_ETS_NAME, File),
              State1 = close_handle(File, State),
              ok = file:delete(rabbit_msg_store_misc:form_filename(
                                 Dir,
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 729cd287..1866e629 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/4, gc/2, stop/0]).
+-export([start_link/3, gc/2, stop/0]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -41,7 +41,6 @@
 -record(gcstate,
         {dir,
          index_state,
-         file_summary,
          index_module
         }).
 
@@ -51,9 +50,9 @@
 
 %%----------------------------------------------------------------------------
 
-start_link(Dir, IndexState, FileSummary, IndexModule) ->
+start_link(Dir, IndexState, IndexModule) ->
     gen_server2:start_link({local, ?SERVER}, ?MODULE,
-                           [Dir, IndexState, FileSummary, IndexModule],
+                           [Dir, IndexState, IndexModule],
                            [{timeout, infinity}]).
 
 gc(Source, Destination) ->
@@ -64,9 +63,9 @@ stop() ->
 
 %%----------------------------------------------------------------------------
 
-init([Dir, IndexState, FileSummary, IndexModule]) ->
+init([Dir, IndexState, IndexModule]) ->
     {ok, #gcstate { dir = Dir, index_state = IndexState,
-                    file_summary = FileSummary, index_module = IndexModule },
+                    index_module = IndexModule },
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -89,23 +88,22 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-adjust_meta_and_combine(SourceFile, DestFile,
-                        State = #gcstate { file_summary = FileSummary }) ->
+adjust_meta_and_combine(SourceFile, DestFile, State) ->
 
     [SourceObj = #file_summary {
        valid_total_size = SourceValidData, left = DestFile,
        file_size = SourceFileSize, locked = true }] =
-        ets:lookup(FileSummary, SourceFile),
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, SourceFile),
     [DestObj = #file_summary {
        valid_total_size = DestValidData, right = SourceFile,
        file_size = DestFileSize, locked = true }] =
-        ets:lookup(FileSummary, DestFile),
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, DestFile),
 
     TotalValidData = DestValidData + SourceValidData,
     ok = combine_files(SourceObj, DestObj, State),
     %% don't update dest.right, because it could be changing at the same time
     true =
-        ets:update_element(FileSummary, DestFile,
+        ets:update_element(?FILE_SUMMARY_ETS_NAME, DestFile,
                            [{#file_summary.valid_total_size, TotalValidData},
                             {#file_summary.contiguous_top,   TotalValidData},
                             {#file_summary.file_size,        TotalValidData}]),
-- 
cgit v1.2.1


From 52d624f7ec8b931830f8777223d1686c065e65fd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 8 Jan 2010 16:58:54 +0000
Subject: Msg_store now supports concurrent reads when it is safe, directly
 from the queue. This means that even if the msg_store process is flooded with
 writes or acks, it won't (necessarily) block queues. This is extremely useful
 and has substantial benefit when memory has been exhausted and the queue is
 operating off ? only (effectively, no message content held in ram).

---
 include/rabbit_msg_store.hrl  |   3 +-
 src/rabbit_msg_store.erl      | 262 +++++++++++++++++++++++++++++++++++-------
 src/rabbit_msg_store_gc.erl   |  28 +++--
 src/rabbit_tests.erl          |  39 ++++---
 src/rabbit_variable_queue.erl |  25 ++--
 5 files changed, 282 insertions(+), 75 deletions(-)

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 0e9a0408..a094454a 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -34,7 +34,7 @@
 
 -record(file_summary,
         {file, valid_total_size, contiguous_top, left, right, file_size,
-         locked}).
+         locked, readers}).
 
 -define(BINARY_MODE,     [raw, binary]).
 -define(READ_MODE,       [read]).
@@ -52,3 +52,4 @@
 
 -define(FILE_SUMMARY_ETS_NAME, rabbit_msg_store_file_summary).
 -define(CACHE_ETS_NAME,        rabbit_msg_store_cache).
+-define(FILE_HANDLES_ETS_NAME, rabbit_msg_store_file_handles).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 8acd9149..3a645059 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,8 +33,8 @@
 
 -behaviour(gen_server2).
 
--export([start_link/3, write/2, read/1, contains/1, remove/1, release/1,
-         sync/2]).
+-export([start_link/3, write/2, read/2, contains/1, remove/1, release/1,
+         sync/2, client_init/0, client_terminate/1]).
 
 -export([sync/0, gc_done/3]). %% internal
 
@@ -49,45 +49,62 @@
 
 %%----------------------------------------------------------------------------
 
+-record(msstate,
+        { dir,                    %% store directory
+          index_module,           %% the module for index ops
+          index_state,            %% where are messages?
+          current_file,           %% current file name as number
+          current_file_handle,    %% current file handle
+                                  %% since the last fsync?
+          file_handle_cache,      %% file handle cache
+          on_sync,                %% pending sync requests
+          sync_timer_ref,         %% TRef for our interval timer
+          sum_valid_data,         %% sum of valid data in all files
+          sum_file_size,          %% sum of file sizes
+          pending_gc_completion,  %% things to do once GC completes
+          gc_active               %% is the GC currently working?
+        }).
+
+-record(client_msstate,
+        { file_handle_cache,
+          index_state,
+          index_module,
+          dir
+        }).
+
+%%----------------------------------------------------------------------------
+
 -ifdef(use_specs).
 
 -type(msg_id() :: binary()).
 -type(msg() :: any()).
 -type(file_path() :: any()).
 -type(file_num() :: non_neg_integer()).
+-type(client_msstate() :: #client_msstate { file_handle_cache :: dict(),
+                                            index_state       :: any(),
+                                            index_module      :: atom(),
+                                            dir               :: file_path() }).
 
 -spec(start_link/3 ::
       (file_path(),
        (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})), A) ->
              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/2 :: (msg_id(), msg()) -> 'ok').
--spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
+%% -spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
+-spec(read/2 :: (msg_id(), client_msstate()) ->
+                     {{'ok', msg()} | 'not_found', client_msstate()}).
 -spec(contains/1 :: (msg_id()) -> boolean()).
 -spec(remove/1 :: ([msg_id()]) -> 'ok').
 -spec(release/1 :: ([msg_id()]) -> 'ok').
 -spec(sync/2 :: ([msg_id()], fun (() -> any())) -> 'ok').
 -spec(gc_done/3 :: (non_neg_integer(), file_num(), file_num()) -> 'ok').
+-spec(client_init/0 :: () -> client_msstate()).
+-spec(client_terminate/1 :: (client_msstate()) -> 'ok').
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
--record(msstate,
-        {dir,                    %% store directory
-         index_module,           %% the module for index ops
-         index_state,            %% where are messages?
-         current_file,           %% current file name as number
-         current_file_handle,    %% current file handle
-                                 %% since the last fsync?
-         file_handle_cache,      %% file handle cache
-         on_sync,                %% pending sync requests
-         sync_timer_ref,         %% TRef for our interval timer
-         sum_valid_data,         %% sum of valid data in all files
-         sum_file_size,          %% sum of file sizes
-         pending_gc_completion,  %% things to do once GC completes
-         gc_active               %% is the GC currently working?
-        }).
-
 -include("rabbit_msg_store.hrl").
 
 %% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
@@ -221,16 +238,120 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(MsgId, Msg)  -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
-read(MsgId)        -> gen_server2:call(?SERVER, {read, MsgId}, infinity).
-contains(MsgId)    -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
-remove(MsgIds)     -> gen_server2:cast(?SERVER, {remove, MsgIds}).
-release(MsgIds)    -> gen_server2:cast(?SERVER, {release, MsgIds}).
-sync(MsgIds, K)    -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-sync()             -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+write(MsgId, Msg)   -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+
+read(MsgId, CState) ->
+    case index_lookup(MsgId, CState) of
+        not_found ->
+            {gen_server2:call(?SERVER, {read, MsgId}, infinity), CState};
+        #msg_location { ref_count  = RefCount,
+                        file       = File,
+                        offset     = Offset,
+                        total_size = TotalSize } ->
+            case fetch_and_increment_cache(MsgId) of
+                not_found ->
+                    [#file_summary { locked = Locked, right = Right }] =
+                        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+                    case Right =:= undefined orelse Locked =:= true of
+                        true ->
+                            {gen_server2:call(?SERVER, {read, MsgId}, infinity),
+                             CState};
+                        false ->
+                            ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+                                               {#file_summary.readers, 1}),
+                            %% need to check again to see if we've
+                            %% been locked in the meantime
+                            [#file_summary { locked = Locked2 }] =
+                                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+                            case Locked2 of
+                                true ->
+                                    {gen_server2:call(?SERVER, {read, MsgId},
+                                                      infinity), CState};
+                                false ->
+                                    %% ok, we're definitely safe to
+                                    %% continue - a GC can't start up
+                                    %% now
+                                    Self = self(),
+                                    CState1 =
+                                        case ets:lookup(?FILE_HANDLES_ETS_NAME,
+                                                        {File, self()}) of
+                                            [{Key, close}] ->
+                                                CState2 =
+                                                    close_handle(File, CState),
+                                                true = ets:insert(
+                                                         ?FILE_HANDLES_ETS_NAME,
+                                                         {Key, open}),
+                                                CState2;
+                                            [{_Key, open}] ->
+                                                CState;
+                                            [] ->
+                                                true = ets:insert_new(
+                                                         ?FILE_HANDLES_ETS_NAME,
+                                                         {{File, Self}, open}),
+                                                CState
+                                        end,
+                                    {Hdl, CState3} =
+                                        get_read_handle(File, CState1),
+                                    {ok, Offset} =
+                                        file_handle_cache:position(Hdl, Offset),
+                                    {ok, {MsgId, Msg}} =
+                                        case rabbit_msg_file:read(Hdl, TotalSize) of
+                                            {ok, {MsgId, _}} = Obj -> Obj;
+                                            Rest ->
+                                                throw({error,
+                                                       {misread,
+                                                        [{old_cstate, CState1},
+                                                         {file_num, File},
+                                                         {offset, Offset},
+                                                         {read, Rest},
+                                                         {proc_dict, get()}
+                                                        ]}})
+                                        end,
+                                    ets:update_counter(
+                                      ?FILE_SUMMARY_ETS_NAME, File,
+                                      {#file_summary.readers, -1}),
+                                    ok = case RefCount > 1 of
+                                             true ->
+                                                 insert_into_cache(MsgId, Msg);
+                                             false ->
+                                                 %% it's not in the
+                                                 %% cache and we only
+                                                 %% have one reference
+                                                 %% to the message. So
+                                                 %% don't bother
+                                                 %% putting it in the
+                                                 %% cache.
+                                                 ok
+                                         end,
+                                    {{ok, Msg}, CState3}
+                            end
+                    end;
+                Msg ->
+                    {{ok, Msg}, CState}
+            end
+    end.
+
+contains(MsgId)     -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
+remove(MsgIds)      -> gen_server2:cast(?SERVER, {remove, MsgIds}).
+release(MsgIds)     -> gen_server2:cast(?SERVER, {release, MsgIds}).
+sync(MsgIds, K)     -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
+sync()              -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+
 gc_done(Reclaimed, Source, Destination) ->
     gen_server2:pcast(?SERVER, 9, {gc_done, Reclaimed, Source, Destination}).
 
+client_init() ->
+    {IState, IModule, Dir} =
+        gen_server2:call(?SERVER, new_client_state, infinity),
+    #client_msstate { file_handle_cache = dict:new(),
+                      index_state       = IState,
+                      index_module      = IModule,
+                      dir               = Dir }.
+
+client_terminate(CState) ->
+    close_all_handles(CState),
+    ok.
+
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
@@ -250,6 +371,8 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                                      [ordered_set, public, named_table,
                                       {keypos, #file_summary.file}]),
     ?CACHE_ETS_NAME = ets:new(?CACHE_ETS_NAME, [set, public, named_table]),
+    ?FILE_HANDLES_ETS_NAME = ets:new(?FILE_HANDLES_ETS_NAME,
+                                     [ordered_set, public, named_table]),
     State =
         #msstate { dir                    = Dir,
                    index_module           = IndexModule,
@@ -295,7 +418,12 @@ handle_call({read, MsgId}, From, State) ->
 
 handle_call({contains, MsgId}, From, State) ->
     State1 = contains_message(MsgId, From, State),
-    noreply(State1).
+    noreply(State1);
+
+handle_call(new_client_state, _From,
+            State = #msstate { index_state = IndexState, dir = Dir,
+                               index_module = IndexModule }) ->
+    reply({IndexState, IndexModule, Dir}, State).
 
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
@@ -414,6 +542,8 @@ terminate(_Reason, State = #msstate { index_state            = IndexState,
              end,
     State3 = close_all_handles(State1),
     ets:delete(?FILE_SUMMARY_ETS_NAME),
+    ets:delete(?CACHE_ETS_NAME),
+    ets:delete(?FILE_HANDLES_ETS_NAME),
     IndexModule:terminate(IndexState),
     State3 #msstate { index_state         = undefined,
                       current_file_handle = undefined }.
@@ -599,33 +729,56 @@ run_pending({contains, MsgId, From}, State) ->
 run_pending({remove, MsgId}, State) ->
     remove_message(MsgId, State).
 
+close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) ->
+    CState #client_msstate { file_handle_cache = close_handle(Key, FHC) };
+
 close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
+    State #msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, FHC) ->
     case dict:find(Key, FHC) of
         {ok, Hdl} ->
             ok = file_handle_cache:close(Hdl),
-            State #msstate { file_handle_cache = dict:erase(Key, FHC) };
-        error -> State
+            dict:erase(Key, FHC);
+        error -> FHC
     end.
 
+close_all_handles(CState = #client_msstate { file_handle_cache = FHC }) ->
+    Self = self(),
+    ok = dict:fold(fun (Key, Hdl, ok) ->
+                           true =
+                               ets:delete(?FILE_HANDLES_ETS_NAME, {Key, Self}),
+                           file_handle_cache:close(Hdl)
+                   end, ok, FHC),
+    CState #client_msstate { file_handle_cache = dict:new() };
+
 close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
     ok = dict:fold(fun (_Key, Hdl, ok) ->
                            file_handle_cache:close(Hdl)
                    end, ok, FHC),
     State #msstate { file_handle_cache = dict:new() }.
 
-get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC }) ->
+get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC,
+                                                   dir = Dir }) ->
+    {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+    {Hdl, CState #client_msstate { file_handle_cache = FHC2 }};
+
+get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC,
+                                            dir = Dir }) ->
+    {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+    {Hdl, State #msstate { file_handle_cache = FHC2 }}.
+
+get_read_handle(FileNum, FHC, Dir) ->
     case dict:find(FileNum, FHC) of
-        {ok, Hdl} -> {Hdl, State};
-        error -> new_handle(FileNum,
-                            rabbit_msg_store_misc:filenum_to_name(FileNum),
-                            [read | ?BINARY_MODE], State)
+        {ok, Hdl} ->
+            {Hdl, FHC};
+        error ->
+            {ok, Hdl} = rabbit_msg_store_misc:open_file(
+                          Dir, rabbit_msg_store_misc:filenum_to_name(FileNum),
+                          [read | ?BINARY_MODE]),
+            {Hdl, dict:store(FileNum, Hdl, FHC) }
     end.
 
-new_handle(Key, FileName, Mode, State = #msstate { file_handle_cache = FHC,
-                                                   dir = Dir }) ->
-    {ok, Hdl} = rabbit_msg_store_misc:open_file(Dir, FileName, Mode),
-    {Hdl, State #msstate { file_handle_cache = dict:store(Key, Hdl, FHC) }}.
-
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
@@ -676,6 +829,9 @@ insert_into_cache(MsgId, Msg) ->
 %% index
 %%----------------------------------------------------------------------------
 
+index_lookup(Key, #client_msstate { index_module = Index, index_state = State }) ->
+    Index:lookup(Key, State);
+
 index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
     Index:lookup(Key, State).
 
@@ -901,7 +1057,8 @@ build_index(Left, [File|Files],
         ets:insert_new(?FILE_SUMMARY_ETS_NAME, #file_summary {
                           file = File, valid_total_size = ValidTotalSize,
                           contiguous_top = ContiguousTop, locked = false,
-                          left = Left, right = Right, file_size = FileSize1 }),
+                          left = Left, right = Right, file_size = FileSize1,
+                          readers = 0 }),
     build_index(File, Files,
                 State #msstate { sum_valid_data = SumValid + ValidTotalSize,
                                  sum_file_size = SumFileSize + FileSize1 }).
@@ -925,7 +1082,7 @@ maybe_roll_to_new_file(Offset,
              ?FILE_SUMMARY_ETS_NAME, #file_summary {
                 file = NextFile, valid_total_size = 0, contiguous_top = 0,
                 left = CurFile, right = undefined, file_size = 0,
-                locked = false }),
+                locked = false, readers = 0 }),
     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, CurFile,
                               {#file_summary.right, NextFile}),
     State1 #msstate { current_file_handle = NextHdl,
@@ -948,12 +1105,30 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid,
                                       {#file_summary.locked, true}),
             true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Dest,
                                       {#file_summary.locked, true}),
+            %% now that they're locked, we know no queue will touch
+            %% them (not even add to the ets table for these files),
+            %% so now ensure that we ask the queues to close handles
+            %% to these files
+            true = mark_handle_to_close(Source),
+            true = mark_handle_to_close(Dest),
             ok = rabbit_msg_store_gc:gc(Source, Dest),
             State1 #msstate { gc_active = {Source, Dest} }
     end;
 maybe_compact(State) ->
     State.
 
+mark_handle_to_close(File) ->
+    lists:foldl(
+      fun ({Key, opened}, true) ->
+              try
+                  true = ets:update_element(?FILE_HANDLES_ETS_NAME,
+                                            Key, {2, close})
+              catch error:badarg -> %% client has deleted concurrently, no prob
+                      true
+              end
+      end,
+      true, ets:match_object(?FILE_HANDLES_ETS_NAME, {{File, '_'}, opened})).
+
 find_files_to_gc(_N, '$end_of_table') ->
     undefined;
 find_files_to_gc(N, First) ->
@@ -995,8 +1170,8 @@ delete_file_if_empty(File, State = #msstate { current_file = File }) ->
 delete_file_if_empty(File, State =
                      #msstate { dir = Dir, sum_file_size = SumFileSize }) ->
     [#file_summary { valid_total_size = ValidData, file_size = FileSize,
-                     left = Left, right = Right, locked = false }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+                     left = Left, right = Right, locked = false }]
+        = ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
@@ -1012,6 +1187,7 @@ delete_file_if_empty(File, State =
                      true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Left,
                                                {#file_summary.right, Right})
              end,
+             true = mark_handle_to_close(File),
              true = ets:delete(?FILE_SUMMARY_ETS_NAME, File),
              State1 = close_handle(File, State),
              ok = file:delete(rabbit_msg_store_misc:form_filename(
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 1866e629..d4c572c1 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -91,23 +91,33 @@ code_change(_OldVsn, State, _Extra) ->
 adjust_meta_and_combine(SourceFile, DestFile, State) ->
 
     [SourceObj = #file_summary {
+       readers = SourceReaders,
        valid_total_size = SourceValidData, left = DestFile,
        file_size = SourceFileSize, locked = true }] =
         ets:lookup(?FILE_SUMMARY_ETS_NAME, SourceFile),
     [DestObj = #file_summary {
+       readers = DestReaders,
        valid_total_size = DestValidData, right = SourceFile,
        file_size = DestFileSize, locked = true }] =
         ets:lookup(?FILE_SUMMARY_ETS_NAME, DestFile),
 
-    TotalValidData = DestValidData + SourceValidData,
-    ok = combine_files(SourceObj, DestObj, State),
-    %% don't update dest.right, because it could be changing at the same time
-    true =
-        ets:update_element(?FILE_SUMMARY_ETS_NAME, DestFile,
-                           [{#file_summary.valid_total_size, TotalValidData},
-                            {#file_summary.contiguous_top,   TotalValidData},
-                            {#file_summary.file_size,        TotalValidData}]),
-    SourceFileSize + DestFileSize - TotalValidData.
+    case SourceReaders =:= 0 andalso DestReaders =:= 0 of
+        true ->
+            TotalValidData = DestValidData + SourceValidData,
+            ok = combine_files(SourceObj, DestObj, State),
+            %% don't update dest.right, because it could be changing
+            %% at the same time
+            true = ets:update_element(
+                     ?FILE_SUMMARY_ETS_NAME, DestFile,
+                     [{#file_summary.valid_total_size, TotalValidData},
+                      {#file_summary.contiguous_top,   TotalValidData},
+                      {#file_summary.file_size,        TotalValidData}]),
+            SourceFileSize + DestFileSize - TotalValidData;
+        false ->
+            io:format("sleeping!~n"),
+            timer:sleep(100),
+            adjust_meta_and_combine(SourceFile, DestFile, State)
+    end.
 
 combine_files(#file_summary { file = Source,
                               valid_total_size = SourceValid,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 62a4792c..856a8c46 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -911,11 +911,13 @@ msg_store_sync(MsgIds) ->
             throw(timeout)
     end.
 
-msg_store_read(MsgIds) ->
-    ok =
-        lists:foldl(
-          fun (MsgId, ok) -> {ok, MsgId} = rabbit_msg_store:read(MsgId), ok end,
-          ok, MsgIds).
+msg_store_read(MsgIds, MSCState) ->
+    lists:foldl(
+      fun (MsgId, MSCStateM) ->
+              {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read(MsgId, MSCStateM),
+              MSCStateN
+      end,
+      MSCState, MsgIds).
 
 msg_store_write(MsgIds) ->
     ok = lists:foldl(
@@ -966,9 +968,10 @@ test_msg_store() ->
     %% should hit a different code path
     ok = msg_store_sync(MsgIds1stHalf),
     %% read them all
-    ok = msg_store_read(MsgIds),
+    MSCState = rabbit_msg_store:client_init(),
+    MSCState1 = msg_store_read(MsgIds, MSCState),
     %% read them all again - this will hit the cache, not disk
-    ok = msg_store_read(MsgIds),
+    MSCState2 = msg_store_read(MsgIds, MSCState1),
     %% remove them all
     ok = rabbit_msg_store:remove(MsgIds),
     %% check first half doesn't exist
@@ -976,11 +979,12 @@ test_msg_store() ->
     %% check second half does exist
     true = msg_store_contains(true, MsgIds2ndHalf),
     %% read the second half again
-    ok = msg_store_read(MsgIds2ndHalf),
+    MSCState3 = msg_store_read(MsgIds2ndHalf, MSCState2),
     %% release the second half, just for fun (aka code coverage)
     ok = rabbit_msg_store:release(MsgIds2ndHalf),
     %% read the second half again, just for fun (aka code coverage)
-    ok = msg_store_read(MsgIds2ndHalf),
+    MSCState4 = msg_store_read(MsgIds2ndHalf, MSCState3),
+    ok = rabbit_msg_store:client_terminate(MSCState4),
     %% stop and restart, preserving every other msg in 2nd half
     ok = stop_msg_store(),
     ok = start_msg_store(fun ([]) -> finished;
@@ -1003,19 +1007,28 @@ test_msg_store() ->
     %% publish the first half again
     ok = msg_store_write(MsgIds1stHalf),
     %% this should force some sort of sync internally otherwise misread
-    ok = msg_store_read(MsgIds1stHalf),
+    ok = rabbit_msg_store:client_terminate(
+           msg_store_read(MsgIds1stHalf, rabbit_msg_store:client_init())),
     ok = rabbit_msg_store:remove(MsgIds1stHalf),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(), %% now safe to reuse msg_ids
     %% push a lot of msgs in...
     BigCount = 100000,
-    MsgIdsBig = lists:seq(1, BigCount),
+    MsgIdsBig = [msg_id_bin(X) || X <- lists:seq(1, BigCount)],
     Payload = << 0:65536 >>,
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:write(msg_id_bin(MsgId), Payload)
+                   rabbit_msg_store:write(MsgId, Payload)
            end, ok, MsgIdsBig),
+    %% now read them to ensure we hit the fast client-side reading
+    ok = rabbit_msg_store:client_terminate(
+           lists:foldl(
+             fun (MsgId, MSCStateM) ->
+                     {{ok, Payload}, MSCStateN} =
+                         rabbit_msg_store:read(MsgId, MSCStateM),
+                     MSCStateN
+             end, rabbit_msg_store:client_init(), MsgIdsBig)),
     %% .., then 3s by 1...
     ok = lists:foldl(
            fun (MsgId, ok) ->
@@ -1034,7 +1047,7 @@ test_msg_store() ->
                    rabbit_msg_store:remove([msg_id_bin(MsgId)])
            end, ok, lists:seq(BigCount-2, 1, -3)),
     %% ensure empty
-    false = msg_store_contains(false, [msg_id_bin(M) || M <- MsgIdsBig]),
+    false = msg_store_contains(false, MsgIdsBig),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f2d45700..7c1ef687 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -61,7 +61,8 @@
           avg_ingress_rate,
           rate_timestamp,
           len,
-          on_sync
+          on_sync,
+          msg_store_read_state
         }).
 
 -include("rabbit.hrl").
@@ -124,7 +125,8 @@
                avg_ingress_rate      :: float(),
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
-               on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]}
+               on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]},
+               msg_store_read_state  :: any()
               }).
 
 -spec(init/1 :: (queue_name()) -> vqstate()).
@@ -198,11 +200,14 @@ init(QueueName) ->
                    avg_ingress_rate = 0,
                    rate_timestamp = Now,
                    len = GammaCount,
-                   on_sync = {[], [], []}
+                   on_sync = {[], [], []},
+                   msg_store_read_state = rabbit_msg_store:client_init()
                   },
     maybe_gammas_to_betas(State).
 
-terminate(State = #vqstate { index_state = IndexState }) ->
+terminate(State = #vqstate { index_state = IndexState,
+                             msg_store_read_state = MSCState }) ->
+    rabbit_msg_store:client_terminate(MSCState),
     State #vqstate { index_state = rabbit_queue_index:terminate(IndexState) }.
 
 publish(Msg, State) ->
@@ -618,7 +623,8 @@ remove_queue_entries(Q, IndexState) ->
 
 fetch_from_q3_or_gamma(State = #vqstate {
                          q1 = Q1, q2 = Q2, gamma = #gamma { count = GammaCount },
-                         q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount }) ->
+                         q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
+                         msg_store_read_state = MSCState }) ->
     case queue:out(Q3) of
         {empty, _Q3} ->
             0 = GammaCount, %% ASSERTION
@@ -629,15 +635,16 @@ fetch_from_q3_or_gamma(State = #vqstate {
           #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
                   is_persistent = IsPersistent, index_on_disk = IndexOnDisk }},
          Q3a} ->
-            {ok, Msg = #basic_message { is_persistent = IsPersistent,
-                                        guid = MsgId }} =
-                rabbit_msg_store:read(MsgId),
+            {{ok, Msg = #basic_message { is_persistent = IsPersistent,
+                                        guid = MsgId }}, MSCState1} =
+                rabbit_msg_store:read(MsgId, MSCState),
             Q4a = queue:in(
                     #alpha { msg = Msg, seq_id = SeqId,
                              is_delivered = IsDelivered, msg_on_disk = true,
                              index_on_disk = IndexOnDisk }, Q4),
             State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
-                                      ram_msg_count = RamMsgCount + 1 },
+                                      ram_msg_count = RamMsgCount + 1,
+                                      msg_store_read_state = MSCState1 },
             State2 =
                 case {queue:is_empty(Q3a), 0 == GammaCount} of
                     {true, true} ->
-- 
cgit v1.2.1


From 9bc3b3ace0a465642ff206f8943ed2d814e59a5f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 8 Jan 2010 17:05:32 +0000
Subject: Whoops, forgot to remove an io:format

---
 src/rabbit_msg_store_gc.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index d4c572c1..7b751ce8 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -114,7 +114,6 @@ adjust_meta_and_combine(SourceFile, DestFile, State) ->
                       {#file_summary.file_size,        TotalValidData}]),
             SourceFileSize + DestFileSize - TotalValidData;
         false ->
-            io:format("sleeping!~n"),
             timer:sleep(100),
             adjust_meta_and_combine(SourceFile, DestFile, State)
     end.
-- 
cgit v1.2.1


From 810c620e5d5d06381f901af488ec5b8b838440c9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 8 Jan 2010 17:53:06 +0000
Subject: Correct the closing mech. Sadly I'm currently scanning the whole
 table every time, which is bad. I might change that to scan every, say, 1000
 reads

---
 src/rabbit_msg_store.erl | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 3a645059..b66564bb 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -270,26 +270,14 @@ read(MsgId, CState) ->
                                 false ->
                                     %% ok, we're definitely safe to
                                     %% continue - a GC can't start up
-                                    %% now
-                                    Self = self(),
-                                    CState1 =
-                                        case ets:lookup(?FILE_HANDLES_ETS_NAME,
-                                                        {File, self()}) of
-                                            [{Key, close}] ->
-                                                CState2 =
-                                                    close_handle(File, CState),
-                                                true = ets:insert(
-                                                         ?FILE_HANDLES_ETS_NAME,
-                                                         {Key, open}),
-                                                CState2;
-                                            [{_Key, open}] ->
-                                                CState;
-                                            [] ->
-                                                true = ets:insert_new(
-                                                         ?FILE_HANDLES_ETS_NAME,
-                                                         {{File, Self}, open}),
-                                                CState
-                                        end,
+                                    %% now, and isn't running, so
+                                    %% nothing will tell us from now
+                                    %% on to close the handle if it's
+                                    %% already open.
+                                    %% this is fine to fail (already exists)
+                                    ets:insert_new(?FILE_HANDLES_ETS_NAME,
+                                                   {{File, self()}, open}),
+                                    CState1 = close_all_indicated(CState),
                                     {Hdl, CState3} =
                                         get_read_handle(File, CState1),
                                     {ok, Offset} =
@@ -331,6 +319,13 @@ read(MsgId, CState) ->
             end
     end.
 
+close_all_indicated(CState) ->
+    Objs = ets:match_object(?FILE_HANDLES_ETS_NAME, {{'_', self()}, close}),
+    lists:foldl(fun ({Key = {File, _Self}, close}, CStateM) ->
+                        true = ets:delete(?FILE_HANDLES_ETS_NAME, Key),
+                        close_handle(File, CStateM)
+                end, CState, Objs).
+
 contains(MsgId)     -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
 remove(MsgIds)      -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)     -> gen_server2:cast(?SERVER, {release, MsgIds}).
@@ -1119,7 +1114,7 @@ maybe_compact(State) ->
 
 mark_handle_to_close(File) ->
     lists:foldl(
-      fun ({Key, opened}, true) ->
+      fun ({Key, open}, true) ->
               try
                   true = ets:update_element(?FILE_HANDLES_ETS_NAME,
                                             Key, {2, close})
@@ -1127,7 +1122,7 @@ mark_handle_to_close(File) ->
                       true
               end
       end,
-      true, ets:match_object(?FILE_HANDLES_ETS_NAME, {{File, '_'}, opened})).
+      true, ets:match_object(?FILE_HANDLES_ETS_NAME, {{File, '_'}, open})).
 
 find_files_to_gc(_N, '$end_of_table') ->
     undefined;
-- 
cgit v1.2.1


From b75e805c8efe82e4f957ce1f1e962901dd9986ca Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 8 Jan 2010 23:08:41 +0000
Subject: Scanning the common operation, so that should be prioritied. The
 open->close transition is much rarer (only on GC startup and file deletion)
 so can be slower.

---
 src/rabbit_msg_store.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b66564bb..59b283cf 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -276,7 +276,7 @@ read(MsgId, CState) ->
                                     %% already open.
                                     %% this is fine to fail (already exists)
                                     ets:insert_new(?FILE_HANDLES_ETS_NAME,
-                                                   {{File, self()}, open}),
+                                                   {{self(), File}, open}),
                                     CState1 = close_all_indicated(CState),
                                     {Hdl, CState3} =
                                         get_read_handle(File, CState1),
@@ -320,8 +320,8 @@ read(MsgId, CState) ->
     end.
 
 close_all_indicated(CState) ->
-    Objs = ets:match_object(?FILE_HANDLES_ETS_NAME, {{'_', self()}, close}),
-    lists:foldl(fun ({Key = {File, _Self}, close}, CStateM) ->
+    Objs = ets:match_object(?FILE_HANDLES_ETS_NAME, {{self(), '_'}, close}),
+    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
                         true = ets:delete(?FILE_HANDLES_ETS_NAME, Key),
                         close_handle(File, CStateM)
                 end, CState, Objs).
@@ -740,9 +740,9 @@ close_handle(Key, FHC) ->
 
 close_all_handles(CState = #client_msstate { file_handle_cache = FHC }) ->
     Self = self(),
-    ok = dict:fold(fun (Key, Hdl, ok) ->
+    ok = dict:fold(fun (File, Hdl, ok) ->
                            true =
-                               ets:delete(?FILE_HANDLES_ETS_NAME, {Key, Self}),
+                               ets:delete(?FILE_HANDLES_ETS_NAME, {Self, File}),
                            file_handle_cache:close(Hdl)
                    end, ok, FHC),
     CState #client_msstate { file_handle_cache = dict:new() };
@@ -1122,7 +1122,7 @@ mark_handle_to_close(File) ->
                       true
               end
       end,
-      true, ets:match_object(?FILE_HANDLES_ETS_NAME, {{File, '_'}, open})).
+      true, ets:match_object(?FILE_HANDLES_ETS_NAME, {{'_', File}, open})).
 
 find_files_to_gc(_N, '$end_of_table') ->
     undefined;
-- 
cgit v1.2.1


From 79ce28a37bf62e3ac76eabe314295733f88e7d66 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 9 Jan 2010 19:03:56 +0000
Subject: some minor refactoring (more to do), and spotted and fixed a race
 condition

---
 src/rabbit_msg_store.erl | 58 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 59b283cf..29e4972e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -241,9 +241,12 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
 write(MsgId, Msg)   -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
 
 read(MsgId, CState) ->
+    Defer = fun() ->
+                    {gen_server2:call(?SERVER, {read, MsgId}, infinity), CState}
+            end,
     case index_lookup(MsgId, CState) of
         not_found ->
-            {gen_server2:call(?SERVER, {read, MsgId}, infinity), CState};
+            Defer();
         #msg_location { ref_count  = RefCount,
                         file       = File,
                         offset     = Offset,
@@ -254,27 +257,42 @@ read(MsgId, CState) ->
                         ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
                     case Right =:= undefined orelse Locked =:= true of
                         true ->
-                            {gen_server2:call(?SERVER, {read, MsgId}, infinity),
-                             CState};
+                            Defer();
                         false ->
                             ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
                                                {#file_summary.readers, 1}),
-                            %% need to check again to see if we've
-                            %% been locked in the meantime
+                            Release = fun() ->
+                                              ets:update_counter(
+                                                ?FILE_SUMMARY_ETS_NAME, File,
+                                                {#file_summary.readers, -1})
+                                      end,
+                            %% If a GC hasn't already started, it
+                            %% won't start now. Need to check again to
+                            %% see if we've been locked in the
+                            %% meantime, between lookup and
+                            %% update_counter (thus GC actually in
+                            %% progress).
                             [#file_summary { locked = Locked2 }] =
                                 ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
                             case Locked2 of
                                 true ->
-                                    {gen_server2:call(?SERVER, {read, MsgId},
-                                                      infinity), CState};
+                                    Release(),
+                                    Defer();
                                 false ->
-                                    %% ok, we're definitely safe to
+                                    %% Ok, we're definitely safe to
                                     %% continue - a GC can't start up
                                     %% now, and isn't running, so
                                     %% nothing will tell us from now
                                     %% on to close the handle if it's
-                                    %% already open.
-                                    %% this is fine to fail (already exists)
+                                    %% already open. (Well, a GC could
+                                    %% start, and could put close
+                                    %% entries into the ets table, but
+                                    %% the GC will wait until we're
+                                    %% done here before doing any real
+                                    %% work.)
+
+                                    %% This is fine to fail (already
+                                    %% exists)
                                     ets:insert_new(?FILE_HANDLES_ETS_NAME,
                                                    {{self(), File}, open}),
                                     CState1 = close_all_indicated(CState),
@@ -295,14 +313,12 @@ read(MsgId, CState) ->
                                                          {proc_dict, get()}
                                                         ]}})
                                         end,
-                                    ets:update_counter(
-                                      ?FILE_SUMMARY_ETS_NAME, File,
-                                      {#file_summary.readers, -1}),
+                                    Release(),
                                     ok = case RefCount > 1 of
                                              true ->
                                                  insert_into_cache(MsgId, Msg);
                                              false ->
-                                                 %% it's not in the
+                                                 %% It's not in the
                                                  %% cache and we only
                                                  %% have one reference
                                                  %% to the message. So
@@ -497,6 +513,14 @@ handle_cast(sync, State) ->
 handle_cast({gc_done, Reclaimed, Source, Dest},
             State = #msstate { sum_file_size = SumFileSize,
                                gc_active = {Source, Dest} }) ->
+    %% GC done, so now ensure that any clients that have open fhs to
+    %% those files close them before using them again. This has to be
+    %% done here, and not when starting up the GC, because if done
+    %% when starting up the GC, the client could find the close, and
+    %% close and reopen the fh, whilst the GC is waiting for readers
+    %% to disappear, before it's actually done the GC.
+    true = mark_handle_to_close(Source),
+    true = mark_handle_to_close(Dest),
     %% we always move data left, so Source has gone and was on the
     %% right, so need to make dest = source.right.left, and also
     %% dest.right = source.right
@@ -1100,12 +1124,6 @@ maybe_compact(State = #msstate { sum_valid_data = SumValid,
                                       {#file_summary.locked, true}),
             true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Dest,
                                       {#file_summary.locked, true}),
-            %% now that they're locked, we know no queue will touch
-            %% them (not even add to the ets table for these files),
-            %% so now ensure that we ask the queues to close handles
-            %% to these files
-            true = mark_handle_to_close(Source),
-            true = mark_handle_to_close(Dest),
             ok = rabbit_msg_store_gc:gc(Source, Dest),
             State1 #msstate { gc_active = {Source, Dest} }
     end;
-- 
cgit v1.2.1


From 0a250a7dfbac186edb0018406c14a32888da7692 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 12 Jan 2010 18:33:37 +0000
Subject: Right, getting there - some major reworkings to vq which have fixed
 bugs. It doesn't quite do everything I want it to do it - in particular, on
 memory reduction, it needs to ensure that the inner queues nearest ? have
 sufficient non-ram-index msgs, but all thet tests pass and adding that
 feature shouldn't be too painful.

---
 include/rabbit_queue.hrl      |  20 +-
 src/rabbit_tests.erl          |  18 +-
 src/rabbit_variable_queue.erl | 789 ++++++++++++++++++++++++------------------
 3 files changed, 467 insertions(+), 360 deletions(-)

diff --git a/include/rabbit_queue.hrl b/include/rabbit_queue.hrl
index 69ad7588..fc1dbf74 100644
--- a/include/rabbit_queue.hrl
+++ b/include/rabbit_queue.hrl
@@ -29,23 +29,7 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--record(alpha,
-        { msg,
-          seq_id,
-          is_delivered,
-          msg_on_disk,
-          index_on_disk
-        }).
-
--record(beta,
-        { msg_id,
-          seq_id,
-          is_persistent,
-          is_delivered,
-          index_on_disk
-        }).
-
--record(gamma,
+-record(delta,
         { start_seq_id,
           count,
           end_seq_id %% note the end_seq_id is always >, not >=
@@ -53,7 +37,7 @@
 
 -ifdef(use_specs).
 
--type(gamma() :: #gamma { start_seq_id :: non_neg_integer(),
+-type(delta() :: #delta { start_seq_id :: non_neg_integer(),
                           count :: non_neg_integer (),
                           end_seq_id :: non_neg_integer() }).
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 856a8c46..16332f32 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1225,7 +1225,7 @@ fresh_variable_queue() ->
     assert_prop(S0, len, 0),
     assert_prop(S0, q1, 0),
     assert_prop(S0, q2, 0),
-    assert_prop(S0, gamma, #gamma { start_seq_id = undefined,
+    assert_prop(S0, delta, #delta { start_seq_id = undefined,
                                     count = 0,
                                     end_seq_id = undefined }),
     assert_prop(S0, q3, 0),
@@ -1234,7 +1234,7 @@ fresh_variable_queue() ->
 
 test_variable_queue() ->
     passed = test_variable_queue_dynamic_duration_change(),
-    passed = test_variable_queue_partial_segments_gamma_thing(),
+    passed = test_variable_queue_partial_segments_delta_thing(),
     passed.
 
 test_variable_queue_dynamic_duration_change() ->
@@ -1253,7 +1253,7 @@ test_variable_queue_dynamic_duration_change() ->
 
     %% just publish and fetch some persistent msgs, this hits the the
     %% partial segment path in queue_index due to the period when
-    %% duration was 0 and the entire queue was gamma.
+    %% duration was 0 and the entire queue was delta.
     {_SeqIds1, VQ7} = variable_queue_publish(true, 20, VQ6),
     {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
@@ -1288,7 +1288,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
             test_variable_queue_dynamic_duration_change_f(Len, VQ3)
     end.
 
-test_variable_queue_partial_segments_gamma_thing() ->
+test_variable_queue_partial_segments_delta_thing() ->
     SegmentSize = rabbit_queue_index:segment_size(),
     HalfSegment = SegmentSize div 2,
     VQ0 = fresh_variable_queue(),
@@ -1296,21 +1296,21 @@ test_variable_queue_partial_segments_gamma_thing() ->
         variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
     VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
     VQ3 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ2),
-    %% one segment in q3 as betas, and half a segment in gamma
+    %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
     io:format("~p~n", [S3]),
-    assert_prop(S3, gamma, #gamma { start_seq_id = SegmentSize,
+    assert_prop(S3, delta, #delta { start_seq_id = SegmentSize,
                                     count = HalfSegment,
                                     end_seq_id = SegmentSize + HalfSegment }),
     assert_prop(S3, q3, SegmentSize),
     assert_prop(S3, len, SegmentSize + HalfSegment),
     VQ4 = rabbit_variable_queue:set_queue_ram_duration_target(infinity, VQ3),
     {[_SeqId], VQ5} = variable_queue_publish(true, 1, VQ4),
-    %% should have 1 alpha, but it's in the same segment as the gammas
+    %% should have 1 alpha, but it's in the same segment as the deltas
     S5 = rabbit_variable_queue:status(VQ5),
     io:format("~p~n", [S5]),
     assert_prop(S5, q1, 1),
-    assert_prop(S5, gamma, #gamma { start_seq_id = SegmentSize,
+    assert_prop(S5, delta, #delta { start_seq_id = SegmentSize,
                                     count = HalfSegment,
                                     end_seq_id = SegmentSize + HalfSegment }),
     assert_prop(S5, q3, SegmentSize),
@@ -1320,7 +1320,7 @@ test_variable_queue_partial_segments_gamma_thing() ->
     %% the half segment should now be in q3 as betas
     S6 = rabbit_variable_queue:status(VQ6),
     io:format("~p~n", [S6]),
-    assert_prop(S6, gamma, #gamma { start_seq_id = undefined,
+    assert_prop(S6, delta, #delta { start_seq_id = undefined,
                                     count = 0,
                                     end_seq_id = undefined }),
     assert_prop(S6, q1, 1),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7c1ef687..6c7fad12 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -43,14 +43,14 @@
 -record(vqstate,
         { q1,
           q2,
-          gamma,
+          delta,
           q3,
           q4,
           duration_target,
           target_ram_msg_count,
           ram_msg_count,
           ram_msg_count_prev,
-          queue,
+          ram_index_count,
           index_state,
           next_seq_id,
           out_counter,
@@ -68,32 +68,46 @@
 -include("rabbit.hrl").
 -include("rabbit_queue.hrl").
 
+-record(msg_status,
+        { msg,
+          msg_id,
+          seq_id,
+          is_persistent,
+          is_delivered,
+          msg_on_disk,
+          index_on_disk
+        }).
+
+-define(RAM_INDEX_TARGET_RATIO, 32768).
+
 %%----------------------------------------------------------------------------
 
-%% Basic premise is that msgs move from q1 -> q2 -> gamma -> q3 -> q4
+%% WRONG - UPDATE ME!
+
+%% Basic premise is that msgs move from q1 -> q2 -> delta -> q3 -> q4
 %% but they can only do so in the right form. q1 and q4 only hold
 %% alphas (msgs in ram), q2 and q3 only hold betas (msg on disk, index
-%% in ram), and gamma is just a count of the number of index entries
+%% in ram), and delta is just a count of the number of index entries
 %% on disk at that stage (msg on disk, index on disk).
 %%
 %% When a msg arrives, we decide in which form it should be. It is
 %% then added to the right-most appropriate queue, maintaining
 %% order. Thus if the msg is to be an alpha, it will be added to q1,
-%% unless all of q2, gamma and q3 are empty, in which case it will go
-%% to q4. If it is to be a beta, it will be added to q2 unless gamma
+%% unless all of q2, delta and q3 are empty, in which case it will go
+%% to q4. If it is to be a beta, it will be added to q2 unless delta
 %% is empty, in which case it will go to q3.
 %%
 %% The major invariant is that if the msg is to be a beta, q1 will be
-%% empty, and if it is to be a gamma then both q1 and q2 will be empty.
+%% empty, and if it is to be a delta then both q1 and q2 will be empty.
 %%
 %% When taking msgs out of the queue, if q4 is empty then we read
-%% directly from q3, or gamma, if q3 is empty. If q3 and gamma are
+%% directly from q3, or delta, if q3 is empty. If q3 and delta are
 %% empty then we have an invariant that q2 must be empty because q2
-%% can only grow if gamma is non empty.
+%% can only grow if delta is non empty.
 %%
 %% A further invariant is that if the queue is non empty, either q4 or
-%% q3 contains at least one entry. I.e. we never allow gamma to
-%% contain all msgs in the queue.  Also, if q4 is non empty and gamma
+%% q3 contains at least one entry. I.e. we never allow delta to
+%% contain all msgs in the queue.  Also, if q4 is non empty and delta
 %% is non empty then q3 must be non empty.
 
 %%----------------------------------------------------------------------------
@@ -106,15 +120,15 @@
                  | 'ack_not_on_disk').
 -type(vqstate() :: #vqstate {
                q1                    :: queue(),
-               q2                    :: queue(),
-               gamma                 :: gamma(),
-               q3                    :: queue(),
+               q2                    :: {non_neg_integer(), queue()},
+               delta                 :: delta(),
+               q3                    :: {non_neg_integer(), queue()},
                q4                    :: queue(),
                duration_target       :: non_neg_integer(),
                target_ram_msg_count  :: non_neg_integer(),
                ram_msg_count         :: non_neg_integer(),
                ram_msg_count_prev    :: non_neg_integer(),
-               queue                 :: queue_name(),
+               ram_index_count       :: non_neg_integer(),
                index_state           :: any(),
                next_seq_id           :: seq_id(),
                out_counter           :: non_neg_integer(),
@@ -161,7 +175,7 @@
 
 -endif.
 
--define(BLANK_GAMMA, #gamma { start_seq_id = undefined,
+-define(BLANK_DELTA, #delta { start_seq_id = undefined,
                               count = 0,
                               end_seq_id = undefined }).
 
@@ -170,40 +184,40 @@
 %%----------------------------------------------------------------------------
 
 init(QueueName) ->
-    {GammaCount, IndexState} =
+    {DeltaCount, IndexState} =
         rabbit_queue_index:init(QueueName),
-    {GammaSeqId, NextSeqId, IndexState1} =
+    {DeltaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
-    Gamma = case GammaCount of
-                0 -> ?BLANK_GAMMA;
-                _ -> #gamma { start_seq_id = GammaSeqId,
-                              count = GammaCount,
+    Delta = case DeltaCount of
+                0 -> ?BLANK_DELTA;
+                _ -> #delta { start_seq_id = DeltaSeqId,
+                              count = DeltaCount,
                               end_seq_id = NextSeqId }
             end,
     Now = now(),
     State =
-        #vqstate { q1 = queue:new(), q2 = queue:new(),
-                   gamma = Gamma,
-                   q3 = queue:new(), q4 = queue:new(),
+        #vqstate { q1 = queue:new(), q2 = {0, queue:new()},
+                   delta = Delta,
+                   q3 = {0, queue:new()}, q4 = queue:new(),
                    duration_target = undefined,
                    target_ram_msg_count = undefined,
                    ram_msg_count = 0,
                    ram_msg_count_prev = 0,
-                   queue = QueueName,
+                   ram_index_count = 0,
                    index_state = IndexState1,
                    next_seq_id = NextSeqId,
                    out_counter = 0,
                    in_counter = 0,
                    egress_rate = {Now, 0},
                    avg_egress_rate = 0,
-                   ingress_rate = {Now, GammaCount},
+                   ingress_rate = {Now, DeltaCount},
                    avg_ingress_rate = 0,
                    rate_timestamp = Now,
-                   len = GammaCount,
+                   len = DeltaCount,
                    on_sync = {[], [], []},
                    msg_store_read_state = rabbit_msg_store:client_init()
                   },
-    maybe_gammas_to_betas(State).
+    maybe_deltas_to_betas(State).
 
 terminate(State = #vqstate { index_state = IndexState,
                              msg_store_read_state = MSCState }) ->
@@ -221,11 +235,14 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                                      in_counter = InCount}) ->
     State1 = State #vqstate { out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
-    case maybe_write_msg_to_disk(false, false, Msg) of
+    MsgStatus = #msg_status {
+      msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
+      is_delivered = false, msg_on_disk = false, index_on_disk = false },
+    MsgStatus1 = maybe_write_msg_to_disk(false, MsgStatus),
+    case MsgStatus1 #msg_status.msg_on_disk of
         true ->
-            {true, IndexState1} =
-                maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                          true, IndexState),
+            {#msg_status { index_on_disk = true }, IndexState1} =
+                maybe_write_index_to_disk(false, MsgStatus1, IndexState),
             {{ack_index_and_store, MsgId, SeqId},
              State1 #vqstate { index_state = IndexState1,
                                next_seq_id = SeqId + 1 }};
@@ -289,12 +306,11 @@ fetch(State =
                  index_state = IndexState, len = Len }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
-            fetch_from_q3_or_gamma(State);
-        {{value,
-          #alpha { msg = Msg = #basic_message { guid = MsgId,
-                                                is_persistent = IsPersistent },
-                   seq_id = SeqId, is_delivered = IsDelivered,
-                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+            fetch_from_q3_or_delta(State);
+        {{value, #msg_status {
+            msg = Msg, msg_id = MsgId, seq_id = SeqId,
+            is_persistent = IsPersistent, is_delivered = IsDelivered,
+            msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
             {IndexState1, IndexOnDisk1} =
                 case IndexOnDisk of
@@ -313,20 +329,17 @@ fetch(State =
                     false ->
                         {IndexState, false}
                 end,
-            _MsgOnDisk1 = IndexOnDisk1 =
+            AckTag =
                 case IndexOnDisk1 of
                     true  -> true = IsPersistent, %% ASSERTION
-                             true = MsgOnDisk; %% ASSERTION
+                             true = MsgOnDisk, %% ASSERTION
+                             {ack_index_and_store, MsgId, SeqId};
                     false -> ok = case MsgOnDisk andalso not IsPersistent of
                                       true -> rabbit_msg_store:remove([MsgId]);
                                       false -> ok
                                   end,
-                             false
+                             ack_not_on_disk
                 end,
-            AckTag = case IndexOnDisk1 of
-                         true  -> {ack_index_and_store, MsgId, SeqId};
-                         false -> ack_not_on_disk
-                     end,
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
@@ -362,7 +375,7 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
-    {Len, State1 #vqstate { len = 0 }}.
+    {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0 }}.
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
@@ -373,9 +386,9 @@ delete(State) ->
                IndexState) of
             {N, N, IndexState2} ->
                 IndexState2;
-            {GammaSeqId, NextSeqId, IndexState2} ->
+            {DeltaSeqId, NextSeqId, IndexState2} ->
                 {_DeleteCount, IndexState3} =
-                    delete1(NextSeqId, 0, GammaSeqId, IndexState2),
+                    delete1(NextSeqId, 0, DeltaSeqId, IndexState2),
                 IndexState3
     end,
     IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
@@ -383,27 +396,26 @@ delete(State) ->
 
 %% [{Msg, AckTag}]
 %% We guarantee that after fetch, only persistent msgs are left on
-%% disk. This means that in a requeue, we set
-%% PersistentMsgsAlreadyOnDisk to true, thus avoiding calls to
-%% msg_store:write for persistent msgs. It also means that we don't
-%% need to worry about calling msg_store:remove (as ack would do)
-%% because transient msgs won't be on disk anyway, thus they won't
-%% need to be removed. However, we do call msg_store:release so that
-%% the cache isn't held full of msgs which are now at the tail of the
-%% queue.
+%% disk. This means that in a requeue, we set MsgOnDisk to true, thus
+%% avoiding calls to msg_store:write for persistent msgs. It also
+%% means that we don't need to worry about calling msg_store:remove
+%% (as ack would do) because transient msgs won't be on disk anyway,
+%% thus they won't need to be removed. However, we do call
+%% msg_store:release so that the cache isn't held full of msgs which
+%% are now at the tail of the queue.
 requeue(MsgsWithAckTags, State) ->
     {SeqIds, MsgIds, State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, AckTag},
                {SeqIdsAcc, MsgIdsAcc, StateN}) ->
-                  {_SeqId, StateN1} = publish(Msg, true, true, StateN),
-                  {SeqIdsAcc1, MsgIdsAcc1} =
+                  {SeqIdsAcc1, MsgIdsAcc1, MsgOnDisk} =
                       case AckTag of
                           ack_not_on_disk ->
-                              {SeqIdsAcc, MsgIdsAcc};
+                              {SeqIdsAcc, MsgIdsAcc, false};
                           {ack_index_and_store, MsgId, SeqId} ->
-                              {[SeqId | SeqIdsAcc], [MsgId | MsgIdsAcc]}
+                              {[SeqId | SeqIdsAcc], [MsgId | MsgIdsAcc], true}
                       end,
+                  {_SeqId, StateN1} = publish(Msg, true, MsgOnDisk, StateN),
                   {SeqIdsAcc1, MsgIdsAcc1, StateN1}
           end, {[], [], State}, MsgsWithAckTags),
     IndexState1 = case SeqIds of
@@ -416,8 +428,13 @@ requeue(MsgsWithAckTags, State) ->
          end,
     State1 #vqstate { index_state = IndexState1 }.
 
-tx_publish(Msg = #basic_message { is_persistent = true }, State) ->
-    true = maybe_write_msg_to_disk(true, false, Msg),
+tx_publish(Msg = #basic_message { is_persistent = true, guid = MsgId },
+           State) ->
+    MsgStatus = #msg_status {
+      msg = Msg, msg_id = MsgId, seq_id = undefined, is_persistent = true,
+      is_delivered = false, msg_on_disk = false, index_on_disk = false },
+    #msg_status { msg_on_disk = true } =
+        maybe_write_msg_to_disk(false, MsgStatus),
     State;
 tx_publish(_Msg, State) ->
     State.
@@ -457,7 +474,7 @@ tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
                {SeqIdsAcc, StateN}) ->
-                  {SeqId, StateN1} = publish(Msg, false, true, StateN),
+                  {SeqId, StateN1} = publish(Msg, false, IsPersistent, StateN),
                   SeqIdsAcc1 = case IsPersistent of
                                    true -> [SeqId | SeqIdsAcc];
                                    false -> SeqIdsAcc
@@ -478,22 +495,25 @@ flush_journal(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
                      rabbit_queue_index:flush_journal(IndexState) }.
 
-status(#vqstate { q1 = Q1, q2 = Q2, gamma = Gamma, q3 = Q3, q4 = Q4,
+status(#vqstate { q1 = Q1, q2 = {Q2Len, _Q2},
+                  delta = Delta, q3 = {Q3Len, _Q3}, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count = RamMsgCount,
+                  ram_index_count = RamIndexCount,
                   avg_egress_rate = AvgEgressRate,
                   avg_ingress_rate = AvgIngressRate,
                   next_seq_id = NextSeqId }) ->
     [ {q1, queue:len(Q1)},
-      {q2, queue:len(Q2)},
-      {gamma, Gamma},
-      {q3, queue:len(Q3)},
+      {q2, Q2Len},
+      {delta, Delta},
+      {q3, Q3Len},
       {q4, queue:len(Q4)},
       {len, Len},
       {outstanding_txns, length(From)},
       {target_ram_msg_count, TargetRamMsgCount},
       {ram_msg_count, RamMsgCount},
+      {ram_index_count, RamIndexCount},
       {avg_egress_rate, AvgEgressRate},
       {avg_ingress_rate, AvgIngressRate},
       {next_seq_id, NextSeqId} ].
@@ -511,23 +531,45 @@ persistent_msg_ids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
               Obj #basic_message.is_persistent].
 
-entry_salient_details(#alpha { msg = #basic_message { guid = MsgId },
-                               seq_id = SeqId, is_delivered = IsDelivered,
-                               msg_on_disk = MsgOnDisk,
-                               index_on_disk = IndexOnDisk }) ->
-    {MsgId, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk};
-entry_salient_details(#beta { msg_id = MsgId, seq_id = SeqId,
-                              is_delivered = IsDelivered,
-                              index_on_disk = IndexOnDisk }) ->
-    {MsgId, SeqId, IsDelivered, true, IndexOnDisk}.
-
 betas_from_segment_entries(List, SeqIdLimit) ->
-    queue:from_list([#beta { msg_id = MsgId, seq_id = SeqId,
-                             is_persistent = IsPersistent,
-                             is_delivered = IsDelivered,
-                             index_on_disk = true }
-                     || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
-                        SeqId < SeqIdLimit ]).
+    List1 = [#msg_status { msg           = undefined,
+                           msg_id        = MsgId,
+                           seq_id        = SeqId,
+                           is_persistent = IsPersistent,
+                           is_delivered  = IsDelivered,
+                           msg_on_disk   = true,
+                           index_on_disk = true
+                         }
+             || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
+                SeqId < SeqIdLimit ],
+    {length(List1), queue:from_list([{true, queue:from_list(List1)}])}.
+
+join_betas({HeadLen, Head}, {TailLen, Tail}) ->
+    {HeadLen + TailLen, join_betas1(Head, Tail)}.
+
+join_betas1(Head, Tail) ->
+    case {queue:out_r(Head), queue:out(Tail)} of
+        {{empty, _Head}, _} ->
+            Tail;
+        {_, {empty, _Tail}} ->
+            Head;
+        {{{value, {IndexOnDisk, InnerQHead}}, Head1},
+         {{value, {IndexOnDisk, InnerQTail}}, Tail1}} ->
+             queue:join(
+               queue:in({IndexOnDisk,
+                         queue:join(InnerQHead, InnerQTail)}, Head1),
+               Tail1);
+        {_, _} -> queue:join(Head, Tail)
+    end.
+
+grab_beta(Gen, Q) ->
+    case Gen(Q) of
+        {empty, _Q} ->
+            empty;
+        {{value, {_IndexOnDisk, InnerQ}}, _Q} ->
+            {{value, MsgStatus}, _InnerQ} = Gen(InnerQ),
+            MsgStatus
+    end.
 
 read_index_segment(SeqId, IndexState) ->
     SeqId1 = SeqId + rabbit_queue_index:segment_size(),
@@ -541,39 +583,39 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
       content = rabbit_binary_parser:clear_decoded_content(
                   rabbit_binary_generator:ensure_content_encoded(Content)) }.
 
-%% the first arg is the older gamma
-combine_gammas(#gamma { count = 0 }, #gamma { count = 0 }) ->
-    ?BLANK_GAMMA;
-combine_gammas(#gamma { count = 0 }, #gamma {       } = B) -> B;
-combine_gammas(#gamma {       } = A, #gamma { count = 0 }) -> A;
-combine_gammas(#gamma { start_seq_id = SeqIdLow,  count = CountLow},
-               #gamma { start_seq_id = SeqIdHigh, count = CountHigh,
+%% the first arg is the older delta
+combine_deltas(#delta { count = 0 }, #delta { count = 0 }) ->
+    ?BLANK_DELTA;
+combine_deltas(#delta { count = 0 }, #delta {       } = B) -> B;
+combine_deltas(#delta {       } = A, #delta { count = 0 }) -> A;
+combine_deltas(#delta { start_seq_id = SeqIdLow,  count = CountLow},
+               #delta { start_seq_id = SeqIdHigh, count = CountHigh,
                         end_seq_id = SeqIdEnd }) ->
     true = SeqIdLow =< SeqIdHigh, %% ASSERTION
     Count = CountLow + CountHigh,
     true = Count =< SeqIdEnd - SeqIdLow, %% ASSERTION
-    #gamma { start_seq_id = SeqIdLow, count = Count, end_seq_id = SeqIdEnd }.
+    #delta { start_seq_id = SeqIdLow, count = Count, end_seq_id = SeqIdEnd }.
 
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-delete1(NextSeqId, Count, GammaSeqId, IndexState)
-  when GammaSeqId >= NextSeqId ->
+delete1(NextSeqId, Count, DeltaSeqId, IndexState)
+  when DeltaSeqId >= NextSeqId ->
     {Count, IndexState};
-delete1(NextSeqId, Count, GammaSeqId, IndexState) ->
-    Gamma1SeqId = GammaSeqId + rabbit_queue_index:segment_size(),
-    case rabbit_queue_index:read_segment_entries(GammaSeqId, IndexState) of
+delete1(NextSeqId, Count, DeltaSeqId, IndexState) ->
+    Delta1SeqId = DeltaSeqId + rabbit_queue_index:segment_size(),
+    case rabbit_queue_index:read_segment_entries(DeltaSeqId, IndexState) of
         {[], IndexState1} ->
-            delete1(NextSeqId, Count, Gamma1SeqId, IndexState1);
+            delete1(NextSeqId, Count, Delta1SeqId, IndexState1);
         {List, IndexState1} ->
-            Q = betas_from_segment_entries(List, Gamma1SeqId),
+            {QCount, Q} = betas_from_segment_entries(List, Delta1SeqId),
             {QCount, IndexState2} = remove_queue_entries(Q, IndexState1),
-            delete1(NextSeqId, Count + QCount, Gamma1SeqId, IndexState2)
+            delete1(NextSeqId, Count + QCount, Delta1SeqId, IndexState2)
     end.
 
-purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
-    case queue:is_empty(Q3) of
+purge1(Count, State = #vqstate { q3 = {Q3Len, Q3}, index_state = IndexState }) ->
+    case 0 == Q3Len of
         true ->
             {Q1Count, IndexState1} =
                 remove_queue_entries(State #vqstate.q1, IndexState),
@@ -582,30 +624,31 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
         false ->
             {Q3Count, IndexState1} = remove_queue_entries(Q3, IndexState),
             purge1(Count + Q3Count,
-                   maybe_gammas_to_betas(
+                   maybe_deltas_to_betas(
                      State #vqstate { index_state = IndexState1,
-                                      q3 = queue:new() }))
+                                      q3 = {0, queue:new()} }))
     end.
 
 remove_queue_entries(Q, IndexState) ->
     {Count, MsgIds, SeqIds, IndexState1} =
         lists:foldl(
-          fun (Entry, {CountN, MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
-                  {MsgId, SeqId, IsDelivered, MsgOnDisk, IndexOnDisk} =
-                      entry_salient_details(Entry),
+          fun (#msg_status { msg_id = MsgId, seq_id = SeqId,
+                             is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
+                             index_on_disk = IndexOnDisk },
+               {CountN, MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
+                  MsgIdsAcc1 = case MsgOnDisk of
+                                   true  -> [MsgId | MsgIdsAcc];
+                                   false -> MsgIdsAcc
+                               end,
+                  SeqIdsAcc1 = case IndexOnDisk of
+                                   true  -> [SeqId | SeqIdsAcc];
+                                   false -> SeqIdsAcc
+                               end,
                   IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
                                      true -> rabbit_queue_index:write_delivered(
                                                SeqId, IndexStateN);
                                      false -> IndexStateN
                                  end,
-                  SeqIdsAcc1 = case IndexOnDisk of
-                                   true  -> [SeqId | SeqIdsAcc];
-                                   false -> SeqIdsAcc
-                               end,
-                  MsgIdsAcc1 = case MsgOnDisk of
-                                   true  -> [MsgId | MsgIdsAcc];
-                                   false -> MsgIdsAcc
-                               end,
                   {CountN + 1, MsgIdsAcc1, SeqIdsAcc1, IndexStateN1}
           %% we need to write the delivered records in order otherwise
           %% we upset the qi. So don't reverse.
@@ -621,45 +664,56 @@ remove_queue_entries(Q, IndexState) ->
         end,
     {Count, IndexState2}.
 
-fetch_from_q3_or_gamma(State = #vqstate {
-                         q1 = Q1, q2 = Q2, gamma = #gamma { count = GammaCount },
-                         q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
+fetch_from_q3_or_delta(State = #vqstate {
+                         q1 = Q1, q2 = {Q2Len, _Q2}, delta = #delta { count = DeltaCount },
+                         q3 = {Q3Len, Q3}, q4 = Q4, ram_msg_count = RamMsgCount,
+                         ram_index_count = RamIndexCount,
                          msg_store_read_state = MSCState }) ->
     case queue:out(Q3) of
         {empty, _Q3} ->
-            0 = GammaCount, %% ASSERTION
-            true = queue:is_empty(Q2), %% ASSERTION
+            0 = DeltaCount, %% ASSERTION
+            0 = Q2Len, %% ASSERTION
+            0 = Q3Len, %% ASSERTION
             true = queue:is_empty(Q1), %% ASSERTION
             {empty, State};
-        {{value,
-          #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
-                  is_persistent = IsPersistent, index_on_disk = IndexOnDisk }},
-         Q3a} ->
+        {{value, {IndexOnDisk, InnerQ}}, Q3a} ->
+            {{value, MsgStatus = #msg_status {
+                       msg = undefined, msg_id = MsgId,
+                       is_persistent = IsPersistent
+                      }}, InnerQ1} = queue:out(InnerQ),
+            Q3LenB = Q3Len - 1,
+            Q3b = {Q3LenB, case queue:is_empty(InnerQ1) of
+                               true  -> Q3a;
+                               false -> queue:in_r({IndexOnDisk, InnerQ1}, Q3a)
+                           end},
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
-                                        guid = MsgId }}, MSCState1} =
+                                         guid = MsgId }}, MSCState1} =
                 rabbit_msg_store:read(MsgId, MSCState),
-            Q4a = queue:in(
-                    #alpha { msg = Msg, seq_id = SeqId,
-                             is_delivered = IsDelivered, msg_on_disk = true,
-                             index_on_disk = IndexOnDisk }, Q4),
-            State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
+            Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
+            RamIndexCount1 = case IndexOnDisk of
+                                 true  -> RamIndexCount;
+                                 false -> RamIndexCount - 1
+                             end,
+            true = RamIndexCount1 >= 0, %% ASSERTION
+            State1 = State #vqstate { q3 = Q3b, q4 = Q4a,
                                       ram_msg_count = RamMsgCount + 1,
+                                      ram_index_count = RamIndexCount1,
                                       msg_store_read_state = MSCState1 },
             State2 =
-                case {queue:is_empty(Q3a), 0 == GammaCount} of
+                case {0 == Q3LenB, 0 == DeltaCount} of
                     {true, true} ->
-                        %% q3 is now empty, it wasn't before; gamma is
+                        %% q3 is now empty, it wasn't before; delta is
                         %% still empty. So q2 must be empty, and q1
                         %% can now be joined onto q4
-                        true = queue:is_empty(Q2), %% ASSERTION
+                        0 = Q2Len, %% ASSERTION
                         State1 #vqstate { q1 = queue:new(),
                                           q4 = queue:join(Q4a, Q1) };
                     {true, false} ->
-                        maybe_gammas_to_betas(State1);
+                        maybe_deltas_to_betas(State1);
                     {false, _} ->
                         %% q3 still isn't empty, we've not touched
-                        %% gamma, so the invariants between q1, q2,
-                        %% gamma and q3 are maintained
+                        %% delta, so the invariants between q1, q2,
+                        %% delta and q3 are maintained
                         State1
                 end,
             fetch(State2)
@@ -673,7 +727,7 @@ reduce_memory_use(State =
                   #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
     State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
     case TargetRamMsgCount of
-        0 -> push_betas_to_gammas(State1);
+        0 -> push_betas_to_deltas(State1);
         _ -> State1
     end.
 
@@ -683,7 +737,7 @@ reduce_memory_use(State =
 
 test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
                                        ram_msg_count = RamMsgCount,
-                                       q1 = Q1, q3 = Q3 }) ->
+                                       q1 = Q1, q3 = {_Q3Len, Q3} }) ->
     case TargetRamMsgCount of
         undefined ->
             msg;
@@ -691,17 +745,19 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
             case queue:out(Q3) of
                 {empty, _Q3} ->
                     %% if TargetRamMsgCount == 0, we know we have no
-                    %% alphas. If q3 is empty then gamma must be empty
+                    %% alphas. If q3 is empty then delta must be empty
                     %% too, so create a beta, which should end up in
                     %% q3
                     index;
-                {{value, #beta { seq_id = OldSeqId }}, _Q3a} ->
-                    %% Don't look at the current gamma as it may be
+                {{value, {_IndexOnDisk, InnerQ}}, _Q3a} ->
+                    {{value, #msg_status { seq_id = OldSeqId }}, _InnerQ} =
+                        queue:out(InnerQ),
+                    %% Don't look at the current delta as it may be
                     %% empty. If the SeqId is still within the current
                     %% segment, it'll be a beta, else it'll go into
-                    %% gamma
+                    %% delta
                     case SeqId >= rabbit_queue_index:next_segment_boundary(OldSeqId) of
-                        true -> neither;
+                        true  -> neither;
                         false -> index
                     end
             end;
@@ -716,178 +772,198 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
             end
     end.
 
-publish(Msg, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { next_seq_id = SeqId, len = Len,
-                           in_counter = InCount }) ->
-    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), Msg, SeqId, IsDelivered,
-                    PersistentMsgsAlreadyOnDisk,
+publish(Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId },
+        IsDelivered, MsgOnDisk, State =
+        #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount }) ->
+    MsgStatus = #msg_status {
+      msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
+      is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
+      index_on_disk = false },
+    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id = SeqId + 1, len = Len + 1,
                                      in_counter = InCount + 1 })}.
 
-publish(msg, Msg = #basic_message { guid = MsgId,
-                                    is_persistent = IsPersistent },
-        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { index_state = IndexState,
-                           ram_msg_count = RamMsgCount }) ->
-    MsgOnDisk =
-        maybe_write_msg_to_disk(false, PersistentMsgsAlreadyOnDisk, Msg),
-    {IndexOnDisk, IndexState1} =
-        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    Entry = #alpha { msg = Msg, seq_id = SeqId, is_delivered = IsDelivered,
-                     msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk },
+publish(msg, MsgStatus, State = #vqstate { index_state = IndexState,
+                                           ram_msg_count = RamMsgCount }) ->
+    MsgStatus1 = maybe_write_msg_to_disk(false, MsgStatus),
+    {MsgStatus2, IndexState1} =
+        maybe_write_index_to_disk(false, MsgStatus1, IndexState),
     State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
                               index_state = IndexState1 },
-    store_alpha_entry(Entry, State1);
-
-publish(index, Msg = #basic_message { guid = MsgId,
-                                      is_persistent = IsPersistent },
-        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { index_state = IndexState, q1 = Q1 }) ->
-    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
-    {IndexOnDisk, IndexState1} =
-        maybe_write_index_to_disk(false, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    Entry = #beta { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered,
-                    is_persistent = IsPersistent, index_on_disk = IndexOnDisk },
-    State1 = State #vqstate { index_state = IndexState1 },
+    store_alpha_entry(MsgStatus2, State1);
+
+publish(index, MsgStatus, State =
+        #vqstate { index_state = IndexState, q1 = Q1,
+                   ram_index_count = RamIndexCount,
+                   target_ram_msg_count = TargetRamMsgCount }) ->
+    MsgStatus1 = #msg_status { msg_on_disk = true } =
+        maybe_write_msg_to_disk(true, MsgStatus),
+    ForceIndex = case TargetRamMsgCount of
+                     undefined ->
+                         false;
+                     _ ->
+                         RamIndexCount >= (?RAM_INDEX_TARGET_RATIO *
+                                           TargetRamMsgCount)
+                 end,
+    {MsgStatus2, IndexState1} =
+        maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
+    RamIndexCount1 = case MsgStatus2 #msg_status.index_on_disk of
+                         true  -> RamIndexCount;
+                         false -> RamIndexCount + 1
+                     end,
+    State1 = State #vqstate { index_state = IndexState1,
+                              ram_index_count = RamIndexCount1 },
     true = queue:is_empty(Q1), %% ASSERTION
-    store_beta_entry(Entry, State1);
-
-publish(neither, Msg = #basic_message { guid = MsgId,
-                                        is_persistent = IsPersistent },
-        SeqId, IsDelivered, PersistentMsgsAlreadyOnDisk,
-        State = #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                           gamma = Gamma }) ->
-    true = maybe_write_msg_to_disk(true, PersistentMsgsAlreadyOnDisk, Msg),
-    {true, IndexState1} =
-        maybe_write_index_to_disk(true, IsPersistent, MsgId, SeqId,
-                                  IsDelivered, IndexState),
-    true = queue:is_empty(Q1) andalso queue:is_empty(Q2), %% ASSERTION
-    %% gamma may be empty, seq_id > next_segment_boundary from q3
+    store_beta_entry(MsgStatus2, State1);
+
+publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
+        #vqstate { index_state = IndexState, q1 = Q1, q2 = {Q2Len, _Q2},
+                   delta = Delta }) ->
+    MsgStatus1 = #msg_status { msg_on_disk = true } =
+        maybe_write_msg_to_disk(true, MsgStatus),
+    {#msg_status { index_on_disk = true }, IndexState1} =
+        maybe_write_index_to_disk(true, MsgStatus1, IndexState),
+    true = queue:is_empty(Q1) andalso 0 == Q2Len, %% ASSERTION
+    %% delta may be empty, seq_id > next_segment_boundary from q3
     %% head, so we need to find where the segment boundary is before
     %% or equal to seq_id
-    GammaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
+    DeltaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
         rabbit_queue_index:segment_size(),
-    Gamma1 = #gamma { start_seq_id = GammaSeqId, count = 1,
+    Delta1 = #delta { start_seq_id = DeltaSeqId, count = 1,
                       end_seq_id = SeqId + 1 },
     State #vqstate { index_state = IndexState1,
-                     gamma = combine_gammas(Gamma, Gamma1) }.
-
-store_alpha_entry(Entry = #alpha {}, State =
-                  #vqstate { q1 = Q1, q2 = Q2,
-                             gamma = #gamma { count = GammaCount },
-                             q3 = Q3, q4 = Q4 }) ->
-    case queue:is_empty(Q2) andalso GammaCount == 0 andalso
-        queue:is_empty(Q3) of
-        true ->
-            State #vqstate { q4 = queue:in(Entry, Q4) };
-        false ->
-            maybe_push_q1_to_betas(State #vqstate { q1 = queue:in(Entry, Q1) })
+                     delta = combine_deltas(Delta, Delta1) }.
+
+store_alpha_entry(MsgStatus, State =
+                  #vqstate { q1 = Q1, q2 = {Q2Len, _Q2},
+                             delta = #delta { count = DeltaCount },
+                             q3 = {Q3Len, _Q3}, q4 = Q4 }) ->
+    case 0 == Q2Len andalso 0 == DeltaCount andalso 0 == Q3Len of
+        true  -> true = queue:is_empty(Q1), %% ASSERTION
+                 State #vqstate { q4 = queue:in(MsgStatus, Q4) };
+        false -> maybe_push_q1_to_betas(
+                   State #vqstate { q1 = queue:in(MsgStatus, Q1) })
     end.
 
-store_beta_entry(Entry = #beta {}, State =
-                 #vqstate { q2 = Q2, gamma = #gamma { count = GammaCount },
-                            q3 = Q3 }) ->
-    case GammaCount == 0 of
-        true  -> State #vqstate { q3 = queue:in(Entry, Q3) };
-        false -> State #vqstate { q2 = queue:in(Entry, Q2) }
+store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true },
+                 State = #vqstate { q2 = {Q2Len, Q2},
+                                    delta = #delta { count = DeltaCount },
+                                    q3 = {Q3Len, Q3} }) ->
+    MsgStatus1 = MsgStatus #msg_status { msg = undefined },
+    case DeltaCount == 0 of
+        true  -> State #vqstate { q3 = {Q3Len + 1,
+                                        store_beta_entry1(
+                                          fun queue:out_r/1, fun queue:in/2,
+                                          MsgStatus1, Q3)} };
+        false -> State #vqstate { q2 = {Q2Len + 1,
+                                        store_beta_entry1(
+                                          fun queue:out_r/1, fun queue:in/2,
+                                          MsgStatus1, Q2)} }
     end.
 
-%% Bool  IsPersistent PersistentMsgsAlreadyOnDisk | WriteToDisk?
-%% -----------------------------------------------+-------------
-%% false false        false                       | false      1
-%% false true         false                       | true       2
-%% false false        true                        | false      3
-%% false true         true                        | false      4
-%% true  false        false                       | true       5
-%% true  true         false                       | true       6
-%% true  false        true                        | true       7
-%% true  true         true                        | false      8
-
-%% (Bool and not (IsPersistent and PersistentMsgsAlreadyOnDisk)) or  | 5 6 7
-%% (IsPersistent and (not PersistentMsgsAlreadyOnDisk))              | 2 6
-maybe_write_msg_to_disk(Bool, PersistentMsgsAlreadyOnDisk,
-                        Msg = #basic_message { guid = MsgId,
-                                               is_persistent = IsPersistent })
-  when (Bool andalso not (IsPersistent andalso PersistentMsgsAlreadyOnDisk))
-       orelse (IsPersistent andalso not PersistentMsgsAlreadyOnDisk) ->
+store_beta_entry1(Gen, Cons, MsgStatus =
+                  #msg_status { index_on_disk = IndexOnDisk }, Q) ->
+    case Gen(Q) of
+        {{value, {IndexOnDisk, InnerQ}}, QTail} ->
+            Cons({IndexOnDisk, Cons(MsgStatus, InnerQ)}, QTail);
+        {_EmptyOrNotIndexOnDisk, _QTail} ->
+            Cons({IndexOnDisk, Cons(MsgStatus, queue:new())}, Q)
+    end.
+
+maybe_write_msg_to_disk(_Force, MsgStatus =
+                        #msg_status { msg_on_disk = true }) ->
+    MsgStatus;
+maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
+                                 msg = Msg, msg_id = MsgId,
+                                 is_persistent = IsPersistent })
+  when Force orelse IsPersistent ->
     ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
-    true;
-maybe_write_msg_to_disk(_Bool, true, #basic_message { is_persistent = true }) ->
-    true;
-maybe_write_msg_to_disk(_Bool, _PersistentMsgsAlreadyOnDisk, _Msg) ->
-    false.
-
-maybe_write_index_to_disk(Bool, IsPersistent, MsgId, SeqId, IsDelivered,
-                          IndexState) when Bool orelse IsPersistent ->
+    MsgStatus #msg_status { msg_on_disk = true };
+maybe_write_msg_to_disk(_Force, MsgStatus) ->
+    MsgStatus.
+
+maybe_write_index_to_disk(_Force, MsgStatus =
+                          #msg_status { index_on_disk = true }, IndexState) ->
+    true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
+    {MsgStatus, IndexState};
+maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
+                                   msg_id = MsgId, seq_id = SeqId,
+                                   is_persistent = IsPersistent,
+                                   is_delivered = IsDelivered }, IndexState)
+  when Force orelse IsPersistent ->
+    true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     IndexState1 = rabbit_queue_index:write_published(
                     MsgId, SeqId, IsPersistent, IndexState),
-    {true, case IsDelivered of
-               true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
-               false -> IndexState1
-           end};
-maybe_write_index_to_disk(_Bool, _IsPersistent, _MsgId, _SeqId, _IsDelivered,
-                          IndexState) ->
-    {false, IndexState}.
+    {MsgStatus #msg_status { index_on_disk = true },
+     case IsDelivered of
+         true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
+         false -> IndexState1
+     end};
+maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
+    {MsgStatus, IndexState}.
 
 %%----------------------------------------------------------------------------
 %% Phase changes
 %%----------------------------------------------------------------------------
 
-maybe_gammas_to_betas(State = #vqstate { gamma = #gamma { count = 0 } }) ->
+maybe_deltas_to_betas(State = #vqstate { delta = #delta { count = 0 } }) ->
     State;
-maybe_gammas_to_betas(State =
-                      #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
-                                 target_ram_msg_count = TargetRamMsgCount,
-                                 gamma = #gamma { start_seq_id = GammaSeqId,
-                                                  count = GammaCount,
-                                                  end_seq_id = GammaSeqIdEnd }}
-                     ) ->
-    case (not queue:is_empty(Q3)) andalso 0 == TargetRamMsgCount of
+maybe_deltas_to_betas(
+  State = #vqstate { index_state = IndexState,
+                     q2 = Q2All, q3 = {Q3Len, _Q3} = Q3All,
+                     target_ram_msg_count = TargetRamMsgCount,
+                     delta = #delta { start_seq_id = DeltaSeqId,
+                                      count = DeltaCount,
+                                      end_seq_id = DeltaSeqIdEnd }}) ->
+    case (0 < Q3Len) andalso (0 == TargetRamMsgCount) of
         true ->
             State;
         false ->
             %% either q3 is empty, in which case we load at least one
             %% segment, or TargetRamMsgCount > 0, meaning we should
             %% really be holding all the betas in memory.
-            {List, IndexState1, Gamma1SeqId} =
-                read_index_segment(GammaSeqId, IndexState),
+            {List, IndexState1, Delta1SeqId} =
+                read_index_segment(DeltaSeqId, IndexState),
             State1 = State #vqstate { index_state = IndexState1 },
             %% length(List) may be < segment_size because of acks. But
             %% it can't be []
-            Q3b = betas_from_segment_entries(List, GammaSeqIdEnd),
-            Q3a = queue:join(Q3, Q3b),
-            case GammaCount - queue:len(Q3b) of
+            Q3bAll = {Q3bLen, _Q3b} =
+                betas_from_segment_entries(List, DeltaSeqIdEnd),
+            Q3a = join_betas(Q3All, Q3bAll),
+            case DeltaCount - Q3bLen of
                 0 ->
-                    %% gamma is now empty, but it wasn't before, so
+                    %% delta is now empty, but it wasn't before, so
                     %% can now join q2 onto q3
-                    State1 #vqstate { gamma = ?BLANK_GAMMA,
-                                      q2 = queue:new(),
-                                      q3 = queue:join(Q3a, Q2) };
+                    State1 #vqstate { delta = ?BLANK_DELTA,
+                                      q2 = {0, queue:new()},
+                                      q3 = join_betas(Q3a, Q2All) };
                 N when N > 0 ->
                     State1 #vqstate {
                       q3 = Q3a,
-                      gamma = #gamma { start_seq_id = Gamma1SeqId,
+                      delta = #delta { start_seq_id = Delta1SeqId,
                                        count = N,
-                                       end_seq_id = GammaSeqIdEnd } }
+                                       end_seq_id = DeltaSeqIdEnd } }
             end
     end.
 
 maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out/1,
-      fun (Beta, Q1a, State1) ->
-              %% these could legally go to q3 if gamma and q2 are empty
-              store_beta_entry(Beta, State1 #vqstate { q1 = Q1a })
+      fun (MsgStatus, Q1a, State1) ->
+              %% these could legally go to q3 if delta and q2 are empty
+              store_beta_entry(MsgStatus, State1 #vqstate { q1 = Q1a })
       end, Q1, State).
 
 maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out_r/1,
-      fun (Beta, Q4a, State1 = #vqstate { q3 = Q3 }) ->
+      fun (MsgStatus, Q4a, State1 = #vqstate { q3 = {Q3Len, Q3} }) ->
+              MsgStatus1 = MsgStatus #msg_status { msg = undefined },
               %% these must go to q3
-              State1 #vqstate { q3 = queue:in_r(Beta, Q3), q4 = Q4a }
+              State1 #vqstate { q3 = {Q3Len + 1,
+                                      store_beta_entry1(
+                                        fun queue:out/1, fun queue:in_r/2,
+                                        MsgStatus1, Q3)}, q4 = Q4a }
       end, Q4, State).
 
 maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
@@ -895,112 +971,159 @@ maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
                                       target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
-maybe_push_alphas_to_betas(Generator, Consumer, Q, State =
-                           #vqstate { ram_msg_count = RamMsgCount }) ->
+maybe_push_alphas_to_betas(
+  Generator, Consumer, Q, State =
+  #vqstate { ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount,
+             target_ram_msg_count = TargetRamMsgCount,
+             index_state = IndexState }) ->
     case Generator(Q) of
         {empty, _Q} -> State;
-        {{value,
-          #alpha { msg = Msg = #basic_message { guid = MsgId,
-                                                is_persistent = IsPersistent },
-                   seq_id = SeqId, is_delivered = IsDelivered,
-                   index_on_disk = IndexOnDisk }},
-         Qa} ->
-            true = maybe_write_msg_to_disk(true, true, Msg),
-            Beta = #beta { msg_id = MsgId, seq_id = SeqId,
-                           is_persistent = IsPersistent,
-                           is_delivered = IsDelivered,
-                           index_on_disk = IndexOnDisk },
-            State1 = State #vqstate { ram_msg_count = RamMsgCount - 1 },
+        {{value, MsgStatus}, Qa} ->
+            MsgStatus1 = maybe_write_msg_to_disk(true, MsgStatus),
+            ForceIndex = case TargetRamMsgCount of
+                             undefined ->
+                                 false;
+                             _ ->
+                                 RamIndexCount >= (?RAM_INDEX_TARGET_RATIO *
+                                                   TargetRamMsgCount)
+                         end,
+            {MsgStatus2, IndexState1} =
+                maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
+            RamIndexCount1 = case MsgStatus2 #msg_status.index_on_disk of
+                                 true  -> RamIndexCount;
+                                 false -> RamIndexCount + 1
+                             end,
+            State1 = State #vqstate { ram_msg_count = RamMsgCount - 1,
+                                      ram_index_count = RamIndexCount1,
+                                      index_state = IndexState1 },
             maybe_push_alphas_to_betas(Generator, Consumer, Qa,
-                                       Consumer(Beta, Qa, State1))
+                                       Consumer(MsgStatus2, Qa, State1))
     end.
 
-push_betas_to_gammas(State = #vqstate { q2 = Q2, gamma = Gamma, q3 = Q3,
+push_betas_to_deltas(State = #vqstate { q2 = {Q2Len, Q2}, delta = Delta,
+                                        q3 = {Q3Len, Q3},
+                                        ram_index_count = RamIndexCount,
                                         index_state = IndexState }) ->
     %% HighSeqId is high in the sense that it must be higher than the
-    %% seq_id in Gamma, but it's also the lowest of the betas that we
-    %% transfer from q2 to gamma.
-    {HighSeqId, Len1, Q2a, IndexState1} =
-        push_betas_to_gammas(fun queue:out/1, undefined, Q2, IndexState),
+    %% seq_id in Delta, but it's also the lowest of the betas that we
+    %% transfer from q2 to delta.
+    {HighSeqId, Q2Len, Q2a, RamIndexCount1, IndexState1} =
+        push_betas_to_deltas(
+          fun queue:out/1,
+          fun (IndexOnDisk, InnerQ, Q) ->
+                  join_betas1(queue:from_list([{IndexOnDisk, InnerQ}]), Q)
+          end, undefined, Q2, RamIndexCount, IndexState),
+    true = queue:is_empty(Q2a), %% ASSERTION
     EndSeqId = case queue:out_r(Q2) of
-                   {empty, _Q2} -> undefined;
-                   {{value, #beta { seq_id = EndSeqId1 }}, _Q2} -> EndSeqId1 + 1
+                   {empty, _Q2} ->
+                       undefined;
+                   {{value, {_IndexOnDisk, InnerQ}}, _Q2} ->
+                       {{value, #msg_status { seq_id = EndSeqId1 }}, _InnerQ} =
+                           queue:out_r(InnerQ),
+                       EndSeqId1 + 1
                end,
-    Gamma1 = #gamma { start_seq_id = Gamma1SeqId } =
-        combine_gammas(Gamma, #gamma { start_seq_id = HighSeqId,
-                                       count = Len1,
+    Delta1 = #delta { start_seq_id = Delta1SeqId } =
+        combine_deltas(Delta, #delta { start_seq_id = HighSeqId,
+                                       count = Q2Len,
                                        end_seq_id = EndSeqId }),
-    State1 = State #vqstate { q2 = Q2a, gamma = Gamma1,
-                              index_state = IndexState1 },
+    State1 = State #vqstate { q2 = {0, Q2a}, delta = Delta1,
+                              index_state = IndexState1,
+                              ram_index_count = RamIndexCount1 },
     case queue:out(Q3) of
-        {empty, _Q3} -> State1;
-        {{value, #beta { seq_id = SeqId }}, _Q3a} ->
-            {{value, #beta { seq_id = SeqIdMax }}, _Q3b} = queue:out_r(Q3),
+        {empty, _Q3} ->
+            State1;
+        {{value, {_IndexOnDisk1, InnerQ1}}, _Q3} ->
+            {{value, #msg_status { seq_id = SeqId }}, _InnerQ1} =
+                queue:out(InnerQ1),
+            #msg_status { seq_id = SeqIdMax } =
+                grab_beta(fun queue:out_r/1, Q3),
             Limit = rabbit_queue_index:next_segment_boundary(SeqId),
             %% ASSERTION
-            true = Gamma1SeqId == undefined orelse Gamma1SeqId > SeqIdMax,
+            true = Delta1SeqId == undefined orelse Delta1SeqId > SeqIdMax,
             case SeqIdMax < Limit of
                 true -> %% already only holding LTE one segment indices in q3
                     State1;
                 false ->
-                    %% ASSERTION (sadly large!)
-                    %% This says that if Gamma1SeqId /= undefined then
-                    %% the gap from Limit to Gamma1SeqId is an integer
+                    %% ASSERTION
+                    %% This says that if Delta1SeqId /= undefined then
+                    %% the gap from Limit to Delta1SeqId is an integer
                     %% multiple of segment_size
-                    0 = case Gamma1SeqId of
+                    0 = case Delta1SeqId of
                             undefined -> 0;
-                            _ -> (Gamma1SeqId - Limit) rem
+                            _ -> (Delta1SeqId - Limit) rem
                                      rabbit_queue_index:segment_size()
                         end,
                     %% SeqIdMax is low in the sense that it must be
-                    %% lower than the seq_id in gamma1, in fact either
-                    %% gamma1 has undefined as its seq_id or there
+                    %% lower than the seq_id in delta1, in fact either
+                    %% delta1 has undefined as its seq_id or there
                     %% does not exist a seq_id X s.t. X > SeqIdMax and
-                    %% X < gamma1's seq_id (would be +1 if it wasn't
+                    %% X < delta1's seq_id (would be +1 if it wasn't
                     %% for the possibility of gaps in the seq_ids).
                     %% But because we use queue:out_r, SeqIdMax is
                     %% actually also the highest seq_id of the betas we
-                    %% transfer from q3 to gammas.
-                    {SeqIdMax, Len2, Q3b, IndexState2} =
-                        push_betas_to_gammas(fun queue:out_r/1, Limit, Q3,
-                                             IndexState1),
-                    Gamma2 = combine_gammas(#gamma { start_seq_id = Limit,
+                    %% transfer from q3 to deltas.
+                    {SeqIdMax, Len2, Q3b, RamIndexCount2, IndexState2} =
+                        push_betas_to_deltas(
+                          fun queue:out_r/1,
+                          fun (IndexOnDisk, InnerQ, Q) ->
+                                  join_betas1(Q, queue:from_list(
+                                                   [{IndexOnDisk, InnerQ}]))
+                          end, Limit, Q3, RamIndexCount1, IndexState1),
+                    Delta2 = combine_deltas(#delta { start_seq_id = Limit,
                                                      count = Len2,
                                                      end_seq_id = SeqIdMax+1 },
-                                            Gamma1),
-                    State1 #vqstate { q3 = Q3b, gamma = Gamma2,
-                                      index_state = IndexState2 }
+                                            Delta1),
+                    State1 #vqstate { q3 = {Q3Len - Len2, Q3b}, delta = Delta2,
+                                      index_state = IndexState2,
+                                      ram_index_count = RamIndexCount2 }
             end
     end.
 
-push_betas_to_gammas(Generator, Limit, Q, IndexState) ->
+push_betas_to_deltas(
+  Generator, Consumer, Limit, Q, RamIndexCount, IndexState) ->
     case Generator(Q) of
-        {empty, Qa} -> {undefined, 0, Qa, IndexState};
-        {{value, #beta { seq_id = SeqId }}, _Qa} ->
-            {Count, Qb, IndexState1} =
-                push_betas_to_gammas(Generator, Limit, Q, 0, IndexState),
-            {SeqId, Count, Qb, IndexState1}
+        {empty, Qa} -> {undefined, 0, Qa, RamIndexCount, IndexState};
+        {{value, {IndexOnDisk, InnerQ}}, Qa} ->
+            {{value, #msg_status { seq_id = SeqId }}, _Qb} = Generator(InnerQ),
+            {Count, Qb, RamIndexCount1, IndexState1} =
+                push_betas_to_deltas(
+                  Generator, Consumer, Limit, IndexOnDisk, InnerQ, Qa, 0,
+                  RamIndexCount, IndexState),
+            {SeqId, Count, Qb, RamIndexCount1, IndexState1}
     end.
 
-push_betas_to_gammas(Generator, Limit, Q, Count, IndexState) ->
+push_betas_to_deltas(
+  Generator, Consumer, Limit, Q, Count, RamIndexCount, IndexState) ->
     case Generator(Q) of
-        {empty, Qa} -> {Count, Qa, IndexState};
-        {{value, #beta { seq_id = SeqId }}, _Qa}
+        {empty, Qa} ->
+            {Count, Qa, RamIndexCount, IndexState};
+        {{value, {IndexOnDisk, InnerQ}}, Qa} ->
+            push_betas_to_deltas(
+              Generator, Consumer, Limit, IndexOnDisk, InnerQ, Qa, Count,
+              RamIndexCount, IndexState)
+    end.
+
+push_betas_to_deltas(Generator, Consumer, Limit, IndexOnDisk, InnerQ, Q,
+                     Count, RamIndexCount, IndexState) ->
+    case Generator(InnerQ) of
+        {empty, _InnerQ} ->
+            push_betas_to_deltas(Generator, Consumer, Limit, Q, Count,
+                                 RamIndexCount, IndexState);
+        {{value, #msg_status { seq_id = SeqId }}, _InnerQ}
         when Limit /= undefined andalso SeqId < Limit ->
-            {Count, Q, IndexState};
-        {{value, #beta { msg_id = MsgId, seq_id = SeqId,
-                         is_persistent = IsPersistent,
-                         is_delivered = IsDelivered,
-                         index_on_disk = IndexOnDisk}}, Qa} ->
-            IndexState1 =
+            {Count, Consumer(IndexOnDisk, InnerQ, Q), RamIndexCount,
+             IndexState};
+        {{value, MsgStatus}, InnerQa} ->
+            {RamIndexCount1, IndexState1} =
                 case IndexOnDisk of
-                    true -> IndexState;
+                    true -> {RamIndexCount, IndexState};
                     false ->
-                        {true, IndexState2} =
-                            maybe_write_index_to_disk(
-                              true, IsPersistent, MsgId,
-                              SeqId, IsDelivered, IndexState),
-                        IndexState2
+                        {#msg_status { index_on_disk = true }, IndexState2} =
+                            maybe_write_index_to_disk(true, MsgStatus,
+                                                      IndexState),
+                        {RamIndexCount - 1, IndexState2}
                 end,
-            push_betas_to_gammas(Generator, Limit, Qa, Count + 1, IndexState1)
+            push_betas_to_deltas(
+              Generator, Consumer, Limit, IndexOnDisk, InnerQa, Q, Count + 1,
+              RamIndexCount1, IndexState1)
     end.
-- 
cgit v1.2.1


From b34e4c47a189d742abe4ec3d7e111f17e59aef14 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 Jan 2010 16:56:18 +0000
Subject: Refactoring of vq - pulled out the inlined block-prefix queue code
 and generally tidied profusely. Also efficiency fix in remove_queue_entries
 by avoiding an intermediate list (which could potentially be massive).

---
 src/bpqueue.erl               | 185 +++++++++++++++++++++++
 src/rabbit_misc.erl           |   9 +-
 src/rabbit_variable_queue.erl | 330 +++++++++++++++++-------------------------
 3 files changed, 324 insertions(+), 200 deletions(-)
 create mode 100644 src/bpqueue.erl

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
new file mode 100644
index 00000000..5e7471f7
--- /dev/null
+++ b/src/bpqueue.erl
@@ -0,0 +1,185 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(bpqueue).
+
+%% Block-prefixed queue. This implements a queue of queues, but
+%% supporting the normal queue interface. Each block has a prefix and
+%% it is guaranteed that no two consecutive blocks have the same
+%% prefix. len/1 returns the flattened length of the queue and is O(1)
+
+-export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
+         fold/3, from_list/1, to_list/1]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(bpqueue() :: {non_neg_integer(), queue()}).
+-type(prefix() :: any()).
+-type(value() :: any()).
+-type(result() :: {'empty', bpqueue()} |
+                  {{'value', prefix(), value()}, bpqueue()}).
+
+-spec(new/0 :: () -> bpqueue()).
+-spec(is_empty/1 :: (bpqueue()) -> boolean()).
+-spec(len/1 :: (bpqueue()) -> non_neg_integer()).
+-spec(in/3 :: (prefix(), value(), bpqueue()) -> bpqueue()).
+-spec(in_r/3 :: (prefix(), value(), bpqueue()) -> bpqueue()).
+-spec(out/1 :: (bpqueue()) -> result()).
+-spec(out_r/1 :: (bpqueue()) -> result()).
+-spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()).
+-spec(fold/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()).
+-spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+new() ->
+    {0, queue:new()}.
+
+is_empty({0, _Q}) ->
+    true;
+is_empty(_BPQ) ->
+    false.
+
+len({N, _Q}) ->
+    N.
+
+in(Prefix, Value, {0, Q}) ->
+    {1, queue:in({Prefix, queue:in(Value, Q)}, Q)};
+in(Prefix, Value, {N, Q}) ->
+    {N+1,
+     case queue:out_r(Q) of
+         {{value, {Prefix, InnerQ}}, Q1} ->
+             queue:in({Prefix, queue:in(Value, InnerQ)}, Q1);
+         {{value, {_Prefix, _InnerQ}}, _Q1} ->
+             queue:in({Prefix, queue:in(Value, queue:new())}, Q)
+     end}.
+
+in_r(Prefix, Value, {0, Q}) ->
+    {1, queue:in({Prefix, queue:in(Value, Q)}, Q)};
+in_r(Prefix, Value, {N, Q}) ->
+    {N+1,
+     case queue:out(Q) of
+         {{value, {Prefix, InnerQ}}, Q1} ->
+             queue:in_r({Prefix, queue:in_r(Value, InnerQ)}, Q1);
+         {{value, {_Prefix, _InnerQ}}, _Q1} ->
+             queue:in_r({Prefix, queue:in(Value, queue:new())}, Q)
+     end}.
+
+out({0, _Q} = BPQ) ->
+    {empty, BPQ};
+out({N, Q}) ->
+    {{value, {Prefix, InnerQ}}, Q1} = queue:out(Q),
+    {{value, Value}, InnerQ1} = queue:out(InnerQ),
+    Q2 = case queue:is_empty(InnerQ1) of
+             true  -> Q1;
+             false -> queue:in_r({Prefix, InnerQ1}, Q1)
+         end,
+    {{value, Prefix, Value}, {N-1, Q2}}.
+
+out_r({0, _Q} = BPQ) ->
+    {empty, BPQ};
+out_r({N, Q}) ->
+    {{value, {Prefix, InnerQ}}, Q1} = queue:out_r(Q),
+    {{value, Value}, InnerQ1} = queue:out_r(InnerQ),
+    Q2 = case queue:is_empty(InnerQ1) of
+             true  -> Q1;
+             false -> queue:in({Prefix, InnerQ1}, Q1)
+         end,
+    {{value, Prefix, Value}, {N-1, Q2}}.
+
+join({0, _Q}, BPQ) ->
+    BPQ;
+join(BPQ, {0, _Q}) ->
+    BPQ;
+join({NHead, QHead}, {NTail, QTail}) ->
+    {{value, {Prefix, InnerQHead}}, QHead1} = queue:out_r(QHead),
+    {NHead + NTail,
+     case queue:out(QTail) of
+         {{value, {Prefix, InnerQTail}}, QTail1} ->
+             queue:join(
+               queue:in({Prefix, queue:join(InnerQHead, InnerQTail)}, QHead1),
+               QTail1);
+         {{value, {_Prefix, _InnerQTail}}, _QTail1} ->
+             queue:join(QHead, QTail)
+     end}.
+
+fold(_Fun, Init, {0, _Q}) ->
+    Init;
+fold(Fun, Init, {_N, Q}) ->
+    fold1(Fun, Init, Q).
+
+fold1(Fun, Init, Q) ->
+    case queue:out(Q) of
+        {empty, _Q} ->
+            Init;
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            fold1(Fun, fold1(Fun, Prefix, Init, InnerQ), Q1)
+    end.
+
+fold1(Fun, Prefix, Init, InnerQ) ->
+    case queue:out(InnerQ) of
+        {empty, _Q} ->
+            Init;
+        {{value, Value}, InnerQ1} ->
+            fold1(Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1)
+    end.
+
+from_list(List) ->
+    {FinalPrefix, FinalInnerQ, ListOfPQs1, Len} =
+        lists:foldl(
+          fun ({_Prefix, []}, Acc) ->
+                  Acc;
+              ({Prefix, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) ->
+                  {Prefix, queue:join(InnerQ, queue:from_list(InnerList)),
+                   ListOfPQs, LenAcc + length(InnerList)};
+              ({Prefix1, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) ->
+                  {Prefix1, queue:from_list(InnerList),
+                   [{Prefix, InnerQ} | ListOfPQs], LenAcc + length(InnerList)}
+          end, {undefined, queue:new(), [], 0}, List),
+    ListOfPQs2 = [{FinalPrefix, FinalInnerQ} | ListOfPQs1],
+    [{undefined, InnerQ1} | Rest] = All = lists:reverse(ListOfPQs2),
+    {Len, queue:from_list(case queue:is_empty(InnerQ1) of
+                              true  -> Rest;
+                              false -> All
+                          end)}.
+
+to_list({0, _Q}) ->
+    [];
+to_list({_N, Q}) ->
+    lists:map(fun to_list1/1, queue:to_list(Q)).
+
+to_list1({Prefix, InnerQ}) ->
+    {Prefix, queue:to_list(InnerQ)}.
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 23666a5f..2b5fe4c7 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -55,7 +55,7 @@
 -export([append_file/2, ensure_parent_dirs_exist/1]).
 -export([format_stderr/2]).
 -export([start_applications/1, stop_applications/1]).
--export([unfold/2, ceil/1]).
+-export([unfold/2, ceil/1, queue_fold/3]).
 
 -import(mnesia).
 -import(lists).
@@ -126,6 +126,7 @@
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 -spec(ceil/1 :: (number()) -> number()).
+-spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B).
 
 -endif.
 
@@ -489,3 +490,9 @@ ceil(N) ->
         true  -> T;
         false -> 1 + T
     end.
+
+queue_fold(Fun, Init, Q) ->
+    case queue:out(Q) of
+        {empty, _Q} -> Init;
+        {{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1)
+    end.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6c7fad12..8205f79f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -114,15 +114,16 @@
 
 -ifdef(use_specs).
 
+-type(bpqueue() :: any()).
 -type(msg_id()  :: binary()).
 -type(seq_id()  :: non_neg_integer()).
 -type(ack()     :: {'ack_index_and_store', msg_id(), seq_id()}
                  | 'ack_not_on_disk').
 -type(vqstate() :: #vqstate {
                q1                    :: queue(),
-               q2                    :: {non_neg_integer(), queue()},
+               q2                    :: bpqueue(),
                delta                 :: delta(),
-               q3                    :: {non_neg_integer(), queue()},
+               q3                    :: bpqueue(),
                q4                    :: queue(),
                duration_target       :: non_neg_integer(),
                target_ram_msg_count  :: non_neg_integer(),
@@ -196,9 +197,9 @@ init(QueueName) ->
             end,
     Now = now(),
     State =
-        #vqstate { q1 = queue:new(), q2 = {0, queue:new()},
+        #vqstate { q1 = queue:new(), q2 = bpqueue:new(),
                    delta = Delta,
-                   q3 = {0, queue:new()}, q4 = queue:new(),
+                   q3 = bpqueue:new(), q4 = queue:new(),
                    duration_target = undefined,
                    target_ram_msg_count = undefined,
                    ram_msg_count = 0,
@@ -371,7 +372,8 @@ is_empty(State) ->
     0 == len(State).
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
-    {Q4Count, IndexState1} = remove_queue_entries(Q4, IndexState),
+    {Q4Count, IndexState1} =
+        remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, IndexState),
     {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
@@ -495,8 +497,7 @@ flush_journal(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
                      rabbit_queue_index:flush_journal(IndexState) }.
 
-status(#vqstate { q1 = Q1, q2 = {Q2Len, _Q2},
-                  delta = Delta, q3 = {Q3Len, _Q3}, q4 = Q4,
+status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count = RamMsgCount,
@@ -505,9 +506,9 @@ status(#vqstate { q1 = Q1, q2 = {Q2Len, _Q2},
                   avg_ingress_rate = AvgIngressRate,
                   next_seq_id = NextSeqId }) ->
     [ {q1, queue:len(Q1)},
-      {q2, Q2Len},
+      {q2, bpqueue:len(Q2)},
       {delta, Delta},
-      {q3, Q3Len},
+      {q3, bpqueue:len(Q3)},
       {q4, queue:len(Q4)},
       {len, Len},
       {outstanding_txns, length(From)},
@@ -532,44 +533,17 @@ persistent_msg_ids(Pubs) ->
               Obj #basic_message.is_persistent].
 
 betas_from_segment_entries(List, SeqIdLimit) ->
-    List1 = [#msg_status { msg           = undefined,
-                           msg_id        = MsgId,
-                           seq_id        = SeqId,
-                           is_persistent = IsPersistent,
-                           is_delivered  = IsDelivered,
-                           msg_on_disk   = true,
-                           index_on_disk = true
-                         }
-             || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
-                SeqId < SeqIdLimit ],
-    {length(List1), queue:from_list([{true, queue:from_list(List1)}])}.
-
-join_betas({HeadLen, Head}, {TailLen, Tail}) ->
-    {HeadLen + TailLen, join_betas1(Head, Tail)}.
-
-join_betas1(Head, Tail) ->
-    case {queue:out_r(Head), queue:out(Tail)} of
-        {{empty, _Head}, _} ->
-            Tail;
-        {_, {empty, _Tail}} ->
-            Head;
-        {{{value, {IndexOnDisk, InnerQHead}}, Head1},
-         {{value, {IndexOnDisk, InnerQTail}}, Tail1}} ->
-             queue:join(
-               queue:in({IndexOnDisk,
-                         queue:join(InnerQHead, InnerQTail)}, Head1),
-               Tail1);
-        {_, _} -> queue:join(Head, Tail)
-    end.
-
-grab_beta(Gen, Q) ->
-    case Gen(Q) of
-        {empty, _Q} ->
-            empty;
-        {{value, {_IndexOnDisk, InnerQ}}, _Q} ->
-            {{value, MsgStatus}, _InnerQ} = Gen(InnerQ),
-            MsgStatus
-    end.
+    bpqueue:from_list([{true,
+                        [#msg_status { msg           = undefined,
+                                       msg_id        = MsgId,
+                                       seq_id        = SeqId,
+                                       is_persistent = IsPersistent,
+                                       is_delivered  = IsDelivered,
+                                       msg_on_disk   = true,
+                                       index_on_disk = true
+                                     }
+                         || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
+                            SeqId < SeqIdLimit ]}]).
 
 read_index_segment(SeqId, IndexState) ->
     SeqId1 = SeqId + rabbit_queue_index:segment_size(),
@@ -596,6 +570,11 @@ combine_deltas(#delta { start_seq_id = SeqIdLow,  count = CountLow},
     true = Count =< SeqIdEnd - SeqIdLow, %% ASSERTION
     #delta { start_seq_id = SeqIdLow, count = Count, end_seq_id = SeqIdEnd }.
 
+beta_fold_no_index_on_disk(Fun, Init, Q) ->
+    bpqueue:fold(fun (_Prefix, Value, Acc) ->
+                         Fun(Value, Acc)
+                 end, Init, Q).
+
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
@@ -609,50 +588,34 @@ delete1(NextSeqId, Count, DeltaSeqId, IndexState) ->
         {[], IndexState1} ->
             delete1(NextSeqId, Count, Delta1SeqId, IndexState1);
         {List, IndexState1} ->
-            {QCount, Q} = betas_from_segment_entries(List, Delta1SeqId),
-            {QCount, IndexState2} = remove_queue_entries(Q, IndexState1),
+            Q = betas_from_segment_entries(List, Delta1SeqId),
+            {QCount, IndexState2} =
+                remove_queue_entries(fun beta_fold_no_index_on_disk/3,
+                                     Q, IndexState1),
             delete1(NextSeqId, Count + QCount, Delta1SeqId, IndexState2)
     end.
 
-purge1(Count, State = #vqstate { q3 = {Q3Len, Q3}, index_state = IndexState }) ->
-    case 0 == Q3Len of
+purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
+    case bpqueue:is_empty(Q3) of
         true ->
             {Q1Count, IndexState1} =
-                remove_queue_entries(State #vqstate.q1, IndexState),
+                remove_queue_entries(fun rabbit_misc:queue_fold/3,
+                                     State #vqstate.q1, IndexState),
             {Count + Q1Count, State #vqstate { q1 = queue:new(),
                                                index_state = IndexState1 }};
         false ->
-            {Q3Count, IndexState1} = remove_queue_entries(Q3, IndexState),
+            {Q3Count, IndexState1} =
+                remove_queue_entries(fun beta_fold_no_index_on_disk/3,
+                                     Q3, IndexState),
             purge1(Count + Q3Count,
                    maybe_deltas_to_betas(
                      State #vqstate { index_state = IndexState1,
-                                      q3 = {0, queue:new()} }))
+                                      q3 = bpqueue:new() }))
     end.
 
-remove_queue_entries(Q, IndexState) ->
+remove_queue_entries(Fold, Q, IndexState) ->
     {Count, MsgIds, SeqIds, IndexState1} =
-        lists:foldl(
-          fun (#msg_status { msg_id = MsgId, seq_id = SeqId,
-                             is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
-                             index_on_disk = IndexOnDisk },
-               {CountN, MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
-                  MsgIdsAcc1 = case MsgOnDisk of
-                                   true  -> [MsgId | MsgIdsAcc];
-                                   false -> MsgIdsAcc
-                               end,
-                  SeqIdsAcc1 = case IndexOnDisk of
-                                   true  -> [SeqId | SeqIdsAcc];
-                                   false -> SeqIdsAcc
-                               end,
-                  IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
-                                     true -> rabbit_queue_index:write_delivered(
-                                               SeqId, IndexStateN);
-                                     false -> IndexStateN
-                                 end,
-                  {CountN + 1, MsgIdsAcc1, SeqIdsAcc1, IndexStateN1}
-          %% we need to write the delivered records in order otherwise
-          %% we upset the qi. So don't reverse.
-          end, {0, [], [], IndexState}, queue:to_list(Q)),
+        Fold(fun remove_queue_entries1/2, {0, [], [], IndexState}, Q),
     ok = case MsgIds of
              [] -> ok;
              _  -> rabbit_msg_store:remove(MsgIds)
@@ -664,28 +627,40 @@ remove_queue_entries(Q, IndexState) ->
         end,
     {Count, IndexState2}.
 
+remove_queue_entries1(
+  #msg_status { msg_id = MsgId, seq_id = SeqId,
+                is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
+                index_on_disk = IndexOnDisk },
+  {CountN, MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
+    MsgIdsAcc1 = case MsgOnDisk of
+                     true  -> [MsgId | MsgIdsAcc];
+                     false -> MsgIdsAcc
+                 end,
+    SeqIdsAcc1 = case IndexOnDisk of
+                     true  -> [SeqId | SeqIdsAcc];
+                     false -> SeqIdsAcc
+                 end,
+    IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
+                       true -> rabbit_queue_index:write_delivered(
+                                 SeqId, IndexStateN);
+                       false -> IndexStateN
+                   end,
+    {CountN + 1, MsgIdsAcc1, SeqIdsAcc1, IndexStateN1}.
+
 fetch_from_q3_or_delta(State = #vqstate {
-                         q1 = Q1, q2 = {Q2Len, _Q2}, delta = #delta { count = DeltaCount },
-                         q3 = {Q3Len, Q3}, q4 = Q4, ram_msg_count = RamMsgCount,
+                         q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
+                         q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
                          msg_store_read_state = MSCState }) ->
-    case queue:out(Q3) of
+    case bpqueue:out(Q3) of
         {empty, _Q3} ->
             0 = DeltaCount, %% ASSERTION
-            0 = Q2Len, %% ASSERTION
-            0 = Q3Len, %% ASSERTION
+            true = bpqueue:is_empty(Q2), %% ASSERTION
             true = queue:is_empty(Q1), %% ASSERTION
             {empty, State};
-        {{value, {IndexOnDisk, InnerQ}}, Q3a} ->
-            {{value, MsgStatus = #msg_status {
-                       msg = undefined, msg_id = MsgId,
-                       is_persistent = IsPersistent
-                      }}, InnerQ1} = queue:out(InnerQ),
-            Q3LenB = Q3Len - 1,
-            Q3b = {Q3LenB, case queue:is_empty(InnerQ1) of
-                               true  -> Q3a;
-                               false -> queue:in_r({IndexOnDisk, InnerQ1}, Q3a)
-                           end},
+        {{value, IndexOnDisk, MsgStatus = #msg_status {
+                                msg = undefined, msg_id = MsgId,
+                                is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
                                          guid = MsgId }}, MSCState1} =
                 rabbit_msg_store:read(MsgId, MSCState),
@@ -695,17 +670,17 @@ fetch_from_q3_or_delta(State = #vqstate {
                                  false -> RamIndexCount - 1
                              end,
             true = RamIndexCount1 >= 0, %% ASSERTION
-            State1 = State #vqstate { q3 = Q3b, q4 = Q4a,
+            State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
                                       ram_msg_count = RamMsgCount + 1,
                                       ram_index_count = RamIndexCount1,
                                       msg_store_read_state = MSCState1 },
             State2 =
-                case {0 == Q3LenB, 0 == DeltaCount} of
+                case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
                     {true, true} ->
                         %% q3 is now empty, it wasn't before; delta is
                         %% still empty. So q2 must be empty, and q1
                         %% can now be joined onto q4
-                        0 = Q2Len, %% ASSERTION
+                        true = bpqueue:is_empty(Q2), %% ASSERTION
                         State1 #vqstate { q1 = queue:new(),
                                           q4 = queue:join(Q4a, Q1) };
                     {true, false} ->
@@ -737,26 +712,26 @@ reduce_memory_use(State =
 
 test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
                                        ram_msg_count = RamMsgCount,
-                                       q1 = Q1, q3 = {_Q3Len, Q3} }) ->
+                                       q1 = Q1, q3 = Q3 }) ->
     case TargetRamMsgCount of
         undefined ->
             msg;
         0 ->
-            case queue:out(Q3) of
+            case bpqueue:out(Q3) of
                 {empty, _Q3} ->
                     %% if TargetRamMsgCount == 0, we know we have no
                     %% alphas. If q3 is empty then delta must be empty
                     %% too, so create a beta, which should end up in
                     %% q3
                     index;
-                {{value, {_IndexOnDisk, InnerQ}}, _Q3a} ->
-                    {{value, #msg_status { seq_id = OldSeqId }}, _InnerQ} =
-                        queue:out(InnerQ),
+                {{value, _IndexOnDisk, #msg_status { seq_id = OldSeqId }},
+                 _Q3a} ->
                     %% Don't look at the current delta as it may be
                     %% empty. If the SeqId is still within the current
                     %% segment, it'll be a beta, else it'll go into
                     %% delta
-                    case SeqId >= rabbit_queue_index:next_segment_boundary(OldSeqId) of
+                    case SeqId >= rabbit_queue_index:next_segment_boundary(
+                                    OldSeqId) of
                         true  -> neither;
                         false -> index
                     end
@@ -817,13 +792,13 @@ publish(index, MsgStatus, State =
     store_beta_entry(MsgStatus2, State1);
 
 publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
-        #vqstate { index_state = IndexState, q1 = Q1, q2 = {Q2Len, _Q2},
+        #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
                    delta = Delta }) ->
     MsgStatus1 = #msg_status { msg_on_disk = true } =
         maybe_write_msg_to_disk(true, MsgStatus),
     {#msg_status { index_on_disk = true }, IndexState1} =
         maybe_write_index_to_disk(true, MsgStatus1, IndexState),
-    true = queue:is_empty(Q1) andalso 0 == Q2Len, %% ASSERTION
+    true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
     %% delta may be empty, seq_id > next_segment_boundary from q3
     %% head, so we need to find where the segment boundary is before
     %% or equal to seq_id
@@ -835,39 +810,28 @@ publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
                      delta = combine_deltas(Delta, Delta1) }.
 
 store_alpha_entry(MsgStatus, State =
-                  #vqstate { q1 = Q1, q2 = {Q2Len, _Q2},
+                  #vqstate { q1 = Q1, q2 = Q2,
                              delta = #delta { count = DeltaCount },
-                             q3 = {Q3Len, _Q3}, q4 = Q4 }) ->
-    case 0 == Q2Len andalso 0 == DeltaCount andalso 0 == Q3Len of
+                             q3 = Q3, q4 = Q4 }) ->
+    case bpqueue:is_empty(Q2) andalso 0 == DeltaCount andalso
+        bpqueue:is_empty(Q3) of
         true  -> true = queue:is_empty(Q1), %% ASSERTION
                  State #vqstate { q4 = queue:in(MsgStatus, Q4) };
         false -> maybe_push_q1_to_betas(
                    State #vqstate { q1 = queue:in(MsgStatus, Q1) })
     end.
 
-store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true },
-                 State = #vqstate { q2 = {Q2Len, Q2},
+store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
+                                           index_on_disk = IndexOnDisk },
+                 State = #vqstate { q2 = Q2,
                                     delta = #delta { count = DeltaCount },
-                                    q3 = {Q3Len, Q3} }) ->
+                                    q3 = Q3 }) ->
     MsgStatus1 = MsgStatus #msg_status { msg = undefined },
     case DeltaCount == 0 of
-        true  -> State #vqstate { q3 = {Q3Len + 1,
-                                        store_beta_entry1(
-                                          fun queue:out_r/1, fun queue:in/2,
-                                          MsgStatus1, Q3)} };
-        false -> State #vqstate { q2 = {Q2Len + 1,
-                                        store_beta_entry1(
-                                          fun queue:out_r/1, fun queue:in/2,
-                                          MsgStatus1, Q2)} }
-    end.
-
-store_beta_entry1(Gen, Cons, MsgStatus =
-                  #msg_status { index_on_disk = IndexOnDisk }, Q) ->
-    case Gen(Q) of
-        {{value, {IndexOnDisk, InnerQ}}, QTail} ->
-            Cons({IndexOnDisk, Cons(MsgStatus, InnerQ)}, QTail);
-        {_EmptyOrNotIndexOnDisk, _QTail} ->
-            Cons({IndexOnDisk, Cons(MsgStatus, queue:new())}, Q)
+        true ->
+            State #vqstate { q3 = bpqueue:in(IndexOnDisk, MsgStatus1, Q3) };
+        false ->
+            State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
     end.
 
 maybe_write_msg_to_disk(_Force, MsgStatus =
@@ -909,13 +873,12 @@ maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
 maybe_deltas_to_betas(State = #vqstate { delta = #delta { count = 0 } }) ->
     State;
 maybe_deltas_to_betas(
-  State = #vqstate { index_state = IndexState,
-                     q2 = Q2All, q3 = {Q3Len, _Q3} = Q3All,
+  State = #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
                      target_ram_msg_count = TargetRamMsgCount,
                      delta = #delta { start_seq_id = DeltaSeqId,
                                       count = DeltaCount,
                                       end_seq_id = DeltaSeqIdEnd }}) ->
-    case (0 < Q3Len) andalso (0 == TargetRamMsgCount) of
+    case (not bpqueue:is_empty(Q3)) andalso (0 == TargetRamMsgCount) of
         true ->
             State;
         false ->
@@ -927,19 +890,18 @@ maybe_deltas_to_betas(
             State1 = State #vqstate { index_state = IndexState1 },
             %% length(List) may be < segment_size because of acks. But
             %% it can't be []
-            Q3bAll = {Q3bLen, _Q3b} =
-                betas_from_segment_entries(List, DeltaSeqIdEnd),
-            Q3a = join_betas(Q3All, Q3bAll),
-            case DeltaCount - Q3bLen of
+            Q3a = betas_from_segment_entries(List, DeltaSeqIdEnd),
+            Q3b = bpqueue:join(Q3, Q3a),
+            case DeltaCount - bpqueue:len(Q3a) of
                 0 ->
                     %% delta is now empty, but it wasn't before, so
                     %% can now join q2 onto q3
                     State1 #vqstate { delta = ?BLANK_DELTA,
-                                      q2 = {0, queue:new()},
-                                      q3 = join_betas(Q3a, Q2All) };
+                                      q2 = bpqueue:new(),
+                                      q3 = bpqueue:join(Q3b, Q2) };
                 N when N > 0 ->
                     State1 #vqstate {
-                      q3 = Q3a,
+                      q3 = Q3b,
                       delta = #delta { start_seq_id = Delta1SeqId,
                                        count = N,
                                        end_seq_id = DeltaSeqIdEnd } }
@@ -957,13 +919,12 @@ maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
 maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out_r/1,
-      fun (MsgStatus, Q4a, State1 = #vqstate { q3 = {Q3Len, Q3} }) ->
+      fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q4a, State1 = #vqstate { q3 = Q3 }) ->
               MsgStatus1 = MsgStatus #msg_status { msg = undefined },
               %% these must go to q3
-              State1 #vqstate { q3 = {Q3Len + 1,
-                                      store_beta_entry1(
-                                        fun queue:out/1, fun queue:in_r/2,
-                                        MsgStatus1, Q3)}, q4 = Q4a }
+              State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus1, Q3),
+                                q4 = Q4a }
       end, Q4, State).
 
 maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
@@ -1000,43 +961,36 @@ maybe_push_alphas_to_betas(
                                        Consumer(MsgStatus2, Qa, State1))
     end.
 
-push_betas_to_deltas(State = #vqstate { q2 = {Q2Len, Q2}, delta = Delta,
-                                        q3 = {Q3Len, Q3},
+push_betas_to_deltas(State = #vqstate { q2 = Q2, delta = Delta, q3 = Q3,
                                         ram_index_count = RamIndexCount,
                                         index_state = IndexState }) ->
     %% HighSeqId is high in the sense that it must be higher than the
     %% seq_id in Delta, but it's also the lowest of the betas that we
     %% transfer from q2 to delta.
-    {HighSeqId, Q2Len, Q2a, RamIndexCount1, IndexState1} =
+    {HighSeqId, Len1, Q2a, RamIndexCount1, IndexState1} =
         push_betas_to_deltas(
-          fun queue:out/1,
-          fun (IndexOnDisk, InnerQ, Q) ->
-                  join_betas1(queue:from_list([{IndexOnDisk, InnerQ}]), Q)
-          end, undefined, Q2, RamIndexCount, IndexState),
-    true = queue:is_empty(Q2a), %% ASSERTION
-    EndSeqId = case queue:out_r(Q2) of
-                   {empty, _Q2} ->
-                       undefined;
-                   {{value, {_IndexOnDisk, InnerQ}}, _Q2} ->
-                       {{value, #msg_status { seq_id = EndSeqId1 }}, _InnerQ} =
-                           queue:out_r(InnerQ),
-                       EndSeqId1 + 1
-               end,
+          fun bpqueue:out/1, undefined, Q2, RamIndexCount, IndexState),
+    true = bpqueue:is_empty(Q2a), %% ASSERTION
+    EndSeqId =
+        case bpqueue:out_r(Q2) of
+            {empty, _Q2} ->
+                undefined;
+            {{value, _IndexOnDisk, #msg_status { seq_id = EndSeqId1 }}, _Q2} ->
+                EndSeqId1 + 1
+        end,
     Delta1 = #delta { start_seq_id = Delta1SeqId } =
         combine_deltas(Delta, #delta { start_seq_id = HighSeqId,
-                                       count = Q2Len,
+                                       count = Len1,
                                        end_seq_id = EndSeqId }),
-    State1 = State #vqstate { q2 = {0, Q2a}, delta = Delta1,
+    State1 = State #vqstate { q2 = bpqueue:new(), delta = Delta1,
                               index_state = IndexState1,
                               ram_index_count = RamIndexCount1 },
-    case queue:out(Q3) of
+    case bpqueue:out(Q3) of
         {empty, _Q3} ->
             State1;
-        {{value, {_IndexOnDisk1, InnerQ1}}, _Q3} ->
-            {{value, #msg_status { seq_id = SeqId }}, _InnerQ1} =
-                queue:out(InnerQ1),
-            #msg_status { seq_id = SeqIdMax } =
-                grab_beta(fun queue:out_r/1, Q3),
+        {{value, _IndexOnDisk1, #msg_status { seq_id = SeqId }}, _Q3} ->
+            {{value, _IndexOnDisk2, #msg_status { seq_id = SeqIdMax }}, _Q3a} =
+                bpqueue:out_r(Q3),
             Limit = rabbit_queue_index:next_segment_boundary(SeqId),
             %% ASSERTION
             true = Delta1SeqId == undefined orelse Delta1SeqId > SeqIdMax,
@@ -1062,58 +1016,37 @@ push_betas_to_deltas(State = #vqstate { q2 = {Q2Len, Q2}, delta = Delta,
                     %% But because we use queue:out_r, SeqIdMax is
                     %% actually also the highest seq_id of the betas we
                     %% transfer from q3 to deltas.
-                    {SeqIdMax, Len2, Q3b, RamIndexCount2, IndexState2} =
-                        push_betas_to_deltas(
-                          fun queue:out_r/1,
-                          fun (IndexOnDisk, InnerQ, Q) ->
-                                  join_betas1(Q, queue:from_list(
-                                                   [{IndexOnDisk, InnerQ}]))
-                          end, Limit, Q3, RamIndexCount1, IndexState1),
+                    {SeqIdMax, Len2, Q3a, RamIndexCount2, IndexState2} =
+                        push_betas_to_deltas(fun bpqueue:out_r/1, Limit, Q3,
+                                             RamIndexCount1, IndexState1),
                     Delta2 = combine_deltas(#delta { start_seq_id = Limit,
                                                      count = Len2,
                                                      end_seq_id = SeqIdMax+1 },
                                             Delta1),
-                    State1 #vqstate { q3 = {Q3Len - Len2, Q3b}, delta = Delta2,
+                    State1 #vqstate { q3 = Q3a, delta = Delta2,
                                       index_state = IndexState2,
                                       ram_index_count = RamIndexCount2 }
             end
     end.
 
-push_betas_to_deltas(
-  Generator, Consumer, Limit, Q, RamIndexCount, IndexState) ->
+push_betas_to_deltas(Generator, Limit, Q, RamIndexCount, IndexState) ->
     case Generator(Q) of
         {empty, Qa} -> {undefined, 0, Qa, RamIndexCount, IndexState};
-        {{value, {IndexOnDisk, InnerQ}}, Qa} ->
-            {{value, #msg_status { seq_id = SeqId }}, _Qb} = Generator(InnerQ),
+        {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa} ->
             {Count, Qb, RamIndexCount1, IndexState1} =
                 push_betas_to_deltas(
-                  Generator, Consumer, Limit, IndexOnDisk, InnerQ, Qa, 0,
-                  RamIndexCount, IndexState),
+                  Generator, Limit, Q, 0, RamIndexCount, IndexState),
             {SeqId, Count, Qb, RamIndexCount1, IndexState1}
     end.
 
-push_betas_to_deltas(
-  Generator, Consumer, Limit, Q, Count, RamIndexCount, IndexState) ->
+push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
     case Generator(Q) of
         {empty, Qa} ->
             {Count, Qa, RamIndexCount, IndexState};
-        {{value, {IndexOnDisk, InnerQ}}, Qa} ->
-            push_betas_to_deltas(
-              Generator, Consumer, Limit, IndexOnDisk, InnerQ, Qa, Count,
-              RamIndexCount, IndexState)
-    end.
-
-push_betas_to_deltas(Generator, Consumer, Limit, IndexOnDisk, InnerQ, Q,
-                     Count, RamIndexCount, IndexState) ->
-    case Generator(InnerQ) of
-        {empty, _InnerQ} ->
-            push_betas_to_deltas(Generator, Consumer, Limit, Q, Count,
-                                 RamIndexCount, IndexState);
-        {{value, #msg_status { seq_id = SeqId }}, _InnerQ}
+        {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa}
         when Limit /= undefined andalso SeqId < Limit ->
-            {Count, Consumer(IndexOnDisk, InnerQ, Q), RamIndexCount,
-             IndexState};
-        {{value, MsgStatus}, InnerQa} ->
+            {Count, Q, RamIndexCount, IndexState};
+        {{value, IndexOnDisk, MsgStatus}, Qa} ->
             {RamIndexCount1, IndexState1} =
                 case IndexOnDisk of
                     true -> {RamIndexCount, IndexState};
@@ -1124,6 +1057,5 @@ push_betas_to_deltas(Generator, Consumer, Limit, IndexOnDisk, InnerQ, Q,
                         {RamIndexCount - 1, IndexState2}
                 end,
             push_betas_to_deltas(
-              Generator, Consumer, Limit, IndexOnDisk, InnerQa, Q, Count + 1,
-              RamIndexCount1, IndexState1)
+              Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1)
     end.
-- 
cgit v1.2.1


From 67308eebd754b785216520de97cb74666d867cfe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 Jan 2010 19:32:51 +0000
Subject: Mechanism to limit the number of betas which don't have their index
 on disk is now in. Testing showed that if the queue is long, the change in
 target_ram_msg_count can be large, thus driving the number of ram indices
 directly off that still doesn't solve the problem. Thus am driving it from
 publish, with a limit on the maximum amount of work that can be done. This
 should allow the queue to remain responsive, as it works towards its goal.
 However, further testing, tuning and thinking is still needed.

---
 src/bpqueue.erl               | 70 +++++++++++++++++++++++++++++++++++-
 src/rabbit_tests.erl          | 83 +++++++++++++++++++++++++++++++++++++++++++
 src/rabbit_variable_queue.erl | 56 +++++++++++++++++++++++++++--
 3 files changed, 206 insertions(+), 3 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 5e7471f7..7237473f 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -37,7 +37,8 @@
 %% prefix. len/1 returns the flattened length of the queue and is O(1)
 
 -export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
-         fold/3, from_list/1, to_list/1]).
+         fold/3, from_list/1, to_list/1, map_fold_filter_l/4,
+         map_fold_filter_r/4]).
 
 %%----------------------------------------------------------------------------
 
@@ -60,6 +61,14 @@
 -spec(fold/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
 -spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()).
 -spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
+-spec(map_fold_filter_l/4 ::
+        (fun ((prefix()) -> boolean()),
+             fun ((value(), B) -> {prefix(), value(), B}), B, bpqueue()) ->
+        {bpqueue(), B}).
+-spec(map_fold_filter_r/4 ::
+        (fun ((prefix()) -> boolean()),
+             fun ((value(), B) -> {prefix(), value(), B}), B, bpqueue()) ->
+        {bpqueue(), B}).
 
 -endif.
 
@@ -183,3 +192,62 @@ to_list({_N, Q}) ->
 
 to_list1({Prefix, InnerQ}) ->
     {Prefix, queue:to_list(InnerQ)}.
+
+%% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init}
+%% where FilterFun(Prefix) -> boolean()
+%%       Fun(Value, Init) -> {Prefix, Value, Init}
+%%
+%% The filter fun allows you to skip very quickly over blocks that
+%% you're not interested in. Such blocks appear in the resulting bpq
+%% without modification. The Fun is then used both to map the value,
+%% which also allows you to change the prefix (and thus block) of the
+%% value, and also to modify the Init/Acc (just like a fold).
+map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
+    {BPQ, Init};
+map_fold_filter_l(PFilter, Fun, Init, {_N, Q}) ->
+    map_fold_filter_l1(PFilter, Fun, Init, Q, new()).
+
+map_fold_filter_l1(PFilter, Fun, Init, Q, QNew) ->
+    case queue:out(Q) of
+        {empty, _Q} ->
+            {QNew, Init};
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            InnerList = queue:to_list(InnerQ),
+            {Init1, QNew1} =
+                case PFilter(Prefix) of
+                    true ->
+                        lists:foldl(
+                          fun (Value, {Acc, QNew2}) ->
+                                  {Prefix1, Value1, Acc1} = Fun(Value, Acc),
+                                  {Acc1, in(Prefix1, Value1, QNew2)}
+                          end, {Init, QNew}, InnerList);
+                    false ->
+                        {Init, join(QNew, from_list([{Prefix, InnerList}]))}
+                end,
+            map_fold_filter_l1(PFilter, Fun, Init1, Q1, QNew1)
+    end.
+
+map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
+    {BPQ, Init};
+map_fold_filter_r(PFilter, Fun, Init, {_N, Q}) ->
+    map_fold_filter_r1(PFilter, Fun, Init, Q, new()).
+
+map_fold_filter_r1(PFilter, Fun, Init, Q, QNew) ->
+    case queue:out_r(Q) of
+        {empty, _Q} ->
+            {QNew, Init};
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            InnerList = queue:to_list(InnerQ),
+            {Init1, QNew1} =
+                case PFilter(Prefix) of
+                    true ->
+                        lists:foldr(
+                          fun (Value, {Acc, QNew2}) ->
+                                  {Prefix1, Value1, Acc1} = Fun(Value, Acc),
+                                  {Acc1, in_r(Prefix1, Value1, QNew2)}
+                          end, {Init, QNew}, InnerList);
+                    false ->
+                        {Init, join(from_list([{Prefix, InnerList}]), QNew)}
+                end,
+            map_fold_filter_r1(PFilter, Fun, Init1, Q1, QNew1)
+    end.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 16332f32..45b48017 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -55,6 +55,7 @@ all_tests() ->
     passed = test_queue_index(),
     passed = test_variable_queue(),
     passed = test_priority_queue(),
+    passed = test_bpqueue(),
     passed = test_unfold(),
     passed = test_parsing(),
     passed = test_topic_matching(),
@@ -181,6 +182,88 @@ test_priority_queue(Q) ->
      priority_queue:to_list(Q),
      priority_queue_out_all(Q)}.
 
+test_bpqueue() ->
+    Q = bpqueue:new(),
+    true = bpqueue:is_empty(Q),
+    0 = bpqueue:len(Q),
+
+    Q1 = bpqueue:in(bar, 3, bpqueue:in(foo, 2, bpqueue:in(foo, 1, Q))),
+    false = bpqueue:is_empty(Q1),
+    3 = bpqueue:len(Q1),
+    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(Q1),
+
+    Q2 = bpqueue:in_r(bar, 3, bpqueue:in_r(foo, 2, bpqueue:in_r(foo, 1, Q))),
+    false = bpqueue:is_empty(Q2),
+    3 = bpqueue:len(Q2),
+    [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(Q2),
+
+    {empty, _Q} = bpqueue:out(Q),
+    {{value, foo, 1}, Q3} = bpqueue:out(Q1),
+    {{value, foo, 2}, Q4} = bpqueue:out(Q3),
+    {{value, bar, 3}, _Q5} = bpqueue:out(Q4),
+
+    {empty, _Q} = bpqueue:out_r(Q),
+    {{value, foo, 1}, Q6} = bpqueue:out_r(Q2),
+    {{value, foo, 2}, Q7} = bpqueue:out_r(Q6),
+    {{value, bar, 3}, _Q8} = bpqueue:out_r(Q7),
+
+    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)),
+    [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)),
+    [{foo, [1, 2]}, {bar, [3, 3]}, {foo, [2,1]}] =
+        bpqueue:to_list(bpqueue:join(Q1, Q2)),
+
+    [{foo, [1, 2]}, {bar, [3]}, {foo, [1, 2]}, {bar, [3]}] =
+        bpqueue:to_list(bpqueue:join(Q1, Q1)),
+
+    [{foo, [1, 2]}, {bar, [3]}] =
+        bpqueue:to_list(
+          bpqueue:from_list(
+            [{x, []}, {foo, [1]}, {y, []}, {foo, [2]}, {bar, [3]}, {z, []}])),
+
+    [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])),
+
+    {4, [a,b,c,d]} =
+        bpqueue:fold(
+          fun (Prefix, Value, {Prefix, Acc}) ->
+                  {Prefix + 1, [Value | Acc]}
+          end,
+          {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])),
+
+    ok = bpqueue:fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
+                      ok, Q),
+
+    [] = bpqueue:to_list(Q),
+
+    F1 = fun (Qn) ->
+                 bpqueue:map_fold_filter_l(
+                   fun (foo) -> true;
+                       (_) -> false
+                   end,
+                   fun (V, Num) -> {bar, -V, V - Num} end,
+                   0, Qn)
+         end,
+
+    F2 = fun (Qn) ->
+                 bpqueue:map_fold_filter_r(
+                   fun (foo) -> true;
+                       (_) -> false
+                   end,
+                   fun (V, Num) -> {bar, -V, V - Num} end,
+                   0, Qn)
+         end,
+
+    {Q9, 1} = F1(Q1), %% 2 - (1 - 0) == 1
+    [{bar, [-1, -2, 3]}] = bpqueue:to_list(Q9),
+    {Q10, -1} = F2(Q1), %% 1 - (2 - 0) == -1
+    [{bar, [-1, -2, 3]}] = bpqueue:to_list(Q10),
+
+    {Q11, 0} = F1(Q),
+    [] = bpqueue:to_list(Q11),
+    {Q12, 0} = F2(Q),
+    [] = bpqueue:to_list(Q12),
+
+    passed.
+
 test_simple_n_element_queue(N) ->
     Items = lists:seq(1, N),
     Q = priority_queue_in_all(priority_queue:new(), Items),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 8205f79f..e821cf6b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -78,7 +78,8 @@
           index_on_disk
         }).
 
--define(RAM_INDEX_TARGET_RATIO, 32768).
+-define(RAM_INDEX_TARGET_RATIO, 1024).
+-define(RAM_INDEX_MAX_WORK, 32).
 
 %%----------------------------------------------------------------------------
 
@@ -226,7 +227,8 @@ terminate(State = #vqstate { index_state = IndexState,
     State #vqstate { index_state = rabbit_queue_index:terminate(IndexState) }.
 
 publish(Msg, State) ->
-    publish(Msg, false, false, State).
+    State1 = limit_ram_index(State),
+    publish(Msg, false, false, State1).
 
 publish_delivered(Msg = #basic_message { guid = MsgId,
                                          is_persistent = IsPersistent },
@@ -870,6 +872,56 @@ maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
 %% Phase changes
 %%----------------------------------------------------------------------------
 
+limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount,
+                                   target_ram_msg_count = TargetRamMsgCount })
+  when RamIndexCount > ?RAM_INDEX_TARGET_RATIO * TargetRamMsgCount ->
+    Reduction = lists:min([?RAM_INDEX_MAX_WORK,
+                           RamIndexCount - (?RAM_INDEX_TARGET_RATIO *
+                                            TargetRamMsgCount)]),
+    io:format("~p~n", [Reduction]),
+    {Reduction1, State1} = limit_q2_ram_index(Reduction, State),
+    {_Reduction2, State2} = limit_q3_ram_index(Reduction1, State1),
+    State2;
+limit_ram_index(State) ->
+    State.
+
+limit_q2_ram_index(Reduction, State = #vqstate { q2 = Q2 })
+  when Reduction > 0 ->
+    {Q2a, Reduction1, State1} = limit_ram_index(fun bpqueue:map_fold_filter_l/4,
+                                                Q2, Reduction, State),
+    {Reduction1, State1 #vqstate { q2 = Q2a }};
+limit_q2_ram_index(Reduction, State) ->
+    {Reduction, State}.
+
+limit_q3_ram_index(Reduction, State = #vqstate { q3 = Q3 })
+  when Reduction > 0 ->
+    %% use the _r version so that we prioritise the msgs closest to
+    %% delta, and least soon to be delivered
+    {Q3a, Reduction1, State1} = limit_ram_index(fun bpqueue:map_fold_filter_r/4,
+                                                Q3, Reduction, State),
+    {Reduction1, State1 #vqstate { q3 = Q3a }};
+limit_q3_ram_index(Reduction, State) ->
+    {Reduction, State}.
+
+limit_ram_index(MapFoldFilterFun, Q, Reduction, State =
+                #vqstate { ram_index_count = RamIndexCount,
+                           index_state = IndexState }) ->
+    {Qa, {Reduction1, IndexState1}} =
+        MapFoldFilterFun(
+          fun erlang:'not'/1,
+          fun (MsgStatus, {0, _IndexStateN} = Acc) ->
+                  false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+                  {false, MsgStatus, Acc};
+              (MsgStatus, {N, IndexStateN}) when N > 0 ->
+                  false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+                  {MsgStatus1, IndexStateN1} =
+                      maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
+                  {true, MsgStatus1, {N-1, IndexStateN1}}
+          end, {Reduction, IndexState}, Q),
+    RamIndexCount1 = RamIndexCount - (Reduction - Reduction1),
+    {Qa, Reduction1, State #vqstate { index_state = IndexState1,
+                                      ram_index_count = RamIndexCount1 }}.
+
 maybe_deltas_to_betas(State = #vqstate { delta = #delta { count = 0 } }) ->
     State;
 maybe_deltas_to_betas(
-- 
cgit v1.2.1


From 263192acc15329753dc539a2c5b57278a7df6cda Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 Jan 2010 19:40:46 +0000
Subject: Forgot to remove an io:format

---
 src/rabbit_variable_queue.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index e821cf6b..6c5efbd5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -78,7 +78,7 @@
           index_on_disk
         }).
 
--define(RAM_INDEX_TARGET_RATIO, 1024).
+-define(RAM_INDEX_TARGET_RATIO, 256).
 -define(RAM_INDEX_MAX_WORK, 32).
 
 %%----------------------------------------------------------------------------
@@ -878,7 +878,6 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount,
     Reduction = lists:min([?RAM_INDEX_MAX_WORK,
                            RamIndexCount - (?RAM_INDEX_TARGET_RATIO *
                                             TargetRamMsgCount)]),
-    io:format("~p~n", [Reduction]),
     {Reduction1, State1} = limit_q2_ram_index(Reduction, State),
     {_Reduction2, State2} = limit_q3_ram_index(Reduction1, State1),
     State2;
-- 
cgit v1.2.1


From c81bbf53513c34ce8be4f6e8308102c46955f928 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 Jan 2010 23:46:08 +0000
Subject: Much better. The reason why batching is important is because if
 you're walking through the bpqueue and doing very little work before stopping
 then you don't really get the amortised constant time behaviour. But the goal
 is achieved - throughput is maintained and very slowly diminishes with no
 major interruptions and the queue gets fuller and the transition is made to
 betas and then deltas.

---
 src/bpqueue.erl               | 164 ++++++++++++++++++++++++++++++++----------
 src/rabbit_tests.erl          |  14 ++--
 src/rabbit_variable_queue.erl |  89 ++++++++++++++---------
 3 files changed, 192 insertions(+), 75 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 7237473f..a556ec23 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -63,12 +63,12 @@
 -spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
 -spec(map_fold_filter_l/4 ::
         (fun ((prefix()) -> boolean()),
-             fun ((value(), B) -> {prefix(), value(), B}), B, bpqueue()) ->
-        {bpqueue(), B}).
+             fun ((value(), B) -> ({prefix(), value(), B} | 'stop')), B,
+                 bpqueue()) -> {bpqueue(), B}).
 -spec(map_fold_filter_r/4 ::
         (fun ((prefix()) -> boolean()),
-             fun ((value(), B) -> {prefix(), value(), B}), B, bpqueue()) ->
-        {bpqueue(), B}).
+             fun ((value(), B) -> ({prefix(), value(), B} | 'stop')), B,
+                 bpqueue()) -> {bpqueue(), B}).
 
 -endif.
 
@@ -107,6 +107,40 @@ in_r(Prefix, Value, {N, Q}) ->
              queue:in_r({Prefix, queue:in(Value, queue:new())}, Q)
      end}.
 
+in_q(Prefix, Queue, BPQ = {0, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        N -> {N, queue:in({Prefix, Queue}, Q)}
+    end;
+in_q(Prefix, Queue, BPQ = {N, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        M -> {N + M,
+              case queue:out_r(Q) of
+                  {{value, {Prefix, InnerQ}}, Q1} ->
+                      queue:in({Prefix, queue:join(InnerQ, Queue)}, Q1);
+                  {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                      queue:in({Prefix, Queue}, Q)
+              end}
+    end.
+
+in_q_r(Prefix, Queue, BPQ = {0, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        N -> {N, queue:in({Prefix, Queue}, Q)}
+    end;
+in_q_r(Prefix, Queue, BPQ = {N, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        M -> {N + M,
+              case queue:out(Q) of
+                  {{value, {Prefix, InnerQ}}, Q1} ->
+                      queue:in_r({Prefix, queue:join(Queue, InnerQ)}, Q1);
+                  {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                      queue:in_r({Prefix, Queue}, Q)
+              end}
+    end.
+
 out({0, _Q} = BPQ) ->
     {empty, BPQ};
 out({N, Q}) ->
@@ -195,7 +229,7 @@ to_list1({Prefix, InnerQ}) ->
 
 %% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init}
 %% where FilterFun(Prefix) -> boolean()
-%%       Fun(Value, Init) -> {Prefix, Value, Init}
+%%       Fun(Value, Init) -> {Prefix, Value, Init} | stop
 %%
 %% The filter fun allows you to skip very quickly over blocks that
 %% you're not interested in. Such blocks appear in the resulting bpq
@@ -204,50 +238,106 @@ to_list1({Prefix, InnerQ}) ->
 %% value, and also to modify the Init/Acc (just like a fold).
 map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
-map_fold_filter_l(PFilter, Fun, Init, {_N, Q}) ->
-    map_fold_filter_l1(PFilter, Fun, Init, Q, new()).
+map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
+    map_fold_filter_l1(N, PFilter, Fun, Init, Q, new()).
 
-map_fold_filter_l1(PFilter, Fun, Init, Q, QNew) ->
+map_fold_filter_l1(Len, PFilter, Fun, Init, Q, QNew) ->
     case queue:out(Q) of
         {empty, _Q} ->
             {QNew, Init};
         {{value, {Prefix, InnerQ}}, Q1} ->
-            InnerList = queue:to_list(InnerQ),
-            {Init1, QNew1} =
-                case PFilter(Prefix) of
-                    true ->
-                        lists:foldl(
-                          fun (Value, {Acc, QNew2}) ->
-                                  {Prefix1, Value1, Acc1} = Fun(Value, Acc),
-                                  {Acc1, in(Prefix1, Value1, QNew2)}
-                          end, {Init, QNew}, InnerList);
-                    false ->
-                        {Init, join(QNew, from_list([{Prefix, InnerList}]))}
-                end,
-            map_fold_filter_l1(PFilter, Fun, Init1, Q1, QNew1)
+            case PFilter(Prefix) of
+                true ->
+                    {Init1, QNew1, Cont} =
+                        map_fold_filter_l2(
+                          Fun, Prefix, Prefix, Init, InnerQ, QNew, queue:new()),
+                    case Cont of
+                        false ->
+                            {join(QNew1, {Len - len(QNew1), Q1}), Init1};
+                        true ->
+                            map_fold_filter_l1(
+                              Len, PFilter, Fun, Init1, Q1, QNew1)
+                    end;
+                false ->
+                    map_fold_filter_l1(
+                      Len, PFilter, Fun, Init, Q1, in_q(Prefix, InnerQ, QNew))
+            end
+    end.
+
+map_fold_filter_l2(Fun, OrigPrefix, Prefix, Init, InnerQ, QNew, InnerQNew) ->
+    case queue:out(InnerQ) of
+        {empty, _Q} ->
+            {Init, in_q(OrigPrefix, InnerQ,
+                        in_q(Prefix, InnerQNew, QNew)), true};
+        {{value, Value}, InnerQ1} ->
+            case Fun(Value, Init) of
+                stop ->
+                    {Init, in_q(OrigPrefix, InnerQ,
+                                in_q(Prefix, InnerQNew, QNew)), false};
+                {Prefix1, Value1, Init1} ->
+                    case Prefix1 =:= Prefix of
+                        true ->
+                            map_fold_filter_l2(
+                              Fun, OrigPrefix, Prefix, Init1, InnerQ1, QNew,
+                              queue:in(Value1, InnerQNew));
+                        false ->
+                            map_fold_filter_l2(
+                              Fun, OrigPrefix, Prefix1, Init1, InnerQ1,
+                              in_q(Prefix, InnerQNew, QNew),
+                              queue:in(Value1, queue:new()))
+                    end
+            end
     end.
 
 map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
-map_fold_filter_r(PFilter, Fun, Init, {_N, Q}) ->
-    map_fold_filter_r1(PFilter, Fun, Init, Q, new()).
+map_fold_filter_r(PFilter, Fun, Init, {N, Q}) ->
+    map_fold_filter_r1(N, PFilter, Fun, Init, Q, new()).
 
-map_fold_filter_r1(PFilter, Fun, Init, Q, QNew) ->
+map_fold_filter_r1(Len, PFilter, Fun, Init, Q, QNew) ->
     case queue:out_r(Q) of
         {empty, _Q} ->
             {QNew, Init};
         {{value, {Prefix, InnerQ}}, Q1} ->
-            InnerList = queue:to_list(InnerQ),
-            {Init1, QNew1} =
-                case PFilter(Prefix) of
-                    true ->
-                        lists:foldr(
-                          fun (Value, {Acc, QNew2}) ->
-                                  {Prefix1, Value1, Acc1} = Fun(Value, Acc),
-                                  {Acc1, in_r(Prefix1, Value1, QNew2)}
-                          end, {Init, QNew}, InnerList);
-                    false ->
-                        {Init, join(from_list([{Prefix, InnerList}]), QNew)}
-                end,
-            map_fold_filter_r1(PFilter, Fun, Init1, Q1, QNew1)
+            case PFilter(Prefix) of
+                true ->
+                    {Init1, QNew1, Cont} =
+                        map_fold_filter_r2(
+                          Fun, Prefix, Prefix, Init, InnerQ, QNew, queue:new()),
+                    case Cont of
+                        false ->
+                            {join({Len - len(QNew1), Q1}, QNew1), Init1};
+                        true ->
+                            map_fold_filter_r1(
+                              Len, PFilter, Fun, Init1, Q1, QNew1)
+                    end;
+                false ->
+                    map_fold_filter_r1(
+                      Len, PFilter, Fun, Init, Q1, in_q_r(Prefix, InnerQ, QNew))
+            end
+    end.
+
+map_fold_filter_r2(Fun, OrigPrefix, Prefix, Init, InnerQ, QNew, InnerQNew) ->
+    case queue:out_r(InnerQ) of
+        {empty, _Q} ->
+            {Init, in_q_r(OrigPrefix, InnerQ,
+                          in_q_r(Prefix, InnerQNew, QNew)), true};
+        {{value, Value}, InnerQ1} ->
+            case Fun(Value, Init) of
+                stop ->
+                    {Init, in_q_r(OrigPrefix, InnerQ,
+                                  in_q_r(Prefix, InnerQNew, QNew)), false};
+                {Prefix1, Value1, Init1} ->
+                    case Prefix1 =:= Prefix of
+                        true ->
+                            map_fold_filter_r2(
+                              Fun, OrigPrefix, Prefix, Init1, InnerQ1, QNew,
+                              queue:in_r(Value1, InnerQNew));
+                        false ->
+                            map_fold_filter_r2(
+                              Fun, OrigPrefix, Prefix1, Init1, InnerQ1,
+                              in_q_r(Prefix, InnerQNew, QNew),
+                              queue:in(Value1, queue:new()))
+                    end
+            end
     end.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 45b48017..291f4cb0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -239,7 +239,8 @@ test_bpqueue() ->
                    fun (foo) -> true;
                        (_) -> false
                    end,
-                   fun (V, Num) -> {bar, -V, V - Num} end,
+                   fun (2, _Num) -> stop;
+                       (V, Num)  -> {bar, -V, V - Num} end,
                    0, Qn)
          end,
 
@@ -248,14 +249,15 @@ test_bpqueue() ->
                    fun (foo) -> true;
                        (_) -> false
                    end,
-                   fun (V, Num) -> {bar, -V, V - Num} end,
+                   fun (2, _Num) -> stop;
+                       (V, Num)  -> {bar, -V, V - Num} end,
                    0, Qn)
          end,
 
-    {Q9, 1} = F1(Q1), %% 2 - (1 - 0) == 1
-    [{bar, [-1, -2, 3]}] = bpqueue:to_list(Q9),
-    {Q10, -1} = F2(Q1), %% 1 - (2 - 0) == -1
-    [{bar, [-1, -2, 3]}] = bpqueue:to_list(Q10),
+    {Q9, 1} = F1(Q1),
+    [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = bpqueue:to_list(Q9),
+    {Q10, 0} = F2(Q1),
+    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(Q10),
 
     {Q11, 0} = F1(Q),
     [] = bpqueue:to_list(Q11),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6c5efbd5..a62a90ce 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -78,8 +78,20 @@
           index_on_disk
         }).
 
--define(RAM_INDEX_TARGET_RATIO, 256).
--define(RAM_INDEX_MAX_WORK, 32).
+%% If there are N msgs in the q, and M of them are betas, then it is
+%% required that RAM_INDEX_BETA_RATIO * (M/N) * M of those have their
+%% index on disk.  Eg if RAM_INDEX_BETA_RATIO is 1.0, and there are 36
+%% msgs in the queue, of which 12 are betas, then 4 of those betas
+%% must have their index on disk.
+-define(RAM_INDEX_BETA_RATIO, 0.8).
+%% When we discover, on publish, that we should write some indices to
+%% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
+%% betas that we must be due to write indices for before we do any
+%% work at all. This is both a minimum and a maximum - we don't write
+%% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't
+%% write more - we can always come back on the next publish to do
+%% more.
+-define(RAM_INDEX_BATCH_SIZE, 1024).
 
 %%----------------------------------------------------------------------------
 
@@ -577,6 +589,26 @@ beta_fold_no_index_on_disk(Fun, Init, Q) ->
                          Fun(Value, Acc)
                  end, Init, Q).
 
+permitted_ram_index_count(#vqstate { len = 0 }) ->
+    undefined;
+permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3 }) ->
+    case bpqueue:len(Q2) + bpqueue:len(Q3) of
+        0 ->
+            undefined;
+        BetaLength ->
+            %% the fraction of the queue that are betas
+            BetaFrac = BetaLength / Len,
+            BetaLength - trunc(BetaFrac * BetaLength * ?RAM_INDEX_BETA_RATIO)
+    end.
+
+
+should_force_index_to_disk(State =
+                           #vqstate { ram_index_count = RamIndexCount }) ->
+    case permitted_ram_index_count(State) of
+        undefined -> false;
+        Permitted -> RamIndexCount >= Permitted
+    end.
+
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
@@ -771,17 +803,10 @@ publish(msg, MsgStatus, State = #vqstate { index_state = IndexState,
 
 publish(index, MsgStatus, State =
         #vqstate { index_state = IndexState, q1 = Q1,
-                   ram_index_count = RamIndexCount,
-                   target_ram_msg_count = TargetRamMsgCount }) ->
+                   ram_index_count = RamIndexCount }) ->
     MsgStatus1 = #msg_status { msg_on_disk = true } =
         maybe_write_msg_to_disk(true, MsgStatus),
-    ForceIndex = case TargetRamMsgCount of
-                     undefined ->
-                         false;
-                     _ ->
-                         RamIndexCount >= (?RAM_INDEX_TARGET_RATIO *
-                                           TargetRamMsgCount)
-                 end,
+    ForceIndex = should_force_index_to_disk(State),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
     RamIndexCount1 = case MsgStatus2 #msg_status.index_on_disk of
@@ -872,17 +897,24 @@ maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
 %% Phase changes
 %%----------------------------------------------------------------------------
 
-limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount,
-                                   target_ram_msg_count = TargetRamMsgCount })
-  when RamIndexCount > ?RAM_INDEX_TARGET_RATIO * TargetRamMsgCount ->
-    Reduction = lists:min([?RAM_INDEX_MAX_WORK,
-                           RamIndexCount - (?RAM_INDEX_TARGET_RATIO *
-                                            TargetRamMsgCount)]),
-    {Reduction1, State1} = limit_q2_ram_index(Reduction, State),
-    {_Reduction2, State2} = limit_q3_ram_index(Reduction1, State1),
-    State2;
-limit_ram_index(State) ->
-    State.
+limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
+    case permitted_ram_index_count(State) of
+        undefined ->
+            State;
+        Permitted when RamIndexCount > Permitted ->
+            Reduction = lists:min([RamIndexCount - Permitted,
+                                   ?RAM_INDEX_BATCH_SIZE]),
+            case Reduction < ?RAM_INDEX_BATCH_SIZE of
+                true ->
+                    State;
+                false ->
+                    {Reduction1, State1} = limit_q2_ram_index(Reduction, State),
+                    {_Red2, State2} = limit_q3_ram_index(Reduction1, State1),
+                    State2
+            end;
+        _ ->
+            State
+    end.
 
 limit_q2_ram_index(Reduction, State = #vqstate { q2 = Q2 })
   when Reduction > 0 ->
@@ -908,9 +940,9 @@ limit_ram_index(MapFoldFilterFun, Q, Reduction, State =
     {Qa, {Reduction1, IndexState1}} =
         MapFoldFilterFun(
           fun erlang:'not'/1,
-          fun (MsgStatus, {0, _IndexStateN} = Acc) ->
+          fun (MsgStatus, {0, _IndexStateN}) ->
                   false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
-                  {false, MsgStatus, Acc};
+                  stop;
               (MsgStatus, {N, IndexStateN}) when N > 0 ->
                   false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
                   {MsgStatus1, IndexStateN1} =
@@ -986,19 +1018,12 @@ maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
 maybe_push_alphas_to_betas(
   Generator, Consumer, Q, State =
   #vqstate { ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount,
-             target_ram_msg_count = TargetRamMsgCount,
              index_state = IndexState }) ->
     case Generator(Q) of
         {empty, _Q} -> State;
         {{value, MsgStatus}, Qa} ->
             MsgStatus1 = maybe_write_msg_to_disk(true, MsgStatus),
-            ForceIndex = case TargetRamMsgCount of
-                             undefined ->
-                                 false;
-                             _ ->
-                                 RamIndexCount >= (?RAM_INDEX_TARGET_RATIO *
-                                                   TargetRamMsgCount)
-                         end,
+            ForceIndex = should_force_index_to_disk(State),
             {MsgStatus2, IndexState1} =
                 maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
             RamIndexCount1 = case MsgStatus2 #msg_status.index_on_disk of
-- 
cgit v1.2.1


From c07a8780cb5d99d1afef253f29d72f14f97b6164 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 13 Jan 2010 23:53:40 +0000
Subject: the ratio should always be 1. Hence removed.

---
 src/rabbit_variable_queue.erl | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a62a90ce..6b458662 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -78,12 +78,6 @@
           index_on_disk
         }).
 
-%% If there are N msgs in the q, and M of them are betas, then it is
-%% required that RAM_INDEX_BETA_RATIO * (M/N) * M of those have their
-%% index on disk.  Eg if RAM_INDEX_BETA_RATIO is 1.0, and there are 36
-%% msgs in the queue, of which 12 are betas, then 4 of those betas
-%% must have their index on disk.
--define(RAM_INDEX_BETA_RATIO, 0.8).
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
 %% betas that we must be due to write indices for before we do any
@@ -598,7 +592,7 @@ permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3 }) ->
         BetaLength ->
             %% the fraction of the queue that are betas
             BetaFrac = BetaLength / Len,
-            BetaLength - trunc(BetaFrac * BetaLength * ?RAM_INDEX_BETA_RATIO)
+            BetaLength - trunc(BetaFrac * BetaLength)
     end.
 
 
-- 
cgit v1.2.1


From aae377fb35de4c95d038b2d8ecdc9a5bb07b00f5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 14 Jan 2010 00:47:22 +0000
Subject: Should actually be working on the ratio of betas to betas+alphas.
 Thus subtract out deltacount

---
 src/rabbit_variable_queue.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6b458662..2c00c4da 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -585,13 +585,14 @@ beta_fold_no_index_on_disk(Fun, Init, Q) ->
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
     undefined;
-permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3 }) ->
+permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3,
+                                     delta = #delta { count = DeltaCount } }) ->
     case bpqueue:len(Q2) + bpqueue:len(Q3) of
         0 ->
             undefined;
         BetaLength ->
             %% the fraction of the queue that are betas
-            BetaFrac = BetaLength / Len,
+            BetaFrac = BetaLength / (Len - DeltaCount),
             BetaLength - trunc(BetaFrac * BetaLength)
     end.
 
-- 
cgit v1.2.1


From 0f1ac231aa3d91eca491f01f2e1fba0d23ee13c4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 14 Jan 2010 13:46:13 +0000
Subject: Extensive testing suggests the batch size was previous set too high

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 2c00c4da..765d01a4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -85,7 +85,7 @@
 %% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't
 %% write more - we can always come back on the next publish to do
 %% more.
--define(RAM_INDEX_BATCH_SIZE, 1024).
+-define(RAM_INDEX_BATCH_SIZE, 64).
 
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From be931c4fa6c0f9961171d1d7890c40d0faf8530c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 15 Jan 2010 18:09:36 +0000
Subject: Refactoring of bpq

---
 src/bpqueue.erl      | 211 ++++++++++++++++++++-------------------------------
 src/rabbit_tests.erl |   6 +-
 2 files changed, 85 insertions(+), 132 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index a556ec23..b33abdbb 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -37,7 +37,7 @@
 %% prefix. len/1 returns the flattened length of the queue and is O(1)
 
 -export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
-         fold/3, from_list/1, to_list/1, map_fold_filter_l/4,
+         foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4,
          map_fold_filter_r/4]).
 
 %%----------------------------------------------------------------------------
@@ -58,7 +58,8 @@
 -spec(out/1 :: (bpqueue()) -> result()).
 -spec(out_r/1 :: (bpqueue()) -> result()).
 -spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()).
--spec(fold/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(foldl/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
 -spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()).
 -spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
 -spec(map_fold_filter_l/4 ::
@@ -87,24 +88,21 @@ len({N, _Q}) ->
 
 in(Prefix, Value, {0, Q}) ->
     {1, queue:in({Prefix, queue:in(Value, Q)}, Q)};
-in(Prefix, Value, {N, Q}) ->
-    {N+1,
-     case queue:out_r(Q) of
-         {{value, {Prefix, InnerQ}}, Q1} ->
-             queue:in({Prefix, queue:in(Value, InnerQ)}, Q1);
-         {{value, {_Prefix, _InnerQ}}, _Q1} ->
-             queue:in({Prefix, queue:in(Value, queue:new())}, Q)
-     end}.
+in(Prefix, Value, BPQ) ->
+    in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ).
 
-in_r(Prefix, Value, {0, Q}) ->
-    {1, queue:in({Prefix, queue:in(Value, Q)}, Q)};
-in_r(Prefix, Value, {N, Q}) ->
+in_r(Prefix, Value, BPQ = {0, _Q}) ->
+    in(Prefix, Value, BPQ);
+in_r(Prefix, Value, BPQ) ->
+    in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ).
+
+in1({In, Out}, Prefix, Value, {N, Q}) ->
     {N+1,
-     case queue:out(Q) of
+     case Out(Q) of
          {{value, {Prefix, InnerQ}}, Q1} ->
-             queue:in_r({Prefix, queue:in_r(Value, InnerQ)}, Q1);
+             In({Prefix, In(Value, InnerQ)}, Q1);
          {{value, {_Prefix, _InnerQ}}, _Q1} ->
-             queue:in_r({Prefix, queue:in(Value, queue:new())}, Q)
+             In({Prefix, queue:in(Value, queue:new())}, Q)
      end}.
 
 in_q(Prefix, Queue, BPQ = {0, Q}) ->
@@ -112,54 +110,45 @@ in_q(Prefix, Queue, BPQ = {0, Q}) ->
         0 -> BPQ;
         N -> {N, queue:in({Prefix, Queue}, Q)}
     end;
-in_q(Prefix, Queue, BPQ = {N, Q}) ->
-    case queue:len(Queue) of
-        0 -> BPQ;
-        M -> {N + M,
-              case queue:out_r(Q) of
-                  {{value, {Prefix, InnerQ}}, Q1} ->
-                      queue:in({Prefix, queue:join(InnerQ, Queue)}, Q1);
-                  {{value, {_Prefix, _InnerQ}}, _Q1} ->
-                      queue:in({Prefix, Queue}, Q)
-              end}
-    end.
+in_q(Prefix, Queue, BPQ) ->
+    in_q1({fun queue:in/2, fun queue:out_r/1, fun queue:join/2},
+          Prefix, Queue, BPQ).
 
-in_q_r(Prefix, Queue, BPQ = {0, Q}) ->
-    case queue:len(Queue) of
-        0 -> BPQ;
-        N -> {N, queue:in({Prefix, Queue}, Q)}
-    end;
-in_q_r(Prefix, Queue, BPQ = {N, Q}) ->
+in_q_r(Prefix, Queue, BPQ = {0, _Q}) ->
+    in_q(Prefix, Queue, BPQ);
+in_q_r(Prefix, Queue, BPQ) ->
+    in_q1({fun queue:in_r/2, fun queue:out/1,
+           fun (T, H) -> queue:join(H, T) end},
+          Prefix, Queue, BPQ).
+
+in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) ->
     case queue:len(Queue) of
         0 -> BPQ;
         M -> {N + M,
-              case queue:out(Q) of
+              case Out(Q) of
                   {{value, {Prefix, InnerQ}}, Q1} ->
-                      queue:in_r({Prefix, queue:join(Queue, InnerQ)}, Q1);
+                      In({Prefix, Join(InnerQ, Queue)}, Q1);
                   {{value, {_Prefix, _InnerQ}}, _Q1} ->
-                      queue:in_r({Prefix, Queue}, Q)
+                      In({Prefix, Queue}, Q)
               end}
     end.
 
 out({0, _Q} = BPQ) ->
     {empty, BPQ};
-out({N, Q}) ->
-    {{value, {Prefix, InnerQ}}, Q1} = queue:out(Q),
-    {{value, Value}, InnerQ1} = queue:out(InnerQ),
-    Q2 = case queue:is_empty(InnerQ1) of
-             true  -> Q1;
-             false -> queue:in_r({Prefix, InnerQ1}, Q1)
-         end,
-    {{value, Prefix, Value}, {N-1, Q2}}.
+out(BPQ) ->
+    out1({fun queue:in_r/2, fun queue:out/1}, BPQ).
 
 out_r({0, _Q} = BPQ) ->
     {empty, BPQ};
-out_r({N, Q}) ->
-    {{value, {Prefix, InnerQ}}, Q1} = queue:out_r(Q),
-    {{value, Value}, InnerQ1} = queue:out_r(InnerQ),
+out_r(BPQ) ->
+    out1({fun queue:in/2, fun queue:out_r/1}, BPQ).
+
+out1({In, Out}, {N, Q}) ->
+    {{value, {Prefix, InnerQ}}, Q1} = Out(Q),
+    {{value, Value}, InnerQ1} = Out(InnerQ),
     Q2 = case queue:is_empty(InnerQ1) of
              true  -> Q1;
-             false -> queue:in({Prefix, InnerQ1}, Q1)
+             false -> In({Prefix, InnerQ1}, Q1)
          end,
     {{value, Prefix, Value}, {N-1, Q2}}.
 
@@ -179,25 +168,30 @@ join({NHead, QHead}, {NTail, QTail}) ->
              queue:join(QHead, QTail)
      end}.
 
-fold(_Fun, Init, {0, _Q}) ->
+foldl(_Fun, Init, {0, _Q}) ->
     Init;
-fold(Fun, Init, {_N, Q}) ->
-    fold1(Fun, Init, Q).
+foldl(Fun, Init, {_N, Q}) ->
+    fold1(fun queue:out/1, Fun, Init, Q).
 
-fold1(Fun, Init, Q) ->
-    case queue:out(Q) of
+foldr(_Fun, Init, {0, _Q}) ->
+    Init;
+foldr(Fun, Init, {_N, Q}) ->
+    fold1(fun queue:out_r/1, Fun, Init, Q).
+
+fold1(Out, Fun, Init, Q) ->
+    case Out(Q) of
         {empty, _Q} ->
             Init;
         {{value, {Prefix, InnerQ}}, Q1} ->
-            fold1(Fun, fold1(Fun, Prefix, Init, InnerQ), Q1)
+            fold1(Out, Fun, fold1(Out, Fun, Prefix, Init, InnerQ), Q1)
     end.
 
-fold1(Fun, Prefix, Init, InnerQ) ->
-    case queue:out(InnerQ) of
+fold1(Out, Fun, Prefix, Init, InnerQ) ->
+    case Out(InnerQ) of
         {empty, _Q} ->
             Init;
         {{value, Value}, InnerQ1} ->
-            fold1(Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1)
+            fold1(Out, Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1)
     end.
 
 from_list(List) ->
@@ -239,105 +233,64 @@ to_list1({Prefix, InnerQ}) ->
 map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
 map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
-    map_fold_filter_l1(N, PFilter, Fun, Init, Q, new()).
-
-map_fold_filter_l1(Len, PFilter, Fun, Init, Q, QNew) ->
-    case queue:out(Q) of
-        {empty, _Q} ->
-            {QNew, Init};
-        {{value, {Prefix, InnerQ}}, Q1} ->
-            case PFilter(Prefix) of
-                true ->
-                    {Init1, QNew1, Cont} =
-                        map_fold_filter_l2(
-                          Fun, Prefix, Prefix, Init, InnerQ, QNew, queue:new()),
-                    case Cont of
-                        false ->
-                            {join(QNew1, {Len - len(QNew1), Q1}), Init1};
-                        true ->
-                            map_fold_filter_l1(
-                              Len, PFilter, Fun, Init1, Q1, QNew1)
-                    end;
-                false ->
-                    map_fold_filter_l1(
-                      Len, PFilter, Fun, Init, Q1, in_q(Prefix, InnerQ, QNew))
-            end
-    end.
-
-map_fold_filter_l2(Fun, OrigPrefix, Prefix, Init, InnerQ, QNew, InnerQNew) ->
-    case queue:out(InnerQ) of
-        {empty, _Q} ->
-            {Init, in_q(OrigPrefix, InnerQ,
-                        in_q(Prefix, InnerQNew, QNew)), true};
-        {{value, Value}, InnerQ1} ->
-            case Fun(Value, Init) of
-                stop ->
-                    {Init, in_q(OrigPrefix, InnerQ,
-                                in_q(Prefix, InnerQNew, QNew)), false};
-                {Prefix1, Value1, Init1} ->
-                    case Prefix1 =:= Prefix of
-                        true ->
-                            map_fold_filter_l2(
-                              Fun, OrigPrefix, Prefix, Init1, InnerQ1, QNew,
-                              queue:in(Value1, InnerQNew));
-                        false ->
-                            map_fold_filter_l2(
-                              Fun, OrigPrefix, Prefix1, Init1, InnerQ1,
-                              in_q(Prefix, InnerQNew, QNew),
-                              queue:in(Value1, queue:new()))
-                    end
-            end
-    end.
+    map_fold_filter1(
+      {fun queue:out/1, fun queue:in/2, fun in_q/3, fun join/2},
+      N, PFilter, Fun, Init, Q, new()).
 
 map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
 map_fold_filter_r(PFilter, Fun, Init, {N, Q}) ->
-    map_fold_filter_r1(N, PFilter, Fun, Init, Q, new()).
+    map_fold_filter1(
+      {fun queue:out_r/1, fun queue:in_r/2, fun in_q_r/3,
+       fun (T, H) -> join(H, T) end},
+      N, PFilter, Fun, Init, Q, new()).
 
-map_fold_filter_r1(Len, PFilter, Fun, Init, Q, QNew) ->
-    case queue:out_r(Q) of
+map_fold_filter1(
+  Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, Init, Q, QNew) ->
+    case Out(Q) of
         {empty, _Q} ->
             {QNew, Init};
         {{value, {Prefix, InnerQ}}, Q1} ->
             case PFilter(Prefix) of
                 true ->
                     {Init1, QNew1, Cont} =
-                        map_fold_filter_r2(
-                          Fun, Prefix, Prefix, Init, InnerQ, QNew, queue:new()),
+                        map_fold_filter2(
+                          Funs, Fun, Prefix, Prefix, Init, InnerQ, QNew, queue:new()),
                     case Cont of
                         false ->
-                            {join({Len - len(QNew1), Q1}, QNew1), Init1};
+                            {Join(QNew1, {Len - len(QNew1), Q1}), Init1};
                         true ->
-                            map_fold_filter_r1(
-                              Len, PFilter, Fun, Init1, Q1, QNew1)
+                            map_fold_filter1(
+                              Funs, Len, PFilter, Fun, Init1, Q1, QNew1)
                     end;
                 false ->
-                    map_fold_filter_r1(
-                      Len, PFilter, Fun, Init, Q1, in_q_r(Prefix, InnerQ, QNew))
+                    map_fold_filter1(
+                      Funs, Len, PFilter, Fun, Init, Q1, InQ(Prefix, InnerQ, QNew))
             end
     end.
 
-map_fold_filter_r2(Fun, OrigPrefix, Prefix, Init, InnerQ, QNew, InnerQNew) ->
-    case queue:out_r(InnerQ) of
+map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, Init,
+                 InnerQ, QNew, InnerQNew) ->
+    case Out(InnerQ) of
         {empty, _Q} ->
-            {Init, in_q_r(OrigPrefix, InnerQ,
-                          in_q_r(Prefix, InnerQNew, QNew)), true};
+            {Init, InQ(OrigPrefix, InnerQ,
+                       InQ(Prefix, InnerQNew, QNew)), true};
         {{value, Value}, InnerQ1} ->
             case Fun(Value, Init) of
                 stop ->
-                    {Init, in_q_r(OrigPrefix, InnerQ,
-                                  in_q_r(Prefix, InnerQNew, QNew)), false};
+                    {Init, InQ(OrigPrefix, InnerQ,
+                               InQ(Prefix, InnerQNew, QNew)), false};
                 {Prefix1, Value1, Init1} ->
                     case Prefix1 =:= Prefix of
                         true ->
-                            map_fold_filter_r2(
-                              Fun, OrigPrefix, Prefix, Init1, InnerQ1, QNew,
-                              queue:in_r(Value1, InnerQNew));
+                            map_fold_filter2(
+                              Funs, Fun, OrigPrefix, Prefix, Init1, InnerQ1, QNew,
+                              In(Value1, InnerQNew));
                         false ->
-                            map_fold_filter_r2(
-                              Fun, OrigPrefix, Prefix1, Init1, InnerQ1,
-                              in_q_r(Prefix, InnerQNew, QNew),
-                              queue:in(Value1, queue:new()))
+                            map_fold_filter2(
+                              Funs, Fun, OrigPrefix, Prefix1, Init1, InnerQ1,
+                              InQ(Prefix, InnerQNew, QNew),
+                              In(Value1, queue:new()))
                     end
             end
     end.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 291f4cb0..6ba62284 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -223,14 +223,14 @@ test_bpqueue() ->
     [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])),
 
     {4, [a,b,c,d]} =
-        bpqueue:fold(
+        bpqueue:foldl(
           fun (Prefix, Value, {Prefix, Acc}) ->
                   {Prefix + 1, [Value | Acc]}
           end,
           {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])),
 
-    ok = bpqueue:fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
-                      ok, Q),
+    ok = bpqueue:foldl(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
+                       ok, Q),
 
     [] = bpqueue:to_list(Q),
 
-- 
cgit v1.2.1


From 194a9b96a63ca4c02139ea6eb09a4ed72a90ece8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 16 Jan 2010 18:24:04 +0000
Subject: Inability to do basic maths

---
 src/rabbit_variable_queue.erl | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 765d01a4..58e3f5a3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -579,21 +579,23 @@ combine_deltas(#delta { start_seq_id = SeqIdLow,  count = CountLow},
     #delta { start_seq_id = SeqIdLow, count = Count, end_seq_id = SeqIdEnd }.
 
 beta_fold_no_index_on_disk(Fun, Init, Q) ->
-    bpqueue:fold(fun (_Prefix, Value, Acc) ->
-                         Fun(Value, Acc)
-                 end, Init, Q).
+    bpqueue:foldr(fun (_Prefix, Value, Acc) ->
+                          Fun(Value, Acc)
+                  end, Init, Q).
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
     undefined;
 permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3,
                                      delta = #delta { count = DeltaCount } }) ->
-    case bpqueue:len(Q2) + bpqueue:len(Q3) of
-        0 ->
+    AlphaBetaLen = Len - DeltaCount,
+    case AlphaBetaLen == 0 of
+        true ->
             undefined;
-        BetaLength ->
-            %% the fraction of the queue that are betas
-            BetaFrac = BetaLength / (Len - DeltaCount),
-            BetaLength - trunc(BetaFrac * BetaLength)
+        false ->
+            BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
+            %% the fraction of the alphas+betas that are betas
+            BetaFrac =  BetaLen / AlphaBetaLen,
+            BetaLen - trunc(BetaFrac * BetaLen)
     end.
 
 
-- 
cgit v1.2.1


From 8cabb1109bf864c3ee401297f09d7a44b94e240e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 16 Jan 2010 19:35:05 +0000
Subject: When publishing delivered, the fact is, the message has already been
 delivered. Thus it should be marked as such, otherwise bad things happen when
 you eventually try to delete the queue (i.e. crash).

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 58e3f5a3..42a01577 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -246,7 +246,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                               in_counter = InCount + 1 },
     MsgStatus = #msg_status {
       msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
-      is_delivered = false, msg_on_disk = false, index_on_disk = false },
+      is_delivered = true, msg_on_disk = false, index_on_disk = false },
     MsgStatus1 = maybe_write_msg_to_disk(false, MsgStatus),
     case MsgStatus1 #msg_status.msg_on_disk of
         true ->
-- 
cgit v1.2.1


From b1b323254ae330bd956b029a94568d4001442321 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 16 Jan 2010 20:21:54 +0000
Subject: Correction of modes in reading

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 29e4972e..f362d15d 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -794,7 +794,7 @@ get_read_handle(FileNum, FHC, Dir) ->
         error ->
             {ok, Hdl} = rabbit_msg_store_misc:open_file(
                           Dir, rabbit_msg_store_misc:filenum_to_name(FileNum),
-                          [read | ?BINARY_MODE]),
+                          ?READ_MODE),
             {Hdl, dict:store(FileNum, Hdl, FHC) }
     end.
 
-- 
cgit v1.2.1


From 94104a79164e4bd1d15a155ec5d8f65ac4716d8e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 17 Jan 2010 20:48:11 +0000
Subject: Refactored and generally tidied the msg_store. Also added a
 write-back cache for the current file. This means that the clients don't need
 to go to the server when reading a msg from the current file. Managed to
 avoid using any further lines!

---
 include/rabbit_msg_store.hrl |   7 +-
 src/rabbit_msg_store.erl     | 281 +++++++++++++++++++++----------------------
 2 files changed, 144 insertions(+), 144 deletions(-)

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index a094454a..4dff4a01 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -50,6 +50,7 @@
 
 -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
 
--define(FILE_SUMMARY_ETS_NAME, rabbit_msg_store_file_summary).
--define(CACHE_ETS_NAME,        rabbit_msg_store_cache).
--define(FILE_HANDLES_ETS_NAME, rabbit_msg_store_file_handles).
+-define(FILE_SUMMARY_ETS_NAME,    rabbit_msg_store_file_summary).
+-define(CACHE_ETS_NAME,           rabbit_msg_store_cache).
+-define(FILE_HANDLES_ETS_NAME,    rabbit_msg_store_file_handles).
+-define(CUR_FILE_CACHE_ETS_NAME,  rabbit_msg_store_cur_file).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index f362d15d..272db825 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -247,101 +247,13 @@ read(MsgId, CState) ->
     case index_lookup(MsgId, CState) of
         not_found ->
             Defer();
-        #msg_location { ref_count  = RefCount,
-                        file       = File,
-                        offset     = Offset,
-                        total_size = TotalSize } ->
+        MsgLocation ->
             case fetch_and_increment_cache(MsgId) of
-                not_found ->
-                    [#file_summary { locked = Locked, right = Right }] =
-                        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
-                    case Right =:= undefined orelse Locked =:= true of
-                        true ->
-                            Defer();
-                        false ->
-                            ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
-                                               {#file_summary.readers, 1}),
-                            Release = fun() ->
-                                              ets:update_counter(
-                                                ?FILE_SUMMARY_ETS_NAME, File,
-                                                {#file_summary.readers, -1})
-                                      end,
-                            %% If a GC hasn't already started, it
-                            %% won't start now. Need to check again to
-                            %% see if we've been locked in the
-                            %% meantime, between lookup and
-                            %% update_counter (thus GC actually in
-                            %% progress).
-                            [#file_summary { locked = Locked2 }] =
-                                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
-                            case Locked2 of
-                                true ->
-                                    Release(),
-                                    Defer();
-                                false ->
-                                    %% Ok, we're definitely safe to
-                                    %% continue - a GC can't start up
-                                    %% now, and isn't running, so
-                                    %% nothing will tell us from now
-                                    %% on to close the handle if it's
-                                    %% already open. (Well, a GC could
-                                    %% start, and could put close
-                                    %% entries into the ets table, but
-                                    %% the GC will wait until we're
-                                    %% done here before doing any real
-                                    %% work.)
-
-                                    %% This is fine to fail (already
-                                    %% exists)
-                                    ets:insert_new(?FILE_HANDLES_ETS_NAME,
-                                                   {{self(), File}, open}),
-                                    CState1 = close_all_indicated(CState),
-                                    {Hdl, CState3} =
-                                        get_read_handle(File, CState1),
-                                    {ok, Offset} =
-                                        file_handle_cache:position(Hdl, Offset),
-                                    {ok, {MsgId, Msg}} =
-                                        case rabbit_msg_file:read(Hdl, TotalSize) of
-                                            {ok, {MsgId, _}} = Obj -> Obj;
-                                            Rest ->
-                                                throw({error,
-                                                       {misread,
-                                                        [{old_cstate, CState1},
-                                                         {file_num, File},
-                                                         {offset, Offset},
-                                                         {read, Rest},
-                                                         {proc_dict, get()}
-                                                        ]}})
-                                        end,
-                                    Release(),
-                                    ok = case RefCount > 1 of
-                                             true ->
-                                                 insert_into_cache(MsgId, Msg);
-                                             false ->
-                                                 %% It's not in the
-                                                 %% cache and we only
-                                                 %% have one reference
-                                                 %% to the message. So
-                                                 %% don't bother
-                                                 %% putting it in the
-                                                 %% cache.
-                                                 ok
-                                         end,
-                                    {{ok, Msg}, CState3}
-                            end
-                    end;
-                Msg ->
-                    {{ok, Msg}, CState}
+                not_found -> client_read1(MsgLocation, Defer, CState);
+                Msg       -> {{ok, Msg}, CState}
             end
     end.
 
-close_all_indicated(CState) ->
-    Objs = ets:match_object(?FILE_HANDLES_ETS_NAME, {{self(), '_'}, close}),
-    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
-                        true = ets:delete(?FILE_HANDLES_ETS_NAME, Key),
-                        close_handle(File, CStateM)
-                end, CState, Objs).
-
 contains(MsgId)     -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
 remove(MsgIds)      -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)     -> gen_server2:cast(?SERVER, {release, MsgIds}).
@@ -363,6 +275,69 @@ client_terminate(CState) ->
     close_all_handles(CState),
     ok.
 
+%%----------------------------------------------------------------------------
+%% Client-side-only helpers
+%%----------------------------------------------------------------------------
+
+client_read1(#msg_location { msg_id = MsgId, ref_count = RefCount, file = File }
+             = MsgLocation, Defer, CState) ->
+    [#file_summary { locked = Locked, right = Right }] =
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+    case {Right, Locked} of
+        {undefined, false} ->
+            case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+                [] ->
+                    Defer(); %% may have rolled over
+                [{MsgId, Msg}] ->
+                    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+                    {{ok, Msg}, CState}
+            end;
+        {_, true} ->
+            Defer();
+        _ ->
+            ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+                               {#file_summary.readers, 1}),
+            Release = fun() ->
+                              ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+                                                 {#file_summary.readers, -1})
+                      end,
+            %% If a GC hasn't already started, it won't start
+            %% now. Need to check again to see if we've been locked in
+            %% the meantime, between lookup and update_counter (thus
+            %% GC actually in progress).
+            [#file_summary { locked = Locked2 }] =
+                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+            case Locked2 of
+                true ->
+                    Release(),
+                    Defer();
+                false ->
+                    %% Ok, we're definitely safe to continue - a GC
+                    %% can't start up now, and isn't running, so
+                    %% nothing will tell us from now on to close the
+                    %% handle if it's already open. (Well, a GC could
+                    %% start, and could put close entries into the ets
+                    %% table, but the GC will wait until we're done
+                    %% here before doing any real work.)
+
+                    %% This is fine to fail (already exists)
+                    ets:insert_new(?FILE_HANDLES_ETS_NAME,
+                                   {{self(), File}, open}),
+                    CState1 = close_all_indicated(CState),
+                    {Msg, CState2} = read_from_disk(MsgLocation, CState1),
+                    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+                    Release(),
+                    {{ok, Msg}, CState2}
+            end
+    end.
+
+close_all_indicated(CState) ->
+    Objs = ets:match_object(?FILE_HANDLES_ETS_NAME, {{self(), '_'}, close}),
+    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
+                        true = ets:delete(?FILE_HANDLES_ETS_NAME, Key),
+                        close_handle(File, CStateM)
+                end, CState, Objs).
+
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
@@ -384,6 +359,8 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     ?CACHE_ETS_NAME = ets:new(?CACHE_ETS_NAME, [set, public, named_table]),
     ?FILE_HANDLES_ETS_NAME = ets:new(?FILE_HANDLES_ETS_NAME,
                                      [ordered_set, public, named_table]),
+    ?CUR_FILE_CACHE_ETS_NAME = ets:new(?CUR_FILE_CACHE_ETS_NAME,
+                                       [set, public, named_table]),
     State =
         #msstate { dir                    = Dir,
                    index_module           = IndexModule,
@@ -444,6 +421,7 @@ handle_cast({write, MsgId, Msg},
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
+            true = ets:insert_new(?CUR_FILE_CACHE_ETS_NAME, {MsgId, Msg}),
             {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
@@ -563,6 +541,7 @@ terminate(_Reason, State = #msstate { index_state            = IndexState,
     ets:delete(?FILE_SUMMARY_ETS_NAME),
     ets:delete(?CACHE_ETS_NAME),
     ets:delete(?FILE_HANDLES_ETS_NAME),
+    ets:delete(?CUR_FILE_CACHE_ETS_NAME),
     IndexModule:terminate(IndexState),
     State3 #msstate { index_state         = undefined,
                       current_file_handle = undefined }.
@@ -621,66 +600,80 @@ sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
-read_message(MsgId, From, State =
-             #msstate { current_file = CurFile,
-                        current_file_handle = CurHdl }) ->
+read_message(MsgId, From, State) ->
     case index_lookup(MsgId, State) of
         not_found -> gen_server2:reply(From, not_found),
                      State;
-        #msg_location { ref_count  = RefCount,
-                        file       = File,
-                        offset     = Offset,
-                        total_size = TotalSize } ->
+        MsgLocation ->
             case fetch_and_increment_cache(MsgId) of
                 not_found ->
-                    [#file_summary { locked = Locked }] =
-                        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
-                    case Locked of
-                        true ->
-                            add_to_pending_gc_completion({read, MsgId, From},
-                                                         State);
-                        false ->
-                            ok = case CurFile =:= File andalso {ok, Offset} >=
-                                     file_handle_cache:current_raw_offset(
-                                       CurHdl) of
-                                     true  -> file_handle_cache:flush(CurHdl);
-                                     false -> ok
-                                 end,
-                            {Hdl, State1} = get_read_handle(File, State),
-                            {ok, Offset} =
-                                file_handle_cache:position(Hdl, Offset),
-                            {ok, {MsgId, Msg}} =
-                                case rabbit_msg_file:read(Hdl, TotalSize) of
-                                    {ok, {MsgId, _}} = Obj -> Obj;
-                                    Rest ->
-                                        throw({error, {misread,
-                                                       [{old_state, State},
-                                                        {file_num, File},
-                                                        {offset, Offset},
-                                                        {read, Rest},
-                                                        {proc_dict, get()}
-                                                       ]}})
-                                end,
-                            ok = case RefCount > 1 of
-                                     true ->
-                                         insert_into_cache(MsgId, Msg);
-                                     false ->
-                                         %% it's not in the cache and
-                                         %% we only have one reference
-                                         %% to the message. So don't
-                                         %% bother putting it in the
-                                         %% cache.
-                                         ok
-                                 end,
-                            gen_server2:reply(From, {ok, Msg}),
-                            State1
-                    end;
+                    read_message1(From, MsgLocation, State);
                 Msg ->
                     gen_server2:reply(From, {ok, Msg}),
                     State
             end
     end.
 
+read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
+                                    file = File, offset = Offset } = MsgLoc,
+              State = #msstate { current_file = CurFile,
+                                 current_file_handle = CurHdl }) ->
+    case File =:= CurFile of
+        true ->
+            {Msg, State1} =
+                %% can return [] if msg in file existed on startup
+                case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+                    [] ->
+                        ok = case {ok, Offset} >=
+                                 file_handle_cache:current_raw_offset(CurHdl) of
+                                 true  -> file_handle_cache:flush(CurHdl);
+                                 false -> ok
+                             end,
+                        read_from_disk(MsgLoc, State);
+                    [{MsgId, Msg1}] ->
+                        {Msg1, State}
+                end,
+            ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+            gen_server2:reply(From, {ok, Msg}),
+            State1;
+        false ->
+            [#file_summary { locked = Locked }] =
+                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+            case Locked of
+                true ->
+                    add_to_pending_gc_completion({read, MsgId, From}, State);
+                false ->
+                    {Msg, State1} = read_from_disk(MsgLoc, State),
+                    gen_server2:reply(From, {ok, Msg}),
+                    State1
+            end
+    end.
+
+read_from_disk(#msg_location { msg_id = MsgId, ref_count = RefCount,
+                               file = File, offset = Offset,
+                               total_size = TotalSize }, State) ->
+    {Hdl, State1} = get_read_handle(File, State),
+    {ok, Offset} = file_handle_cache:position(Hdl, Offset),
+    {ok, {MsgId, Msg}} =
+        case rabbit_msg_file:read(Hdl, TotalSize) of
+            {ok, {MsgId, _}} = Obj ->
+                Obj;
+            Rest ->
+                throw({error, {misread, [{old_state, State},
+                                         {file_num,  File},
+                                         {offset,    Offset},
+                                         {read,      Rest},
+                                         {proc_dict, get()}
+                                        ]}})
+        end,
+    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+    {Msg, State1}.
+
+maybe_insert_into_cache(RefCount, MsgId, Msg) when RefCount > 1 ->
+    insert_into_cache(MsgId, Msg);
+maybe_insert_into_cache(_RefCount, _MsgId, _Msg) ->
+    ok.
+
 contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
     case index_lookup(MsgId, State) of
         not_found ->
@@ -697,10 +690,15 @@ contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
             end
     end.
 
-remove_message(MsgId, State = #msstate { sum_valid_data = SumValid }) ->
+remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
+                                         current_file = CurFile }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
+    true = case File =:= CurFile of
+               true  -> ets:delete(?CUR_FILE_CACHE_ETS_NAME, MsgId);
+               false -> true
+           end,
     case RefCount of
         1 ->
             ok = remove_cache_entry(MsgId),
@@ -1104,6 +1102,7 @@ maybe_roll_to_new_file(Offset,
                 locked = false, readers = 0 }),
     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, CurFile,
                               {#file_summary.right, NextFile}),
+    true = ets:delete_all_objects(?CUR_FILE_CACHE_ETS_NAME),
     State1 #msstate { current_file_handle = NextHdl,
                       current_file        = NextFile };
 maybe_roll_to_new_file(_, State) ->
-- 
cgit v1.2.1


From 4ca426c2c331559a7da6863157b7880f383639ea Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 18 Jan 2010 11:01:44 +0000
Subject: Of course, we should only delete messages when they're, err, due to
 be deleted...

---
 src/rabbit_msg_store.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 272db825..82110c7d 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -695,12 +695,12 @@ remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
-    true = case File =:= CurFile of
-               true  -> ets:delete(?CUR_FILE_CACHE_ETS_NAME, MsgId);
-               false -> true
-           end,
     case RefCount of
         1 ->
+            true = case File =:= CurFile of
+                       true  -> ets:delete(?CUR_FILE_CACHE_ETS_NAME, MsgId);
+                       false -> true
+                   end,
             ok = remove_cache_entry(MsgId),
             [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
                                        contiguous_top = ContiguousTop,
-- 
cgit v1.2.1


From fed8234275c2c19830a678f176983c93d6faeab3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 18 Jan 2010 11:40:08 +0000
Subject: Of course, there's no reason not to add into the current file cache
 immediately, thus allowing a write followed by a read to have no delay at all

---
 src/rabbit_msg_store.erl | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 82110c7d..ef31efad 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -238,7 +238,10 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(MsgId, Msg)   -> gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+write(MsgId, Msg) ->
+    %% could fail if msg already in there
+    ets:insert_new(?CUR_FILE_CACHE_ETS_NAME, {MsgId, undefined, Msg}),
+    gen_server2:cast(?SERVER, {write, MsgId, Msg}).
 
 read(MsgId, CState) ->
     Defer = fun() ->
@@ -288,7 +291,7 @@ client_read1(#msg_location { msg_id = MsgId, ref_count = RefCount, file = File }
             case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
                 [] ->
                     Defer(); %% may have rolled over
-                [{MsgId, Msg}] ->
+                [{MsgId, _FileOrUndefined, Msg}] ->
                     ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
                     {{ok, Msg}, CState}
             end;
@@ -421,7 +424,8 @@ handle_cast({write, MsgId, Msg},
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
-            true = ets:insert_new(?CUR_FILE_CACHE_ETS_NAME, {MsgId, Msg}),
+            true = ets:update_element(?CUR_FILE_CACHE_ETS_NAME, MsgId,
+                                      {2, CurFile}),
             {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
@@ -630,7 +634,7 @@ read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
                                  false -> ok
                              end,
                         read_from_disk(MsgLoc, State);
-                    [{MsgId, Msg1}] ->
+                    [{MsgId, File, Msg1}] ->
                         {Msg1, State}
                 end,
             ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
@@ -1102,7 +1106,7 @@ maybe_roll_to_new_file(Offset,
                 locked = false, readers = 0 }),
     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, CurFile,
                               {#file_summary.right, NextFile}),
-    true = ets:delete_all_objects(?CUR_FILE_CACHE_ETS_NAME),
+    true = ets:match_delete(?CUR_FILE_CACHE_ETS_NAME, {'_', CurFile, '_'}),
     State1 #msstate { current_file_handle = NextHdl,
                       current_file        = NextFile };
 maybe_roll_to_new_file(_, State) ->
-- 
cgit v1.2.1


From 155e3d4f88742bc9cdfeb5ea4fc6e257923255c3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 18 Jan 2010 17:52:29 +0000
Subject: Initial pass at solution. Seems to work. Interesting realisation that
 the fhc server was never previously running (post new boot sequence).

---
 src/file_handle_cache.erl | 51 ++++++++++++++++++++++++++++++++---------------
 src/rabbit.erl            |  5 +++++
 src/rabbit_reader.erl     |  5 ++---
 src/tcp_acceptor.erl      | 22 ++++++++++----------
 4 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index e8d7cf6e..ac6de519 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -124,7 +124,7 @@
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([decrement/0, increment/0]).
+-export([release/0, obtain/0]).
 
 -define(SERVER, ?MODULE).
 -define(RESERVED_FOR_OTHERS, 50).
@@ -159,7 +159,8 @@
 -record(fhc_state,
         { elders,
           limit,
-          count
+          count,
+          obtains
         }).
 
 %%----------------------------------------------------------------------------
@@ -431,11 +432,11 @@ set_maximum_since_use(MaximumAge) ->
         false -> ok
     end.
 
-decrement() ->
-    gen_server:cast(?SERVER, decrement).
+release() ->
+    gen_server:cast(?SERVER, release).
 
-increment() ->
-    gen_server:cast(?SERVER, increment).
+obtain() ->
+    gen_server:call(?SERVER, obtain, infinity).
 
 %%----------------------------------------------------------------------------
 %% Internal functions
@@ -662,10 +663,17 @@ init([]) ->
                     ulimit()
             end,
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
-    {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0}}.
-
-handle_call(_Msg, _From, State) ->
-    {reply, message_not_understood, State}.
+    {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
+                      obtains = [] }}.
+
+handle_call(obtain, From, State = #fhc_state { count = Count }) ->
+    State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
+        maybe_reduce(State #fhc_state { count = Count + 1 }),
+    case Limit /= infinity andalso Count1 >= Limit of
+        true  -> {noreply, State1 #fhc_state { obtains = [From | Obtains],
+                                               count = Count1 - 1 }};
+        false -> {reply, ok, State1}
+    end.
 
 handle_cast({open, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
@@ -686,13 +694,11 @@ handle_cast({close, Pid, EldestUnusedSince}, State =
                   undefined -> dict:erase(Pid, Elders);
                   _         -> dict:store(Pid, EldestUnusedSince, Elders)
               end,
-    {noreply, State #fhc_state { elders = Elders1, count = Count - 1 }};
+    {noreply, process_obtains(State #fhc_state { elders = Elders1,
+                                                 count = Count - 1 })};
 
-handle_cast(increment, State = #fhc_state { count = Count }) ->
-    {noreply, maybe_reduce(State #fhc_state { count = Count + 1 })};
-
-handle_cast(decrement, State = #fhc_state { count = Count }) ->
-    {noreply, State #fhc_state { count = Count - 1 }};
+handle_cast(release, State = #fhc_state { count = Count }) ->
+    {noreply, process_obtains(State #fhc_state { count = Count - 1 })};
 
 handle_cast(check_counts, State) ->
     {noreply, maybe_reduce(State)}.
@@ -710,6 +716,19 @@ code_change(_OldVsn, State, _Extra) ->
 %% server helpers
 %%----------------------------------------------------------------------------
 
+process_obtains(State = #fhc_state { obtains = [] }) ->
+    State;
+process_obtains(State = #fhc_state { limit = Limit, count = Count })
+  when Limit /= infinity andalso Count >= Limit ->
+    State;
+process_obtains(State = #fhc_state { limit = Limit, count = Count,
+                                     obtains = Obtains }) ->
+    Take = lists:min([length(Obtains), Limit - Count]),
+    {Obtainable, ObtainsNewRev} = lists:split(Take, lists:reverse(Obtains)),
+    [gen_server:reply(From, ok) || From <- Obtainable],
+    State #fhc_state { count = Count + Take,
+                       obtains = lists:reverse(ObtainsNewRev) }.
+
 maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                                   elders = Elders })
   when Limit /= infinity andalso Count >= Limit ->
diff --git a/src/rabbit.erl b/src/rabbit.erl
index ac7ad046..a0cc1436 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -63,6 +63,11 @@
                     {mfa,         {rabbit_hooks, start, []}},
                     {pre,         kernel_ready}]}).
 
+-rabbit_boot_step({file_handle_cache,
+                   [{description, "file handle cache server"},
+                    {mfa,         {rabbit_sup, start_child, [file_handle_cache]}},
+                    {pre,         kernel_ready}]}).
+
 -rabbit_boot_step({kernel_ready,
                    [{description, "kernel ready"}]}).
 
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index 49e66e32..c2818be4 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -213,8 +213,7 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
     erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(),
                       handshake_timeout),
     ProfilingValue = setup_profiling(),
-    try 
-        file_handle_cache:increment(),
+    try
         mainloop(Parent, Deb, switch_callback(
                                 #v1{sock = ClientSock,
                                     connection = #connection{
@@ -235,7 +234,7 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
                end)("exception on TCP connection ~p from ~s:~p~n~p~n",
                     [self(), PeerAddressS, PeerPort, Ex])
     after
-        file_handle_cache:decrement(),
+        file_handle_cache:release(),
         rabbit_log:info("closing TCP connection ~p from ~s:~p~n",
                         [self(), PeerAddressS, PeerPort]),
         %% We don't close the socket explicitly. The reader is the
diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl
index bc742561..9e796233 100644
--- a/src/tcp_acceptor.erl
+++ b/src/tcp_acceptor.erl
@@ -48,16 +48,14 @@ start_link(Callback, LSock) ->
 %%--------------------------------------------------------------------
 
 init({Callback, LSock}) ->
-    case prim_inet:async_accept(LSock, -1) of
-        {ok, Ref} -> {ok, #state{callback=Callback, sock=LSock, ref=Ref}};
-        Error -> {stop, {cannot_accept, Error}}
-    end.
+    gen_server:cast(self(), accept),
+    {ok, #state{callback=Callback, sock=LSock, ref=undefined}}.
 
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
-handle_cast(_Msg, State) ->
-    {noreply, State}.
+handle_cast(accept, State) ->
+    accept(State).
 
 handle_info({inet_async, LSock, Ref, {ok, Sock}},
             State = #state{callback={M,F,A}, sock=LSock, ref=Ref}) ->
@@ -83,10 +81,7 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}},
     end,
 
     %% accept more
-    case prim_inet:async_accept(LSock, -1) of
-        {ok, NRef} -> {noreply, State#state{ref=NRef}};
-        Error -> {stop, {cannot_accept, Error}, none}
-    end;
+    accept(State);
 handle_info({inet_async, LSock, Ref, {error, closed}},
             State=#state{sock=LSock, ref=Ref}) ->
     %% It would be wrong to attempt to restart the acceptor when we
@@ -104,3 +99,10 @@ code_change(_OldVsn, State, _Extra) ->
 %%--------------------------------------------------------------------
 
 inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F).
+
+accept(State = #state{sock=LSock}) ->
+    ok = file_handle_cache:obtain(),
+    case prim_inet:async_accept(LSock, -1) of
+        {ok, Ref} -> {noreply, State#state{ref=Ref}};
+        Error     -> {stop,    {cannot_accept, Error}}
+    end.
-- 
cgit v1.2.1


From 82982eca2d9b8214fe14aadbb8c3f12ef270175c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 Jan 2010 12:13:30 +0000
Subject: The motivation for the cast in init is that if the first obtain
 blocks then without the cast, rabbit won't startup because of the fact that
 start_link blocks on init thus the boot process won't finish. Switched to
 monitoring the spawned process which is much much nicer than passing through
 a fun to be run at the end of the process and is a much more generic solution
 anyway.

---
 src/file_handle_cache.erl | 18 ++++++++++--------
 src/rabbit_reader.erl     |  1 -
 src/tcp_acceptor.erl      |  4 ++--
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index ac6de519..2519382c 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -124,7 +124,7 @@
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([release/0, obtain/0]).
+-export([release_on_death/1, obtain/0]).
 
 -define(SERVER, ?MODULE).
 -define(RESERVED_FOR_OTHERS, 50).
@@ -432,8 +432,8 @@ set_maximum_since_use(MaximumAge) ->
         false -> ok
     end.
 
-release() ->
-    gen_server:cast(?SERVER, release).
+release_on_death(Pid) when is_pid(Pid) ->
+    gen_server:cast(?SERVER, {release_on_death, Pid}).
 
 obtain() ->
     gen_server:call(?SERVER, obtain, infinity).
@@ -697,15 +697,17 @@ handle_cast({close, Pid, EldestUnusedSince}, State =
     {noreply, process_obtains(State #fhc_state { elders = Elders1,
                                                  count = Count - 1 })};
 
-handle_cast(release, State = #fhc_state { count = Count }) ->
-    {noreply, process_obtains(State #fhc_state { count = Count - 1 })};
-
 handle_cast(check_counts, State) ->
-    {noreply, maybe_reduce(State)}.
+    {noreply, maybe_reduce(State)};
 
-handle_info(_Msg, State) ->
+handle_cast({release_on_death, Pid}, State) ->
+    _MRef = erlang:monitor(process, Pid),
     {noreply, State}.
 
+handle_info({'DOWN', _MRef, process, _Pid, _Reason},
+            State = #fhc_state { count = Count }) ->
+    {noreply, process_obtains(State #fhc_state { count = Count - 1 })}.
+
 terminate(_Reason, State) ->
     State.
 
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index c2818be4..adfd412f 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -234,7 +234,6 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
                end)("exception on TCP connection ~p from ~s:~p~n~p~n",
                     [self(), PeerAddressS, PeerPort, Ex])
     after
-        file_handle_cache:release(),
         rabbit_log:info("closing TCP connection ~p from ~s:~p~n",
                         [self(), PeerAddressS, PeerPort]),
         %% We don't close the socket explicitly. The reader is the
diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl
index 9e796233..6de6ac3e 100644
--- a/src/tcp_acceptor.erl
+++ b/src/tcp_acceptor.erl
@@ -61,7 +61,7 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}},
             State = #state{callback={M,F,A}, sock=LSock, ref=Ref}) ->
 
     %% patch up the socket so it looks like one we got from
-    %% gen_tcp:accept/1 
+    %% gen_tcp:accept/1
     {ok, Mod} = inet_db:lookup_socket(LSock),
     inet_db:register_socket(Sock, Mod),
 
@@ -73,7 +73,7 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}},
                               [inet_parse:ntoa(Address), Port,
                                inet_parse:ntoa(PeerAddress), PeerPort]),
         %% handle
-        apply(M, F, A ++ [Sock])
+        file_handle_cache:release_on_death(apply(M, F, A ++ [Sock]))
     catch {inet_error, Reason} ->
             gen_tcp:close(Sock),
             error_logger:error_msg("unable to accept TCP connection: ~p~n",
-- 
cgit v1.2.1


From cbe1ed36ee453b8e4fc351ed3a6e25c4c80b2037 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 Jan 2010 14:51:23 +0000
Subject: Make sure the fhc server is started early on in the boot process

---
 src/rabbit.erl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index ac7ad046..a0cc1436 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -63,6 +63,11 @@
                     {mfa,         {rabbit_hooks, start, []}},
                     {pre,         kernel_ready}]}).
 
+-rabbit_boot_step({file_handle_cache,
+                   [{description, "file handle cache server"},
+                    {mfa,         {rabbit_sup, start_child, [file_handle_cache]}},
+                    {pre,         kernel_ready}]}).
+
 -rabbit_boot_step({kernel_ready,
                    [{description, "kernel ready"}]}).
 
-- 
cgit v1.2.1


From e787774a8d8ceb671f623b25d7991ce8b3786e97 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 Jan 2010 15:59:28 +0000
Subject: documentation tweaks

---
 src/file_handle_cache.erl | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index e8d7cf6e..aa01a508 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -88,19 +88,20 @@
 %% message and pass it into set_maximum_since_use/1. However, it's
 %% highly possible this age will be greater than the ages of all the
 %% handles the client knows of because the client has used its file
-%% handles in the mean time. Thus at this point it reports to the
-%% server the current timestamp at which its least recently used file
-%% handle was last used. The server will check two seconds later that
-%% either it's back under the limit, in which case all is well again,
-%% or if not, it will calculate a new average age. Its data will be
-%% much more recent now, and so it's very likely that when this is
-%% communicated to the clients, the clients will close file handles.
+%% handles in the mean time. Thus at this point the client reports to
+%% the server the current timestamp at which its least recently used
+%% file handle was last used. The server will check two seconds later
+%% that either it's back under the limit, in which case all is well
+%% again, or if not, it will calculate a new average age. Its data
+%% will be much more recent now, and so it's very likely that when
+%% this is communicated to the clients, the clients will close file
+%% handles.
 %%
 %% The advantage of this scheme is that there is only communication
 %% from the client to the server on open, close, and when in the
 %% process of trying to reduce file handle usage. There is no
 %% communication from the client to the server on normal file handle
-%% operations. This scheme forms a feed back loop - the server doesn't
+%% operations. This scheme forms a feed-back loop - the server doesn't
 %% care which file handles are closed, just that some are, and it
 %% checks this repeatedly when over the limit. Given the guarantees of
 %% now(), even if there is just one file handle open, a limit of 1,
-- 
cgit v1.2.1


From 7719de648fa67641a41600682a16a56ae147a2fe Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 Jan 2010 16:02:40 +0000
Subject: documentation

---
 src/file_handle_cache.erl | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index b289e842..723f05ae 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -115,6 +115,14 @@
 %% fully reopened again as soon as needed, thus users of this library
 %% do not need to worry about their handles being closed by the server
 %% - reopening them when necessary is handled transparently.
+%%
+%% The server also supports obtain and release_on_death. obtain/0
+%% blocks until a file descriptor is available. release_on_death/1
+%% takes a pid and monitors the pid, reducing the count by 1 when the
+%% pid dies. Thus the assumption is that obtain/0 is called first, and
+%% when that returns, release_on_death/1 is called with the pid who
+%% "owns" the file descriptor. This is, for example, used to track the
+%% use of file descriptors through network sockets.
 
 -behaviour(gen_server).
 
-- 
cgit v1.2.1


From 5276f77950a68804a2fc45c543d02a3157cc8240 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 Jan 2010 16:53:42 +0000
Subject: Added behaviour for msg_store_index and tidied includes, and updated
 makefile appropriately. Good lord it'd be nice if erlang did proper build
 dependencies...

---
 Makefile                           |  5 +++-
 include/rabbit_msg_store_index.hrl | 58 ++++++++++++++++++++++++++++++++++++++
 src/rabbit_msg_store_ets_index.erl |  5 +++-
 src/rabbit_msg_store_index.erl     | 46 ++++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+), 2 deletions(-)
 create mode 100644 include/rabbit_msg_store_index.hrl
 create mode 100644 src/rabbit_msg_store_index.erl

diff --git a/Makefile b/Makefile
index db8f7001..2dd7ff3c 100644
--- a/Makefile
+++ b/Makefile
@@ -64,7 +64,10 @@ $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
 $(EBIN_DIR)/gen_server2.beam: $(SOURCE_DIR)/gen_server2.erl
 	erlc $(ERLC_OPTS) $<
 
-$(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit.hrl $(EBIN_DIR)/gen_server2.beam
+$(EBIN_DIR)/rabbit_msg_store_index.beam: $(SOURCE_DIR)/rabbit_msg_store_index.erl
+	erlc $(ERLC_OPTS) $<
+
+$(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit.hrl $(EBIN_DIR)/gen_server2.beam $(EBIN_DIR)/rabbit_msg_store_index.beam
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 #	ERLC_EMULATOR="erl -smp" erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 
diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
new file mode 100644
index 00000000..92c6f51a
--- /dev/null
+++ b/include/rabbit_msg_store_index.hrl
@@ -0,0 +1,58 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(msg_id() :: binary()).
+-type(dir() :: any()).
+-type(index_state() :: any()).
+-type(keyvalue() :: any()).
+-type(fieldpos() :: non_neg_integer()).
+-type(fieldvalue() :: any()).
+
+-spec(init/1 :: (dir()) -> index_state()).
+-spec(lookup/2 :: (msg_id(), index_state()) -> ('not_found' | keyvalue())).
+-spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
+-spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
+-spec(update_fields/3 :: (msg_id(), ({fieldpos(), fieldvalue()} |
+                                     [{fieldpos(), fieldvalue()}]),
+                          index_state()) -> 'ok').
+-spec(delete/2 :: (msg_id(), index_state()) -> 'ok').
+-spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok').
+-spec(terminate/1 :: (index_state()) -> any()).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index e8d596f9..f4814cd1 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -30,12 +30,15 @@
 %%
 
 -module(rabbit_msg_store_ets_index).
+
+-behaviour(rabbit_msg_store_index).
+
 -export([init/1, lookup/2, insert/2, update/2, update_fields/3, delete/2,
          delete_by_file/2, terminate/1]).
 
 -define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
 
--include("rabbit_msg_store.hrl").
+-include("rabbit_msg_store_index.hrl").
 
 init(_Dir) ->
     ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]).
diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl
new file mode 100644
index 00000000..9d899d8a
--- /dev/null
+++ b/src/rabbit_msg_store_index.erl
@@ -0,0 +1,46 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_index).
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [{init,            1},
+     {lookup,          2},
+     {insert,          2},
+     {update,          2},
+     {update_fields,   3},
+     {delete,          2},
+     {delete_by_file,  2},
+     {terminate,       1}];
+behaviour_info(_Other) ->
+    undefined.
-- 
cgit v1.2.1


From 0548df88766fc147872021d91f6b0e0a89a873ef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 19 Jan 2010 17:45:47 +0000
Subject: documentation

---
 src/rabbit_msg_store.erl | 64 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index ef31efad..1b356370 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -113,8 +113,9 @@
 
 %% The components:
 %%
-%% MsgLocation: this is an ets table which contains:
+%% MsgLocation: this is a mapping from MsgId to #msg_location{}:
 %%              {MsgId, RefCount, File, Offset, TotalSize}
+%%              By default, it's in ets, but it's also pluggable.
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
 %%
@@ -126,7 +127,7 @@
 %% eldest file.
 %%
 %% We need to keep track of which messages are in which files (this is
-%% the MsgLocation table); how much useful data is in each file and
+%% the MsgLocation mapping); how much useful data is in each file and
 %% which files are on the left and right of each other. This is the
 %% purpose of the FileSummary table.
 %%
@@ -136,26 +137,31 @@
 %% valid data right at the start of each file. These are needed for
 %% garbage collection.
 %%
-%% When we discover that either a file is now empty or that it can be
-%% combined with the useful data in either its left or right file, we
-%% compact the two files together. This keeps disk utilisation high
-%% and aids performance.
+%% When we discover that a file is now empty, we delete it. When we
+%% discover that it can be combined with the useful data in either its
+%% left or right neighbour, and overall, across all the files, we have
+%% ((the amount of garbage) / (the sum of all file sizes)) >
+%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently,
+%% which will compact the two files together. This keeps disk
+%% utilisation high and aids performance. We deliberately do this
+%% lazily in order to prevent doing GC on files which are soon to be
+%% emptied (and hence deleted) soon.
 %%
-%% Given the compaction between two files, the left file is considered
-%% the ultimate destination for the good data in the right file. If
-%% necessary, the good data in the left file which is fragmented
-%% throughout the file is written out to a temporary file, then read
-%% back in to form a contiguous chunk of good data at the start of the
-%% left file. Thus the left file is garbage collected and
-%% compacted. Then the good data from the right file is copied onto
-%% the end of the left file. MsgLocation and FileSummary tables are
-%% updated.
+%% Given the compaction between two files, the left file (i.e. elder
+%% file) is considered the ultimate destination for the good data in
+%% the right file. If necessary, the good data in the left file which
+%% is fragmented throughout the file is written out to a temporary
+%% file, then read back in to form a contiguous chunk of good data at
+%% the start of the left file. Thus the left file is garbage collected
+%% and compacted. Then the good data from the right file is copied
+%% onto the end of the left file. MsgLocation and FileSummary tables
+%% are updated.
 %%
 %% On startup, we scan the files we discover, dealing with the
 %% possibilites of a crash have occured during a compaction (this
 %% consists of tidyup - the compaction is deliberately designed such
 %% that data is duplicated on disk rather than risking it being lost),
-%% and rebuild the ets tables (MsgLocation, FileSummary).
+%% and rebuild the FileSummary ets table and MsgLocation mapping.
 %%
 %% So, with this design, messages move to the left. Eventually, they
 %% should end up in a contiguous block on the left and are then never
@@ -215,7 +221,8 @@
 %%
 %% Messages are reference-counted. When a message with the same id is
 %% written several times we only store it once, and only remove it
-%% from the store when it has been removed the same number of times.
+%% from the store when it has been removed the same number of
+%% times.
 %%
 %% The reference counts do not persist. Therefore the initialisation
 %% function must be provided with a generator that produces ref count
@@ -228,6 +235,29 @@
 %% are read from several processes they are read back as the same
 %% binary object rather than multiples of identical binary
 %% objects.
+%%
+%% Reads can be performed directly by clients without calling to the
+%% server. This is safe because multiple file handles can be used to
+%% read files. However, locking is used by the concurrent GC to make
+%% sure that reads are not attempted from files which are in the
+%% process of being garbage collected.
+%%
+%% The server automatically defers reads, removes and contains calls
+%% that occur which refer to files which are currently being
+%% GC'd. Contains calls are only deferred in order to ensure they do
+%% not overtake removes.
+%%
+%% The current file to which messages are being written has a
+%% write-back cache. This is written to immediately by the client and
+%% can be read from by the client too. This means that there are only
+%% ever writes made to the current file, thus eliminating delays due
+%% to flushing write buffers in order to be able to safely read from
+%% the current file. The one exception to this is that on start up,
+%% the cache is not populated with msgs found in the current file, and
+%% thus in this case only, reads may have to come from the file
+%% itself. The effect of this is that even if the msg_store process is
+%% heavily overloaded, clients can still write and read messages with
+%% very low latency and not block at all.
 
 %%----------------------------------------------------------------------------
 %% public API
-- 
cgit v1.2.1


From 19f58b0d23873d786136f9fab22fc6901cbb8c68 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 20 Jan 2010 17:24:01 +0000
Subject: A mistake in the error case in accept, and elimination of one reverse

---
 src/file_handle_cache.erl | 10 +++++-----
 src/tcp_acceptor.erl      |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 723f05ae..9fb18c39 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -734,11 +734,11 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count })
     State;
 process_obtains(State = #fhc_state { limit = Limit, count = Count,
                                      obtains = Obtains }) ->
-    Take = lists:min([length(Obtains), Limit - Count]),
-    {Obtainable, ObtainsNewRev} = lists:split(Take, lists:reverse(Obtains)),
-    [gen_server:reply(From, ok) || From <- Obtainable],
-    State #fhc_state { count = Count + Take,
-                       obtains = lists:reverse(ObtainsNewRev) }.
+    ObtainsLen = length(Obtains),
+    Take = ObtainsLen - lists:min([ObtainsLen, Limit - Count]),
+    {ObtainsNew, ObtainableRev} = lists:split(Take, Obtains),
+    [gen_server:reply(From, ok) || From <- lists:reverse(ObtainableRev)],
+    State #fhc_state { count = Count + Take, obtains = ObtainsNew }.
 
 maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                                   elders = Elders })
diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl
index 6de6ac3e..f38f8191 100644
--- a/src/tcp_acceptor.erl
+++ b/src/tcp_acceptor.erl
@@ -104,5 +104,5 @@ accept(State = #state{sock=LSock}) ->
     ok = file_handle_cache:obtain(),
     case prim_inet:async_accept(LSock, -1) of
         {ok, Ref} -> {noreply, State#state{ref=Ref}};
-        Error     -> {stop,    {cannot_accept, Error}}
+        Error     -> {stop, {cannot_accept, Error}, State}
     end.
-- 
cgit v1.2.1


From 18eed376962b3de124b09e07269d88009763141d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 20 Jan 2010 17:31:30 +0000
Subject: Given we know all of those processes are about to be enabled, the
 order in which we do that is actually irrelevant

---
 src/file_handle_cache.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 9fb18c39..144d3831 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -737,7 +737,7 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
     ObtainsLen = length(Obtains),
     Take = ObtainsLen - lists:min([ObtainsLen, Limit - Count]),
     {ObtainsNew, ObtainableRev} = lists:split(Take, Obtains),
-    [gen_server:reply(From, ok) || From <- lists:reverse(ObtainableRev)],
+    [gen_server:reply(From, ok) || From <- ObtainableRev],
     State #fhc_state { count = Count + Take, obtains = ObtainsNew }.
 
 maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
-- 
cgit v1.2.1


From 4733399267b441fe7ae906d3778d0e1d70e2d2bd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 Jan 2010 10:51:34 +0000
Subject: fixed

---
 src/file_handle_cache.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 144d3831..1b39d296 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -738,7 +738,8 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
     Take = ObtainsLen - lists:min([ObtainsLen, Limit - Count]),
     {ObtainsNew, ObtainableRev} = lists:split(Take, Obtains),
     [gen_server:reply(From, ok) || From <- ObtainableRev],
-    State #fhc_state { count = Count + Take, obtains = ObtainsNew }.
+    State #fhc_state { count = Count + length(ObtainableRev),
+                       obtains = ObtainsNew }.
 
 maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                                   elders = Elders })
-- 
cgit v1.2.1


From 042aeed7aa7c9310c15027a09cbadff30764f3d3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 Jan 2010 11:16:07 +0000
Subject: fixed?

---
 src/file_handle_cache.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 1b39d296..45c0eff3 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -735,11 +735,11 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count })
 process_obtains(State = #fhc_state { limit = Limit, count = Count,
                                      obtains = Obtains }) ->
     ObtainsLen = length(Obtains),
-    Take = ObtainsLen - lists:min([ObtainsLen, Limit - Count]),
+    ObtainableLen = lists:min([ObtainsLen, Limit - Count]),
+    Take = ObtainsLen - ObtainableLen,
     {ObtainsNew, ObtainableRev} = lists:split(Take, Obtains),
     [gen_server:reply(From, ok) || From <- ObtainableRev],
-    State #fhc_state { count = Count + length(ObtainableRev),
-                       obtains = ObtainsNew }.
+    State #fhc_state { count = Count + ObtainableLen, obtains = ObtainsNew }.
 
 maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                                   elders = Elders })
-- 
cgit v1.2.1


From aa8a7407247ef6059842b7a50b5291e94c56787c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 Jan 2010 14:21:43 +0000
Subject: Added documentation for qi

---
 src/rabbit_queue_index.erl | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 46a6e008..cc868598 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -40,6 +40,76 @@
 -define(CLEAN_FILENAME, "clean.dot").
 
 %%----------------------------------------------------------------------------
+
+%% The queue index is responsible for recording the order of messages
+%% within a queue on disk.
+
+%% Because of the fact that the queue can decide at any point to send
+%% a queue entry to disk, you can not rely on publishes appearing in
+%% order. The only thing you can rely on is a message being published,
+%% then delivered, then ack'd.
+
+%% In order to be able to clean up ack'd messages, we write to segment
+%% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT
+%% publishes, delivers and acknowledgements. They are numbered, and so
+%% it is known that the 0th segment contains messages 0 ->
+%% ?SEGMENT_ENTRY_COUNT, the 1st segment contains messages
+%% ?SEGMENT_ENTRY_COUNT +1 -> 2*?SEGMENT_ENTRY_COUNT and so on. As
+%% such, in the segment files, we only refer to message sequence ids
+%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a
+%% fixed size.
+
+%% However, transient messages which are not sent to disk at any point
+%% will cause gaps to appear in segment files. Therefore, we delete a
+%% segment file whenever the number of publishes == number of acks
+%% (note that although it is not fully enforced, it is assumed that a
+%% message will never be ackd before it is delivered, thus this test
+%% also implies == number of delivers). In practise, this does not
+%% cause disk churn in the pathological case because of the journal
+%% and caching (see below).
+
+%% Because of the fact that publishes, delivers and acks can occur all
+%% over, we wish to avoid lots of seeking. Therefore we have a fixed
+%% sized journal to which all actions are appended. When the number of
+%% entries in this journal reaches ?MAX_JOURNAL_ENTRY_COUNT, the
+%% journal entries are scattered out to their relevant files, and the
+%% journal is truncated to zero size. Note that entries in the journal
+%% must carry the full sequence id, thus the format of entries in the
+%% journal is different to that in the segments.
+
+%% The journal is also kept fully in memory, pre-segmented: the state
+%% contains a dict from segment numbers to state-per-segment. Actions
+%% are stored directly in this state. Thus at the point of flushing
+%% the journal, firstly no reading from disk is necessary, but
+%% secondly if the known number of acks and publishes are equal, given
+%% the known state of the segment file, combined with the journal, no
+%% writing needs to be done to the segment file either (in fact it is
+%% deleted if it exists at all). This is safe given that the set of
+%% acks is a subset of the set of publishes. When it's necessary to
+%% sync messages because of transactions, it's only necessary to fsync
+%% on the journal: when entries are distributed from the journal to
+%% segment files, those segments appended to are fsync'd prior to the
+%% journal being truncated.
+
+%% It is very common to need to access two particular segments very
+%% frequently: one for publishes, and one for deliveries and acks. As
+%% such, and the poor performance of the erlang dict module, we cache
+%% the per-segment-state for the two most recently used segments in
+%% the state, this provides a substantial performance improvement.
+
+%% This module is also responsible for scanning the queue index files
+%% and seeding the message store on start up.
+
+%% Note that in general, the representation of a message as the tuple:
+%% {('no_pub'|{MsgId, IsPersistent}), ('del'|'no_del'),
+%% ('ack'|'no_ack')} is richer than strictly necessary for most
+%% operations. However, for startup, and to ensure the safe and
+%% correct combination of journal entries with entries read from the
+%% segment on disk, this richer representation vastly simplifies and
+%% clarifies the code.
+
+%%----------------------------------------------------------------------------
+
 %% ---- Journal details ----
 
 -define(MAX_JOURNAL_ENTRY_COUNT, 262144).
-- 
cgit v1.2.1


From d100102773c304ada4fc0e87f5d40b98fbac0c16 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 Jan 2010 14:24:23 +0000
Subject: doc improvement

---
 src/rabbit_queue_index.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index cc868598..98c86cff 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -100,8 +100,8 @@
 %% This module is also responsible for scanning the queue index files
 %% and seeding the message store on start up.
 
-%% Note that in general, the representation of a message as the tuple:
-%% {('no_pub'|{MsgId, IsPersistent}), ('del'|'no_del'),
+%% Note that in general, the representation of a message's state as
+%% the tuple: {('no_pub'|{MsgId, IsPersistent}), ('del'|'no_del'),
 %% ('ack'|'no_ack')} is richer than strictly necessary for most
 %% operations. However, for startup, and to ensure the safe and
 %% correct combination of journal entries with entries read from the
-- 
cgit v1.2.1


From 0e658317542ee13bf10d9857e22cd8fabbc6430f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 Jan 2010 15:07:06 +0000
Subject: Wrote new documentation for VQ

---
 src/rabbit_variable_queue.erl | 122 +++++++++++++++++++++++++++++++-----------
 1 file changed, 92 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 42a01577..c11489d1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -38,6 +38,98 @@
          tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
          flush_journal/1, status/1]).
 
+%%----------------------------------------------------------------------------
+%% Definitions:
+
+%% alpha: this is a message where both the message itself, and its
+%%        position within the queue are held in RAM
+%%
+%% beta: this is a message where the message itself is only held on
+%%        disk, but its position within the queue is held in RAM.
+%%
+%% gamma: this is a message where the message itself is only held on
+%%        disk, but its position is both in RAM and on disk.
+%%
+%% delta: this is a collection of messages, represented by a single
+%%        term, where the messages and their position are only held on
+%%        disk.
+%%
+%% Note that for persistent messages, the message and its position
+%% within the queue are always held on disk, *in addition* to being in
+%% one of the above classifications.
+
+%% Also note that within this code, the term gamma never
+%% appears. Instead, gammas are defined by betas who have had their
+%% queue position recorded on disk.
+
+%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though
+%% many of these steps are frequently skipped. q1 and q4 only hold
+%% alphas, q2 and q3 hold both betas and gammas (as queues of queues,
+%% using the bpqueue module where the block prefix determines whether
+%% they're betas or gammas). When a message arrives, its
+%% classification is determined. It is then added to the rightmost
+%% appropriate queue.
+
+%% If a new message is determined to be a beta or gamma, q1 is
+%% empty. If a new message is determined to be a delta, q1 and q2 are
+%% empty (and actually q4 too).
+
+%% When removing messages from a queue, if q4 is empty then q3 is read
+%% directly. If q3 becomes empty then the next segment's worth of
+%% messages from delta are read into q3, reducing the size of
+%% delta. If the queue is non empty, either q4 or q3 contain
+%% entries. It is never permitted for delta to hold all the messages
+%% in the queue.
+
+%% The duration indicated to us by the memory_monitor is used to
+%% calculate, given our current ingress and egress rates, how many
+%% messages we should hold in RAM. When we need to push alphas to
+%% betas or betas to gammas, we favour writing on messages that are
+%% further from the head of the queue. This minimises writes to disk,
+%% as the messages closer to the tail of the queue stay in the queue
+%% for longer, thus do not need to be replaced as quickly by sending
+%% other messages to disk.
+
+%% Whilst messages are pushed to disk and forgotten from RAM as soon
+%% as requested by a new setting of the queue RAM duration, the
+%% inverse is not true: we only load messages back into RAM as
+%% demanded as the queue is read from. Thus only publishes to the
+%% queue will take up available spare capacity.
+
+%% If a queue is full of transient messages, then the transition from
+%% betas to deltas will be potentially very expensive as millions of
+%% entries must be written to disk by the queue_index module. This can
+%% badly stall the queue. In order to avoid this, the proportion of
+%% gammas / (betas+gammas) must not be lower than (betas+gammas) /
+%% (alphas+betas+gammas). Thus as the queue grows, and the proportion
+%% of alphas shrink, the proportion of gammas will grow, thus at the
+%% point at which betas and gammas must be converted to deltas, there
+%% should be very few betas remaining, thus the transition is fast (no
+%% work needs to be done for the gamma -> delta transition).
+
+%% The conversion of betas to gammas is done on publish, in batches of
+%% exactly ?RAM_INDEX_BATCH_SIZE. This value should not be too small,
+%% otherwise the frequent operations on the queues of q2 and q3 will
+%% not be effectively amortised, nor should it be too big, otherwise a
+%% publish will take too long as it attempts to do too much work and
+%% thus stalls the queue. Therefore, it must be just right. This
+%% approach is preferable to doing work on a new queue-duration
+%% because converting all the indicated betas to gammas at that point
+%% can be far too expensive, thus requiring batching and segmented
+%% work anyway, and furthermore, if we're not getting any publishes
+%% anyway then the queue is either being drained or has no
+%% consumers. In the latter case, an expensive beta to delta
+%% transition doesn't matter, and in the former case the queue's
+%% shrinking length makes it unlikely (though not impossible) that the
+%% duration will become 0.
+
+%% In the queue we only keep track of messages that are pending
+%% delivery. This is fine for queue purging, but can be expensive for
+%% queue deletion: for queue deletion we must scan all the way through
+%% all remaining segments in the queue index (we start by doing a
+%% purge) and delete messages from the msg_store that we find in the
+%% queue index.
+
 %%----------------------------------------------------------------------------
 
 -record(vqstate,
@@ -89,36 +181,6 @@
 
 %%----------------------------------------------------------------------------
 
-%% WRONG - UPDATE ME!
-
-%% Basic premise is that msgs move from q1 -> q2 -> delta -> q3 -> q4
-%% but they can only do so in the right form. q1 and q4 only hold
-%% alphas (msgs in ram), q2 and q3 only hold betas (msg on disk, index
-%% in ram), and delta is just a count of the number of index entries
-%% on disk at that stage (msg on disk, index on disk).
-%%
-%% When a msg arrives, we decide in which form it should be. It is
-%% then added to the right-most appropriate queue, maintaining
-%% order. Thus if the msg is to be an alpha, it will be added to q1,
-%% unless all of q2, delta and q3 are empty, in which case it will go
-%% to q4. If it is to be a beta, it will be added to q2 unless delta
-%% is empty, in which case it will go to q3.
-%%
-%% The major invariant is that if the msg is to be a beta, q1 will be
-%% empty, and if it is to be a delta then both q1 and q2 will be empty.
-%%
-%% When taking msgs out of the queue, if q4 is empty then we read
-%% directly from q3, or delta, if q3 is empty. If q3 and delta are
-%% empty then we have an invariant that q2 must be empty because q2
-%% can only grow if delta is non empty.
-%%
-%% A further invariant is that if the queue is non empty, either q4 or
-%% q3 contains at least one entry. I.e. we never allow delta to
-%% contain all msgs in the queue.  Also, if q4 is non empty and delta
-%% is non empty then q3 must be non empty.
-
-%%----------------------------------------------------------------------------
-
 -ifdef(use_specs).
 
 -type(bpqueue() :: any()).
-- 
cgit v1.2.1


From f223db9e504a35568054da85f07ccd075fbe9037 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 21 Jan 2010 15:21:05 +0000
Subject: typeo

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c11489d1..89493b5e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -84,7 +84,7 @@
 %% The duration indicated to us by the memory_monitor is used to
 %% calculate, given our current ingress and egress rates, how many
 %% messages we should hold in RAM. When we need to push alphas to
-%% betas or betas to gammas, we favour writing on messages that are
+%% betas or betas to gammas, we favour writing out messages that are
 %% further from the head of the queue. This minimises writes to disk,
 %% as the messages closer to the tail of the queue stay in the queue
 %% for longer, thus do not need to be replaced as quickly by sending
-- 
cgit v1.2.1


From 6aee80e840768347895e7c55cff48986c1555e08 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 24 Jan 2010 00:44:45 +0000
Subject: Added supervisor2 with simple_one_for_one_terminate strategy and
 wired queues to use it

---
 Makefile                    |   5 +-
 src/rabbit_amqqueue.erl     |  37 +-
 src/rabbit_amqqueue_sup.erl |   9 +-
 src/supervisor2.erl         | 928 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 941 insertions(+), 38 deletions(-)
 create mode 100644 src/supervisor2.erl

diff --git a/Makefile b/Makefile
index 2dd7ff3c..0e8bd0d0 100644
--- a/Makefile
+++ b/Makefile
@@ -64,10 +64,13 @@ $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
 $(EBIN_DIR)/gen_server2.beam: $(SOURCE_DIR)/gen_server2.erl
 	erlc $(ERLC_OPTS) $<
 
+$(EBIN_DIR)/supervisor2.beam: $(SOURCE_DIR)/supervisor2.erl
+	erlc $(ERLC_OPTS) $<
+
 $(EBIN_DIR)/rabbit_msg_store_index.beam: $(SOURCE_DIR)/rabbit_msg_store_index.erl
 	erlc $(ERLC_OPTS) $<
 
-$(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit.hrl $(EBIN_DIR)/gen_server2.beam $(EBIN_DIR)/rabbit_msg_store_index.beam
+$(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit.hrl $(EBIN_DIR)/gen_server2.beam $(EBIN_DIR)/supervisor2.beam $(EBIN_DIR)/rabbit_msg_store_index.beam
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 #	ERLC_EMULATOR="erl -smp" erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 77fed28d..84d00add 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -171,7 +171,6 @@ find_durable_queues() ->
       end).
 
 declare(QueueName, Durable, AutoDelete, Args) ->
-    prune_queue_childspecs(),
     Q = start_queue_process(#amqqueue{name = QueueName,
                                       durable = Durable,
                                       auto_delete = AutoDelete,
@@ -217,30 +216,9 @@ store_queue(Q = #amqqueue{durable = false}) ->
     ok = mnesia:write(rabbit_queue, Q, write),
     ok.
 
-start_queue_process(Q = #amqqueue{name = QueueName}) ->
-    case supervisor:start_child(
-           rabbit_amqqueue_sup,
-           {QueueName, {rabbit_amqqueue_process, start_link, [Q]},
-            %% 16#ffffffff is the biggest value allowed
-            temporary, 16#ffffffff, worker, [rabbit_amqqueue_process]}) of
-        {ok, Pid} ->
-            Q#amqqueue{pid = Pid};
-        {error, already_present} ->
-            supervisor:delete_child(rabbit_amqqueue_sup, QueueName),
-            start_queue_process(Q);
-        {error, {already_started, _QPid}} ->
-            case rabbit_misc:execute_mnesia_transaction(
-                   fun () ->
-                           case mnesia:wread({rabbit_queue, QueueName}) of
-                               %% it's vanished in the mean time, try again
-                               [] -> try_again;
-                               [ExistingQ] -> ExistingQ
-                           end
-                   end) of
-                try_again -> start_queue_process(Q);
-                ExistingQ -> ExistingQ
-            end
-    end.
+start_queue_process(Q) ->
+    {ok, Pid} = supervisor2:start_child(rabbit_amqqueue_sup, [Q]),
+    Q#amqqueue{pid = Pid}.
 
 add_default_binding(#amqqueue{name = QueueName}) ->
     Exchange = rabbit_misc:r(QueueName, exchange, <<>>),
@@ -288,7 +266,6 @@ stat_all() ->
     lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)).
 
 delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) ->
-    prune_queue_childspecs(),
     gen_server2:call(QPid, {delete, IfUnused, IfEmpty}, infinity).
 
 purge(#amqqueue{ pid = QPid }) -> gen_server2:call(QPid, purge, infinity).
@@ -388,14 +365,6 @@ remeasure_rates(QPid) ->
 set_queue_duration(QPid, Duration) ->
     gen_server2:pcast(QPid, 9, {set_queue_duration, Duration}).    
 
-prune_queue_childspecs() ->
-    lists:foreach(
-      fun ({Name, undefined, _Type, _Mods}) ->
-              supervisor:delete_child(rabbit_amqqueue_sup, Name);
-          (_) -> ok
-      end, supervisor:which_children(rabbit_amqqueue_sup)),
-    ok.
-
 on_node_down(Node) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl
index f06e4c53..160770d9 100644
--- a/src/rabbit_amqqueue_sup.erl
+++ b/src/rabbit_amqqueue_sup.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_amqqueue_sup).
 
--behaviour(supervisor).
+-behaviour(supervisor2).
 
 -export([start_link/0]).
 
@@ -40,7 +40,10 @@
 -define(SERVER, ?MODULE).
 
 start_link() ->
-    supervisor:start_link({local, ?SERVER}, ?MODULE, []).
+    supervisor2:start_link({local, ?SERVER}, ?MODULE, []).
 
 init([]) ->
-    {ok, {{one_for_one, 10, 10}, []}}.
+    {ok, {{simple_one_for_one_terminate, 10, 10},
+          [{amqqueue, {rabbit_amqqueue_process, start_link, []},
+            %% 16#ffffffff is the biggest value allowed
+            temporary, 16#ffffffff, worker, [rabbit_amqqueue_process]}]}}.
diff --git a/src/supervisor2.erl b/src/supervisor2.erl
new file mode 100644
index 00000000..6ff374de
--- /dev/null
+++ b/src/supervisor2.erl
@@ -0,0 +1,928 @@
+%% This file is a copy of supervisor.erl from the R13B-3 Erlang/OTP
+%% distribution, with the following modifications:
+%%
+%% 1) the module name is gen_server2
+%%
+%% 2) there is a new strategy called
+%% simple_one_for_one_terminate. This is exactly the same as for
+%% simple_one_for_one, except that children *are* explicitly killed as
+%% per the shutdown component of the child_spec.
+%%
+%% All modifications are (C) 2010 LShift Ltd.
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+-module(supervisor2).
+
+-behaviour(gen_server).
+
+%% External exports
+-export([start_link/2,start_link/3,
+	 start_child/2, restart_child/2,
+	 delete_child/2, terminate_child/2,
+	 which_children/1,
+	 check_childspecs/1]).
+
+-export([behaviour_info/1]).
+
+%% Internal exports
+-export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
+-export([handle_cast/2]).
+
+-define(DICT, dict).
+
+-record(state, {name,
+		strategy,
+		children = [],
+		dynamics = ?DICT:new(),
+		intensity,
+		period,
+		restarts = [],
+	        module,
+	        args}).
+
+-record(child, {pid = undefined,  % pid is undefined when child is not running
+		name,
+		mfa,
+		restart_type,
+		shutdown,
+		child_type,
+		modules = []}).
+
+-define(is_simple(State), State#state.strategy =:= simple_one_for_one orelse
+        State#state.strategy =:= simple_one_for_one_terminate).
+-define(is_exactly_simple(State), State#state.strategy =:= simple_one_for_one).
+-define(is_terminate_simple(State),
+        State#state.strategy =:= simple_one_for_one_terminate).
+
+behaviour_info(callbacks) ->
+    [{init,1}];
+behaviour_info(_Other) ->
+    undefined.
+
+%%% ---------------------------------------------------
+%%% This is a general process supervisor built upon gen_server.erl.
+%%% Servers/processes should/could also be built using gen_server.erl.
+%%% SupName = {local, atom()} | {global, atom()}.
+%%% ---------------------------------------------------
+start_link(Mod, Args) ->
+    gen_server:start_link(supervisor2, {self, Mod, Args}, []).
+ 
+start_link(SupName, Mod, Args) ->
+    gen_server:start_link(SupName, supervisor2, {SupName, Mod, Args}, []).
+ 
+%%% ---------------------------------------------------
+%%% Interface functions.
+%%% ---------------------------------------------------
+start_child(Supervisor, ChildSpec) ->
+    call(Supervisor, {start_child, ChildSpec}).
+
+restart_child(Supervisor, Name) ->
+    call(Supervisor, {restart_child, Name}).
+
+delete_child(Supervisor, Name) ->
+    call(Supervisor, {delete_child, Name}).
+
+%%-----------------------------------------------------------------
+%% Func: terminate_child/2
+%% Returns: ok | {error, Reason}
+%%          Note that the child is *always* terminated in some
+%%          way (maybe killed).
+%%-----------------------------------------------------------------
+terminate_child(Supervisor, Name) ->
+    call(Supervisor, {terminate_child, Name}).
+
+which_children(Supervisor) ->
+    call(Supervisor, which_children).
+
+call(Supervisor, Req) ->
+    gen_server:call(Supervisor, Req, infinity).
+
+check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
+    case check_startspec(ChildSpecs) of
+	{ok, _} -> ok;
+	Error -> {error, Error}
+    end;
+check_childspecs(X) -> {error, {badarg, X}}.
+
+%%% ---------------------------------------------------
+%%% 
+%%% Initialize the supervisor.
+%%% 
+%%% ---------------------------------------------------
+init({SupName, Mod, Args}) ->
+    process_flag(trap_exit, true),
+    case Mod:init(Args) of
+	{ok, {SupFlags, StartSpec}} ->
+	    case init_state(SupName, SupFlags, Mod, Args) of
+		{ok, State} when ?is_simple(State) ->
+		    init_dynamic(State, StartSpec);
+		{ok, State} ->
+		    init_children(State, StartSpec);
+		Error ->
+		    {stop, {supervisor_data, Error}}
+	    end;
+	ignore ->
+	    ignore;
+	Error ->
+	    {stop, {bad_return, {Mod, init, Error}}}
+    end.
+	
+init_children(State, StartSpec) ->
+    SupName = State#state.name,
+    case check_startspec(StartSpec) of
+        {ok, Children} ->
+            case start_children(Children, SupName) of
+                {ok, NChildren} ->
+                    {ok, State#state{children = NChildren}};
+                {error, NChildren} ->
+                    terminate_children(NChildren, SupName),
+                    {stop, shutdown}
+            end;
+        Error ->
+            {stop, {start_spec, Error}}
+    end.
+
+init_dynamic(State, [StartSpec]) ->
+    case check_startspec([StartSpec]) of
+        {ok, Children} ->
+	    {ok, State#state{children = Children}};
+        Error ->
+            {stop, {start_spec, Error}}
+    end;
+init_dynamic(_State, StartSpec) ->
+    {stop, {bad_start_spec, StartSpec}}.
+
+%%-----------------------------------------------------------------
+%% Func: start_children/2
+%% Args: Children = [#child] in start order
+%%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Purpose: Start all children.  The new list contains #child's 
+%%          with pids.
+%% Returns: {ok, NChildren} | {error, NChildren}
+%%          NChildren = [#child] in termination order (reversed
+%%                        start order)
+%%-----------------------------------------------------------------
+start_children(Children, SupName) -> start_children(Children, [], SupName).
+
+start_children([Child|Chs], NChildren, SupName) ->
+    case do_start_child(SupName, Child) of
+	{ok, Pid} ->
+	    start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
+	{ok, Pid, _Extra} ->
+	    start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
+	{error, Reason} ->
+	    report_error(start_error, Reason, Child, SupName),
+	    {error, lists:reverse(Chs) ++ [Child | NChildren]}
+    end;
+start_children([], NChildren, _SupName) ->
+    {ok, NChildren}.
+
+do_start_child(SupName, Child) ->
+    #child{mfa = {M, F, A}} = Child,
+    case catch apply(M, F, A) of
+	{ok, Pid} when is_pid(Pid) ->
+	    NChild = Child#child{pid = Pid},
+	    report_progress(NChild, SupName),
+	    {ok, Pid};
+	{ok, Pid, Extra} when is_pid(Pid) ->
+	    NChild = Child#child{pid = Pid},
+	    report_progress(NChild, SupName),
+	    {ok, Pid, Extra};
+	ignore -> 
+	    {ok, undefined};
+	{error, What} -> {error, What};
+	What -> {error, What}
+    end.
+
+do_start_child_i(M, F, A) ->
+    case catch apply(M, F, A) of
+	{ok, Pid} when is_pid(Pid) ->
+	    {ok, Pid};
+	{ok, Pid, Extra} when is_pid(Pid) ->
+	    {ok, Pid, Extra};
+	ignore ->
+	    {ok, undefined};
+	{error, Error} ->
+	    {error, Error};
+	What ->
+	    {error, What}
+    end.
+    
+
+%%% ---------------------------------------------------
+%%% 
+%%% Callback functions.
+%%% 
+%%% ---------------------------------------------------
+handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
+    #child{mfa = {M, F, A}} = hd(State#state.children),
+    Args = A ++ EArgs,
+    case do_start_child_i(M, F, Args) of
+	{ok, Pid} ->
+	    NState = State#state{dynamics = 
+				 ?DICT:store(Pid, Args, State#state.dynamics)},
+	    {reply, {ok, Pid}, NState};
+	{ok, Pid, Extra} ->
+	    NState = State#state{dynamics = 
+				 ?DICT:store(Pid, Args, State#state.dynamics)},
+	    {reply, {ok, Pid, Extra}, NState};
+	What ->
+	    {reply, What, State}
+    end;
+
+%%% The requests terminate_child, delete_child and restart_child are 
+%%% invalid for simple_one_for_one supervisors. 
+handle_call({_Req, _Data}, _From, State) when ?is_exactly_simple(State) ->
+    {reply, {error, simple_one_for_one}, State};
+
+%%% The requests terminate_child, delete_child and restart_child are
+%%% invalid for simple_one_for_one_terminate supervisors.
+handle_call({_Req, _Data}, _From, State) when ?is_terminate_simple(State) ->
+    {reply, {error, simple_one_for_one_terminate}, State};
+
+handle_call({start_child, ChildSpec}, _From, State) ->
+    case check_childspec(ChildSpec) of
+	{ok, Child} ->
+	    {Resp, NState} = handle_start_child(Child, State),
+	    {reply, Resp, NState};
+	What ->
+	    {reply, {error, What}, State}
+    end;
+
+handle_call({restart_child, Name}, _From, State) ->
+    case get_child(Name, State) of
+	{value, Child} when Child#child.pid =:= undefined ->
+	    case do_start_child(State#state.name, Child) of
+		{ok, Pid} ->
+		    NState = replace_child(Child#child{pid = Pid}, State),
+		    {reply, {ok, Pid}, NState};
+		{ok, Pid, Extra} ->
+		    NState = replace_child(Child#child{pid = Pid}, State),
+		    {reply, {ok, Pid, Extra}, NState};
+		Error ->
+		    {reply, Error, State}
+	    end;
+	{value, _} ->
+	    {reply, {error, running}, State};
+	_ ->
+	    {reply, {error, not_found}, State}
+    end;
+
+handle_call({delete_child, Name}, _From, State) ->
+    case get_child(Name, State) of
+	{value, Child} when Child#child.pid =:= undefined ->
+	    NState = remove_child(Child, State),
+	    {reply, ok, NState};
+	{value, _} ->
+	    {reply, {error, running}, State};
+	_ ->
+	    {reply, {error, not_found}, State}
+    end;
+
+handle_call({terminate_child, Name}, _From, State) ->
+    case get_child(Name, State) of
+	{value, Child} ->
+	    NChild = do_terminate(Child, State#state.name),
+	    {reply, ok, replace_child(NChild, State)};
+	_ ->
+	    {reply, {error, not_found}, State}
+    end;
+
+handle_call(which_children, _From, State) when ?is_simple(State) ->
+    [#child{child_type = CT, modules = Mods}] = State#state.children,
+    Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end,
+		      ?DICT:to_list(State#state.dynamics)),
+    {reply, Reply, State};
+
+handle_call(which_children, _From, State) ->
+    Resp =
+	lists:map(fun(#child{pid = Pid, name = Name,
+			     child_type = ChildType, modules = Mods}) ->
+		    {Name, Pid, ChildType, Mods}
+		  end,
+		  State#state.children),
+    {reply, Resp, State}.
+
+
+%%% Hopefully cause a function-clause as there is no API function
+%%% that utilizes cast.
+handle_cast(null, State) ->
+    error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", 
+			   []),
+
+    {noreply, State}.
+
+%%
+%% Take care of terminated children.
+%%
+handle_info({'EXIT', Pid, Reason}, State) ->
+    case restart_child(Pid, Reason, State) of
+	{ok, State1} ->
+	    {noreply, State1};
+	{shutdown, State1} ->
+	    {stop, shutdown, State1}
+    end;
+
+handle_info(Msg, State) ->
+    error_logger:error_msg("Supervisor received unexpected message: ~p~n", 
+			   [Msg]),
+    {noreply, State}.
+%%
+%% Terminate this server.
+%%
+terminate(_Reason, State) when ?is_terminate_simple(State) ->
+    ok = terminate_simple_children(
+           hd(State#state.children), State#state.dynamics, State#state.name);
+terminate(_Reason, State) ->
+    terminate_children(State#state.children, State#state.name),
+    ok.
+
+%%
+%% Change code for the supervisor.
+%% Call the new call-back module and fetch the new start specification.
+%% Combine the new spec. with the old. If the new start spec. is
+%% not valid the code change will not succeed.
+%% Use the old Args as argument to Module:init/1.
+%% NOTE: This requires that the init function of the call-back module
+%%       does not have any side effects.
+%%
+code_change(_, State, _) ->
+    case (State#state.module):init(State#state.args) of
+	{ok, {SupFlags, StartSpec}} ->
+	    case catch check_flags(SupFlags) of
+		ok ->
+		    {Strategy, MaxIntensity, Period} = SupFlags,
+                    update_childspec(State#state{strategy = Strategy,
+                                                 intensity = MaxIntensity,
+                                                 period = Period},
+                                     StartSpec);
+		Error ->
+		    {error, Error}
+	    end;
+	ignore ->
+	    {ok, State};
+	Error ->
+	    Error
+    end.
+
+check_flags({Strategy, MaxIntensity, Period}) ->
+    validStrategy(Strategy),
+    validIntensity(MaxIntensity),
+    validPeriod(Period),
+    ok;
+check_flags(What) ->
+    {bad_flags, What}.
+
+update_childspec(State, StartSpec)  when ?is_simple(State) -> 
+    case check_startspec(StartSpec) of                        
+        {ok, [Child]} ->                                      
+            {ok, State#state{children = [Child]}};            
+        Error ->                                              
+            {error, Error}                                    
+    end;                                                      
+
+update_childspec(State, StartSpec) ->
+    case check_startspec(StartSpec) of
+	{ok, Children} ->
+	    OldC = State#state.children, % In reverse start order !
+	    NewC = update_childspec1(OldC, Children, []),
+	    {ok, State#state{children = NewC}};
+        Error ->
+	    {error, Error}
+    end.
+
+update_childspec1([Child|OldC], Children, KeepOld) ->
+    case update_chsp(Child, Children) of
+	{ok,NewChildren} ->
+	    update_childspec1(OldC, NewChildren, KeepOld);
+	false ->
+	    update_childspec1(OldC, Children, [Child|KeepOld])
+    end;
+update_childspec1([], Children, KeepOld) ->
+    % Return them in (keeped) reverse start order.
+    lists:reverse(Children ++ KeepOld).  
+
+update_chsp(OldCh, Children) ->
+    case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name ->
+			   Ch#child{pid = OldCh#child.pid};
+		      (Ch) ->
+			   Ch
+		   end,
+		   Children) of
+	Children ->
+	    false;  % OldCh not found in new spec.
+	NewC ->
+	    {ok, NewC}
+    end.
+    
+%%% ---------------------------------------------------
+%%% Start a new child.
+%%% ---------------------------------------------------
+
+handle_start_child(Child, State) ->
+    case get_child(Child#child.name, State) of
+	false ->
+	    case do_start_child(State#state.name, Child) of
+		{ok, Pid} ->
+		    Children = State#state.children,
+		    {{ok, Pid},
+		     State#state{children = 
+				 [Child#child{pid = Pid}|Children]}};
+		{ok, Pid, Extra} ->
+		    Children = State#state.children,
+		    {{ok, Pid, Extra},
+		     State#state{children = 
+				 [Child#child{pid = Pid}|Children]}};
+		{error, What} ->
+		    {{error, {What, Child}}, State}
+	    end;
+	{value, OldChild} when OldChild#child.pid =/= undefined ->
+	    {{error, {already_started, OldChild#child.pid}}, State};
+	{value, _OldChild} ->
+	    {{error, already_present}, State}
+    end.
+
+%%% ---------------------------------------------------
+%%% Restart. A process has terminated.
+%%% Returns: {ok, #state} | {shutdown, #state}
+%%% ---------------------------------------------------
+
+restart_child(Pid, Reason, State) when ?is_simple(State) ->
+    case ?DICT:find(Pid, State#state.dynamics) of
+	{ok, Args} ->
+	    [Child] = State#state.children,
+	    RestartType = Child#child.restart_type,
+	    {M, F, _} = Child#child.mfa,
+	    NChild = Child#child{pid = Pid, mfa = {M, F, Args}},
+	    do_restart(RestartType, Reason, NChild, State);
+	error ->
+	    {ok, State}
+    end;
+restart_child(Pid, Reason, State) ->
+    Children = State#state.children,
+    case lists:keysearch(Pid, #child.pid, Children) of
+	{value, Child} ->
+	    RestartType = Child#child.restart_type,
+	    do_restart(RestartType, Reason, Child, State);
+	_ ->
+	    {ok, State}
+    end.
+
+do_restart(permanent, Reason, Child, State) ->
+    report_error(child_terminated, Reason, Child, State#state.name),
+    restart(Child, State);
+do_restart(_, normal, Child, State) ->
+    NState = state_del_child(Child, State),
+    {ok, NState};
+do_restart(_, shutdown, Child, State) ->
+    NState = state_del_child(Child, State),
+    {ok, NState};
+do_restart(transient, Reason, Child, State) ->
+    report_error(child_terminated, Reason, Child, State#state.name),
+    restart(Child, State);
+do_restart(temporary, Reason, Child, State) ->
+    report_error(child_terminated, Reason, Child, State#state.name),
+    NState = state_del_child(Child, State),
+    {ok, NState}.
+
+restart(Child, State) ->
+    case add_restart(State) of
+	{ok, NState} ->
+	    restart(NState#state.strategy, Child, NState);
+	{terminate, NState} ->
+	    report_error(shutdown, reached_max_restart_intensity,
+			 Child, State#state.name),
+	    {shutdown, remove_child(Child, NState)}
+    end.
+
+restart(Strategy, Child, State)
+  when Strategy =:= simple_one_for_one orelse
+       Strategy =:= simple_one_for_one_terminate ->
+    #child{mfa = {M, F, A}} = Child,
+    Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics),
+    case do_start_child_i(M, F, A) of
+	{ok, Pid} ->
+	    NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
+	    {ok, NState};
+	{ok, Pid, _Extra} ->
+	    NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
+	    {ok, NState};
+	{error, Error} ->
+	    report_error(start_error, Error, Child, State#state.name),
+	    restart(Child, State)
+    end;
+restart(one_for_one, Child, State) ->
+    case do_start_child(State#state.name, Child) of
+	{ok, Pid} ->
+	    NState = replace_child(Child#child{pid = Pid}, State),
+	    {ok, NState};
+	{ok, Pid, _Extra} ->
+	    NState = replace_child(Child#child{pid = Pid}, State),
+	    {ok, NState};
+	{error, Reason} ->
+	    report_error(start_error, Reason, Child, State#state.name),
+	    restart(Child, State)
+    end;
+restart(rest_for_one, Child, State) ->
+    {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
+    ChAfter2 = terminate_children(ChAfter, State#state.name),
+    case start_children(ChAfter2, State#state.name) of
+	{ok, ChAfter3} ->
+	    {ok, State#state{children = ChAfter3 ++ ChBefore}};
+	{error, ChAfter3} ->
+	    restart(Child, State#state{children = ChAfter3 ++ ChBefore})
+    end;
+restart(one_for_all, Child, State) ->
+    Children1 = del_child(Child#child.pid, State#state.children),
+    Children2 = terminate_children(Children1, State#state.name),
+    case start_children(Children2, State#state.name) of
+	{ok, NChs} ->
+	    {ok, State#state{children = NChs}};
+	{error, NChs} ->
+	    restart(Child, State#state{children = NChs})
+    end.
+
+%%-----------------------------------------------------------------
+%% Func: terminate_children/2
+%% Args: Children = [#child] in termination order
+%%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Returns: NChildren = [#child] in
+%%          startup order (reversed termination order)
+%%-----------------------------------------------------------------
+terminate_children(Children, SupName) ->
+    terminate_children(Children, SupName, []).
+
+terminate_children([Child | Children], SupName, Res) ->
+    NChild = do_terminate(Child, SupName),
+    terminate_children(Children, SupName, [NChild | Res]);
+terminate_children([], _SupName, Res) ->
+    Res.
+
+terminate_simple_children(Child, Dynamics, SupName) ->
+    ok = dict:fold(
+           fun (Pid, _Args, ok) ->
+                   case shutdown(Pid, Child#child.shutdown) of
+                       ok ->
+                           ok;
+                       {error, OtherReason} ->
+                           report_error(shutdown_error, OtherReason, Child,
+                                        SupName),
+                           ok
+                   end
+           end, ok, Dynamics).
+
+do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
+    case shutdown(Child#child.pid,
+		  Child#child.shutdown) of
+	ok ->
+	    Child#child{pid = undefined};
+	{error, OtherReason} ->
+	    report_error(shutdown_error, OtherReason, Child, SupName),
+	    Child#child{pid = undefined}
+    end;
+do_terminate(Child, _SupName) ->
+    Child.
+
+%%-----------------------------------------------------------------
+%% Shutdowns a child. We must check the EXIT value 
+%% of the child, because it might have died with another reason than
+%% the wanted. In that case we want to report the error. We put a 
+%% monitor on the child an check for the 'DOWN' message instead of 
+%% checking for the 'EXIT' message, because if we check the 'EXIT' 
+%% message a "naughty" child, who does unlink(Sup), could hang the 
+%% supervisor. 
+%% Returns: ok | {error, OtherReason}  (this should be reported)
+%%-----------------------------------------------------------------
+shutdown(Pid, brutal_kill) ->
+  
+    case monitor_child(Pid) of
+	ok ->
+	    exit(Pid, kill),
+	    receive
+		{'DOWN', _MRef, process, Pid, killed} ->
+		    ok;
+		{'DOWN', _MRef, process, Pid, OtherReason} ->
+		    {error, OtherReason}
+	    end;
+	{error, Reason} ->      
+	    {error, Reason}
+    end;
+
+shutdown(Pid, Time) ->
+    
+    case monitor_child(Pid) of
+	ok ->
+	    exit(Pid, shutdown), %% Try to shutdown gracefully
+	    receive 
+		{'DOWN', _MRef, process, Pid, shutdown} ->
+		    ok;
+		{'DOWN', _MRef, process, Pid, OtherReason} ->
+		    {error, OtherReason}
+	    after Time ->
+		    exit(Pid, kill),  %% Force termination.
+		    receive
+			{'DOWN', _MRef, process, Pid, OtherReason} ->
+			    {error, OtherReason}
+		    end
+	    end;
+	{error, Reason} ->      
+	    {error, Reason}
+    end.
+
+%% Help function to shutdown/2 switches from link to monitor approach
+monitor_child(Pid) ->
+    
+    %% Do the monitor operation first so that if the child dies 
+    %% before the monitoring is done causing a 'DOWN'-message with
+    %% reason noproc, we will get the real reason in the 'EXIT'-message
+    %% unless a naughty child has already done unlink...
+    erlang:monitor(process, Pid),
+    unlink(Pid),
+
+    receive
+	%% If the child dies before the unlik we must empty
+	%% the mail-box of the 'EXIT'-message and the 'DOWN'-message.
+	{'EXIT', Pid, Reason} -> 
+	    receive 
+		{'DOWN', _, process, Pid, _} ->
+		    {error, Reason}
+	    end
+    after 0 -> 
+	    %% If a naughty child did unlink and the child dies before
+	    %% monitor the result will be that shutdown/2 receives a 
+	    %% 'DOWN'-message with reason noproc.
+	    %% If the child should die after the unlink there
+	    %% will be a 'DOWN'-message with a correct reason
+	    %% that will be handled in shutdown/2. 
+	    ok   
+    end.
+    
+   
+%%-----------------------------------------------------------------
+%% Child/State manipulating functions.
+%%-----------------------------------------------------------------
+state_del_child(#child{pid = Pid}, State) when ?is_simple(State) ->
+    NDynamics = ?DICT:erase(Pid, State#state.dynamics),
+    State#state{dynamics = NDynamics};
+state_del_child(Child, State) ->
+    NChildren = del_child(Child#child.name, State#state.children),
+    State#state{children = NChildren}.
+
+del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name ->
+    [Ch#child{pid = undefined} | Chs];
+del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid ->
+    [Ch#child{pid = undefined} | Chs];
+del_child(Name, [Ch|Chs]) ->
+    [Ch|del_child(Name, Chs)];
+del_child(_, []) ->
+    [].
+
+%% Chs = [S4, S3, Ch, S1, S0]
+%% Ret: {[S4, S3, Ch], [S1, S0]}
+split_child(Name, Chs) ->
+    split_child(Name, Chs, []).
+
+split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name ->
+    {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
+split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid ->
+    {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
+split_child(Name, [Ch|Chs], After) ->
+    split_child(Name, Chs, [Ch | After]);
+split_child(_, [], After) ->
+    {lists:reverse(After), []}.
+
+get_child(Name, State) ->
+    lists:keysearch(Name, #child.name, State#state.children).
+replace_child(Child, State) ->
+    Chs = do_replace_child(Child, State#state.children),
+    State#state{children = Chs}.
+
+do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name ->
+    [Child | Chs];
+do_replace_child(Child, [Ch|Chs]) ->
+    [Ch|do_replace_child(Child, Chs)].
+
+remove_child(Child, State) ->
+    Chs = lists:keydelete(Child#child.name, #child.name, State#state.children),
+    State#state{children = Chs}.
+
+%%-----------------------------------------------------------------
+%% Func: init_state/4
+%% Args: SupName = {local, atom()} | {global, atom()} | self
+%%       Type = {Strategy, MaxIntensity, Period}
+%%         Strategy = one_for_one | one_for_all | simple_one_for_one |
+%%                    rest_for_one 
+%%         MaxIntensity = integer()
+%%         Period = integer()
+%%       Mod :== atom()
+%%       Arsg :== term()
+%% Purpose: Check that Type is of correct type (!)
+%% Returns: {ok, #state} | Error
+%%-----------------------------------------------------------------
+init_state(SupName, Type, Mod, Args) ->
+    case catch init_state1(SupName, Type, Mod, Args) of
+	{ok, State} ->
+	    {ok, State};
+	Error ->
+	    Error
+    end.
+
+init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) ->
+    validStrategy(Strategy),
+    validIntensity(MaxIntensity),
+    validPeriod(Period),
+    {ok, #state{name = supname(SupName,Mod),
+	       strategy = Strategy,
+	       intensity = MaxIntensity,
+	       period = Period,
+	       module = Mod,
+	       args = Args}};
+init_state1(_SupName, Type, _, _) ->
+    {invalid_type, Type}.
+
+validStrategy(simple_one_for_one_terminate) -> true;
+validStrategy(simple_one_for_one)           -> true;
+validStrategy(one_for_one)                  -> true;
+validStrategy(one_for_all)                  -> true;
+validStrategy(rest_for_one)                 -> true;
+validStrategy(What)                         -> throw({invalid_strategy, What}).
+
+validIntensity(Max) when is_integer(Max),
+                         Max >=  0 -> true;
+validIntensity(What)              -> throw({invalid_intensity, What}).
+
+validPeriod(Period) when is_integer(Period),
+                         Period > 0 -> true;
+validPeriod(What)                   -> throw({invalid_period, What}).
+
+supname(self,Mod) -> {self(),Mod};
+supname(N,_)      -> N.
+
+%%% ------------------------------------------------------
+%%% Check that the children start specification is valid.
+%%% Shall be a six (6) tuple
+%%%    {Name, Func, RestartType, Shutdown, ChildType, Modules}
+%%% where Name is an atom
+%%%       Func is {Mod, Fun, Args} == {atom, atom, list}
+%%%       RestartType is permanent | temporary | transient
+%%%       Shutdown = integer() | infinity | brutal_kill
+%%%       ChildType = supervisor | worker
+%%%       Modules = [atom()] | dynamic
+%%% Returns: {ok, [#child]} | Error
+%%% ------------------------------------------------------
+
+check_startspec(Children) -> check_startspec(Children, []).
+
+check_startspec([ChildSpec|T], Res) ->
+    case check_childspec(ChildSpec) of
+	{ok, Child} ->
+	    case lists:keymember(Child#child.name, #child.name, Res) of
+		true -> {duplicate_child_name, Child#child.name};
+		false -> check_startspec(T, [Child | Res])
+	    end;
+	Error -> Error
+    end;
+check_startspec([], Res) ->
+    {ok, lists:reverse(Res)}.
+
+check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) ->
+    catch check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods);
+check_childspec(X) -> {invalid_child_spec, X}.
+
+check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) ->
+    validName(Name),
+    validFunc(Func),
+    validRestartType(RestartType),
+    validChildType(ChildType),
+    validShutdown(Shutdown, ChildType),
+    validMods(Mods),
+    {ok, #child{name = Name, mfa = Func, restart_type = RestartType,
+		shutdown = Shutdown, child_type = ChildType, modules = Mods}}.
+
+validChildType(supervisor) -> true;
+validChildType(worker) -> true;
+validChildType(What) -> throw({invalid_child_type, What}).
+
+validName(_Name) -> true. 
+
+validFunc({M, F, A}) when is_atom(M), 
+                          is_atom(F), 
+                          is_list(A) -> true;
+validFunc(Func)                      -> throw({invalid_mfa, Func}).
+
+validRestartType(permanent)   -> true;
+validRestartType(temporary)   -> true;
+validRestartType(transient)   -> true;
+validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
+
+validShutdown(Shutdown, _) 
+  when is_integer(Shutdown), Shutdown > 0 -> true;
+validShutdown(infinity, supervisor)    -> true;
+validShutdown(brutal_kill, _)          -> true;
+validShutdown(Shutdown, _)             -> throw({invalid_shutdown, Shutdown}).
+
+validMods(dynamic) -> true;
+validMods(Mods) when is_list(Mods) ->
+    lists:foreach(fun(Mod) ->
+		    if
+			is_atom(Mod) -> ok;
+			true -> throw({invalid_module, Mod})
+		    end
+		  end,
+		  Mods);
+validMods(Mods) -> throw({invalid_modules, Mods}).
+
+%%% ------------------------------------------------------
+%%% Add a new restart and calculate if the max restart
+%%% intensity has been reached (in that case the supervisor
+%%% shall terminate).
+%%% All restarts accured inside the period amount of seconds
+%%% are kept in the #state.restarts list.
+%%% Returns: {ok, State'} | {terminate, State'}
+%%% ------------------------------------------------------
+
+add_restart(State) ->  
+    I = State#state.intensity,
+    P = State#state.period,
+    R = State#state.restarts,
+    Now = erlang:now(),
+    R1 = add_restart([Now|R], Now, P),
+    State1 = State#state{restarts = R1},
+    case length(R1) of
+	CurI when CurI  =< I ->
+	    {ok, State1};
+	_ ->
+	    {terminate, State1}
+    end.
+
+add_restart([R|Restarts], Now, Period) ->
+    case inPeriod(R, Now, Period) of
+	true ->
+	    [R|add_restart(Restarts, Now, Period)];
+	_ ->
+	    []
+    end;
+add_restart([], _, _) ->
+    [].
+
+inPeriod(Time, Now, Period) ->
+    case difference(Time, Now) of
+	T when T > Period ->
+	    false;
+	_ ->
+	    true
+    end.
+
+%%
+%% Time = {MegaSecs, Secs, MicroSecs} (NOTE: MicroSecs is ignored)
+%% Calculate the time elapsed in seconds between two timestamps.
+%% If MegaSecs is equal just subtract Secs.
+%% Else calculate the Mega difference and add the Secs difference,
+%% note that Secs difference can be negative, e.g.
+%%      {827, 999999, 676} diff {828, 1, 653753} == > 2 secs.
+%%
+difference({TimeM, TimeS, _}, {CurM, CurS, _}) when CurM > TimeM ->
+    ((CurM - TimeM) * 1000000) + (CurS - TimeS);
+difference({_, TimeS, _}, {_, CurS, _}) ->
+    CurS - TimeS.
+
+%%% ------------------------------------------------------
+%%% Error and progress reporting.
+%%% ------------------------------------------------------
+
+report_error(Error, Reason, Child, SupName) ->
+    ErrorMsg = [{supervisor, SupName},
+		{errorContext, Error},
+		{reason, Reason},
+		{offender, extract_child(Child)}],
+    error_logger:error_report(supervisor_report, ErrorMsg).
+
+
+extract_child(Child) ->
+    [{pid, Child#child.pid},
+     {name, Child#child.name},
+     {mfa, Child#child.mfa},
+     {restart_type, Child#child.restart_type},
+     {shutdown, Child#child.shutdown},
+     {child_type, Child#child.child_type}].
+
+report_progress(Child, SupName) ->
+    Progress = [{supervisor, SupName},
+		{started, extract_child(Child)}],
+    error_logger:info_report(progress, Progress).
-- 
cgit v1.2.1


From a428575950aaae1c02a14d2e5d33eedf39fbbbcc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 00:57:09 +0000
Subject: Added dependency analysis so that we know to compile behaviours
 first, and we track deps on includes (+transitive) correctly

---
 .hgignore     |  1 +
 Makefile      | 27 +++++++++++++----------
 generate_deps | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 12 deletions(-)
 create mode 100644 generate_deps

diff --git a/.hgignore b/.hgignore
index ccd0b09f..73dc61cd 100644
--- a/.hgignore
+++ b/.hgignore
@@ -4,6 +4,7 @@ syntax: glob
 *.swp
 *.patch
 erl_crash.dump
+deps.mk
 
 syntax: regexp
 ^cover/
diff --git a/Makefile b/Makefile
index 0e8bd0d0..dc975bb1 100644
--- a/Makefile
+++ b/Makefile
@@ -9,9 +9,10 @@ RABBITMQ_LOG_BASE ?= $(TMPDIR)
 SOURCE_DIR=src
 EBIN_DIR=ebin
 INCLUDE_DIR=include
+DEPS_FILE=deps.mk
 SOURCES=$(wildcard $(SOURCE_DIR)/*.erl)
 BEAM_TARGETS=$(EBIN_DIR)/rabbit_framing.beam $(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES))
-TARGETS=$(EBIN_DIR)/rabbit.app $(BEAM_TARGETS)
+TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS)
 WEB_URL=http://stage.rabbitmq.com/
 MANPAGES=$(patsubst %.pod, %.gz, $(wildcard docs/*.[0-9].pod))
 
@@ -56,30 +57,29 @@ ERL_CALL=erl_call -sname $(RABBITMQ_NODENAME) -e
 
 ERL_EBIN=erl -noinput -pa $(EBIN_DIR)
 
-all: $(TARGETS)
+all: $(DEPS_FILE) $(TARGETS)
+
+$(DEPS_FILE): $(SOURCES)
+	escript generate_deps $(INCLUDE_DIR) $(SOURCE_DIR) $(DEPS_FILE)
 
 $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
 	escript generate_app $(EBIN_DIR) $@ < $<
 
-$(EBIN_DIR)/gen_server2.beam: $(SOURCE_DIR)/gen_server2.erl
-	erlc $(ERLC_OPTS) $<
-
-$(EBIN_DIR)/supervisor2.beam: $(SOURCE_DIR)/supervisor2.erl
-	erlc $(ERLC_OPTS) $<
-
-$(EBIN_DIR)/rabbit_msg_store_index.beam: $(SOURCE_DIR)/rabbit_msg_store_index.erl
-	erlc $(ERLC_OPTS) $<
-
-$(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit.hrl $(EBIN_DIR)/gen_server2.beam $(EBIN_DIR)/supervisor2.beam $(EBIN_DIR)/rabbit_msg_store_index.beam
+$(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 #	ERLC_EMULATOR="erl -smp" erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 
+$(INCLUDE_DIR)/%.hrl:
+	@touch $@
+
 $(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH)
 	$(PYTHON) codegen.py header $(AMQP_SPEC_JSON_PATH) $@
 
 $(SOURCE_DIR)/rabbit_framing.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH)
 	$(PYTHON) codegen.py body   $(AMQP_SPEC_JSON_PATH) $@
 
+$(EBIN_DIR)/rabbit_framing.beam: $(INCLUDE_DIR)/rabbit_framing.hrl
+
 dialyze: $(BEAM_TARGETS) $(BASIC_PLT)
 	$(ERL_EBIN) -eval \
 		"rabbit_dialyzer:halt_with_code(rabbit_dialyzer:dialyze_files(\"$(BASIC_PLT)\", \"$(BEAM_TARGETS)\"))."
@@ -106,6 +106,7 @@ clean:
 	rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing.erl codegen.pyc
 	rm -f docs/*.[0-9].gz
 	rm -f $(RABBIT_PLT)
+	rm -f $(DEPS_FILE)
 
 cleandb:
 	rm -rf $(RABBITMQ_MNESIA_DIR)/*
@@ -226,3 +227,5 @@ install: all docs_all install_dirs
 install_dirs:
 	mkdir -p $(SBIN_DIR)
 	mkdir -p $(TARGET_DIR)/sbin
+
+-include $(DEPS_FILE)
diff --git a/generate_deps b/generate_deps
new file mode 100644
index 00000000..8b17f499
--- /dev/null
+++ b/generate_deps
@@ -0,0 +1,71 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+
+main([IncludeDir, ErlDir, TargetFile]) ->
+    ErlDirContents = filelib:wildcard("*.erl", ErlDir),
+    ErlFiles = [filename:join(ErlDir, FileName) || FileName <- ErlDirContents],
+    Modules = sets:from_list(
+                [list_to_atom(filename:basename(FileName, ".erl")) ||
+                    FileName <- ErlDirContents]),
+    IncludeDirContents = filelib:wildcard("*.hrl", IncludeDir),
+    HrlFiles = [filename:join(IncludeDir, FileName) ||
+                   FileName <- IncludeDirContents],
+    Headers = sets:from_list(IncludeDirContents),
+    Deps = lists:foldl(fun (Path, Acc) -> make_deps(Path, Acc) end,
+                       dict:new(), ErlFiles),
+    Deps1 = lists:foldl(fun (Path, Acc) -> make_deps(Path, Acc) end,
+                        Deps, HrlFiles),
+    Deps2 = dict:map(
+              fun (_Module, Dep) ->
+                      lists:filter(
+                        fun ({module, Behaviour}) ->
+                                sets:is_element(Behaviour, Modules);
+                            ({include, Include}) ->
+                                sets:is_element(Include, Headers)
+                        end, Dep)
+              end, Deps1),
+    {ok, Hdl} = file:open(TargetFile, [write, delayed_write]),
+    dict:fold(
+      fun (_Module, [], ok) ->
+              ok;
+          (Module, Dep, ok) ->
+              case lists:suffix(".hrl", Module) of
+                  false ->
+                      ok = file:write(Hdl, ["$(EBIN_DIR)/", Module, ".beam:"]),
+                      lists:foreach(
+                        fun (E) ->
+                                write_deps(Hdl, IncludeDir, E)
+                        end, Dep),
+                      file:write(Hdl, [" ", ErlDir, "/", Module, ".erl\n"]);
+                  true ->
+                      ok = file:write(Hdl, [IncludeDir, "/", Module, ":"]),
+                      lists:foreach(
+                        fun (E) ->
+                                write_deps(Hdl, IncludeDir, E)
+                        end, Dep),
+                      file:write(Hdl, "\n")
+              end
+      end, ok, Deps2),
+    ok = file:write(Hdl, [TargetFile, ": ", escript:script_name(), "\n"]),
+    ok = file:sync(Hdl),
+    ok = file:close(Hdl).
+
+write_deps(Hdl, _IncludeDir, {module, Behaviour}) ->
+    ok = file:write(Hdl, [" $(EBIN_DIR)/", atom_to_list(Behaviour), ".beam"]);
+write_deps(Hdl, IncludeDir, {include, Include}) ->
+    ok = file:write(Hdl, [" ", IncludeDir, "/", Include]).
+    
+
+make_deps(Path, Deps) ->
+    {ok, Forms} = epp:parse_file(Path, [], []),
+    Behaviours =
+        lists:foldl(fun (Form, Acc) -> detect_deps(Form, Acc) end,
+                    [], Forms),
+    dict:store(filename:basename(Path, ".erl"), Behaviours, Deps).
+
+detect_deps({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
+    [{module, Behaviour} | Deps];
+detect_deps({error, {_LineNumber, epp, {include, file, Include}}}, Deps) ->
+    [{include, Include} | Deps];
+detect_deps(_Form, Deps) ->
+    Deps.
-- 
cgit v1.2.1


From d792e120ea42efc7e4f1078cc36cd12d7a606cff Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 01:04:23 +0000
Subject: Minor tidying

---
 generate_deps | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/generate_deps b/generate_deps
index 8b17f499..dbac2e42 100644
--- a/generate_deps
+++ b/generate_deps
@@ -16,7 +16,7 @@ main([IncludeDir, ErlDir, TargetFile]) ->
     Deps1 = lists:foldl(fun (Path, Acc) -> make_deps(Path, Acc) end,
                         Deps, HrlFiles),
     Deps2 = dict:map(
-              fun (_Module, Dep) ->
+              fun (_Path, Dep) ->
                       lists:filter(
                         fun ({module, Behaviour}) ->
                                 sets:is_element(Behaviour, Modules);
@@ -26,23 +26,20 @@ main([IncludeDir, ErlDir, TargetFile]) ->
               end, Deps1),
     {ok, Hdl} = file:open(TargetFile, [write, delayed_write]),
     dict:fold(
-      fun (_Module, [], ok) ->
+      fun (_Path, [], ok) ->
               ok;
-          (Module, Dep, ok) ->
-              case lists:suffix(".hrl", Module) of
+          (Path, Dep, ok) ->
+              case lists:suffix(".hrl", Path) of
                   false ->
+                      Module = filename:basename(Path, ".erl"),
                       ok = file:write(Hdl, ["$(EBIN_DIR)/", Module, ".beam:"]),
                       lists:foreach(
-                        fun (E) ->
-                                write_deps(Hdl, IncludeDir, E)
-                        end, Dep),
+                        fun (E) -> write_deps(Hdl, IncludeDir, E) end, Dep),
                       file:write(Hdl, [" ", ErlDir, "/", Module, ".erl\n"]);
                   true ->
-                      ok = file:write(Hdl, [IncludeDir, "/", Module, ":"]),
+                      ok = file:write(Hdl, [Path, ":"]),
                       lists:foreach(
-                        fun (E) ->
-                                write_deps(Hdl, IncludeDir, E)
-                        end, Dep),
+                        fun (E) -> write_deps(Hdl, IncludeDir, E) end, Dep),
                       file:write(Hdl, "\n")
               end
       end, ok, Deps2),
@@ -61,7 +58,7 @@ make_deps(Path, Deps) ->
     Behaviours =
         lists:foldl(fun (Form, Acc) -> detect_deps(Form, Acc) end,
                     [], Forms),
-    dict:store(filename:basename(Path, ".erl"), Behaviours, Deps).
+    dict:store(Path, Behaviours, Deps).
 
 detect_deps({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
     [{module, Behaviour} | Deps];
-- 
cgit v1.2.1


From 347d6ee999febb074593cae80d2ba10fce192cda Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 12:49:40 +0000
Subject: Made the deps file depend on the includes too, and also force on
 use_specs when generating the deps otherwise we miss out some transitive
 includes

---
 Makefile      | 5 +++--
 generate_deps | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index dc975bb1..2c376069 100644
--- a/Makefile
+++ b/Makefile
@@ -6,10 +6,11 @@ RABBITMQ_SERVER_START_ARGS ?=
 RABBITMQ_MNESIA_DIR ?= $(TMPDIR)/rabbitmq-$(RABBITMQ_NODENAME)-mnesia
 RABBITMQ_LOG_BASE ?= $(TMPDIR)
 
+DEPS_FILE=deps.mk
 SOURCE_DIR=src
 EBIN_DIR=ebin
 INCLUDE_DIR=include
-DEPS_FILE=deps.mk
+INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl)
 SOURCES=$(wildcard $(SOURCE_DIR)/*.erl)
 BEAM_TARGETS=$(EBIN_DIR)/rabbit_framing.beam $(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES))
 TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS)
@@ -59,7 +60,7 @@ ERL_EBIN=erl -noinput -pa $(EBIN_DIR)
 
 all: $(DEPS_FILE) $(TARGETS)
 
-$(DEPS_FILE): $(SOURCES)
+$(DEPS_FILE): $(SOURCES) $(INCLUDES)
 	escript generate_deps $(INCLUDE_DIR) $(SOURCE_DIR) $(DEPS_FILE)
 
 $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
diff --git a/generate_deps b/generate_deps
index dbac2e42..d438660b 100644
--- a/generate_deps
+++ b/generate_deps
@@ -51,10 +51,9 @@ write_deps(Hdl, _IncludeDir, {module, Behaviour}) ->
     ok = file:write(Hdl, [" $(EBIN_DIR)/", atom_to_list(Behaviour), ".beam"]);
 write_deps(Hdl, IncludeDir, {include, Include}) ->
     ok = file:write(Hdl, [" ", IncludeDir, "/", Include]).
-    
 
 make_deps(Path, Deps) ->
-    {ok, Forms} = epp:parse_file(Path, [], []),
+    {ok, Forms} = epp:parse_file(Path, [], [{use_specs, true}]),
     Behaviours =
         lists:foldl(fun (Form, Acc) -> detect_deps(Form, Acc) end,
                     [], Forms),
-- 
cgit v1.2.1


From 6c7ecbaa9a8200db35d4333456b2febf71c23eff Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 15:42:21 +0000
Subject: fix

---
 src/supervisor2.erl | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 6ff374de..3b41c376 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -1,7 +1,7 @@
 %% This file is a copy of supervisor.erl from the R13B-3 Erlang/OTP
 %% distribution, with the following modifications:
 %%
-%% 1) the module name is gen_server2
+%% 1) the module name is supervisor2
 %%
 %% 2) there is a new strategy called
 %% simple_one_for_one_terminate. This is exactly the same as for
@@ -347,8 +347,9 @@ handle_info(Msg, State) ->
 %% Terminate this server.
 %%
 terminate(_Reason, State) when ?is_terminate_simple(State) ->
-    ok = terminate_simple_children(
-           hd(State#state.children), State#state.dynamics, State#state.name);
+    terminate_simple_children(
+      hd(State#state.children), State#state.dynamics, State#state.name),
+    ok;
 terminate(_Reason, State) ->
     terminate_children(State#state.children, State#state.name),
     ok.
@@ -575,17 +576,10 @@ terminate_children([], _SupName, Res) ->
     Res.
 
 terminate_simple_children(Child, Dynamics, SupName) ->
-    ok = dict:fold(
-           fun (Pid, _Args, ok) ->
-                   case shutdown(Pid, Child#child.shutdown) of
-                       ok ->
-                           ok;
-                       {error, OtherReason} ->
-                           report_error(shutdown_error, OtherReason, Child,
-                                        SupName),
-                           ok
-                   end
-           end, ok, Dynamics).
+    dict:fold(fun (Pid, _Args, _Any) ->
+                      do_terminate(Child#child{pid = Pid}, SupName)
+              end, ok, Dynamics),
+    ok.
 
 do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
     case shutdown(Child#child.pid,
-- 
cgit v1.2.1


From 1f7afd29202395215bc8506e7a1a65d9015c5a78 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 15:42:46 +0000
Subject: Removed supervisor2.erl

---
 src/supervisor2.erl | 922 ----------------------------------------------------
 1 file changed, 922 deletions(-)
 delete mode 100644 src/supervisor2.erl

diff --git a/src/supervisor2.erl b/src/supervisor2.erl
deleted file mode 100644
index 3b41c376..00000000
--- a/src/supervisor2.erl
+++ /dev/null
@@ -1,922 +0,0 @@
-%% This file is a copy of supervisor.erl from the R13B-3 Erlang/OTP
-%% distribution, with the following modifications:
-%%
-%% 1) the module name is supervisor2
-%%
-%% 2) there is a new strategy called
-%% simple_one_for_one_terminate. This is exactly the same as for
-%% simple_one_for_one, except that children *are* explicitly killed as
-%% per the shutdown component of the child_spec.
-%%
-%% All modifications are (C) 2010 LShift Ltd.
-%%
-%% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
-%% 
-%% The contents of this file are subject to the Erlang Public License,
-%% Version 1.1, (the "License"); you may not use this file except in
-%% compliance with the License. You should have received a copy of the
-%% Erlang Public License along with this software. If not, it can be
-%% retrieved online at http://www.erlang.org/.
-%% 
-%% Software distributed under the License is distributed on an "AS IS"
-%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
-%% the License for the specific language governing rights and limitations
-%% under the License.
-%% 
-%% %CopyrightEnd%
-%%
--module(supervisor2).
-
--behaviour(gen_server).
-
-%% External exports
--export([start_link/2,start_link/3,
-	 start_child/2, restart_child/2,
-	 delete_child/2, terminate_child/2,
-	 which_children/1,
-	 check_childspecs/1]).
-
--export([behaviour_info/1]).
-
-%% Internal exports
--export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
--export([handle_cast/2]).
-
--define(DICT, dict).
-
--record(state, {name,
-		strategy,
-		children = [],
-		dynamics = ?DICT:new(),
-		intensity,
-		period,
-		restarts = [],
-	        module,
-	        args}).
-
--record(child, {pid = undefined,  % pid is undefined when child is not running
-		name,
-		mfa,
-		restart_type,
-		shutdown,
-		child_type,
-		modules = []}).
-
--define(is_simple(State), State#state.strategy =:= simple_one_for_one orelse
-        State#state.strategy =:= simple_one_for_one_terminate).
--define(is_exactly_simple(State), State#state.strategy =:= simple_one_for_one).
--define(is_terminate_simple(State),
-        State#state.strategy =:= simple_one_for_one_terminate).
-
-behaviour_info(callbacks) ->
-    [{init,1}];
-behaviour_info(_Other) ->
-    undefined.
-
-%%% ---------------------------------------------------
-%%% This is a general process supervisor built upon gen_server.erl.
-%%% Servers/processes should/could also be built using gen_server.erl.
-%%% SupName = {local, atom()} | {global, atom()}.
-%%% ---------------------------------------------------
-start_link(Mod, Args) ->
-    gen_server:start_link(supervisor2, {self, Mod, Args}, []).
- 
-start_link(SupName, Mod, Args) ->
-    gen_server:start_link(SupName, supervisor2, {SupName, Mod, Args}, []).
- 
-%%% ---------------------------------------------------
-%%% Interface functions.
-%%% ---------------------------------------------------
-start_child(Supervisor, ChildSpec) ->
-    call(Supervisor, {start_child, ChildSpec}).
-
-restart_child(Supervisor, Name) ->
-    call(Supervisor, {restart_child, Name}).
-
-delete_child(Supervisor, Name) ->
-    call(Supervisor, {delete_child, Name}).
-
-%%-----------------------------------------------------------------
-%% Func: terminate_child/2
-%% Returns: ok | {error, Reason}
-%%          Note that the child is *always* terminated in some
-%%          way (maybe killed).
-%%-----------------------------------------------------------------
-terminate_child(Supervisor, Name) ->
-    call(Supervisor, {terminate_child, Name}).
-
-which_children(Supervisor) ->
-    call(Supervisor, which_children).
-
-call(Supervisor, Req) ->
-    gen_server:call(Supervisor, Req, infinity).
-
-check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
-    case check_startspec(ChildSpecs) of
-	{ok, _} -> ok;
-	Error -> {error, Error}
-    end;
-check_childspecs(X) -> {error, {badarg, X}}.
-
-%%% ---------------------------------------------------
-%%% 
-%%% Initialize the supervisor.
-%%% 
-%%% ---------------------------------------------------
-init({SupName, Mod, Args}) ->
-    process_flag(trap_exit, true),
-    case Mod:init(Args) of
-	{ok, {SupFlags, StartSpec}} ->
-	    case init_state(SupName, SupFlags, Mod, Args) of
-		{ok, State} when ?is_simple(State) ->
-		    init_dynamic(State, StartSpec);
-		{ok, State} ->
-		    init_children(State, StartSpec);
-		Error ->
-		    {stop, {supervisor_data, Error}}
-	    end;
-	ignore ->
-	    ignore;
-	Error ->
-	    {stop, {bad_return, {Mod, init, Error}}}
-    end.
-	
-init_children(State, StartSpec) ->
-    SupName = State#state.name,
-    case check_startspec(StartSpec) of
-        {ok, Children} ->
-            case start_children(Children, SupName) of
-                {ok, NChildren} ->
-                    {ok, State#state{children = NChildren}};
-                {error, NChildren} ->
-                    terminate_children(NChildren, SupName),
-                    {stop, shutdown}
-            end;
-        Error ->
-            {stop, {start_spec, Error}}
-    end.
-
-init_dynamic(State, [StartSpec]) ->
-    case check_startspec([StartSpec]) of
-        {ok, Children} ->
-	    {ok, State#state{children = Children}};
-        Error ->
-            {stop, {start_spec, Error}}
-    end;
-init_dynamic(_State, StartSpec) ->
-    {stop, {bad_start_spec, StartSpec}}.
-
-%%-----------------------------------------------------------------
-%% Func: start_children/2
-%% Args: Children = [#child] in start order
-%%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
-%% Purpose: Start all children.  The new list contains #child's 
-%%          with pids.
-%% Returns: {ok, NChildren} | {error, NChildren}
-%%          NChildren = [#child] in termination order (reversed
-%%                        start order)
-%%-----------------------------------------------------------------
-start_children(Children, SupName) -> start_children(Children, [], SupName).
-
-start_children([Child|Chs], NChildren, SupName) ->
-    case do_start_child(SupName, Child) of
-	{ok, Pid} ->
-	    start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
-	{ok, Pid, _Extra} ->
-	    start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
-	{error, Reason} ->
-	    report_error(start_error, Reason, Child, SupName),
-	    {error, lists:reverse(Chs) ++ [Child | NChildren]}
-    end;
-start_children([], NChildren, _SupName) ->
-    {ok, NChildren}.
-
-do_start_child(SupName, Child) ->
-    #child{mfa = {M, F, A}} = Child,
-    case catch apply(M, F, A) of
-	{ok, Pid} when is_pid(Pid) ->
-	    NChild = Child#child{pid = Pid},
-	    report_progress(NChild, SupName),
-	    {ok, Pid};
-	{ok, Pid, Extra} when is_pid(Pid) ->
-	    NChild = Child#child{pid = Pid},
-	    report_progress(NChild, SupName),
-	    {ok, Pid, Extra};
-	ignore -> 
-	    {ok, undefined};
-	{error, What} -> {error, What};
-	What -> {error, What}
-    end.
-
-do_start_child_i(M, F, A) ->
-    case catch apply(M, F, A) of
-	{ok, Pid} when is_pid(Pid) ->
-	    {ok, Pid};
-	{ok, Pid, Extra} when is_pid(Pid) ->
-	    {ok, Pid, Extra};
-	ignore ->
-	    {ok, undefined};
-	{error, Error} ->
-	    {error, Error};
-	What ->
-	    {error, What}
-    end.
-    
-
-%%% ---------------------------------------------------
-%%% 
-%%% Callback functions.
-%%% 
-%%% ---------------------------------------------------
-handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
-    #child{mfa = {M, F, A}} = hd(State#state.children),
-    Args = A ++ EArgs,
-    case do_start_child_i(M, F, Args) of
-	{ok, Pid} ->
-	    NState = State#state{dynamics = 
-				 ?DICT:store(Pid, Args, State#state.dynamics)},
-	    {reply, {ok, Pid}, NState};
-	{ok, Pid, Extra} ->
-	    NState = State#state{dynamics = 
-				 ?DICT:store(Pid, Args, State#state.dynamics)},
-	    {reply, {ok, Pid, Extra}, NState};
-	What ->
-	    {reply, What, State}
-    end;
-
-%%% The requests terminate_child, delete_child and restart_child are 
-%%% invalid for simple_one_for_one supervisors. 
-handle_call({_Req, _Data}, _From, State) when ?is_exactly_simple(State) ->
-    {reply, {error, simple_one_for_one}, State};
-
-%%% The requests terminate_child, delete_child and restart_child are
-%%% invalid for simple_one_for_one_terminate supervisors.
-handle_call({_Req, _Data}, _From, State) when ?is_terminate_simple(State) ->
-    {reply, {error, simple_one_for_one_terminate}, State};
-
-handle_call({start_child, ChildSpec}, _From, State) ->
-    case check_childspec(ChildSpec) of
-	{ok, Child} ->
-	    {Resp, NState} = handle_start_child(Child, State),
-	    {reply, Resp, NState};
-	What ->
-	    {reply, {error, What}, State}
-    end;
-
-handle_call({restart_child, Name}, _From, State) ->
-    case get_child(Name, State) of
-	{value, Child} when Child#child.pid =:= undefined ->
-	    case do_start_child(State#state.name, Child) of
-		{ok, Pid} ->
-		    NState = replace_child(Child#child{pid = Pid}, State),
-		    {reply, {ok, Pid}, NState};
-		{ok, Pid, Extra} ->
-		    NState = replace_child(Child#child{pid = Pid}, State),
-		    {reply, {ok, Pid, Extra}, NState};
-		Error ->
-		    {reply, Error, State}
-	    end;
-	{value, _} ->
-	    {reply, {error, running}, State};
-	_ ->
-	    {reply, {error, not_found}, State}
-    end;
-
-handle_call({delete_child, Name}, _From, State) ->
-    case get_child(Name, State) of
-	{value, Child} when Child#child.pid =:= undefined ->
-	    NState = remove_child(Child, State),
-	    {reply, ok, NState};
-	{value, _} ->
-	    {reply, {error, running}, State};
-	_ ->
-	    {reply, {error, not_found}, State}
-    end;
-
-handle_call({terminate_child, Name}, _From, State) ->
-    case get_child(Name, State) of
-	{value, Child} ->
-	    NChild = do_terminate(Child, State#state.name),
-	    {reply, ok, replace_child(NChild, State)};
-	_ ->
-	    {reply, {error, not_found}, State}
-    end;
-
-handle_call(which_children, _From, State) when ?is_simple(State) ->
-    [#child{child_type = CT, modules = Mods}] = State#state.children,
-    Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end,
-		      ?DICT:to_list(State#state.dynamics)),
-    {reply, Reply, State};
-
-handle_call(which_children, _From, State) ->
-    Resp =
-	lists:map(fun(#child{pid = Pid, name = Name,
-			     child_type = ChildType, modules = Mods}) ->
-		    {Name, Pid, ChildType, Mods}
-		  end,
-		  State#state.children),
-    {reply, Resp, State}.
-
-
-%%% Hopefully cause a function-clause as there is no API function
-%%% that utilizes cast.
-handle_cast(null, State) ->
-    error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", 
-			   []),
-
-    {noreply, State}.
-
-%%
-%% Take care of terminated children.
-%%
-handle_info({'EXIT', Pid, Reason}, State) ->
-    case restart_child(Pid, Reason, State) of
-	{ok, State1} ->
-	    {noreply, State1};
-	{shutdown, State1} ->
-	    {stop, shutdown, State1}
-    end;
-
-handle_info(Msg, State) ->
-    error_logger:error_msg("Supervisor received unexpected message: ~p~n", 
-			   [Msg]),
-    {noreply, State}.
-%%
-%% Terminate this server.
-%%
-terminate(_Reason, State) when ?is_terminate_simple(State) ->
-    terminate_simple_children(
-      hd(State#state.children), State#state.dynamics, State#state.name),
-    ok;
-terminate(_Reason, State) ->
-    terminate_children(State#state.children, State#state.name),
-    ok.
-
-%%
-%% Change code for the supervisor.
-%% Call the new call-back module and fetch the new start specification.
-%% Combine the new spec. with the old. If the new start spec. is
-%% not valid the code change will not succeed.
-%% Use the old Args as argument to Module:init/1.
-%% NOTE: This requires that the init function of the call-back module
-%%       does not have any side effects.
-%%
-code_change(_, State, _) ->
-    case (State#state.module):init(State#state.args) of
-	{ok, {SupFlags, StartSpec}} ->
-	    case catch check_flags(SupFlags) of
-		ok ->
-		    {Strategy, MaxIntensity, Period} = SupFlags,
-                    update_childspec(State#state{strategy = Strategy,
-                                                 intensity = MaxIntensity,
-                                                 period = Period},
-                                     StartSpec);
-		Error ->
-		    {error, Error}
-	    end;
-	ignore ->
-	    {ok, State};
-	Error ->
-	    Error
-    end.
-
-check_flags({Strategy, MaxIntensity, Period}) ->
-    validStrategy(Strategy),
-    validIntensity(MaxIntensity),
-    validPeriod(Period),
-    ok;
-check_flags(What) ->
-    {bad_flags, What}.
-
-update_childspec(State, StartSpec)  when ?is_simple(State) -> 
-    case check_startspec(StartSpec) of                        
-        {ok, [Child]} ->                                      
-            {ok, State#state{children = [Child]}};            
-        Error ->                                              
-            {error, Error}                                    
-    end;                                                      
-
-update_childspec(State, StartSpec) ->
-    case check_startspec(StartSpec) of
-	{ok, Children} ->
-	    OldC = State#state.children, % In reverse start order !
-	    NewC = update_childspec1(OldC, Children, []),
-	    {ok, State#state{children = NewC}};
-        Error ->
-	    {error, Error}
-    end.
-
-update_childspec1([Child|OldC], Children, KeepOld) ->
-    case update_chsp(Child, Children) of
-	{ok,NewChildren} ->
-	    update_childspec1(OldC, NewChildren, KeepOld);
-	false ->
-	    update_childspec1(OldC, Children, [Child|KeepOld])
-    end;
-update_childspec1([], Children, KeepOld) ->
-    % Return them in (keeped) reverse start order.
-    lists:reverse(Children ++ KeepOld).  
-
-update_chsp(OldCh, Children) ->
-    case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name ->
-			   Ch#child{pid = OldCh#child.pid};
-		      (Ch) ->
-			   Ch
-		   end,
-		   Children) of
-	Children ->
-	    false;  % OldCh not found in new spec.
-	NewC ->
-	    {ok, NewC}
-    end.
-    
-%%% ---------------------------------------------------
-%%% Start a new child.
-%%% ---------------------------------------------------
-
-handle_start_child(Child, State) ->
-    case get_child(Child#child.name, State) of
-	false ->
-	    case do_start_child(State#state.name, Child) of
-		{ok, Pid} ->
-		    Children = State#state.children,
-		    {{ok, Pid},
-		     State#state{children = 
-				 [Child#child{pid = Pid}|Children]}};
-		{ok, Pid, Extra} ->
-		    Children = State#state.children,
-		    {{ok, Pid, Extra},
-		     State#state{children = 
-				 [Child#child{pid = Pid}|Children]}};
-		{error, What} ->
-		    {{error, {What, Child}}, State}
-	    end;
-	{value, OldChild} when OldChild#child.pid =/= undefined ->
-	    {{error, {already_started, OldChild#child.pid}}, State};
-	{value, _OldChild} ->
-	    {{error, already_present}, State}
-    end.
-
-%%% ---------------------------------------------------
-%%% Restart. A process has terminated.
-%%% Returns: {ok, #state} | {shutdown, #state}
-%%% ---------------------------------------------------
-
-restart_child(Pid, Reason, State) when ?is_simple(State) ->
-    case ?DICT:find(Pid, State#state.dynamics) of
-	{ok, Args} ->
-	    [Child] = State#state.children,
-	    RestartType = Child#child.restart_type,
-	    {M, F, _} = Child#child.mfa,
-	    NChild = Child#child{pid = Pid, mfa = {M, F, Args}},
-	    do_restart(RestartType, Reason, NChild, State);
-	error ->
-	    {ok, State}
-    end;
-restart_child(Pid, Reason, State) ->
-    Children = State#state.children,
-    case lists:keysearch(Pid, #child.pid, Children) of
-	{value, Child} ->
-	    RestartType = Child#child.restart_type,
-	    do_restart(RestartType, Reason, Child, State);
-	_ ->
-	    {ok, State}
-    end.
-
-do_restart(permanent, Reason, Child, State) ->
-    report_error(child_terminated, Reason, Child, State#state.name),
-    restart(Child, State);
-do_restart(_, normal, Child, State) ->
-    NState = state_del_child(Child, State),
-    {ok, NState};
-do_restart(_, shutdown, Child, State) ->
-    NState = state_del_child(Child, State),
-    {ok, NState};
-do_restart(transient, Reason, Child, State) ->
-    report_error(child_terminated, Reason, Child, State#state.name),
-    restart(Child, State);
-do_restart(temporary, Reason, Child, State) ->
-    report_error(child_terminated, Reason, Child, State#state.name),
-    NState = state_del_child(Child, State),
-    {ok, NState}.
-
-restart(Child, State) ->
-    case add_restart(State) of
-	{ok, NState} ->
-	    restart(NState#state.strategy, Child, NState);
-	{terminate, NState} ->
-	    report_error(shutdown, reached_max_restart_intensity,
-			 Child, State#state.name),
-	    {shutdown, remove_child(Child, NState)}
-    end.
-
-restart(Strategy, Child, State)
-  when Strategy =:= simple_one_for_one orelse
-       Strategy =:= simple_one_for_one_terminate ->
-    #child{mfa = {M, F, A}} = Child,
-    Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics),
-    case do_start_child_i(M, F, A) of
-	{ok, Pid} ->
-	    NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
-	    {ok, NState};
-	{ok, Pid, _Extra} ->
-	    NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
-	    {ok, NState};
-	{error, Error} ->
-	    report_error(start_error, Error, Child, State#state.name),
-	    restart(Child, State)
-    end;
-restart(one_for_one, Child, State) ->
-    case do_start_child(State#state.name, Child) of
-	{ok, Pid} ->
-	    NState = replace_child(Child#child{pid = Pid}, State),
-	    {ok, NState};
-	{ok, Pid, _Extra} ->
-	    NState = replace_child(Child#child{pid = Pid}, State),
-	    {ok, NState};
-	{error, Reason} ->
-	    report_error(start_error, Reason, Child, State#state.name),
-	    restart(Child, State)
-    end;
-restart(rest_for_one, Child, State) ->
-    {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
-    ChAfter2 = terminate_children(ChAfter, State#state.name),
-    case start_children(ChAfter2, State#state.name) of
-	{ok, ChAfter3} ->
-	    {ok, State#state{children = ChAfter3 ++ ChBefore}};
-	{error, ChAfter3} ->
-	    restart(Child, State#state{children = ChAfter3 ++ ChBefore})
-    end;
-restart(one_for_all, Child, State) ->
-    Children1 = del_child(Child#child.pid, State#state.children),
-    Children2 = terminate_children(Children1, State#state.name),
-    case start_children(Children2, State#state.name) of
-	{ok, NChs} ->
-	    {ok, State#state{children = NChs}};
-	{error, NChs} ->
-	    restart(Child, State#state{children = NChs})
-    end.
-
-%%-----------------------------------------------------------------
-%% Func: terminate_children/2
-%% Args: Children = [#child] in termination order
-%%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
-%% Returns: NChildren = [#child] in
-%%          startup order (reversed termination order)
-%%-----------------------------------------------------------------
-terminate_children(Children, SupName) ->
-    terminate_children(Children, SupName, []).
-
-terminate_children([Child | Children], SupName, Res) ->
-    NChild = do_terminate(Child, SupName),
-    terminate_children(Children, SupName, [NChild | Res]);
-terminate_children([], _SupName, Res) ->
-    Res.
-
-terminate_simple_children(Child, Dynamics, SupName) ->
-    dict:fold(fun (Pid, _Args, _Any) ->
-                      do_terminate(Child#child{pid = Pid}, SupName)
-              end, ok, Dynamics),
-    ok.
-
-do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
-    case shutdown(Child#child.pid,
-		  Child#child.shutdown) of
-	ok ->
-	    Child#child{pid = undefined};
-	{error, OtherReason} ->
-	    report_error(shutdown_error, OtherReason, Child, SupName),
-	    Child#child{pid = undefined}
-    end;
-do_terminate(Child, _SupName) ->
-    Child.
-
-%%-----------------------------------------------------------------
-%% Shutdowns a child. We must check the EXIT value 
-%% of the child, because it might have died with another reason than
-%% the wanted. In that case we want to report the error. We put a 
-%% monitor on the child an check for the 'DOWN' message instead of 
-%% checking for the 'EXIT' message, because if we check the 'EXIT' 
-%% message a "naughty" child, who does unlink(Sup), could hang the 
-%% supervisor. 
-%% Returns: ok | {error, OtherReason}  (this should be reported)
-%%-----------------------------------------------------------------
-shutdown(Pid, brutal_kill) ->
-  
-    case monitor_child(Pid) of
-	ok ->
-	    exit(Pid, kill),
-	    receive
-		{'DOWN', _MRef, process, Pid, killed} ->
-		    ok;
-		{'DOWN', _MRef, process, Pid, OtherReason} ->
-		    {error, OtherReason}
-	    end;
-	{error, Reason} ->      
-	    {error, Reason}
-    end;
-
-shutdown(Pid, Time) ->
-    
-    case monitor_child(Pid) of
-	ok ->
-	    exit(Pid, shutdown), %% Try to shutdown gracefully
-	    receive 
-		{'DOWN', _MRef, process, Pid, shutdown} ->
-		    ok;
-		{'DOWN', _MRef, process, Pid, OtherReason} ->
-		    {error, OtherReason}
-	    after Time ->
-		    exit(Pid, kill),  %% Force termination.
-		    receive
-			{'DOWN', _MRef, process, Pid, OtherReason} ->
-			    {error, OtherReason}
-		    end
-	    end;
-	{error, Reason} ->      
-	    {error, Reason}
-    end.
-
-%% Help function to shutdown/2 switches from link to monitor approach
-monitor_child(Pid) ->
-    
-    %% Do the monitor operation first so that if the child dies 
-    %% before the monitoring is done causing a 'DOWN'-message with
-    %% reason noproc, we will get the real reason in the 'EXIT'-message
-    %% unless a naughty child has already done unlink...
-    erlang:monitor(process, Pid),
-    unlink(Pid),
-
-    receive
-	%% If the child dies before the unlik we must empty
-	%% the mail-box of the 'EXIT'-message and the 'DOWN'-message.
-	{'EXIT', Pid, Reason} -> 
-	    receive 
-		{'DOWN', _, process, Pid, _} ->
-		    {error, Reason}
-	    end
-    after 0 -> 
-	    %% If a naughty child did unlink and the child dies before
-	    %% monitor the result will be that shutdown/2 receives a 
-	    %% 'DOWN'-message with reason noproc.
-	    %% If the child should die after the unlink there
-	    %% will be a 'DOWN'-message with a correct reason
-	    %% that will be handled in shutdown/2. 
-	    ok   
-    end.
-    
-   
-%%-----------------------------------------------------------------
-%% Child/State manipulating functions.
-%%-----------------------------------------------------------------
-state_del_child(#child{pid = Pid}, State) when ?is_simple(State) ->
-    NDynamics = ?DICT:erase(Pid, State#state.dynamics),
-    State#state{dynamics = NDynamics};
-state_del_child(Child, State) ->
-    NChildren = del_child(Child#child.name, State#state.children),
-    State#state{children = NChildren}.
-
-del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name ->
-    [Ch#child{pid = undefined} | Chs];
-del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid ->
-    [Ch#child{pid = undefined} | Chs];
-del_child(Name, [Ch|Chs]) ->
-    [Ch|del_child(Name, Chs)];
-del_child(_, []) ->
-    [].
-
-%% Chs = [S4, S3, Ch, S1, S0]
-%% Ret: {[S4, S3, Ch], [S1, S0]}
-split_child(Name, Chs) ->
-    split_child(Name, Chs, []).
-
-split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name ->
-    {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
-split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid ->
-    {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
-split_child(Name, [Ch|Chs], After) ->
-    split_child(Name, Chs, [Ch | After]);
-split_child(_, [], After) ->
-    {lists:reverse(After), []}.
-
-get_child(Name, State) ->
-    lists:keysearch(Name, #child.name, State#state.children).
-replace_child(Child, State) ->
-    Chs = do_replace_child(Child, State#state.children),
-    State#state{children = Chs}.
-
-do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name ->
-    [Child | Chs];
-do_replace_child(Child, [Ch|Chs]) ->
-    [Ch|do_replace_child(Child, Chs)].
-
-remove_child(Child, State) ->
-    Chs = lists:keydelete(Child#child.name, #child.name, State#state.children),
-    State#state{children = Chs}.
-
-%%-----------------------------------------------------------------
-%% Func: init_state/4
-%% Args: SupName = {local, atom()} | {global, atom()} | self
-%%       Type = {Strategy, MaxIntensity, Period}
-%%         Strategy = one_for_one | one_for_all | simple_one_for_one |
-%%                    rest_for_one 
-%%         MaxIntensity = integer()
-%%         Period = integer()
-%%       Mod :== atom()
-%%       Arsg :== term()
-%% Purpose: Check that Type is of correct type (!)
-%% Returns: {ok, #state} | Error
-%%-----------------------------------------------------------------
-init_state(SupName, Type, Mod, Args) ->
-    case catch init_state1(SupName, Type, Mod, Args) of
-	{ok, State} ->
-	    {ok, State};
-	Error ->
-	    Error
-    end.
-
-init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) ->
-    validStrategy(Strategy),
-    validIntensity(MaxIntensity),
-    validPeriod(Period),
-    {ok, #state{name = supname(SupName,Mod),
-	       strategy = Strategy,
-	       intensity = MaxIntensity,
-	       period = Period,
-	       module = Mod,
-	       args = Args}};
-init_state1(_SupName, Type, _, _) ->
-    {invalid_type, Type}.
-
-validStrategy(simple_one_for_one_terminate) -> true;
-validStrategy(simple_one_for_one)           -> true;
-validStrategy(one_for_one)                  -> true;
-validStrategy(one_for_all)                  -> true;
-validStrategy(rest_for_one)                 -> true;
-validStrategy(What)                         -> throw({invalid_strategy, What}).
-
-validIntensity(Max) when is_integer(Max),
-                         Max >=  0 -> true;
-validIntensity(What)              -> throw({invalid_intensity, What}).
-
-validPeriod(Period) when is_integer(Period),
-                         Period > 0 -> true;
-validPeriod(What)                   -> throw({invalid_period, What}).
-
-supname(self,Mod) -> {self(),Mod};
-supname(N,_)      -> N.
-
-%%% ------------------------------------------------------
-%%% Check that the children start specification is valid.
-%%% Shall be a six (6) tuple
-%%%    {Name, Func, RestartType, Shutdown, ChildType, Modules}
-%%% where Name is an atom
-%%%       Func is {Mod, Fun, Args} == {atom, atom, list}
-%%%       RestartType is permanent | temporary | transient
-%%%       Shutdown = integer() | infinity | brutal_kill
-%%%       ChildType = supervisor | worker
-%%%       Modules = [atom()] | dynamic
-%%% Returns: {ok, [#child]} | Error
-%%% ------------------------------------------------------
-
-check_startspec(Children) -> check_startspec(Children, []).
-
-check_startspec([ChildSpec|T], Res) ->
-    case check_childspec(ChildSpec) of
-	{ok, Child} ->
-	    case lists:keymember(Child#child.name, #child.name, Res) of
-		true -> {duplicate_child_name, Child#child.name};
-		false -> check_startspec(T, [Child | Res])
-	    end;
-	Error -> Error
-    end;
-check_startspec([], Res) ->
-    {ok, lists:reverse(Res)}.
-
-check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) ->
-    catch check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods);
-check_childspec(X) -> {invalid_child_spec, X}.
-
-check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) ->
-    validName(Name),
-    validFunc(Func),
-    validRestartType(RestartType),
-    validChildType(ChildType),
-    validShutdown(Shutdown, ChildType),
-    validMods(Mods),
-    {ok, #child{name = Name, mfa = Func, restart_type = RestartType,
-		shutdown = Shutdown, child_type = ChildType, modules = Mods}}.
-
-validChildType(supervisor) -> true;
-validChildType(worker) -> true;
-validChildType(What) -> throw({invalid_child_type, What}).
-
-validName(_Name) -> true. 
-
-validFunc({M, F, A}) when is_atom(M), 
-                          is_atom(F), 
-                          is_list(A) -> true;
-validFunc(Func)                      -> throw({invalid_mfa, Func}).
-
-validRestartType(permanent)   -> true;
-validRestartType(temporary)   -> true;
-validRestartType(transient)   -> true;
-validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
-
-validShutdown(Shutdown, _) 
-  when is_integer(Shutdown), Shutdown > 0 -> true;
-validShutdown(infinity, supervisor)    -> true;
-validShutdown(brutal_kill, _)          -> true;
-validShutdown(Shutdown, _)             -> throw({invalid_shutdown, Shutdown}).
-
-validMods(dynamic) -> true;
-validMods(Mods) when is_list(Mods) ->
-    lists:foreach(fun(Mod) ->
-		    if
-			is_atom(Mod) -> ok;
-			true -> throw({invalid_module, Mod})
-		    end
-		  end,
-		  Mods);
-validMods(Mods) -> throw({invalid_modules, Mods}).
-
-%%% ------------------------------------------------------
-%%% Add a new restart and calculate if the max restart
-%%% intensity has been reached (in that case the supervisor
-%%% shall terminate).
-%%% All restarts accured inside the period amount of seconds
-%%% are kept in the #state.restarts list.
-%%% Returns: {ok, State'} | {terminate, State'}
-%%% ------------------------------------------------------
-
-add_restart(State) ->  
-    I = State#state.intensity,
-    P = State#state.period,
-    R = State#state.restarts,
-    Now = erlang:now(),
-    R1 = add_restart([Now|R], Now, P),
-    State1 = State#state{restarts = R1},
-    case length(R1) of
-	CurI when CurI  =< I ->
-	    {ok, State1};
-	_ ->
-	    {terminate, State1}
-    end.
-
-add_restart([R|Restarts], Now, Period) ->
-    case inPeriod(R, Now, Period) of
-	true ->
-	    [R|add_restart(Restarts, Now, Period)];
-	_ ->
-	    []
-    end;
-add_restart([], _, _) ->
-    [].
-
-inPeriod(Time, Now, Period) ->
-    case difference(Time, Now) of
-	T when T > Period ->
-	    false;
-	_ ->
-	    true
-    end.
-
-%%
-%% Time = {MegaSecs, Secs, MicroSecs} (NOTE: MicroSecs is ignored)
-%% Calculate the time elapsed in seconds between two timestamps.
-%% If MegaSecs is equal just subtract Secs.
-%% Else calculate the Mega difference and add the Secs difference,
-%% note that Secs difference can be negative, e.g.
-%%      {827, 999999, 676} diff {828, 1, 653753} == > 2 secs.
-%%
-difference({TimeM, TimeS, _}, {CurM, CurS, _}) when CurM > TimeM ->
-    ((CurM - TimeM) * 1000000) + (CurS - TimeS);
-difference({_, TimeS, _}, {_, CurS, _}) ->
-    CurS - TimeS.
-
-%%% ------------------------------------------------------
-%%% Error and progress reporting.
-%%% ------------------------------------------------------
-
-report_error(Error, Reason, Child, SupName) ->
-    ErrorMsg = [{supervisor, SupName},
-		{errorContext, Error},
-		{reason, Reason},
-		{offender, extract_child(Child)}],
-    error_logger:error_report(supervisor_report, ErrorMsg).
-
-
-extract_child(Child) ->
-    [{pid, Child#child.pid},
-     {name, Child#child.name},
-     {mfa, Child#child.mfa},
-     {restart_type, Child#child.restart_type},
-     {shutdown, Child#child.shutdown},
-     {child_type, Child#child.child_type}].
-
-report_progress(Child, SupName) ->
-    Progress = [{supervisor, SupName},
-		{started, extract_child(Child)}],
-    error_logger:info_report(progress, Progress).
-- 
cgit v1.2.1


From 151406f7508366351c3b5382a401dbe259f6824d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 15:43:34 +0000
Subject: Added supervisor2 straight from R13B03

---
 src/supervisor2.erl | 889 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 889 insertions(+)
 create mode 100644 src/supervisor2.erl

diff --git a/src/supervisor2.erl b/src/supervisor2.erl
new file mode 100644
index 00000000..fb1303d1
--- /dev/null
+++ b/src/supervisor2.erl
@@ -0,0 +1,889 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+-module(supervisor).
+
+-behaviour(gen_server).
+
+%% External exports
+-export([start_link/2,start_link/3,
+	 start_child/2, restart_child/2,
+	 delete_child/2, terminate_child/2,
+	 which_children/1,
+	 check_childspecs/1]).
+
+-export([behaviour_info/1]).
+
+%% Internal exports
+-export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
+-export([handle_cast/2]).
+
+-define(DICT, dict).
+
+-record(state, {name,
+		strategy,
+		children = [],
+		dynamics = ?DICT:new(),
+		intensity,
+		period,
+		restarts = [],
+	        module,
+	        args}).
+
+-record(child, {pid = undefined,  % pid is undefined when child is not running
+		name,
+		mfa,
+		restart_type,
+		shutdown,
+		child_type,
+		modules = []}).
+
+-define(is_simple(State), State#state.strategy =:= simple_one_for_one).
+
+behaviour_info(callbacks) ->
+    [{init,1}];
+behaviour_info(_Other) ->
+    undefined.
+
+%%% ---------------------------------------------------
+%%% This is a general process supervisor built upon gen_server.erl.
+%%% Servers/processes should/could also be built using gen_server.erl.
+%%% SupName = {local, atom()} | {global, atom()}.
+%%% ---------------------------------------------------
+start_link(Mod, Args) ->
+    gen_server:start_link(supervisor, {self, Mod, Args}, []).
+ 
+start_link(SupName, Mod, Args) ->
+    gen_server:start_link(SupName, supervisor, {SupName, Mod, Args}, []).
+ 
+%%% ---------------------------------------------------
+%%% Interface functions.
+%%% ---------------------------------------------------
+start_child(Supervisor, ChildSpec) ->
+    call(Supervisor, {start_child, ChildSpec}).
+
+restart_child(Supervisor, Name) ->
+    call(Supervisor, {restart_child, Name}).
+
+delete_child(Supervisor, Name) ->
+    call(Supervisor, {delete_child, Name}).
+
+%%-----------------------------------------------------------------
+%% Func: terminate_child/2
+%% Returns: ok | {error, Reason}
+%%          Note that the child is *always* terminated in some
+%%          way (maybe killed).
+%%-----------------------------------------------------------------
+terminate_child(Supervisor, Name) ->
+    call(Supervisor, {terminate_child, Name}).
+
+which_children(Supervisor) ->
+    call(Supervisor, which_children).
+
+call(Supervisor, Req) ->
+    gen_server:call(Supervisor, Req, infinity).
+
+check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
+    case check_startspec(ChildSpecs) of
+	{ok, _} -> ok;
+	Error -> {error, Error}
+    end;
+check_childspecs(X) -> {error, {badarg, X}}.
+
+%%% ---------------------------------------------------
+%%% 
+%%% Initialize the supervisor.
+%%% 
+%%% ---------------------------------------------------
+init({SupName, Mod, Args}) ->
+    process_flag(trap_exit, true),
+    case Mod:init(Args) of
+	{ok, {SupFlags, StartSpec}} ->
+	    case init_state(SupName, SupFlags, Mod, Args) of
+		{ok, State} when ?is_simple(State) ->
+		    init_dynamic(State, StartSpec);
+		{ok, State} ->
+		    init_children(State, StartSpec);
+		Error ->
+		    {stop, {supervisor_data, Error}}
+	    end;
+	ignore ->
+	    ignore;
+	Error ->
+	    {stop, {bad_return, {Mod, init, Error}}}
+    end.
+	
+init_children(State, StartSpec) ->
+    SupName = State#state.name,
+    case check_startspec(StartSpec) of
+        {ok, Children} ->
+            case start_children(Children, SupName) of
+                {ok, NChildren} ->
+                    {ok, State#state{children = NChildren}};
+                {error, NChildren} ->
+                    terminate_children(NChildren, SupName),
+                    {stop, shutdown}
+            end;
+        Error ->
+            {stop, {start_spec, Error}}
+    end.
+
+init_dynamic(State, [StartSpec]) ->
+    case check_startspec([StartSpec]) of
+        {ok, Children} ->
+	    {ok, State#state{children = Children}};
+        Error ->
+            {stop, {start_spec, Error}}
+    end;
+init_dynamic(_State, StartSpec) ->
+    {stop, {bad_start_spec, StartSpec}}.
+
+%%-----------------------------------------------------------------
+%% Func: start_children/2
+%% Args: Children = [#child] in start order
+%%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Purpose: Start all children.  The new list contains #child's 
+%%          with pids.
+%% Returns: {ok, NChildren} | {error, NChildren}
+%%          NChildren = [#child] in termination order (reversed
+%%                        start order)
+%%-----------------------------------------------------------------
+start_children(Children, SupName) -> start_children(Children, [], SupName).
+
+start_children([Child|Chs], NChildren, SupName) ->
+    case do_start_child(SupName, Child) of
+	{ok, Pid} ->
+	    start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
+	{ok, Pid, _Extra} ->
+	    start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
+	{error, Reason} ->
+	    report_error(start_error, Reason, Child, SupName),
+	    {error, lists:reverse(Chs) ++ [Child | NChildren]}
+    end;
+start_children([], NChildren, _SupName) ->
+    {ok, NChildren}.
+
+do_start_child(SupName, Child) ->
+    #child{mfa = {M, F, A}} = Child,
+    case catch apply(M, F, A) of
+	{ok, Pid} when is_pid(Pid) ->
+	    NChild = Child#child{pid = Pid},
+	    report_progress(NChild, SupName),
+	    {ok, Pid};
+	{ok, Pid, Extra} when is_pid(Pid) ->
+	    NChild = Child#child{pid = Pid},
+	    report_progress(NChild, SupName),
+	    {ok, Pid, Extra};
+	ignore -> 
+	    {ok, undefined};
+	{error, What} -> {error, What};
+	What -> {error, What}
+    end.
+
+do_start_child_i(M, F, A) ->
+    case catch apply(M, F, A) of
+	{ok, Pid} when is_pid(Pid) ->
+	    {ok, Pid};
+	{ok, Pid, Extra} when is_pid(Pid) ->
+	    {ok, Pid, Extra};
+	ignore ->
+	    {ok, undefined};
+	{error, Error} ->
+	    {error, Error};
+	What ->
+	    {error, What}
+    end.
+    
+
+%%% ---------------------------------------------------
+%%% 
+%%% Callback functions.
+%%% 
+%%% ---------------------------------------------------
+handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
+    #child{mfa = {M, F, A}} = hd(State#state.children),
+    Args = A ++ EArgs,
+    case do_start_child_i(M, F, Args) of
+	{ok, Pid} ->
+	    NState = State#state{dynamics = 
+				 ?DICT:store(Pid, Args, State#state.dynamics)},
+	    {reply, {ok, Pid}, NState};
+	{ok, Pid, Extra} ->
+	    NState = State#state{dynamics = 
+				 ?DICT:store(Pid, Args, State#state.dynamics)},
+	    {reply, {ok, Pid, Extra}, NState};
+	What ->
+	    {reply, What, State}
+    end;
+
+%%% The requests terminate_child, delete_child and restart_child are 
+%%% invalid for simple_one_for_one supervisors. 
+handle_call({_Req, _Data}, _From, State) when ?is_simple(State) ->
+    {reply, {error, simple_one_for_one}, State};
+
+handle_call({start_child, ChildSpec}, _From, State) ->
+    case check_childspec(ChildSpec) of
+	{ok, Child} ->
+	    {Resp, NState} = handle_start_child(Child, State),
+	    {reply, Resp, NState};
+	What ->
+	    {reply, {error, What}, State}
+    end;
+
+handle_call({restart_child, Name}, _From, State) ->
+    case get_child(Name, State) of
+	{value, Child} when Child#child.pid =:= undefined ->
+	    case do_start_child(State#state.name, Child) of
+		{ok, Pid} ->
+		    NState = replace_child(Child#child{pid = Pid}, State),
+		    {reply, {ok, Pid}, NState};
+		{ok, Pid, Extra} ->
+		    NState = replace_child(Child#child{pid = Pid}, State),
+		    {reply, {ok, Pid, Extra}, NState};
+		Error ->
+		    {reply, Error, State}
+	    end;
+	{value, _} ->
+	    {reply, {error, running}, State};
+	_ ->
+	    {reply, {error, not_found}, State}
+    end;
+
+handle_call({delete_child, Name}, _From, State) ->
+    case get_child(Name, State) of
+	{value, Child} when Child#child.pid =:= undefined ->
+	    NState = remove_child(Child, State),
+	    {reply, ok, NState};
+	{value, _} ->
+	    {reply, {error, running}, State};
+	_ ->
+	    {reply, {error, not_found}, State}
+    end;
+
+handle_call({terminate_child, Name}, _From, State) ->
+    case get_child(Name, State) of
+	{value, Child} ->
+	    NChild = do_terminate(Child, State#state.name),
+	    {reply, ok, replace_child(NChild, State)};
+	_ ->
+	    {reply, {error, not_found}, State}
+    end;
+
+handle_call(which_children, _From, State) when ?is_simple(State) ->
+    [#child{child_type = CT, modules = Mods}] = State#state.children,
+    Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end,
+		      ?DICT:to_list(State#state.dynamics)),
+    {reply, Reply, State};
+
+handle_call(which_children, _From, State) ->
+    Resp =
+	lists:map(fun(#child{pid = Pid, name = Name,
+			     child_type = ChildType, modules = Mods}) ->
+		    {Name, Pid, ChildType, Mods}
+		  end,
+		  State#state.children),
+    {reply, Resp, State}.
+
+
+%%% Hopefully cause a function-clause as there is no API function
+%%% that utilizes cast.
+handle_cast(null, State) ->
+    error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", 
+			   []),
+
+    {noreply, State}.
+
+%%
+%% Take care of terminated children.
+%%
+handle_info({'EXIT', Pid, Reason}, State) ->
+    case restart_child(Pid, Reason, State) of
+	{ok, State1} ->
+	    {noreply, State1};
+	{shutdown, State1} ->
+	    {stop, shutdown, State1}
+    end;
+
+handle_info(Msg, State) ->
+    error_logger:error_msg("Supervisor received unexpected message: ~p~n", 
+			   [Msg]),
+    {noreply, State}.
+%%
+%% Terminate this server.
+%%
+terminate(_Reason, State) ->
+    terminate_children(State#state.children, State#state.name),
+    ok.
+
+%%
+%% Change code for the supervisor.
+%% Call the new call-back module and fetch the new start specification.
+%% Combine the new spec. with the old. If the new start spec. is
+%% not valid the code change will not succeed.
+%% Use the old Args as argument to Module:init/1.
+%% NOTE: This requires that the init function of the call-back module
+%%       does not have any side effects.
+%%
+code_change(_, State, _) ->
+    case (State#state.module):init(State#state.args) of
+	{ok, {SupFlags, StartSpec}} ->
+	    case catch check_flags(SupFlags) of
+		ok ->
+		    {Strategy, MaxIntensity, Period} = SupFlags,
+                    update_childspec(State#state{strategy = Strategy,
+                                                 intensity = MaxIntensity,
+                                                 period = Period},
+                                     StartSpec);
+		Error ->
+		    {error, Error}
+	    end;
+	ignore ->
+	    {ok, State};
+	Error ->
+	    Error
+    end.
+
+check_flags({Strategy, MaxIntensity, Period}) ->
+    validStrategy(Strategy),
+    validIntensity(MaxIntensity),
+    validPeriod(Period),
+    ok;
+check_flags(What) ->
+    {bad_flags, What}.
+
+update_childspec(State, StartSpec)  when ?is_simple(State) -> 
+    case check_startspec(StartSpec) of                        
+        {ok, [Child]} ->                                      
+            {ok, State#state{children = [Child]}};            
+        Error ->                                              
+            {error, Error}                                    
+    end;                                                      
+
+update_childspec(State, StartSpec) ->
+    case check_startspec(StartSpec) of
+	{ok, Children} ->
+	    OldC = State#state.children, % In reverse start order !
+	    NewC = update_childspec1(OldC, Children, []),
+	    {ok, State#state{children = NewC}};
+        Error ->
+	    {error, Error}
+    end.
+
+update_childspec1([Child|OldC], Children, KeepOld) ->
+    case update_chsp(Child, Children) of
+	{ok,NewChildren} ->
+	    update_childspec1(OldC, NewChildren, KeepOld);
+	false ->
+	    update_childspec1(OldC, Children, [Child|KeepOld])
+    end;
+update_childspec1([], Children, KeepOld) ->
+    % Return them in (keeped) reverse start order.
+    lists:reverse(Children ++ KeepOld).  
+
+update_chsp(OldCh, Children) ->
+    case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name ->
+			   Ch#child{pid = OldCh#child.pid};
+		      (Ch) ->
+			   Ch
+		   end,
+		   Children) of
+	Children ->
+	    false;  % OldCh not found in new spec.
+	NewC ->
+	    {ok, NewC}
+    end.
+    
+%%% ---------------------------------------------------
+%%% Start a new child.
+%%% ---------------------------------------------------
+
+handle_start_child(Child, State) ->
+    case get_child(Child#child.name, State) of
+	false ->
+	    case do_start_child(State#state.name, Child) of
+		{ok, Pid} ->
+		    Children = State#state.children,
+		    {{ok, Pid},
+		     State#state{children = 
+				 [Child#child{pid = Pid}|Children]}};
+		{ok, Pid, Extra} ->
+		    Children = State#state.children,
+		    {{ok, Pid, Extra},
+		     State#state{children = 
+				 [Child#child{pid = Pid}|Children]}};
+		{error, What} ->
+		    {{error, {What, Child}}, State}
+	    end;
+	{value, OldChild} when OldChild#child.pid =/= undefined ->
+	    {{error, {already_started, OldChild#child.pid}}, State};
+	{value, _OldChild} ->
+	    {{error, already_present}, State}
+    end.
+
+%%% ---------------------------------------------------
+%%% Restart. A process has terminated.
+%%% Returns: {ok, #state} | {shutdown, #state}
+%%% ---------------------------------------------------
+
+restart_child(Pid, Reason, State) when ?is_simple(State) ->
+    case ?DICT:find(Pid, State#state.dynamics) of
+	{ok, Args} ->
+	    [Child] = State#state.children,
+	    RestartType = Child#child.restart_type,
+	    {M, F, _} = Child#child.mfa,
+	    NChild = Child#child{pid = Pid, mfa = {M, F, Args}},
+	    do_restart(RestartType, Reason, NChild, State);
+	error ->
+	    {ok, State}
+    end;
+restart_child(Pid, Reason, State) ->
+    Children = State#state.children,
+    case lists:keysearch(Pid, #child.pid, Children) of
+	{value, Child} ->
+	    RestartType = Child#child.restart_type,
+	    do_restart(RestartType, Reason, Child, State);
+	_ ->
+	    {ok, State}
+    end.
+
+do_restart(permanent, Reason, Child, State) ->
+    report_error(child_terminated, Reason, Child, State#state.name),
+    restart(Child, State);
+do_restart(_, normal, Child, State) ->
+    NState = state_del_child(Child, State),
+    {ok, NState};
+do_restart(_, shutdown, Child, State) ->
+    NState = state_del_child(Child, State),
+    {ok, NState};
+do_restart(transient, Reason, Child, State) ->
+    report_error(child_terminated, Reason, Child, State#state.name),
+    restart(Child, State);
+do_restart(temporary, Reason, Child, State) ->
+    report_error(child_terminated, Reason, Child, State#state.name),
+    NState = state_del_child(Child, State),
+    {ok, NState}.
+
+restart(Child, State) ->
+    case add_restart(State) of
+	{ok, NState} ->
+	    restart(NState#state.strategy, Child, NState);
+	{terminate, NState} ->
+	    report_error(shutdown, reached_max_restart_intensity,
+			 Child, State#state.name),
+	    {shutdown, remove_child(Child, NState)}
+    end.
+
+restart(simple_one_for_one, Child, State) ->
+    #child{mfa = {M, F, A}} = Child,
+    Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics),
+    case do_start_child_i(M, F, A) of
+	{ok, Pid} ->
+	    NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
+	    {ok, NState};
+	{ok, Pid, _Extra} ->
+	    NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
+	    {ok, NState};
+	{error, Error} ->
+	    report_error(start_error, Error, Child, State#state.name),
+	    restart(Child, State)
+    end;
+restart(one_for_one, Child, State) ->
+    case do_start_child(State#state.name, Child) of
+	{ok, Pid} ->
+	    NState = replace_child(Child#child{pid = Pid}, State),
+	    {ok, NState};
+	{ok, Pid, _Extra} ->
+	    NState = replace_child(Child#child{pid = Pid}, State),
+	    {ok, NState};
+	{error, Reason} ->
+	    report_error(start_error, Reason, Child, State#state.name),
+	    restart(Child, State)
+    end;
+restart(rest_for_one, Child, State) ->
+    {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
+    ChAfter2 = terminate_children(ChAfter, State#state.name),
+    case start_children(ChAfter2, State#state.name) of
+	{ok, ChAfter3} ->
+	    {ok, State#state{children = ChAfter3 ++ ChBefore}};
+	{error, ChAfter3} ->
+	    restart(Child, State#state{children = ChAfter3 ++ ChBefore})
+    end;
+restart(one_for_all, Child, State) ->
+    Children1 = del_child(Child#child.pid, State#state.children),
+    Children2 = terminate_children(Children1, State#state.name),
+    case start_children(Children2, State#state.name) of
+	{ok, NChs} ->
+	    {ok, State#state{children = NChs}};
+	{error, NChs} ->
+	    restart(Child, State#state{children = NChs})
+    end.
+
+%%-----------------------------------------------------------------
+%% Func: terminate_children/2
+%% Args: Children = [#child] in termination order
+%%       SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
+%% Returns: NChildren = [#child] in
+%%          startup order (reversed termination order)
+%%-----------------------------------------------------------------
+terminate_children(Children, SupName) ->
+    terminate_children(Children, SupName, []).
+
+terminate_children([Child | Children], SupName, Res) ->
+    NChild = do_terminate(Child, SupName),
+    terminate_children(Children, SupName, [NChild | Res]);
+terminate_children([], _SupName, Res) ->
+    Res.
+
+do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
+    case shutdown(Child#child.pid,
+		  Child#child.shutdown) of
+	ok ->
+	    Child#child{pid = undefined};
+	{error, OtherReason} ->
+	    report_error(shutdown_error, OtherReason, Child, SupName),
+	    Child#child{pid = undefined}
+    end;
+do_terminate(Child, _SupName) ->
+    Child.
+
+%%-----------------------------------------------------------------
+%% Shutdowns a child. We must check the EXIT value 
+%% of the child, because it might have died with another reason than
+%% the wanted. In that case we want to report the error. We put a 
+%% monitor on the child an check for the 'DOWN' message instead of 
+%% checking for the 'EXIT' message, because if we check the 'EXIT' 
+%% message a "naughty" child, who does unlink(Sup), could hang the 
+%% supervisor. 
+%% Returns: ok | {error, OtherReason}  (this should be reported)
+%%-----------------------------------------------------------------
+shutdown(Pid, brutal_kill) ->
+  
+    case monitor_child(Pid) of
+	ok ->
+	    exit(Pid, kill),
+	    receive
+		{'DOWN', _MRef, process, Pid, killed} ->
+		    ok;
+		{'DOWN', _MRef, process, Pid, OtherReason} ->
+		    {error, OtherReason}
+	    end;
+	{error, Reason} ->      
+	    {error, Reason}
+    end;
+
+shutdown(Pid, Time) ->
+    
+    case monitor_child(Pid) of
+	ok ->
+	    exit(Pid, shutdown), %% Try to shutdown gracefully
+	    receive 
+		{'DOWN', _MRef, process, Pid, shutdown} ->
+		    ok;
+		{'DOWN', _MRef, process, Pid, OtherReason} ->
+		    {error, OtherReason}
+	    after Time ->
+		    exit(Pid, kill),  %% Force termination.
+		    receive
+			{'DOWN', _MRef, process, Pid, OtherReason} ->
+			    {error, OtherReason}
+		    end
+	    end;
+	{error, Reason} ->      
+	    {error, Reason}
+    end.
+
+%% Help function to shutdown/2 switches from link to monitor approach
+monitor_child(Pid) ->
+    
+    %% Do the monitor operation first so that if the child dies 
+    %% before the monitoring is done causing a 'DOWN'-message with
+    %% reason noproc, we will get the real reason in the 'EXIT'-message
+    %% unless a naughty child has already done unlink...
+    erlang:monitor(process, Pid),
+    unlink(Pid),
+
+    receive
+	%% If the child dies before the unlik we must empty
+	%% the mail-box of the 'EXIT'-message and the 'DOWN'-message.
+	{'EXIT', Pid, Reason} -> 
+	    receive 
+		{'DOWN', _, process, Pid, _} ->
+		    {error, Reason}
+	    end
+    after 0 -> 
+	    %% If a naughty child did unlink and the child dies before
+	    %% monitor the result will be that shutdown/2 receives a 
+	    %% 'DOWN'-message with reason noproc.
+	    %% If the child should die after the unlink there
+	    %% will be a 'DOWN'-message with a correct reason
+	    %% that will be handled in shutdown/2. 
+	    ok   
+    end.
+    
+   
+%%-----------------------------------------------------------------
+%% Child/State manipulating functions.
+%%-----------------------------------------------------------------
+state_del_child(#child{pid = Pid}, State) when ?is_simple(State) ->
+    NDynamics = ?DICT:erase(Pid, State#state.dynamics),
+    State#state{dynamics = NDynamics};
+state_del_child(Child, State) ->
+    NChildren = del_child(Child#child.name, State#state.children),
+    State#state{children = NChildren}.
+
+del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name ->
+    [Ch#child{pid = undefined} | Chs];
+del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid ->
+    [Ch#child{pid = undefined} | Chs];
+del_child(Name, [Ch|Chs]) ->
+    [Ch|del_child(Name, Chs)];
+del_child(_, []) ->
+    [].
+
+%% Chs = [S4, S3, Ch, S1, S0]
+%% Ret: {[S4, S3, Ch], [S1, S0]}
+split_child(Name, Chs) ->
+    split_child(Name, Chs, []).
+
+split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name ->
+    {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
+split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid ->
+    {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
+split_child(Name, [Ch|Chs], After) ->
+    split_child(Name, Chs, [Ch | After]);
+split_child(_, [], After) ->
+    {lists:reverse(After), []}.
+
+get_child(Name, State) ->
+    lists:keysearch(Name, #child.name, State#state.children).
+replace_child(Child, State) ->
+    Chs = do_replace_child(Child, State#state.children),
+    State#state{children = Chs}.
+
+do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name ->
+    [Child | Chs];
+do_replace_child(Child, [Ch|Chs]) ->
+    [Ch|do_replace_child(Child, Chs)].
+
+remove_child(Child, State) ->
+    Chs = lists:keydelete(Child#child.name, #child.name, State#state.children),
+    State#state{children = Chs}.
+
+%%-----------------------------------------------------------------
+%% Func: init_state/4
+%% Args: SupName = {local, atom()} | {global, atom()} | self
+%%       Type = {Strategy, MaxIntensity, Period}
+%%         Strategy = one_for_one | one_for_all | simple_one_for_one |
+%%                    rest_for_one 
+%%         MaxIntensity = integer()
+%%         Period = integer()
+%%       Mod :== atom()
+%%       Arsg :== term()
+%% Purpose: Check that Type is of correct type (!)
+%% Returns: {ok, #state} | Error
+%%-----------------------------------------------------------------
+init_state(SupName, Type, Mod, Args) ->
+    case catch init_state1(SupName, Type, Mod, Args) of
+	{ok, State} ->
+	    {ok, State};
+	Error ->
+	    Error
+    end.
+
+init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) ->
+    validStrategy(Strategy),
+    validIntensity(MaxIntensity),
+    validPeriod(Period),
+    {ok, #state{name = supname(SupName,Mod),
+	       strategy = Strategy,
+	       intensity = MaxIntensity,
+	       period = Period,
+	       module = Mod,
+	       args = Args}};
+init_state1(_SupName, Type, _, _) ->
+    {invalid_type, Type}.
+
+validStrategy(simple_one_for_one) -> true;
+validStrategy(one_for_one)        -> true;
+validStrategy(one_for_all)        -> true;
+validStrategy(rest_for_one)       -> true;
+validStrategy(What)               -> throw({invalid_strategy, What}).
+
+validIntensity(Max) when is_integer(Max),
+                         Max >=  0 -> true;
+validIntensity(What)              -> throw({invalid_intensity, What}).
+
+validPeriod(Period) when is_integer(Period),
+                         Period > 0 -> true;
+validPeriod(What)                   -> throw({invalid_period, What}).
+
+supname(self,Mod) -> {self(),Mod};
+supname(N,_)      -> N.
+
+%%% ------------------------------------------------------
+%%% Check that the children start specification is valid.
+%%% Shall be a six (6) tuple
+%%%    {Name, Func, RestartType, Shutdown, ChildType, Modules}
+%%% where Name is an atom
+%%%       Func is {Mod, Fun, Args} == {atom, atom, list}
+%%%       RestartType is permanent | temporary | transient
+%%%       Shutdown = integer() | infinity | brutal_kill
+%%%       ChildType = supervisor | worker
+%%%       Modules = [atom()] | dynamic
+%%% Returns: {ok, [#child]} | Error
+%%% ------------------------------------------------------
+
+check_startspec(Children) -> check_startspec(Children, []).
+
+check_startspec([ChildSpec|T], Res) ->
+    case check_childspec(ChildSpec) of
+	{ok, Child} ->
+	    case lists:keymember(Child#child.name, #child.name, Res) of
+		true -> {duplicate_child_name, Child#child.name};
+		false -> check_startspec(T, [Child | Res])
+	    end;
+	Error -> Error
+    end;
+check_startspec([], Res) ->
+    {ok, lists:reverse(Res)}.
+
+check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) ->
+    catch check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods);
+check_childspec(X) -> {invalid_child_spec, X}.
+
+check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) ->
+    validName(Name),
+    validFunc(Func),
+    validRestartType(RestartType),
+    validChildType(ChildType),
+    validShutdown(Shutdown, ChildType),
+    validMods(Mods),
+    {ok, #child{name = Name, mfa = Func, restart_type = RestartType,
+		shutdown = Shutdown, child_type = ChildType, modules = Mods}}.
+
+validChildType(supervisor) -> true;
+validChildType(worker) -> true;
+validChildType(What) -> throw({invalid_child_type, What}).
+
+validName(_Name) -> true. 
+
+validFunc({M, F, A}) when is_atom(M), 
+                          is_atom(F), 
+                          is_list(A) -> true;
+validFunc(Func)                      -> throw({invalid_mfa, Func}).
+
+validRestartType(permanent)   -> true;
+validRestartType(temporary)   -> true;
+validRestartType(transient)   -> true;
+validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
+
+validShutdown(Shutdown, _) 
+  when is_integer(Shutdown), Shutdown > 0 -> true;
+validShutdown(infinity, supervisor)    -> true;
+validShutdown(brutal_kill, _)          -> true;
+validShutdown(Shutdown, _)             -> throw({invalid_shutdown, Shutdown}).
+
+validMods(dynamic) -> true;
+validMods(Mods) when is_list(Mods) ->
+    lists:foreach(fun(Mod) ->
+		    if
+			is_atom(Mod) -> ok;
+			true -> throw({invalid_module, Mod})
+		    end
+		  end,
+		  Mods);
+validMods(Mods) -> throw({invalid_modules, Mods}).
+
+%%% ------------------------------------------------------
+%%% Add a new restart and calculate if the max restart
+%%% intensity has been reached (in that case the supervisor
+%%% shall terminate).
+%%% All restarts accured inside the period amount of seconds
+%%% are kept in the #state.restarts list.
+%%% Returns: {ok, State'} | {terminate, State'}
+%%% ------------------------------------------------------
+
+add_restart(State) ->  
+    I = State#state.intensity,
+    P = State#state.period,
+    R = State#state.restarts,
+    Now = erlang:now(),
+    R1 = add_restart([Now|R], Now, P),
+    State1 = State#state{restarts = R1},
+    case length(R1) of
+	CurI when CurI  =< I ->
+	    {ok, State1};
+	_ ->
+	    {terminate, State1}
+    end.
+
+add_restart([R|Restarts], Now, Period) ->
+    case inPeriod(R, Now, Period) of
+	true ->
+	    [R|add_restart(Restarts, Now, Period)];
+	_ ->
+	    []
+    end;
+add_restart([], _, _) ->
+    [].
+
+inPeriod(Time, Now, Period) ->
+    case difference(Time, Now) of
+	T when T > Period ->
+	    false;
+	_ ->
+	    true
+    end.
+
+%%
+%% Time = {MegaSecs, Secs, MicroSecs} (NOTE: MicroSecs is ignored)
+%% Calculate the time elapsed in seconds between two timestamps.
+%% If MegaSecs is equal just subtract Secs.
+%% Else calculate the Mega difference and add the Secs difference,
+%% note that Secs difference can be negative, e.g.
+%%      {827, 999999, 676} diff {828, 1, 653753} == > 2 secs.
+%%
+difference({TimeM, TimeS, _}, {CurM, CurS, _}) when CurM > TimeM ->
+    ((CurM - TimeM) * 1000000) + (CurS - TimeS);
+difference({_, TimeS, _}, {_, CurS, _}) ->
+    CurS - TimeS.
+
+%%% ------------------------------------------------------
+%%% Error and progress reporting.
+%%% ------------------------------------------------------
+
+report_error(Error, Reason, Child, SupName) ->
+    ErrorMsg = [{supervisor, SupName},
+		{errorContext, Error},
+		{reason, Reason},
+		{offender, extract_child(Child)}],
+    error_logger:error_report(supervisor_report, ErrorMsg).
+
+
+extract_child(Child) ->
+    [{pid, Child#child.pid},
+     {name, Child#child.name},
+     {mfa, Child#child.mfa},
+     {restart_type, Child#child.restart_type},
+     {shutdown, Child#child.shutdown},
+     {child_type, Child#child.child_type}].
+
+report_progress(Child, SupName) ->
+    Progress = [{supervisor, SupName},
+		{started, extract_child(Child)}],
+    error_logger:info_report(progress, Progress).
-- 
cgit v1.2.1


From 9e139ba6968b5670cb40c79021ec799752219b5f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 15:46:20 +0000
Subject: Patched supervisor2 with support for simple_one_for_one_terminate

---
 src/supervisor2.erl | 54 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index fb1303d1..4325a23c 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -1,3 +1,14 @@
+%% This file is a copy of supervisor.erl from the R13B-3 Erlang/OTP
+%% distribution, with the following modifications:
+%%
+%% 1) the module name is supervisor2
+%%
+%% 2) there is a new strategy called
+%% simple_one_for_one_terminate. This is exactly the same as for
+%% simple_one_for_one, except that children *are* explicitly killed as
+%% per the shutdown component of the child_spec.
+%%
+%% All modifications are (C) 2010 LShift Ltd.
 %%
 %% %CopyrightBegin%
 %% 
@@ -16,7 +27,7 @@
 %% 
 %% %CopyrightEnd%
 %%
--module(supervisor).
+-module(supervisor2).
 
 -behaviour(gen_server).
 
@@ -53,7 +64,10 @@
 		child_type,
 		modules = []}).
 
--define(is_simple(State), State#state.strategy =:= simple_one_for_one).
+-define(is_simple(State), State#state.strategy =:= simple_one_for_one orelse
+        State#state.strategy =:= simple_one_for_one_terminate).
+-define(is_terminate_simple(State),
+        State#state.strategy =:= simple_one_for_one_terminate).
 
 behaviour_info(callbacks) ->
     [{init,1}];
@@ -66,10 +80,10 @@ behaviour_info(_Other) ->
 %%% SupName = {local, atom()} | {global, atom()}.
 %%% ---------------------------------------------------
 start_link(Mod, Args) ->
-    gen_server:start_link(supervisor, {self, Mod, Args}, []).
+    gen_server:start_link(supervisor2, {self, Mod, Args}, []).
  
 start_link(SupName, Mod, Args) ->
-    gen_server:start_link(SupName, supervisor, {SupName, Mod, Args}, []).
+    gen_server:start_link(SupName, supervisor2, {SupName, Mod, Args}, []).
  
 %%% ---------------------------------------------------
 %%% Interface functions.
@@ -231,10 +245,11 @@ handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
 	    {reply, What, State}
     end;
 
-%%% The requests terminate_child, delete_child and restart_child are 
-%%% invalid for simple_one_for_one supervisors. 
+%%% The requests terminate_child, delete_child and restart_child are
+%%% invalid for simple_one_for_one and simple_one_for_one_terminate
+%%% supervisors.
 handle_call({_Req, _Data}, _From, State) when ?is_simple(State) ->
-    {reply, {error, simple_one_for_one}, State};
+    {reply, {error, State#state.strategy}, State};
 
 handle_call({start_child, ChildSpec}, _From, State) ->
     case check_childspec(ChildSpec) of
@@ -326,6 +341,10 @@ handle_info(Msg, State) ->
 %%
 %% Terminate this server.
 %%
+terminate(_Reason, State) when ?is_terminate_simple(State) ->
+    terminate_simple_children(
+      hd(State#state.children), State#state.dynamics, State#state.name),
+    ok;
 terminate(_Reason, State) ->
     terminate_children(State#state.children, State#state.name),
     ok.
@@ -488,7 +507,9 @@ restart(Child, State) ->
 	    {shutdown, remove_child(Child, NState)}
     end.
 
-restart(simple_one_for_one, Child, State) ->
+restart(Strategy, Child, State)
+  when Strategy =:= simple_one_for_one orelse
+       Strategy =:= simple_one_for_one_terminate ->
     #child{mfa = {M, F, A}} = Child,
     Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics),
     case do_start_child_i(M, F, A) of
@@ -549,6 +570,12 @@ terminate_children([Child | Children], SupName, Res) ->
 terminate_children([], _SupName, Res) ->
     Res.
 
+terminate_simple_children(Child, Dynamics, SupName) ->
+    dict:fold(fun (Pid, _Args, _Any) ->
+                      do_terminate(Child#child{pid = Pid}, SupName)
+              end, ok, Dynamics),
+    ok.
+
 do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
     case shutdown(Child#child.pid,
 		  Child#child.shutdown) of
@@ -718,11 +745,12 @@ init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) ->
 init_state1(_SupName, Type, _, _) ->
     {invalid_type, Type}.
 
-validStrategy(simple_one_for_one) -> true;
-validStrategy(one_for_one)        -> true;
-validStrategy(one_for_all)        -> true;
-validStrategy(rest_for_one)       -> true;
-validStrategy(What)               -> throw({invalid_strategy, What}).
+validStrategy(simple_one_for_one_terminate) -> true;
+validStrategy(simple_one_for_one)           -> true;
+validStrategy(one_for_one)                  -> true;
+validStrategy(one_for_all)                  -> true;
+validStrategy(rest_for_one)                 -> true;
+validStrategy(What)                         -> throw({invalid_strategy, What}).
 
 validIntensity(Max) when is_integer(Max),
                          Max >=  0 -> true;
-- 
cgit v1.2.1


From 5c525da03b973f827e6be771d3a34f483ea3bfb0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 16:08:00 +0000
Subject: There's the possibility that the timer may fire whilst we're doing
 the vq sync callback thus leading to a 2nd call to that, hence specialising a
 noop clause

---
 src/rabbit_variable_queue.erl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 89493b5e..0dbec1e8 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -540,6 +540,8 @@ tx_commit_from_msg_store(Pubs, AckTags, From,
                                  [Pubs | SPubs],
                                  [From | SFroms] }}.
 
+tx_commit_from_vq(State = #vqstate { on_sync = {_, _, []} }) ->
+    State;
 tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
     State1 = ack(lists:flatten(SAcks), State),
     {PubSeqIds, State2 = #vqstate { index_state = IndexState }} =
-- 
cgit v1.2.1


From a875435bc4947b677769139795adc89d996a0a33 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 17:00:57 +0000
Subject: Minor refactorings and changes to deps generation

---
 generate_deps | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/generate_deps b/generate_deps
index d438660b..c780b4b6 100644
--- a/generate_deps
+++ b/generate_deps
@@ -1,5 +1,6 @@
 #!/usr/bin/env escript
 %% -*- erlang -*-
+-mode(compile).
 
 main([IncludeDir, ErlDir, TargetFile]) ->
     ErlDirContents = filelib:wildcard("*.erl", ErlDir),
@@ -11,11 +12,8 @@ main([IncludeDir, ErlDir, TargetFile]) ->
     HrlFiles = [filename:join(IncludeDir, FileName) ||
                    FileName <- IncludeDirContents],
     Headers = sets:from_list(IncludeDirContents),
-    Deps = lists:foldl(fun (Path, Acc) -> make_deps(Path, Acc) end,
-                       dict:new(), ErlFiles),
-    Deps1 = lists:foldl(fun (Path, Acc) -> make_deps(Path, Acc) end,
-                        Deps, HrlFiles),
-    Deps2 = dict:map(
+    Deps = lists:foldl(fun make_deps/2, dict:new(), ErlFiles ++ HrlFiles),
+    Deps1 = dict:map(
               fun (_Path, Dep) ->
                       lists:filter(
                         fun ({module, Behaviour}) ->
@@ -23,7 +21,7 @@ main([IncludeDir, ErlDir, TargetFile]) ->
                             ({include, Include}) ->
                                 sets:is_element(Include, Headers)
                         end, Dep)
-              end, Deps1),
+              end, Deps),
     {ok, Hdl} = file:open(TargetFile, [write, delayed_write]),
     dict:fold(
       fun (_Path, [], ok) ->
@@ -33,16 +31,14 @@ main([IncludeDir, ErlDir, TargetFile]) ->
                   false ->
                       Module = filename:basename(Path, ".erl"),
                       ok = file:write(Hdl, ["$(EBIN_DIR)/", Module, ".beam:"]),
-                      lists:foreach(
-                        fun (E) -> write_deps(Hdl, IncludeDir, E) end, Dep),
+                      [write_deps(Hdl, IncludeDir, E) || E <- Dep],
                       file:write(Hdl, [" ", ErlDir, "/", Module, ".erl\n"]);
                   true ->
                       ok = file:write(Hdl, [Path, ":"]),
-                      lists:foreach(
-                        fun (E) -> write_deps(Hdl, IncludeDir, E) end, Dep),
+                      [write_deps(Hdl, IncludeDir, E) || E <- Dep],
                       file:write(Hdl, "\n")
               end
-      end, ok, Deps2),
+      end, ok, Deps1),
     ok = file:write(Hdl, [TargetFile, ": ", escript:script_name(), "\n"]),
     ok = file:sync(Hdl),
     ok = file:close(Hdl).
-- 
cgit v1.2.1


From 989161b73a53802831e7b24486a769d8eb511c3e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 25 Jan 2010 17:06:42 +0000
Subject: (Minor refactorings and changes to deps generation)?

---
 generate_deps | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/generate_deps b/generate_deps
index c780b4b6..3804ec9c 100644
--- a/generate_deps
+++ b/generate_deps
@@ -50,9 +50,7 @@ write_deps(Hdl, IncludeDir, {include, Include}) ->
 
 make_deps(Path, Deps) ->
     {ok, Forms} = epp:parse_file(Path, [], [{use_specs, true}]),
-    Behaviours =
-        lists:foldl(fun (Form, Acc) -> detect_deps(Form, Acc) end,
-                    [], Forms),
+    Behaviours = lists:foldl(fun detect_deps/2, [], Forms),
     dict:store(Path, Behaviours, Deps).
 
 detect_deps({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
-- 
cgit v1.2.1


From 14761bac6d74b9ef4a96ccb74d854b79504067eb Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 25 Jan 2010 17:37:03 +0000
Subject: refactor

---
 generate_deps | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/generate_deps b/generate_deps
index 3804ec9c..9be6fc09 100644
--- a/generate_deps
+++ b/generate_deps
@@ -12,7 +12,9 @@ main([IncludeDir, ErlDir, TargetFile]) ->
     HrlFiles = [filename:join(IncludeDir, FileName) ||
                    FileName <- IncludeDirContents],
     Headers = sets:from_list(IncludeDirContents),
-    Deps = lists:foldl(fun make_deps/2, dict:new(), ErlFiles ++ HrlFiles),
+    Deps = lists:foldl(fun (Path, Deps1) ->
+                               dict:store(Path, detect_deps(Path), Deps1)
+                       end, dict:new(), ErlFiles ++ HrlFiles),
     Deps1 = dict:map(
               fun (_Path, Dep) ->
                       lists:filter(
@@ -48,14 +50,13 @@ write_deps(Hdl, _IncludeDir, {module, Behaviour}) ->
 write_deps(Hdl, IncludeDir, {include, Include}) ->
     ok = file:write(Hdl, [" ", IncludeDir, "/", Include]).
 
-make_deps(Path, Deps) ->
+detect_deps(Path) ->
     {ok, Forms} = epp:parse_file(Path, [], [{use_specs, true}]),
-    Behaviours = lists:foldl(fun detect_deps/2, [], Forms),
-    dict:store(Path, Behaviours, Deps).
-
-detect_deps({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
-    [{module, Behaviour} | Deps];
-detect_deps({error, {_LineNumber, epp, {include, file, Include}}}, Deps) ->
-    [{include, Include} | Deps];
-detect_deps(_Form, Deps) ->
-    Deps.
+    lists:foldl(
+      fun ({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
+              [{module, Behaviour} | Deps];
+          ({error, {_LineNumber, epp, {include, file, Include}}}, Deps) ->
+              [{include, Include} | Deps];
+          (_Form, Deps) ->
+              Deps
+      end, [], Forms).
-- 
cgit v1.2.1


From 12d92d675d34f14b713d2038af29e36bd5c55a64 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 25 Jan 2010 17:59:19 +0000
Subject: explain the non-obvious

---
 generate_deps | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/generate_deps b/generate_deps
index 9be6fc09..545f7aff 100644
--- a/generate_deps
+++ b/generate_deps
@@ -51,6 +51,8 @@ write_deps(Hdl, IncludeDir, {include, Include}) ->
     ok = file:write(Hdl, [" ", IncludeDir, "/", Include]).
 
 detect_deps(Path) ->
+    %% The easiest way to detect includes is to make them fail, so we
+    %% deliberately do not set the include path here.
     {ok, Forms} = epp:parse_file(Path, [], [{use_specs, true}]),
     lists:foldl(
       fun ({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
-- 
cgit v1.2.1


From 9c2adc56f1b09fe1088db43c81b27be7b7ea8fb8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 26 Jan 2010 12:27:32 +0000
Subject: Converted to get epp to do the transitive closure by giving it the
 include path. This all seems to work, but it does make deps generation nearly
 twice as slow

---
 Makefile      |  3 ---
 generate_deps | 70 +++++++++++++++++++++++++----------------------------------
 2 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/Makefile b/Makefile
index 2c376069..866db51c 100644
--- a/Makefile
+++ b/Makefile
@@ -70,9 +70,6 @@ $(EBIN_DIR)/%.beam: $(SOURCE_DIR)/%.erl
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 #	ERLC_EMULATOR="erl -smp" erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 
-$(INCLUDE_DIR)/%.hrl:
-	@touch $@
-
 $(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH)
 	$(PYTHON) codegen.py header $(AMQP_SPEC_JSON_PATH) $@
 
diff --git a/generate_deps b/generate_deps
index 545f7aff..98f07d40 100644
--- a/generate_deps
+++ b/generate_deps
@@ -8,57 +8,45 @@ main([IncludeDir, ErlDir, TargetFile]) ->
     Modules = sets:from_list(
                 [list_to_atom(filename:basename(FileName, ".erl")) ||
                     FileName <- ErlDirContents]),
-    IncludeDirContents = filelib:wildcard("*.hrl", IncludeDir),
-    HrlFiles = [filename:join(IncludeDir, FileName) ||
-                   FileName <- IncludeDirContents],
-    Headers = sets:from_list(IncludeDirContents),
-    Deps = lists:foldl(fun (Path, Deps1) ->
-                               dict:store(Path, detect_deps(Path), Deps1)
-                       end, dict:new(), ErlFiles ++ HrlFiles),
-    Deps1 = dict:map(
-              fun (_Path, Dep) ->
-                      lists:filter(
-                        fun ({module, Behaviour}) ->
-                                sets:is_element(Behaviour, Modules);
-                            ({include, Include}) ->
-                                sets:is_element(Include, Headers)
-                        end, Dep)
-              end, Deps),
+    Headers = sets:from_list(
+                [filename:join(IncludeDir, FileName) ||
+                    FileName <- filelib:wildcard("*.hrl", IncludeDir)]),
+    Deps = lists:foldl(
+             fun (Path, Deps1) ->
+                     dict:store(Path,
+                                detect_deps(IncludeDir, Modules, Headers, Path),
+                                Deps1)
+             end, dict:new(), ErlFiles),
     {ok, Hdl} = file:open(TargetFile, [write, delayed_write]),
     dict:fold(
       fun (_Path, [], ok) ->
               ok;
           (Path, Dep, ok) ->
-              case lists:suffix(".hrl", Path) of
-                  false ->
-                      Module = filename:basename(Path, ".erl"),
-                      ok = file:write(Hdl, ["$(EBIN_DIR)/", Module, ".beam:"]),
-                      [write_deps(Hdl, IncludeDir, E) || E <- Dep],
-                      file:write(Hdl, [" ", ErlDir, "/", Module, ".erl\n"]);
-                  true ->
-                      ok = file:write(Hdl, [Path, ":"]),
-                      [write_deps(Hdl, IncludeDir, E) || E <- Dep],
-                      file:write(Hdl, "\n")
-              end
-      end, ok, Deps1),
+              Module = filename:basename(Path, ".erl"),
+              ok = file:write(Hdl, ["$(EBIN_DIR)/", Module, ".beam:"]),
+              ok = sets:fold(
+                     fun (E, ok) -> file:write(Hdl, [" ", E]) end, ok, Dep),
+              file:write(Hdl, [" ", ErlDir, "/", Module, ".erl\n"])
+      end, ok, Deps),
     ok = file:write(Hdl, [TargetFile, ": ", escript:script_name(), "\n"]),
     ok = file:sync(Hdl),
     ok = file:close(Hdl).
 
-write_deps(Hdl, _IncludeDir, {module, Behaviour}) ->
-    ok = file:write(Hdl, [" $(EBIN_DIR)/", atom_to_list(Behaviour), ".beam"]);
-write_deps(Hdl, IncludeDir, {include, Include}) ->
-    ok = file:write(Hdl, [" ", IncludeDir, "/", Include]).
-
-detect_deps(Path) ->
-    %% The easiest way to detect includes is to make them fail, so we
-    %% deliberately do not set the include path here.
-    {ok, Forms} = epp:parse_file(Path, [], [{use_specs, true}]),
+detect_deps(IncludeDir, Modules, Headers, Path) ->
+    {ok, Forms} = epp:parse_file(Path, [IncludeDir], [{use_specs, true}]),
     lists:foldl(
       fun ({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
-              [{module, Behaviour} | Deps];
-          ({error, {_LineNumber, epp, {include, file, Include}}}, Deps) ->
-              [{include, Include} | Deps];
+              case sets:is_element(Behaviour, Modules) of
+                  true  -> sets:add_element(["$(EBIN_DIR)/",
+                                             atom_to_list(Behaviour),
+                                             ".beam"], Deps);
+                  false -> Deps
+              end;
+          ({attribute, _LineNumber, file, {FileName, _LineNumber}}, Deps) ->
+              case sets:is_element(FileName, Headers) of
+                  true  -> sets:add_element(FileName, Deps);
+                  false -> Deps
+              end;
           (_Form, Deps) ->
               Deps
-      end, [], Forms).
+      end, sets:new(), Forms).
-- 
cgit v1.2.1


From b0d2692f564d735836d0ff96baf90c0b5d707b7d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 26 Jan 2010 13:45:24 +0000
Subject: Opps, they may not match

---
 generate_deps | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generate_deps b/generate_deps
index 98f07d40..dbb2d6df 100644
--- a/generate_deps
+++ b/generate_deps
@@ -42,7 +42,7 @@ detect_deps(IncludeDir, Modules, Headers, Path) ->
                                              ".beam"], Deps);
                   false -> Deps
               end;
-          ({attribute, _LineNumber, file, {FileName, _LineNumber}}, Deps) ->
+          ({attribute, _LineNumber, file, {FileName, _LineNumber1}}, Deps) ->
               case sets:is_element(FileName, Headers) of
                   true  -> sets:add_element(FileName, Deps);
                   false -> Deps
-- 
cgit v1.2.1


From 4ccec89bac222a7575e841124c61e7fba0af255b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 26 Jan 2010 17:03:12 +0000
Subject: make generate_deps less aware of its context

---
 Makefile      |  2 +-
 generate_deps | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index 866db51c..0be85ad7 100644
--- a/Makefile
+++ b/Makefile
@@ -61,7 +61,7 @@ ERL_EBIN=erl -noinput -pa $(EBIN_DIR)
 all: $(DEPS_FILE) $(TARGETS)
 
 $(DEPS_FILE): $(SOURCES) $(INCLUDES)
-	escript generate_deps $(INCLUDE_DIR) $(SOURCE_DIR) $(DEPS_FILE)
+	escript generate_deps $(INCLUDE_DIR) $(SOURCE_DIR) \$$\(EBIN_DIR\) $(DEPS_FILE)
 
 $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
 	escript generate_app $(EBIN_DIR) $@ < $<
diff --git a/generate_deps b/generate_deps
index dbb2d6df..916006d1 100644
--- a/generate_deps
+++ b/generate_deps
@@ -2,7 +2,7 @@
 %% -*- erlang -*-
 -mode(compile).
 
-main([IncludeDir, ErlDir, TargetFile]) ->
+main([IncludeDir, ErlDir, EbinDir, TargetFile]) ->
     ErlDirContents = filelib:wildcard("*.erl", ErlDir),
     ErlFiles = [filename:join(ErlDir, FileName) || FileName <- ErlDirContents],
     Modules = sets:from_list(
@@ -13,8 +13,8 @@ main([IncludeDir, ErlDir, TargetFile]) ->
                     FileName <- filelib:wildcard("*.hrl", IncludeDir)]),
     Deps = lists:foldl(
              fun (Path, Deps1) ->
-                     dict:store(Path,
-                                detect_deps(IncludeDir, Modules, Headers, Path),
+                     dict:store(Path, detect_deps(IncludeDir, EbinDir,
+                                                  Modules, Headers, Path),
                                 Deps1)
              end, dict:new(), ErlFiles),
     {ok, Hdl} = file:open(TargetFile, [write, delayed_write]),
@@ -23,23 +23,23 @@ main([IncludeDir, ErlDir, TargetFile]) ->
               ok;
           (Path, Dep, ok) ->
               Module = filename:basename(Path, ".erl"),
-              ok = file:write(Hdl, ["$(EBIN_DIR)/", Module, ".beam:"]),
-              ok = sets:fold(
-                     fun (E, ok) -> file:write(Hdl, [" ", E]) end, ok, Dep),
+              ok = file:write(Hdl, [EbinDir, "/", Module, ".beam:"]),
+              ok = sets:fold(fun (E, ok) -> file:write(Hdl, [" ", E]) end,
+                             ok, Dep),
               file:write(Hdl, [" ", ErlDir, "/", Module, ".erl\n"])
       end, ok, Deps),
     ok = file:write(Hdl, [TargetFile, ": ", escript:script_name(), "\n"]),
     ok = file:sync(Hdl),
     ok = file:close(Hdl).
 
-detect_deps(IncludeDir, Modules, Headers, Path) ->
+detect_deps(IncludeDir, EbinDir, Modules, Headers, Path) ->
     {ok, Forms} = epp:parse_file(Path, [IncludeDir], [{use_specs, true}]),
     lists:foldl(
       fun ({attribute, _LineNumber, behaviour, Behaviour}, Deps) ->
               case sets:is_element(Behaviour, Modules) of
-                  true  -> sets:add_element(["$(EBIN_DIR)/",
-                                             atom_to_list(Behaviour),
-                                             ".beam"], Deps);
+                  true  -> sets:add_element(
+                             [EbinDir, "/", atom_to_list(Behaviour), ".beam"],
+                             Deps);
                   false -> Deps
               end;
           ({attribute, _LineNumber, file, {FileName, _LineNumber1}}, Deps) ->
-- 
cgit v1.2.1


From 94f22a687ba8b0cb9b5cbdd384e36646b13e6b2d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 26 Jan 2010 17:45:21 +0000
Subject: cleaner handling of rabbit_framing dependencies these are now
 computed, like all others

---
 Makefile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 0be85ad7..78f80790 100644
--- a/Makefile
+++ b/Makefile
@@ -10,9 +10,9 @@ DEPS_FILE=deps.mk
 SOURCE_DIR=src
 EBIN_DIR=ebin
 INCLUDE_DIR=include
-INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl)
-SOURCES=$(wildcard $(SOURCE_DIR)/*.erl)
-BEAM_TARGETS=$(EBIN_DIR)/rabbit_framing.beam $(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES))
+INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl
+SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing.erl
+BEAM_TARGETS=$(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES))
 TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS)
 WEB_URL=http://stage.rabbitmq.com/
 MANPAGES=$(patsubst %.pod, %.gz, $(wildcard docs/*.[0-9].pod))
@@ -76,8 +76,6 @@ $(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.p
 $(SOURCE_DIR)/rabbit_framing.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH)
 	$(PYTHON) codegen.py body   $(AMQP_SPEC_JSON_PATH) $@
 
-$(EBIN_DIR)/rabbit_framing.beam: $(INCLUDE_DIR)/rabbit_framing.hrl
-
 dialyze: $(BEAM_TARGETS) $(BASIC_PLT)
 	$(ERL_EBIN) -eval \
 		"rabbit_dialyzer:halt_with_code(rabbit_dialyzer:dialyze_files(\"$(BASIC_PLT)\", \"$(BEAM_TARGETS)\"))."
-- 
cgit v1.2.1


From f08c11ffa94aee2ca6ad7f7ba1f26eeff3f9b849 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 27 Jan 2010 12:49:24 +0000
Subject: revert uncosmetic change

---
 src/rabbit_mnesia.erl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 1443d769..1e700362 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -162,8 +162,7 @@ table_definitions() ->
        {disc_copies, [node()]}]},
      {rabbit_queue,
       [{record_name, amqqueue},
-       {attributes, record_info(fields, amqqueue)}]}
-    ].
+       {attributes, record_info(fields, amqqueue)}]}].
 
 table_names() ->
     [Tab || {Tab, _} <- table_definitions()].
@@ -197,8 +196,7 @@ ensure_mnesia_not_running() ->
 
 check_schema_integrity() ->
     %%TODO: more thorough checks
-    case catch [mnesia:table_info(Tab, version)
-                || Tab <- table_names()] of
+    case catch [mnesia:table_info(Tab, version) || Tab <- table_names()] of
         {'EXIT', Reason} -> {error, Reason};
         _ -> ok
     end.
-- 
cgit v1.2.1


From 8af88d43260cc60b69e1c139def4351418674bf8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 27 Jan 2010 15:30:35 +0000
Subject: it turns out we no longer use gen_server2 insomniation

---
 src/gen_server2.erl | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index c4806151..1b24f28e 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -25,11 +25,8 @@
 %% handle_pre_hibernate/1 and handle_post_hibernate/1. These will be
 %% called immediately prior to and post hibernation, respectively. If
 %% handle_pre_hibernate returns {hibernate, NewState} then the process
-%% will hibernate. If handle_pre_hibernate returns {insomniate,
-%% NewState} then the process will go around again, trying to receive
-%% for up to the current timeout value before attempting to hibernate
-%% again. If the module does not implement handle_pre_hibernate/1 then
-%% the default action is to hibernate.
+%% will hibernate. If the module does not implement
+%% handle_pre_hibernate/1 then the default action is to hibernate.
 %%
 %% 6) init can return a 4th arg, {backoff, InitialTimeout,
 %% MinimumTimeout, DesiredHibernatePeriod} (all in
@@ -129,7 +126,6 @@
 %%%   handle_pre_hibernate(State)
 %%%
 %%%    ==> {hibernate, State}
-%%%        {insomniate, State}
 %%%        {stop, Reason, State}
 %%%              Reason = normal | shutdown | Term, terminate(State) is called
 %%%
@@ -549,9 +545,6 @@ pre_hibernate(Parent, Name, State, Mod, TimeoutState, Queue, Debug) ->
                 {hibernate, NState} ->
                     hibernate(Parent, Name, NState, Mod, TimeoutState, Queue,
                               Debug);
-                {insomniate, NState} ->
-                    process_next_msg(Parent, Name, NState, Mod, hibernate,
-                                     TimeoutState, Queue, Debug);
                 Reply ->
                     handle_common_termination(Reply, Name, pre_hibernate,
                                               Mod, State, Debug)
-- 
cgit v1.2.1


From fd841b997eb68c1b71b885fdd68831a3c23d3171 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 27 Jan 2010 16:29:40 +0000
Subject: move the handling of absent vm_memory_monitor process to call site
 ...for consistency with rest of API Also, use gen_server, not gen_server2, to
 reduce dependencies

---
 src/rabbit_memory_monitor.erl | 7 ++++---
 src/vm_memory_monitor.erl     | 6 +-----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index d6693d95..fbfb9bbb 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -124,9 +124,10 @@ stop() ->
 
 init([]) ->
     MemoryLimit = trunc(?MEMORY_LIMIT_SCALING *
-                        (case vm_memory_monitor:get_memory_limit() of
-                             undefined -> ?MEMORY_SIZE_FOR_DISABLED_VMM;
-                             Limit     -> Limit
+                        (try
+                             vm_memory_monitor:get_memory_limit()
+                         catch
+                             exit:{noproc, _} -> ?MEMORY_SIZE_FOR_DISABLED_VMM
                          end)),
 
     {ok, TRef} = timer:apply_interval(?DEFAULT_UPDATE_INTERVAL,
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index 495eff69..ceba5e0a 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -110,11 +110,7 @@ set_vm_memory_high_watermark(Fraction) ->
     gen_server:call(?MODULE, {set_vm_memory_high_watermark, Fraction}).
 
 get_memory_limit() ->
-    try
-        gen_server2:call(?MODULE, get_memory_limit)
-    catch
-        exit:{noproc, _} -> undefined
-    end.
+    gen_server:call(?MODULE, get_memory_limit).
 
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
-- 
cgit v1.2.1


From c26b741ef8a7e340f9b996ef5ae92a738716eb2a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 27 Jan 2010 17:07:21 +0000
Subject: revert accidental though harmless change

---
 src/rabbit_amqqueue_sup.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl
index 160770d9..06a1eebe 100644
--- a/src/rabbit_amqqueue_sup.erl
+++ b/src/rabbit_amqqueue_sup.erl
@@ -44,6 +44,6 @@ start_link() ->
 
 init([]) ->
     {ok, {{simple_one_for_one_terminate, 10, 10},
-          [{amqqueue, {rabbit_amqqueue_process, start_link, []},
+          [{rabbit_amqqueue, {rabbit_amqqueue_process, start_link, []},
             %% 16#ffffffff is the biggest value allowed
             temporary, 16#ffffffff, worker, [rabbit_amqqueue_process]}]}}.
-- 
cgit v1.2.1


From 0782c24705699df46d3d393fc86a318e74cef541 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 29 Jan 2010 15:29:30 +0000
Subject: remove outdated comment

---
 src/rabbit_amqqueue.erl | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 3b63147c..47d9850d 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -181,10 +181,6 @@ declare(QueueName, Durable, AutoDelete, Args) ->
 internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
     case rabbit_misc:execute_mnesia_transaction(
            fun () ->
-                   %% we could still find that mnesia has another
-                   %% entry here because the queue may exist on
-                   %% another node, beyond the knowledge of our own
-                   %% local queue_sup.
                    case mnesia:wread({rabbit_queue, QueueName}) of
                        [] ->
                            case mnesia:read(
-- 
cgit v1.2.1


From 73550747ef97f7f6a77ff5f853098441ab8ab742 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Feb 2010 13:37:21 +0000
Subject: Stored the ch_pid in the txn

---
 src/rabbit_amqqueue_process.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 4f700193..b6650ddd 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -519,7 +519,8 @@ all_tx_record() ->
 record_pending_message(Txn, ChPid, Message) ->
     Tx = #tx{pending_messages = Pending} = lookup_tx(Txn),
     record_current_channel_tx(ChPid, Txn),
-    store_tx(Txn, Tx #tx { pending_messages = [Message | Pending] }).
+    store_tx(Txn, Tx #tx { pending_messages = [Message | Pending],
+                           ch_pid = ChPid }).
 
 record_pending_acks(Txn, ChPid, MsgIds) ->
     Tx = #tx{pending_acks = Pending} = lookup_tx(Txn),
-- 
cgit v1.2.1


From 0d69dd984887cecb2523061b789abbe8280f30ed Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Feb 2010 18:45:19 +0000
Subject: Remove ets:inserts which should have been ets:update_element

---
 src/rabbit_msg_store.erl | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 1b356370..50114a96 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -329,7 +329,7 @@ client_read1(#msg_location { msg_id = MsgId, ref_count = RefCount, file = File }
             Defer();
         _ ->
             ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
-                               {#file_summary.readers, 1}),
+                               {#file_summary.readers, +1}),
             Release = fun() ->
                               ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
                                                  {#file_summary.readers, -1})
@@ -462,11 +462,11 @@ handle_cast({write, MsgId, Msg},
                                 msg_id = MsgId, ref_count = 1, file = CurFile,
                                 offset = CurOffset, total_size = TotalSize },
                               State),
-            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
-                                       contiguous_top = ContiguousTop,
-                                       right = undefined,
-                                       locked = false,
-                                       file_size = FileSize }] =
+            [#file_summary { valid_total_size = ValidTotalSize,
+                             contiguous_top = ContiguousTop,
+                             right = undefined,
+                             locked = false,
+                             file_size = FileSize }] =
                 ets:lookup(?FILE_SUMMARY_ETS_NAME, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
@@ -474,11 +474,12 @@ handle_cast({write, MsgId, Msg},
                                      ValidTotalSize1;
                                 true -> ContiguousTop
                              end,
-            true = ets:insert(?FILE_SUMMARY_ETS_NAME,
-                              FSEntry #file_summary {
-                                valid_total_size = ValidTotalSize1,
-                                contiguous_top = ContiguousTop1,
-                                file_size = FileSize + TotalSize }),
+            true = ets:update_element(
+                     ?FILE_SUMMARY_ETS_NAME,
+                     CurFile,
+                     [{#file_summary.valid_total_size, ValidTotalSize1},
+                      {#file_summary.contiguous_top, ContiguousTop1},
+                      {#file_summary.file_size, FileSize + TotalSize}]),
             NextOffset = CurOffset + TotalSize,
             noreply(maybe_compact(maybe_roll_to_new_file(
                                     NextOffset, State #msstate
@@ -536,7 +537,8 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
     %% we always move data left, so Source has gone and was on the
     %% right, so need to make dest = source.right.left, and also
     %% dest.right = source.right
-    [#file_summary { left = Dest, right = SourceRight, locked = true }] =
+    [#file_summary { left = Dest, right = SourceRight, locked = true,
+                     readers = 0 }] =
         ets:lookup(?FILE_SUMMARY_ETS_NAME, Source),
     %% this could fail if SourceRight == undefined
     ets:update_element(?FILE_SUMMARY_ETS_NAME, SourceRight,
@@ -736,9 +738,9 @@ remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
                        false -> true
                    end,
             ok = remove_cache_entry(MsgId),
-            [FSEntry = #file_summary { valid_total_size = ValidTotalSize,
-                                       contiguous_top = ContiguousTop,
-                                       locked = Locked }] =
+            [#file_summary { valid_total_size = ValidTotalSize,
+                             contiguous_top = ContiguousTop,
+                             locked = Locked }] =
                 ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
             case Locked of
                 true ->
@@ -747,10 +749,11 @@ remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
                     ok = index_delete(MsgId, State),
                     ContiguousTop1 = lists:min([ContiguousTop, Offset]),
                     ValidTotalSize1 = ValidTotalSize - TotalSize,
-                    true = ets:insert(?FILE_SUMMARY_ETS_NAME,
-                                      FSEntry #file_summary {
-                                        valid_total_size = ValidTotalSize1,
-                                        contiguous_top = ContiguousTop1 }),
+                    true = ets:update_element(
+                             ?FILE_SUMMARY_ETS_NAME,
+                             File,
+                             [{#file_summary.valid_total_size, ValidTotalSize1},
+                              {#file_summary.contiguous_top, ContiguousTop1}]),
                     State1 = delete_file_if_empty(File, State),
                     State1 #msstate { sum_valid_data = SumValid - TotalSize }
             end;
-- 
cgit v1.2.1


From 08d5db507ba15d4fd5b3fb97a5157afb35457544 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 10 Feb 2010 09:01:48 +0000
Subject: cosmetic

---
 src/rabbit_amqqueue_process.erl | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b6650ddd..996fe52b 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -111,18 +111,16 @@ init(Q = #amqqueue { name = QName }) ->
     ok = rabbit_memory_monitor:register
            (self(), {rabbit_amqqueue, set_queue_duration, [self()]}),
     VQS = rabbit_variable_queue:init(QName),
-    State = #q{q = Q,
-               owner = none,
-               exclusive_consumer = none,
-               has_had_consumers = false,
-               variable_queue_state = VQS,
-               next_msg_id = 1,
-               active_consumers = queue:new(),
-               blocked_consumers = queue:new(),
-               sync_timer_ref = undefined,
-               rate_timer_ref = undefined
-              },
-    {ok, State, hibernate,
+    {ok, #q{q = Q,
+            owner = none,
+            exclusive_consumer = none,
+            has_had_consumers = false,
+            variable_queue_state = VQS,
+            next_msg_id = 1,
+            active_consumers = queue:new(),
+            blocked_consumers = queue:new(),
+            sync_timer_ref = undefined,
+            rate_timer_ref = undefined}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 terminate(shutdown, #q{variable_queue_state = VQS}) ->
@@ -293,11 +291,10 @@ deliver_msgs_to_consumers(
                                 {ActiveConsumers1,
                                  queue:in(QEntry, BlockedConsumers1)}
                         end,
-                    State2 = State1 #q {
+                    State2 = State1#q{
                                active_consumers = NewActiveConsumers,
                                blocked_consumers = NewBlockedConsumers,
-                               next_msg_id = NextId + 1
-                                       },
+                               next_msg_id = NextId + 1},
                     deliver_msgs_to_consumers(Funs, FunAcc1, State2);
                 %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
@@ -519,8 +516,8 @@ all_tx_record() ->
 record_pending_message(Txn, ChPid, Message) ->
     Tx = #tx{pending_messages = Pending} = lookup_tx(Txn),
     record_current_channel_tx(ChPid, Txn),
-    store_tx(Txn, Tx #tx { pending_messages = [Message | Pending],
-                           ch_pid = ChPid }).
+    store_tx(Txn, Tx#tx{pending_messages = [Message | Pending],
+                        ch_pid = ChPid}).
 
 record_pending_acks(Txn, ChPid, MsgIds) ->
     Tx = #tx{pending_acks = Pending} = lookup_tx(Txn),
@@ -557,8 +554,6 @@ rollback_transaction(Txn, State) ->
     erase_tx(Txn),
     State #q { variable_queue_state = VQS }.
 
-%% {A, B} = collect_messages(C, D) %% A = C `intersect` D; B = D \\ C
-%% err, A = C `intersect` D , via projection through the dict that is C
 collect_messages(MsgIds, UAM) ->
     lists:mapfoldl(
       fun (MsgId, D) -> {dict:fetch(MsgId, D), dict:erase(MsgId, D)} end,
-- 
cgit v1.2.1


From 274bcef6c94263982fe05eab955b2922d7a608f5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 10 Feb 2010 09:07:18 +0000
Subject: osmetic

---
 src/rabbit_amqqueue_process.erl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 996fe52b..d4de6bfd 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -248,12 +248,11 @@ record_current_channel_tx(ChPid, Txn) ->
     %% that wasn't happening already)
     store_ch_record((ch_record(ChPid))#cr{txn = Txn}).
 
-deliver_msgs_to_consumers(
-  Funs = {PredFun, DeliverFun}, FunAcc,
-  State = #q{q = #amqqueue{name = QName},
-             active_consumers = ActiveConsumers,
-             blocked_consumers = BlockedConsumers,
-             next_msg_id = NextId}) ->
+deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
+                          State = #q{q = #amqqueue{name = QName},
+                                     active_consumers = ActiveConsumers,
+                                     blocked_consumers = BlockedConsumers,
+                                     next_msg_id = NextId}) ->
     case queue:out(ActiveConsumers) of
         {{value, QEntry = {ChPid, #consumer{tag = ConsumerTag,
                                             ack_required = AckRequired}}},
-- 
cgit v1.2.1


From 5d035e3461724ea6c8e659453363d004449eff97 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Feb 2010 13:22:53 +0000
Subject: Fix all sorts of concurrency races in the concurrent readers versus
 GC

---
 src/rabbit_msg_store.erl | 142 +++++++++++++++++++++++++++++++----------------
 1 file changed, 94 insertions(+), 48 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 50114a96..c605a6a2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -312,55 +312,101 @@ client_terminate(CState) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
-client_read1(#msg_location { msg_id = MsgId, ref_count = RefCount, file = File }
-             = MsgLocation, Defer, CState) ->
-    [#file_summary { locked = Locked, right = Right }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
-    case {Right, Locked} of
-        {undefined, false} ->
-            case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
-                [] ->
-                    Defer(); %% may have rolled over
-                [{MsgId, _FileOrUndefined, Msg}] ->
-                    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
-                    {{ok, Msg}, CState}
-            end;
-        {_, true} ->
-            Defer();
-        _ ->
-            ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
-                               {#file_summary.readers, +1}),
-            Release = fun() ->
-                              ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
-                                                 {#file_summary.readers, -1})
-                      end,
-            %% If a GC hasn't already started, it won't start
-            %% now. Need to check again to see if we've been locked in
-            %% the meantime, between lookup and update_counter (thus
-            %% GC actually in progress).
-            [#file_summary { locked = Locked2 }] =
-                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
-            case Locked2 of
-                true ->
-                    Release(),
+client_read1(#msg_location { msg_id = MsgId, ref_count = RefCount,
+                             file = File }, Defer, CState) ->
+    case ets:lookup(?FILE_SUMMARY_ETS_NAME, File) of
+        [] -> %% File has been GC'd and no longer exists. Go around again.
+            read(MsgId, CState);
+        [#file_summary { locked = Locked, right = Right }] ->
+            case {Right, Locked} of
+                {undefined, false} ->
+                    case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+                        [] ->
+                            Defer(); %% may have rolled over
+                        [{MsgId, _FileOrUndefined, Msg}] ->
+                            ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+                            {{ok, Msg}, CState}
+                    end;
+                {_, true} ->
+                    %% of course, in the mean time, the GC could have
+                    %% run and our msg is actually in a different
+                    %% file, unlocked. However, defering is the safest
+                    %% and simplest thing to do.
                     Defer();
-                false ->
-                    %% Ok, we're definitely safe to continue - a GC
-                    %% can't start up now, and isn't running, so
-                    %% nothing will tell us from now on to close the
-                    %% handle if it's already open. (Well, a GC could
-                    %% start, and could put close entries into the ets
-                    %% table, but the GC will wait until we're done
-                    %% here before doing any real work.)
-
-                    %% This is fine to fail (already exists)
-                    ets:insert_new(?FILE_HANDLES_ETS_NAME,
-                                   {{self(), File}, open}),
-                    CState1 = close_all_indicated(CState),
-                    {Msg, CState2} = read_from_disk(MsgLocation, CState1),
-                    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
-                    Release(),
-                    {{ok, Msg}, CState2}
+                _ ->
+                    %% It's entirely possible that everything we're
+                    %% doing from here on is for the wrong file, or a
+                    %% non-existent file, as a GC may have finished.
+                    try
+                        ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+                                           {#file_summary.readers, +1})
+                    catch error:badarg ->
+                            %% the File has been GC'd and deleted. Go around.
+                            read(MsgId, CState)
+                    end,
+                    Release = fun() -> ets:update_counter(
+                                         ?FILE_SUMMARY_ETS_NAME, File,
+                                         {#file_summary.readers, -1})
+                              end,
+                    %% If a GC hasn't already started, it won't start
+                    %% now. Need to check again to see if we've been
+                    %% locked in the meantime, between lookup and
+                    %% update_counter (thus GC started before our +1).
+                    [#file_summary { locked = Locked2 }] =
+                        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+                    case Locked2 of
+                        true ->
+                            %% If we get a badarg here, then the GC
+                            %% has finished and deleted our file. Try
+                            %% going around again. Otherwise, just
+                            %% defer.
+
+                            %% badarg scenario:
+                            %% we lookup, msg_store locks, gc starts,
+                            %% gc ends, we +1 readers, msg_store
+                            %% ets:deletes (and unlocks the dest)
+                            try
+                                Release(),
+                                Defer()
+                            catch error:badarg -> read(MsgId, CState)
+                            end;
+                        false ->
+                            %% Ok, we're definitely safe to continue -
+                            %% a GC can't start up now, and isn't
+                            %% running, so nothing will tell us from
+                            %% now on to close the handle if it's
+                            %% already open. (Well, a GC could start,
+                            %% and could put close entries into the
+                            %% ets table, but the GC will wait until
+                            %% we're done here before doing any real
+                            %% work.)
+
+                            %% Finally, we need to recheck that the
+                            %% msg is still at the same place - it's
+                            %% possible an entire GC ran between us
+                            %% doing the lookup and the +1 on the
+                            %% readers. (Same as badarg scenario
+                            %% above, but we don't have a missing file
+                            %% - we just have the /wrong/ file).
+
+                            case index_lookup(MsgId, CState) of
+                                MsgLocation2 = #msg_location { file = File } ->
+                                    %% Still the same file.
+                                    %% This is fine to fail (already exists)
+                                    ets:insert_new(?FILE_HANDLES_ETS_NAME,
+                                                   {{self(), File}, open}),
+                                    CState1 = close_all_indicated(CState),
+                                    {Msg, CState2} =
+                                        read_from_disk(MsgLocation2, CState1),
+                                    ok = maybe_insert_into_cache(
+                                           RefCount, MsgId, Msg),
+                                    Release(), %% this MUST NOT fail with badarg
+                                    {{ok, Msg}, CState2};
+                                MsgLocation2 -> %% different file!
+                                    Release(), %% this MUST NOT fail with badarg
+                                    client_read1(MsgLocation2, Defer, CState)
+                            end
+                    end
             end
     end.
 
-- 
cgit v1.2.1


From e0080f82ed63cf89f20ca8a8d8b0202988cc3a5a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Feb 2010 16:54:54 +0000
Subject: Refactoring of client concurrent read

---
 src/rabbit_msg_store.erl | 170 ++++++++++++++++++++++-------------------------
 1 file changed, 79 insertions(+), 91 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2bd3ac3d..b3d784b2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -312,101 +312,89 @@ client_terminate(CState) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
-client_read1(#msg_location { msg_id = MsgId, ref_count = RefCount,
-                             file = File }, Defer, CState) ->
+client_read1(MsgLocation = #msg_location { msg_id = MsgId, file = File }, Defer,
+             CState) ->
     case ets:lookup(?FILE_SUMMARY_ETS_NAME, File) of
         [] -> %% File has been GC'd and no longer exists. Go around again.
             read(MsgId, CState);
         [#file_summary { locked = Locked, right = Right }] ->
-            case {Right, Locked} of
-                {undefined, false} ->
-                    case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
-                        [] ->
-                            Defer(); %% may have rolled over
-                        [{MsgId, _FileOrUndefined, Msg}] ->
-                            ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
-                            {{ok, Msg}, CState}
-                    end;
-                {_, true} ->
-                    %% of course, in the mean time, the GC could have
-                    %% run and our msg is actually in a different
-                    %% file, unlocked. However, defering is the safest
-                    %% and simplest thing to do.
-                    Defer();
-                _ ->
-                    %% It's entirely possible that everything we're
-                    %% doing from here on is for the wrong file, or a
-                    %% non-existent file, as a GC may have finished.
-                    try
-                        ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
-                                           {#file_summary.readers, +1})
-                    catch error:badarg ->
-                            %% the File has been GC'd and deleted. Go around.
-                            read(MsgId, CState)
-                    end,
-                    Release = fun() -> ets:update_counter(
-                                         ?FILE_SUMMARY_ETS_NAME, File,
-                                         {#file_summary.readers, -1})
-                              end,
-                    %% If a GC hasn't already started, it won't start
-                    %% now. Need to check again to see if we've been
-                    %% locked in the meantime, between lookup and
-                    %% update_counter (thus GC started before our +1).
-                    [#file_summary { locked = Locked2 }] =
-                        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
-                    case Locked2 of
-                        true ->
-                            %% If we get a badarg here, then the GC
-                            %% has finished and deleted our file. Try
-                            %% going around again. Otherwise, just
-                            %% defer.
-
-                            %% badarg scenario:
-                            %% we lookup, msg_store locks, gc starts,
-                            %% gc ends, we +1 readers, msg_store
-                            %% ets:deletes (and unlocks the dest)
-                            try
-                                Release(),
-                                Defer()
-                            catch error:badarg -> read(MsgId, CState)
-                            end;
-                        false ->
-                            %% Ok, we're definitely safe to continue -
-                            %% a GC can't start up now, and isn't
-                            %% running, so nothing will tell us from
-                            %% now on to close the handle if it's
-                            %% already open. (Well, a GC could start,
-                            %% and could put close entries into the
-                            %% ets table, but the GC will wait until
-                            %% we're done here before doing any real
-                            %% work.)
-
-                            %% Finally, we need to recheck that the
-                            %% msg is still at the same place - it's
-                            %% possible an entire GC ran between us
-                            %% doing the lookup and the +1 on the
-                            %% readers. (Same as badarg scenario
-                            %% above, but we don't have a missing file
-                            %% - we just have the /wrong/ file).
-
-                            case index_lookup(MsgId, CState) of
-                                MsgLocation2 = #msg_location { file = File } ->
-                                    %% Still the same file.
-                                    %% This is fine to fail (already exists)
-                                    ets:insert_new(?FILE_HANDLES_ETS_NAME,
-                                                   {{self(), File}, open}),
-                                    CState1 = close_all_indicated(CState),
-                                    {Msg, CState2} =
-                                        read_from_disk(MsgLocation2, CState1),
-                                    ok = maybe_insert_into_cache(
-                                           RefCount, MsgId, Msg),
-                                    Release(), %% this MUST NOT fail with badarg
-                                    {{ok, Msg}, CState2};
-                                MsgLocation2 -> %% different file!
-                                    Release(), %% this MUST NOT fail with badarg
-                                    client_read1(MsgLocation2, Defer, CState)
-                            end
-                    end
+            client_read2(MsgLocation, Locked, Right, Defer, CState)
+    end.
+
+client_read2(#msg_location { msg_id = MsgId, ref_count = RefCount },
+             false, undefined, Defer, CState) ->
+    case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+        [] ->
+            Defer(); %% may have rolled over
+        [{MsgId, _FileOrUndefined, Msg}] ->
+            ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+            {{ok, Msg}, CState}
+    end;
+client_read2(_MsgLocation, true, _Right, Defer, _CState) ->
+    %% Of course, in the mean time, the GC could have run and our msg
+    %% is actually in a different file, unlocked. However, defering is
+    %% the safest and simplest thing to do.
+    Defer();
+client_read2(#msg_location { msg_id = MsgId, ref_count = RefCount,
+                            file = File }, false, _Right, Defer, CState) ->
+    %% It's entirely possible that everything we're doing from here on
+    %% is for the wrong file, or a non-existent file, as a GC may have
+    %% finished.
+    try ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+                           {#file_summary.readers, +1})
+    catch error:badarg -> %% the File has been GC'd and deleted. Go around.
+            read(MsgId, CState)
+    end,
+    Release = fun() -> ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+                                          {#file_summary.readers, -1})
+              end,
+    %% If a GC hasn't already started, it won't start now. Need to
+    %% check again to see if we've been locked in the meantime,
+    %% between lookup and update_counter (thus GC started before our
+    %% +1).
+    [#file_summary { locked = Locked }] =
+        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+    case Locked of
+        true ->
+            %% If we get a badarg here, then the GC has finished and
+            %% deleted our file. Try going around again. Otherwise,
+            %% just defer.
+
+            %% badarg scenario:
+            %% we lookup, msg_store locks, gc starts, gc ends, we +1
+            %% readers, msg_store ets:deletes (and unlocks the dest)
+            try Release(),
+                Defer()
+            catch error:badarg -> read(MsgId, CState)
+            end;
+        false ->
+            %% Ok, we're definitely safe to continue - a GC can't
+            %% start up now, and isn't running, so nothing will tell
+            %% us from now on to close the handle if it's already
+            %% open. (Well, a GC could start, and could put close
+            %% entries into the ets table, but the GC will wait until
+            %% we're done here before doing any real work.)
+
+            %% Finally, we need to recheck that the msg is still at
+            %% the same place - it's possible an entire GC ran between
+            %% us doing the lookup and the +1 on the readers. (Same as
+            %% badarg scenario above, but we don't have a missing file
+            %% - we just have the /wrong/ file).
+
+            case index_lookup(MsgId, CState) of
+                MsgLocation = #msg_location { file = File } ->
+                    %% Still the same file.
+                    %% This is fine to fail (already exists)
+                    ets:insert_new(
+                      ?FILE_HANDLES_ETS_NAME, {{self(), File}, open}),
+                    CState1 = close_all_indicated(CState),
+                    {Msg, CState2} = read_from_disk(MsgLocation, CState1),
+                    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+                    Release(), %% this MUST NOT fail with badarg
+                    {{ok, Msg}, CState2};
+                MsgLocation -> %% different file!
+                    Release(), %% this MUST NOT fail with badarg
+                    client_read1(MsgLocation, Defer, CState)
             end
     end.
 
-- 
cgit v1.2.1


From 6c9f1335c65d48dbfff40504823e9f3d4d8605ac Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 10 Feb 2010 17:41:53 +0000
Subject: Agressively check caches before risking deferring to the msg_store.
 This often is very beneficial because in practise, the last messages to be
 pushed to disk under memory pressure are the messages at the head of the
 queue, not the tail. Thus in the event of the msg_store being overloaded, due
 to pushing to disk due to memory pressure, checking all the caches first is
 very effective at keeping the queues moving. The downside is that when we
 read from the cur_file cache, we don't get the ref count (at this point, the
 write is maybe only in the mailbox of the msg_store, so it may not even have
 an index entry, thus we have no chance of knowing an accurate refcount). The
 result is that the message may end up existing having been read from the cur
 file cache, then pushed to disk, forgotten, and read in again, only to be
 added to the dedup cache. However, this is as bad as it can get - there could
 at most be two binary copies of a message in memory. This actually holds,
 even though there's a slightly amazing interaction on ack: on ack, the copy
 read from the cur_file cache will decrement the dedup cache, possibly
 evicting the entry. At this point there would be no copies in the dedup
 cache, maybe one copy in the cur_file cache, and maybe one copy in RAM, which
 is the copy read from the dedup cache. The copy read from the cur_file cache
 has been ack'd, so has gone. The next read, if there is no copy in the
 cur_file cache may add to the dedup cache, which will then be emptied on the
 ack from the first dedup read. I.e. you can achieve a chain in which no more
 than 2 copies are held in RAM at any one time, but each read really goes to
 disk. It is left as an exercise to the reader to come up with a test case
 that exhibits this behaviour.

---
 src/rabbit_msg_store.erl | 47 +++++++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b3d784b2..2a4eadc9 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -274,17 +274,27 @@ write(MsgId, Msg) ->
     gen_server2:cast(?SERVER, {write, MsgId, Msg}).
 
 read(MsgId, CState) ->
-    Defer = fun() ->
-                    {gen_server2:call(?SERVER, {read, MsgId}, infinity), CState}
-            end,
-    case index_lookup(MsgId, CState) of
+    %% 1. Check the dedup cache
+    case fetch_and_increment_cache(MsgId) of
         not_found ->
-            Defer();
-        MsgLocation ->
-            case fetch_and_increment_cache(MsgId) of
-                not_found -> client_read1(MsgLocation, Defer, CState);
-                Msg       -> {{ok, Msg}, CState}
-            end
+            %% 2. Check the cur file cache
+            case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+                [] ->
+                    Defer =
+                        fun() -> {gen_server2:call(
+                                    ?SERVER, {read, MsgId}, infinity), CState}
+                        end,
+                    case index_lookup(MsgId, CState) of
+                        not_found   -> Defer();
+                        MsgLocation -> client_read1(MsgLocation, Defer, CState)
+                    end;
+                [{MsgId, _FileOrUndefined, Msg}] ->
+                    %% Although we've found it, we don't know the
+                    %% refcount, so can't insert into dedup cache
+                    {{ok, Msg}, CState}
+            end;
+        Msg ->
+            {{ok, Msg}, CState}
     end.
 
 contains(MsgId)     -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
@@ -318,11 +328,11 @@ client_read1(MsgLocation = #msg_location { msg_id = MsgId, file = File }, Defer,
         [] -> %% File has been GC'd and no longer exists. Go around again.
             read(MsgId, CState);
         [#file_summary { locked = Locked, right = Right }] ->
-            client_read2(MsgLocation, Locked, Right, Defer, CState)
+            client_read2(Locked, Right, MsgLocation, Defer, CState)
     end.
 
-client_read2(#msg_location { msg_id = MsgId, ref_count = RefCount },
-             false, undefined, Defer, CState) ->
+client_read2(false, undefined, #msg_location {
+                      msg_id = MsgId, ref_count = RefCount }, Defer, CState) ->
     case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
         [] ->
             Defer(); %% may have rolled over
@@ -330,13 +340,14 @@ client_read2(#msg_location { msg_id = MsgId, ref_count = RefCount },
             ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
             {{ok, Msg}, CState}
     end;
-client_read2(_MsgLocation, true, _Right, Defer, _CState) ->
+client_read2(true, _Right, _MsgLocation, Defer, _CState) ->
     %% Of course, in the mean time, the GC could have run and our msg
     %% is actually in a different file, unlocked. However, defering is
     %% the safest and simplest thing to do.
     Defer();
-client_read2(#msg_location { msg_id = MsgId, ref_count = RefCount,
-                            file = File }, false, _Right, Defer, CState) ->
+client_read2(false, _Right, #msg_location {
+                      msg_id = MsgId, ref_count = RefCount, file = File },
+             Defer, CState) ->
     %% It's entirely possible that everything we're doing from here on
     %% is for the wrong file, or a non-existent file, as a GC may have
     %% finished.
@@ -892,12 +903,12 @@ fetch_and_increment_cache(MsgId) ->
 decrement_cache(MsgId) ->
     true = try case ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, -1}) of
                    N when N =< 0 -> true = ets:delete(?CACHE_ETS_NAME, MsgId);
-                   _N -> true
+                   _N            -> true
                end
            catch error:badarg ->
                    %% MsgId is not in there because although it's been
                    %% delivered, it's never actually been read (think:
-                   %% persistent message in mixed queue)
+                   %% persistent message held in RAM)
                    true
            end,
     ok.
-- 
cgit v1.2.1


From 7fd27f3b489bdd296ab6cc643379712d96749113 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 11 Feb 2010 10:10:51 +0000
Subject: remove some debugging code this is gone on default already

---
 src/rabbit_amqqueue_process.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 27c5a362..d6bcfad6 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -266,7 +266,6 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                 true ->
                     {{Msg, IsDelivered, AckTag}, FunAcc1, State1} =
                         DeliverFun(AckRequired, FunAcc, State),
-                    ?LOGDEBUG("AMQQUEUE ~p DELIVERY:~n~p~n", [QName, Msg]),
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
                       {QName, self(), NextId, IsDelivered, Msg}),
-- 
cgit v1.2.1


From e48acc80313790e758688cbd79e81163bc31dd50 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 11 Feb 2010 10:12:07 +0000
Subject: refactoring: rename Msg to Message that's what we have on default,
 and it matches the type (message())

---
 src/rabbit_amqqueue_process.erl | 50 +++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index d6bcfad6..508427b4 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -264,14 +264,14 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
             case (IsMsgReady andalso
                   rabbit_limiter:can_send( LimiterPid, self(), AckRequired )) of
                 true ->
-                    {{Msg, IsDelivered, AckTag}, FunAcc1, State1} =
+                    {{Message, IsDelivered, AckTag}, FunAcc1, State1} =
                         DeliverFun(AckRequired, FunAcc, State),
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
-                      {QName, self(), NextId, IsDelivered, Msg}),
+                      {QName, self(), NextId, IsDelivered, Message}),
                     NewUAM =
                         case AckRequired of
-                            true  -> dict:store(NextId, {Msg, AckTag}, UAM);
+                            true  -> dict:store(NextId, {Message, AckTag}, UAM);
                             false -> UAM
                         end,
                     NewC = C#cr{unsent_message_count = Count + 1,
@@ -317,13 +317,13 @@ deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
     not IsEmpty.
 deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
                            State = #q { variable_queue_state = VQS }) ->
-    {{Msg, IsDelivered, AckTag, Remaining}, VQS1} =
+    {{Message, IsDelivered, AckTag, Remaining}, VQS1} =
         rabbit_variable_queue:fetch(VQS),
     AutoAcks1 = case AckRequired of
                     true -> AutoAcks;
                     false -> [AckTag | AutoAcks]
                 end,
-    {{Msg, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
+    {{Message, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
      State #q { variable_queue_state = VQS1 }}.
 
 run_message_queue(State = #q { variable_queue_state = VQS }) ->
@@ -335,7 +335,7 @@ run_message_queue(State = #q { variable_queue_state = VQS }) ->
     VQS1 = rabbit_variable_queue:ack(AutoAcks, State1 #q.variable_queue_state),
     State1 #q { variable_queue_state = VQS1 }.
 
-attempt_immediate_delivery(none, _ChPid, Msg, State) ->
+attempt_immediate_delivery(none, _ChPid, Message, State) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
     DeliverFun =
         fun (AckRequired, false, State1) ->
@@ -344,27 +344,28 @@ attempt_immediate_delivery(none, _ChPid, Msg, State) ->
                         true ->
                             {AckTag1, VQS} =
                                 rabbit_variable_queue:publish_delivered(
-                                  Msg, State1 #q.variable_queue_state),
+                                  Message, State1 #q.variable_queue_state),
                             {AckTag1, State1 #q { variable_queue_state = VQS }};
                         false ->
                             {noack, State1}
                     end,
-                {{Msg, false, AckTag}, true, State2}
+                {{Message, false, AckTag}, true, State2}
         end,
     deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
-attempt_immediate_delivery(Txn, ChPid, Msg, State) ->
-    VQS = rabbit_variable_queue:tx_publish(Msg, State #q.variable_queue_state),
-    record_pending_message(Txn, ChPid, Msg),
+attempt_immediate_delivery(Txn, ChPid, Message, State) ->
+    VQS = rabbit_variable_queue:tx_publish(
+            Message, State #q.variable_queue_state),
+    record_pending_message(Txn, ChPid, Message),
     {true, State #q { variable_queue_state = VQS }}.
 
-deliver_or_enqueue(Txn, ChPid, Msg, State) ->
-    case attempt_immediate_delivery(Txn, ChPid, Msg, State) of
+deliver_or_enqueue(Txn, ChPid, Message, State) ->
+    case attempt_immediate_delivery(Txn, ChPid, Message, State) of
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
             {_SeqId, VQS} = rabbit_variable_queue:publish(
-                              Msg, State #q.variable_queue_state),
+                              Message, State #q.variable_queue_state),
             {false, NewState #q { variable_queue_state = VQS }}
     end.
 
@@ -388,11 +389,12 @@ deliver_or_requeue_n(MsgsWithAcks, State) ->
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
     0 < Len.
 deliver_or_requeue_msgs_deliver(
-  false, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
-    {{Msg, true, noack}, {Len - 1, [AckTag | AcksAcc], MsgsWithAcks}, State};
+  false, {Len, AcksAcc, [{Message, AckTag} | MsgsWithAcks]}, State) ->
+    {{Message, true, noack}, {Len - 1, [AckTag | AcksAcc], MsgsWithAcks},
+     State};
 deliver_or_requeue_msgs_deliver(
-  true, {Len, AcksAcc, [{Msg, AckTag} | MsgsWithAcks]}, State) ->
-    {{Msg, true, AckTag}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
+  true, {Len, AcksAcc, [{Message, AckTag} | MsgsWithAcks]}, State) ->
+    {{Message, true, AckTag}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
 
 add_consumer(ChPid, Consumer, Queue) -> queue:in({ChPid, Consumer}, Queue).
 
@@ -537,7 +539,7 @@ commit_transaction(Txn, From, State) ->
                 {MsgsWithAcks, Remaining} =
                     collect_messages(PendingAcksOrdered, UAM),
                 store_ch_record(C#cr{unacked_messages = Remaining}),
-                [AckTag || {_Msg, AckTag} <- MsgsWithAcks]
+                [AckTag || {_Message, AckTag} <- MsgsWithAcks]
         end,
     {RunQueue, VQS} =
         rabbit_variable_queue:tx_commit(
@@ -673,20 +675,20 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                        }) ->
     case rabbit_variable_queue:fetch(VQS) of
         {empty, VQS1} -> reply(empty, State #q { variable_queue_state = VQS1 });
-        {{Msg, IsDelivered, AckTag, Remaining}, VQS1} ->
+        {{Message, IsDelivered, AckTag, Remaining}, VQS1} ->
             AckRequired = not(NoAck),
             VQS2 =
                 case AckRequired of
                     true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
-                        NewUAM = dict:store(NextId, {Msg, AckTag}, UAM),
+                        NewUAM = dict:store(NextId, {Message, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
                         VQS1;
                     false ->
                         rabbit_variable_queue:ack([AckTag], VQS1)
                 end,
-            Message = {QName, self(), NextId, IsDelivered, Msg},
-            reply({ok, Remaining, Message},
+            Msg = {QName, self(), NextId, IsDelivered, Message},
+            reply({ok, Remaining, Msg},
                   State #q { next_msg_id = NextId + 1, variable_queue_state = VQS2 })
     end;
 
@@ -827,7 +829,7 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
                 none ->
                     {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
                     VQS = rabbit_variable_queue:ack(
-                            [AckTag || {_Msg, AckTag} <- MsgWithAcks],
+                            [AckTag || {_Message, AckTag} <- MsgWithAcks],
                             State #q.variable_queue_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
                     noreply(State #q { variable_queue_state = VQS });
-- 
cgit v1.2.1


From fa187e5389b1697bd55ce8315264f21c7d477806 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 11 Feb 2010 10:19:11 +0000
Subject: refactoring: rename attempt_immediate_delivery to attempt_delivery
 that's what it's called on default

---
 src/rabbit_amqqueue_process.erl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 508427b4..7aa0d321 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -335,7 +335,7 @@ run_message_queue(State = #q { variable_queue_state = VQS }) ->
     VQS1 = rabbit_variable_queue:ack(AutoAcks, State1 #q.variable_queue_state),
     State1 #q { variable_queue_state = VQS1 }.
 
-attempt_immediate_delivery(none, _ChPid, Message, State) ->
+attempt_delivery(none, _ChPid, Message, State) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
     DeliverFun =
         fun (AckRequired, false, State1) ->
@@ -352,14 +352,14 @@ attempt_immediate_delivery(none, _ChPid, Message, State) ->
                 {{Message, false, AckTag}, true, State2}
         end,
     deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
-attempt_immediate_delivery(Txn, ChPid, Message, State) ->
+attempt_delivery(Txn, ChPid, Message, State) ->
     VQS = rabbit_variable_queue:tx_publish(
             Message, State #q.variable_queue_state),
     record_pending_message(Txn, ChPid, Message),
     {true, State #q { variable_queue_state = VQS }}.
 
 deliver_or_enqueue(Txn, ChPid, Message, State) ->
-    case attempt_immediate_delivery(Txn, ChPid, Message, State) of
+    case attempt_delivery(Txn, ChPid, Message, State) of
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
@@ -640,8 +640,7 @@ handle_call({deliver_immediately, Txn, Message, ChPid}, _From, State) ->
     %% just all ready-to-consume queues get the message, with unready
     %% queues discarding the message?
     %%
-    {Delivered, NewState} =
-        attempt_immediate_delivery(Txn, ChPid, Message, State),
+    {Delivered, NewState} = attempt_delivery(Txn, ChPid, Message, State),
     reply(Delivered, NewState);
 
 handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
-- 
cgit v1.2.1


From d979406413e3a1eb205ded35a323b7227e909b8e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 11 Feb 2010 11:19:34 +0000
Subject: Fix potential leak in cur file cache if the msg is both already known
 and in a different file - eg same msg gets added to 2 queues, one very long,
 the other very short. Then memory becomes tight. By the time the short file
 pushes msg to disk, cur file may well have rolled over

---
 src/rabbit_msg_store.erl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2a4eadc9..2db3c34e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -531,12 +531,16 @@ handle_cast({write, MsgId, Msg},
                                     { sum_valid_data = SumValid + TotalSize,
                                       sum_file_size = SumFileSize + TotalSize }
                                    )));
-        #msg_location { ref_count = RefCount } ->
+        #msg_location { ref_count = RefCount, file = File } ->
             %% We already know about it, just update counter. Only
             %% update field otherwise bad interaction with concurrent GC
             ok = index_update_fields(MsgId,
                                      {#msg_location.ref_count, RefCount + 1},
                                      State),
+            true = case File == CurFile of
+                       true  -> true;
+                       false -> ets:delete(?CUR_FILE_CACHE_ETS_NAME, MsgId)
+                   end,
             noreply(State)
     end;
 
-- 
cgit v1.2.1


From db77a142e3e75a5d47f68f09edb654e63136971d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 11 Feb 2010 16:03:07 +0000
Subject: minor refactor: move erase_tx call into commit_transaction for
 symmetry with rollback_transaction

---
 src/rabbit_amqqueue_process.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 7aa0d321..59ea353c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -544,6 +544,7 @@ commit_transaction(Txn, From, State) ->
     {RunQueue, VQS} =
         rabbit_variable_queue:tx_commit(
           PendingMessagesOrdered, Acks, From, State #q.variable_queue_state),
+    erase_tx(Txn),
     {RunQueue, State #q { variable_queue_state = VQS }}.
 
 rollback_transaction(Txn, State) ->
@@ -650,7 +651,6 @@ handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
 
 handle_call({commit, Txn}, From, State) ->
     {RunQueue, NewState} = commit_transaction(Txn, From, State),
-    erase_tx(Txn),
     noreply(case RunQueue of
                 true -> run_message_queue(NewState);
                 false -> NewState
-- 
cgit v1.2.1


From e84b6142ac2f2375f4e2ad649589e4ffbf9a0244 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 15 Feb 2010 15:48:16 +0000
Subject: Yet another API consistency design win for the Erlang OTP team.
 ets:update_counter throws badarg if the key doesn't exist. ets:update_element
 returns false if  the key doesn't exist. The bizarre error that I've been
 tracking down for a week, and have still not replicated (but was a badmatch
 false (which means it's likely that it should have been true)) is almost
 certainly this.

---
 src/rabbit_msg_store.erl | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2db3c34e..6be4358c 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1216,16 +1216,10 @@ maybe_compact(State) ->
     State.
 
 mark_handle_to_close(File) ->
-    lists:foldl(
-      fun ({Key, open}, true) ->
-              try
-                  true = ets:update_element(?FILE_HANDLES_ETS_NAME,
-                                            Key, {2, close})
-              catch error:badarg -> %% client has deleted concurrently, no prob
-                      true
-              end
-      end,
-      true, ets:match_object(?FILE_HANDLES_ETS_NAME, {{'_', File}, open})).
+    [ ets:update_element(?FILE_HANDLES_ETS_NAME, Key, {2, close})
+      || {Key, open} <- ets:match_object(?FILE_HANDLES_ETS_NAME,
+                                         {{'_', File}, open}) ],
+    true.
 
 find_files_to_gc(_N, '$end_of_table') ->
     undefined;
-- 
cgit v1.2.1


From b4caac57ee1ef0e15bea95e9365635c89189ac00 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 16 Feb 2010 13:55:35 +0000
Subject: Promote reads over writes in the msg_store. This is safe. The
 cur_file cache contains all msgs in the current file and which haven't been
 processed by the msg_store. Thus if reading can't find it there, and for some
 reason we can't actually do the disk read directly from the queue process (eg
 the file is locked due to a GC, or we first thought the msg was in the cur
 file, but then it subsequently rolled over), then we know the msg has been
 written to disk. Thus raising the priority of the read call to the msg_store
 will never allow it to overtake its own write. Certainly leads to a
 performance improvement in some tests, and generally helps avoid queues
 stall.

---
 src/rabbit_msg_store.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6be4358c..10e325e9 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -280,10 +280,9 @@ read(MsgId, CState) ->
             %% 2. Check the cur file cache
             case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
                 [] ->
-                    Defer =
-                        fun() -> {gen_server2:call(
-                                    ?SERVER, {read, MsgId}, infinity), CState}
-                        end,
+                    Defer = fun() -> {gen_server2:pcall(
+                                        ?SERVER, 2, {read, MsgId}, infinity),
+                                      CState} end,
                     case index_lookup(MsgId, CState) of
                         not_found   -> Defer();
                         MsgLocation -> client_read1(MsgLocation, Defer, CState)
-- 
cgit v1.2.1


From e627677ba905290000bea08eef1ebf98832f47f2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 17 Feb 2010 18:20:26 +0000
Subject: supervisor2 => ?MODULE

---
 src/supervisor2.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 4325a23c..978d30f9 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -80,10 +80,10 @@ behaviour_info(_Other) ->
 %%% SupName = {local, atom()} | {global, atom()}.
 %%% ---------------------------------------------------
 start_link(Mod, Args) ->
-    gen_server:start_link(supervisor2, {self, Mod, Args}, []).
+    gen_server:start_link(?MODULE, {self, Mod, Args}, []).
  
 start_link(SupName, Mod, Args) ->
-    gen_server:start_link(SupName, supervisor2, {SupName, Mod, Args}, []).
+    gen_server:start_link(SupName, ?MODULE, {SupName, Mod, Args}, []).
  
 %%% ---------------------------------------------------
 %%% Interface functions.
-- 
cgit v1.2.1


From b4d1298e8f5aa3d9ba1b4190bb42a6bf58afa6e7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Feb 2010 14:47:56 +0000
Subject: Several fixes: 1. Both the msg_store and the amqqueue_process can
 have their mailboxes get very long. In this case, it's a problem because
 close messages from the FHC can't get through (Plain !, and no priority).
 Therefore, add callbacks registry to FHC and equip both msg_store and
 amqqueue_process with high priority casts to solve this problem (ftr,
 msg_store can get swamped with writes, whilst the amqqueue_process can get
 swamped with delivery notifications and acks). 2. The GC was missing the
 ability to deal with close msgs from the FHC 3. The FHC, when reopening a
 file, uses the same mode as the file was originally opened with. If that mode
 is just write, then when the file is reopened, its contents get trashed. Thus
 when reopening, add in read to the mode, but don't record this anywhere - the
 file still acts (API wise) as if it was only opened writable.

---
 src/file_handle_cache.erl       | 114 +++++++++++++++++++++++++---------------
 src/rabbit_amqqueue.erl         |  10 ++--
 src/rabbit_amqqueue_process.erl |  14 ++---
 src/rabbit_msg_store.erl        |  20 ++++---
 src/rabbit_msg_store_gc.erl     |   4 ++
 5 files changed, 104 insertions(+), 58 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 6e367b03..520be0ce 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -133,10 +133,10 @@
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([release_on_death/1, obtain/0]).
+-export([release_on_death/1, obtain/0, register_callback/3]).
 
 -define(SERVER, ?MODULE).
--define(RESERVED_FOR_OTHERS, 50).
+-define(RESERVED_FOR_OTHERS, 100).
 -define(FILE_HANDLES_LIMIT_WINDOWS, 10000000).
 -define(FILE_HANDLES_LIMIT_OTHER, 1024).
 -define(FILE_HANDLES_CHECK_INTERVAL, 2000).
@@ -169,7 +169,8 @@
         { elders,
           limit,
           count,
-          obtains
+          obtains,
+          callbacks
         }).
 
 %%----------------------------------------------------------------------------
@@ -184,6 +185,7 @@
 -type(position() :: ('bof' | 'eof' | {'bof',integer()} | {'eof',integer()}
                      | {'cur',integer()} | integer())).
 
+-spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok').
 -spec(open/3 ::
       (string(), [any()],
        [{'write_buffer', (non_neg_integer()|'infinity'|'unbuffered')}]) ->
@@ -215,6 +217,10 @@
 start_link() ->
     gen_server:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]).
 
+register_callback(M, F, A)
+  when is_atom(M) andalso is_atom(F) andalso is_list(A) ->
+    gen_server:call(?SERVER, {register_callback, self(), {M, F, A}}, infinity).
+
 open(Path, Mode, Options) ->
     case is_appender(Mode) of
         true  ->
@@ -241,7 +247,7 @@ open(Path, Mode, Options) ->
                              File1 #file { reader_count = RCount1,
                                            has_writer = HasWriter1}),
                          Ref = make_ref(),
-                         case open1(Path1, Mode, Options, Ref, bof) of
+                         case open1(Path1, Mode, Options, Ref, bof, new) of
                              {ok, _Handle} -> {ok, Ref};
                              Error         -> Error
                          end
@@ -504,7 +510,7 @@ get_or_reopen(Ref) ->
             {error, not_open, Ref};
         #handle { hdl = closed, mode = Mode, options = Options,
                   offset = Offset, path = Path } ->
-            open1(Path, Mode, Options, Ref, Offset);
+            open1(Path, Mode, Options, Ref, Offset, reopen);
         Handle ->
             {ok, Handle}
     end.
@@ -524,8 +530,12 @@ put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
       fun (Tree) -> gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree)) end),
     put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
 
-open1(Path, Mode, Options, Ref, Offset) ->
-    case file:open(Path, Mode) of
+open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
+    Mode1 = case NewOrReopen of
+                new    -> Mode;
+                reopen -> [read | Mode]
+            end,
+    case file:open(Path, Mode1) of
         {ok, Hdl} ->
             WriteBufferSize =
                 case proplists:get_value(write_buffer, Options, unbuffered) of
@@ -561,31 +571,36 @@ close1(Ref, Handle, SoftOrHard) ->
     case write_buffer(Handle) of
         {ok, #handle { hdl = Hdl, path = Path, is_dirty = IsDirty,
                        is_read = IsReader, is_write = IsWriter,
-                       last_used_at = Then } = Handle1 } ->
-            case Hdl of
-                closed -> ok;
-                _      -> ok = case IsDirty of
-                                   true  -> file:sync(Hdl);
-                                   false -> ok
-                               end,
-                          ok = file:close(Hdl),
-                          with_age_tree(
-                            fun (Tree) ->
-                                    Tree1 = gb_trees:delete(Then, Tree),
-                                    Oldest =
-                                        case gb_trees:is_empty(Tree1) of
-                                            true ->
-                                                undefined;
-                                            false ->
-                                                {Oldest1, _Ref} =
-                                                    gb_trees:smallest(Tree1),
-                                                Oldest1
-                                        end,
-                                    gen_server:cast(
-                                      ?SERVER, {close, self(), Oldest}),
-                                    Tree1
-                            end)
-            end,
+                       last_used_at = Then, offset = Offset } = Handle1 } ->
+            Handle2 =
+                case Hdl of
+                    closed ->
+                        ok;
+                    _ ->
+                        ok = case IsDirty of
+                                 true  -> file:sync(Hdl);
+                                 false -> ok
+                             end,
+                        ok = file:close(Hdl),
+                        with_age_tree(
+                          fun (Tree) ->
+                                  Tree1 = gb_trees:delete(Then, Tree),
+                                  Oldest =
+                                      case gb_trees:is_empty(Tree1) of
+                                          true ->
+                                              undefined;
+                                          false ->
+                                              {Oldest1, _Ref} =
+                                                  gb_trees:smallest(Tree1),
+                                              Oldest1
+                                      end,
+                                  gen_server:cast(
+                                    ?SERVER, {close, self(), Oldest}),
+                                  Tree1
+                          end),
+                        Handle1 #handle { trusted_offset = Offset,
+                                          is_dirty = false }
+                end,
             case SoftOrHard of
                 hard -> #file { reader_count = RCount,
                                 has_writer = HasWriter } = File =
@@ -602,7 +617,7 @@ close1(Ref, Handle, SoftOrHard) ->
                                                       has_writer = HasWriter1 })
                         end,
                         ok;
-                soft -> {ok, Handle1 #handle { hdl = closed }}
+                soft -> {ok, Handle2 #handle { hdl = closed }}
             end;
         {Error, Handle1} ->
             put_handle(Ref, Handle1),
@@ -673,7 +688,7 @@ init([]) ->
             end,
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
-                      obtains = [] }}.
+                      obtains = [], callbacks = dict:new() }}.
 
 handle_call(obtain, From, State = #fhc_state { count = Count }) ->
     State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
@@ -682,7 +697,12 @@ handle_call(obtain, From, State = #fhc_state { count = Count }) ->
         true  -> {noreply, State1 #fhc_state { obtains = [From | Obtains],
                                                count = Count1 - 1 }};
         false -> {reply, ok, State1}
-    end.
+    end;
+
+handle_call({register_callback, Pid, MFA}, _From,
+            State = #fhc_state { callbacks = Callbacks }) ->
+    {reply, ok,
+     State #fhc_state { callbacks = dict:store(Pid, MFA, Callbacks) }}.
 
 handle_cast({open, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
@@ -713,9 +733,11 @@ handle_cast({release_on_death, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
     {noreply, State}.
 
-handle_info({'DOWN', _MRef, process, _Pid, _Reason},
-            State = #fhc_state { count = Count }) ->
-    {noreply, process_obtains(State #fhc_state { count = Count - 1 })}.
+handle_info({'DOWN', _MRef, process, Pid, _Reason},
+            State = #fhc_state { count = Count, callbacks = Callbacks }) ->
+    {noreply, process_obtains(
+                State #fhc_state { count = Count - 1,
+                                   callbacks = dict:erase(Pid, Callbacks) })}.
 
 terminate(_Reason, State) ->
     State.
@@ -742,7 +764,7 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
     State #fhc_state { count = Count + ObtainableLen, obtains = ObtainsNew }.
 
 maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
-                                  elders = Elders })
+                                  elders = Elders, callbacks = Callbacks })
   when Limit /= infinity andalso Count >= Limit ->
     Now = now(),
     {Pids, Sum, ClientCount} =
@@ -755,10 +777,16 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
     case Pids of
         [] -> ok;
         _  -> AverageAge = Sum / ClientCount,
-              lists:foreach(fun (Pid) -> Pid ! {?MODULE,
-                                                maximum_eldest_since_use,
-                                                AverageAge}
-                            end, Pids)
+              lists:foreach(
+                fun (Pid) ->
+                        case dict:find(Pid, Callbacks) of
+                            error ->
+                                Pid ! {?MODULE, maximum_eldest_since_use,
+                                       AverageAge};
+                            {ok, {M, F, A}} ->
+                                apply(M, F, A ++ [AverageAge])
+                        end
+                end, Pids)
     end,
     {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server,
                                     cast, [?SERVER, check_counts]),
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index c9f5b5ae..df4ca40f 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -34,7 +34,7 @@
 -export([start/0, recover/1, find_durable_queues/0, declare/4, delete/3,
          purge/1]).
 -export([internal_declare/2, internal_delete/1, remeasure_rates/1,
-         set_queue_duration/2]).
+         set_queue_duration/2, set_maximum_since_use/2]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
          stat/1, stat_all/0, deliver/2, redeliver/2, requeue/3, ack/4]).
@@ -122,6 +122,7 @@
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(remeasure_rates/1 :: (pid()) -> 'ok').
 -spec(set_queue_duration/2 :: (pid(), number()) -> 'ok').
+-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
 
@@ -374,10 +375,13 @@ internal_delete(QueueName) ->
       end).
 
 remeasure_rates(QPid) ->
-    gen_server2:pcast(QPid, 9, remeasure_rates).    
+    gen_server2:pcast(QPid, 9, remeasure_rates).
 
 set_queue_duration(QPid, Duration) ->
-    gen_server2:pcast(QPid, 9, {set_queue_duration, Duration}).    
+    gen_server2:pcast(QPid, 9, {set_queue_duration, Duration}).
+
+set_maximum_since_use(QPid, Age) ->
+    gen_server2:pcast(QPid, 9, {set_maximum_since_use, Age}).
 
 on_node_down(Node) ->
     rabbit_misc:execute_mnesia_transaction(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 73c3678d..93ebc3c5 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -102,12 +102,14 @@
 start_link(Q) -> gen_server2:start_link(?MODULE, Q, []).
 
 info_keys() -> ?INFO_KEYS.
-    
+
 %%----------------------------------------------------------------------------
 
 init(Q = #amqqueue { name = QName }) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
+    ok = file_handle_cache:register_callback(
+           rabbit_amqqueue, set_maximum_since_use, [self()]),
     ok = rabbit_memory_monitor:register
            (self(), {rabbit_amqqueue, set_queue_duration, [self()]}),
     VQS = rabbit_variable_queue:init(QName),
@@ -907,7 +909,11 @@ handle_cast({set_queue_duration, Duration},
             State = #q{variable_queue_state = VQS}) ->
     VQS1 = rabbit_variable_queue:set_queue_ram_duration_target(
              Duration, VQS),
-    noreply(State#q{variable_queue_state = VQS1}).
+    noreply(State#q{variable_queue_state = VQS1});
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State).
 
 handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
             State = #q{owner = {DownPid, MonitorRef}}) ->
@@ -934,10 +940,6 @@ handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
         State#q{variable_queue_state =
                 rabbit_variable_queue:tx_commit_from_vq(VQS)}));
 
-handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
-    ok = file_handle_cache:set_maximum_since_use(Age),
-    noreply(State);
-
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 10e325e9..81663c00 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -36,7 +36,7 @@
 -export([start_link/3, write/2, read/2, contains/1, remove/1, release/1,
          sync/2, client_init/0, client_terminate/1]).
 
--export([sync/0, gc_done/3]). %% internal
+-export([sync/0, gc_done/3, set_maximum_since_use/1]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
@@ -98,6 +98,7 @@
 -spec(release/1 :: ([msg_id()]) -> 'ok').
 -spec(sync/2 :: ([msg_id()], fun (() -> any())) -> 'ok').
 -spec(gc_done/3 :: (non_neg_integer(), file_num(), file_num()) -> 'ok').
+-spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(client_init/0 :: () -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
 
@@ -305,6 +306,9 @@ sync()              -> gen_server2:pcast(?SERVER, 9, sync). %% internal
 gc_done(Reclaimed, Source, Destination) ->
     gen_server2:pcast(?SERVER, 9, {gc_done, Reclaimed, Source, Destination}).
 
+set_maximum_since_use(Age) ->
+    gen_server2:pcast(?SERVER, 9, {set_maximum_since_use, Age}).
+
 client_init() ->
     {IState, IModule, Dir} =
         gen_server2:call(?SERVER, new_client_state, infinity),
@@ -422,6 +426,9 @@ close_all_indicated(CState) ->
 init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     process_flag(trap_exit, true),
 
+    ok =
+        file_handle_cache:register_callback(?MODULE, set_maximum_since_use, []),
+
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     {ok, IndexModule} = application:get_env(msg_store_index_module),
@@ -597,15 +604,15 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
     true = ets:delete(?FILE_SUMMARY_ETS_NAME, Source),
     noreply(run_pending(
               State #msstate { sum_file_size = SumFileSize - Reclaimed,
-                               gc_active = false })).
+                               gc_active = false }));
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State).
 
 handle_info(timeout, State) ->
     noreply(sync(State));
 
-handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
-    ok = file_handle_cache:set_maximum_since_use(Age),
-    noreply(State);
-
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State}.
 
@@ -746,6 +753,7 @@ read_from_disk(#msg_location { msg_id = MsgId, ref_count = RefCount,
                 throw({error, {misread, [{old_state, State},
                                          {file_num,  File},
                                          {offset,    Offset},
+                                         {msg_id,    MsgId},
                                          {read,      Rest},
                                          {proc_dict, get()}
                                         ]}})
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 6023de02..a64733df 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -77,6 +77,10 @@ handle_cast({gc, Source, Destination}, State) ->
     ok = rabbit_msg_store:gc_done(Reclaimed, Source, Destination),
     {noreply, State, hibernate}.
 
+handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    {noreply, State, hibernate};
+
 handle_info(Info, State) ->
     {stop, {unhandled_info, Info}, State}.
 
-- 
cgit v1.2.1


From 6a39b3f81e41ab60cd62e7186d83091de43e7a00 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 18 Feb 2010 15:38:30 +0000
Subject: Sort out priorities: Only info should be at 9. Conserve memory and
 messages relating to memory monitoring or file handles or such book keeping
 are at 8. Messages that are higher-priority but nevertheless amqp messages
 (eg acks, commit-related-actions, notify-sent etc) are at 7.

---
 src/rabbit_amqqueue.erl  | 16 ++++++++--------
 src/rabbit_channel.erl   |  2 +-
 src/rabbit_msg_store.erl |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index df4ca40f..31a0f0c1 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -304,7 +304,7 @@ requeue(QPid, MsgIds, ChPid) ->
     gen_server2:cast(QPid, {requeue, MsgIds, ChPid}).
 
 ack(QPid, Txn, MsgIds, ChPid) ->
-    gen_server2:pcast(QPid, 8, {ack, Txn, MsgIds, ChPid}).
+    gen_server2:pcast(QPid, 7, {ack, Txn, MsgIds, ChPid}).
 
 commit_all(QPids, Txn) ->
     safe_pmap_ok(
@@ -349,17 +349,17 @@ basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) ->
                           infinity).
 
 notify_sent(QPid, ChPid) ->
-    gen_server2:pcast(QPid, 8, {notify_sent, ChPid}).
+    gen_server2:pcast(QPid, 7, {notify_sent, ChPid}).
 
 unblock(QPid, ChPid) ->
-    gen_server2:pcast(QPid, 8, {unblock, ChPid}).
+    gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
 tx_commit_msg_store_callback(QPid, Pubs, AckTags, From) ->
-    gen_server2:pcast(QPid, 8,
+    gen_server2:pcast(QPid, 7,
                       {tx_commit_msg_store_callback, Pubs, AckTags, From}).
 
 tx_commit_vq_callback(QPid) ->
-    gen_server2:pcast(QPid, 8, tx_commit_vq_callback).
+    gen_server2:pcast(QPid, 7, tx_commit_vq_callback).
 
 internal_delete(QueueName) ->
     rabbit_misc:execute_mnesia_transaction(
@@ -375,13 +375,13 @@ internal_delete(QueueName) ->
       end).
 
 remeasure_rates(QPid) ->
-    gen_server2:pcast(QPid, 9, remeasure_rates).
+    gen_server2:pcast(QPid, 8, remeasure_rates).
 
 set_queue_duration(QPid, Duration) ->
-    gen_server2:pcast(QPid, 9, {set_queue_duration, Duration}).
+    gen_server2:pcast(QPid, 8, {set_queue_duration, Duration}).
 
 set_maximum_since_use(QPid, Age) ->
-    gen_server2:pcast(QPid, 9, {set_maximum_since_use, Age}).
+    gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}).
 
 on_node_down(Node) ->
     rabbit_misc:execute_mnesia_transaction(
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 9f1a08ee..670e0fa7 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -113,7 +113,7 @@ deliver(Pid, ConsumerTag, AckRequired, Msg) ->
     gen_server2:cast(Pid, {deliver, ConsumerTag, AckRequired, Msg}).
 
 conserve_memory(Pid, Conserve) ->
-    gen_server2:pcast(Pid, 9, {conserve_memory, Conserve}).
+    gen_server2:pcast(Pid, 8, {conserve_memory, Conserve}).
 
 list() ->
     pg_local:get_members(rabbit_channels).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 81663c00..5e1c0311 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -301,13 +301,13 @@ contains(MsgId)     -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
 remove(MsgIds)      -> gen_server2:cast(?SERVER, {remove, MsgIds}).
 release(MsgIds)     -> gen_server2:cast(?SERVER, {release, MsgIds}).
 sync(MsgIds, K)     -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-sync()              -> gen_server2:pcast(?SERVER, 9, sync). %% internal
+sync()              -> gen_server2:pcast(?SERVER, 8, sync). %% internal
 
 gc_done(Reclaimed, Source, Destination) ->
-    gen_server2:pcast(?SERVER, 9, {gc_done, Reclaimed, Source, Destination}).
+    gen_server2:pcast(?SERVER, 8, {gc_done, Reclaimed, Source, Destination}).
 
 set_maximum_since_use(Age) ->
-    gen_server2:pcast(?SERVER, 9, {set_maximum_since_use, Age}).
+    gen_server2:pcast(?SERVER, 8, {set_maximum_since_use, Age}).
 
 client_init() ->
     {IState, IModule, Dir} =
-- 
cgit v1.2.1


From 62ac934376b7ac09d15560dc8087ea948f7a5279 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 23 Feb 2010 15:39:55 +0000
Subject: Missing call to terminate_client in msg_store

---
 src/rabbit_amqqueue_process.erl |  2 +-
 src/rabbit_variable_queue.erl   | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 93ebc3c5..36d047e8 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -145,7 +145,7 @@ terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
     %% called internal_delete first, we would then have a race between
     %% the disk delete and a new queue with the same name being
     %% created and published to.
-    _VQS = rabbit_variable_queue:delete(VQS1),
+    _VQS = rabbit_variable_queue:delete_and_terminate(VQS1),
     ok = rabbit_amqqueue:internal_delete(qname(State)).
 
 code_change(_OldVsn, State, _Extra) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9d5c2b99..0043bb5f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -33,10 +33,10 @@
 
 -export([init/1, terminate/1, publish/2, publish_delivered/2,
          set_queue_ram_duration_target/2, remeasure_rates/1,
-         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1, delete/1,
-         requeue/2, tx_publish/2, tx_rollback/2, tx_commit/4,
-         tx_commit_from_msg_store/4, tx_commit_from_vq/1, needs_sync/1,
-         flush_journal/1, status/1]).
+         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
+         delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
+         tx_commit/4, tx_commit_from_msg_store/4, tx_commit_from_vq/1,
+         needs_sync/1, flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
 %% Definitions:
@@ -230,7 +230,7 @@
 -spec(len/1 :: (vqstate()) -> non_neg_integer()).
 -spec(is_empty/1 :: (vqstate()) -> boolean()).
 -spec(purge/1 :: (vqstate()) -> {non_neg_integer(), vqstate()}).
--spec(delete/1 :: (vqstate()) -> vqstate()).
+-spec(delete_and_terminate/1 :: (vqstate()) -> vqstate()).
 -spec(requeue/2 :: ([{basic_message(), ack()}], vqstate()) -> vqstate()).
 -spec(tx_publish/2 :: (basic_message(), vqstate()) -> vqstate()).
 -spec(tx_rollback/2 :: ([msg_id()], vqstate()) -> vqstate()).
@@ -451,8 +451,10 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
-delete(State) ->
-    {_PurgeCount, State1 = #vqstate { index_state = IndexState }} = purge(State),
+delete_and_terminate(State) ->
+    {_PurgeCount, State1 = #vqstate { index_state = IndexState,
+                                      msg_store_read_state = MSCState }} =
+        purge(State),
     IndexState1 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
                IndexState) of
@@ -464,6 +466,7 @@ delete(State) ->
                 IndexState3
     end,
     IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
+    rabbit_msg_store:client_terminate(MSCState),
     State1 #vqstate { index_state = IndexState4 }.
 
 %% [{Msg, AckTag}]
-- 
cgit v1.2.1


From dddc872d83ac77871261ac739daa341cc2dc89d9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Mar 2010 17:15:48 +0000
Subject: Correct boot sequence and rearrange msg_store / queue startup and
 recovery into amqqueue rather than queue_index

---
 src/rabbit.erl             |  5 +++--
 src/rabbit_amqqueue.erl    |  3 +++
 src/rabbit_queue_index.erl | 11 +----------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index c9589a17..92db2e9e 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -122,8 +122,9 @@
 
 -rabbit_boot_step({message_store_queue_sup_queue_recovery,
                    [{description, "message store, queue supervisor and queue recovery"},
-                    {mfa,         {rabbit_queue_index, start_msg_store, []}},
-                    {requires,    exchange_recovery}]}).
+                    {mfa,         {rabbit_amqqueue, start, []}},
+                    {requires,    exchange_recovery},
+                    {enables,     routing_ready}]}).
 
 -rabbit_boot_step({routing_ready,
                    [{description, "message delivery logic ready"}]}).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 8466a137..394db477 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -132,11 +132,14 @@
 %%----------------------------------------------------------------------------
 
 start() ->
+    DurableQueues = rabbit_amqqueue:find_durable_queues(),
+    ok = rabbit_queue_index:start_msg_store(DurableQueues),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
                {rabbit_amqqueue_sup,
                 {rabbit_amqqueue_sup, start_link, []},
                 transient, infinity, supervisor, [rabbit_amqqueue_sup]}),
+    {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues),
     ok.
 
 recover(DurableQueues) ->
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 73a8edb6..a16efb20 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -34,8 +34,7 @@
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/0,
-         start_msg_store/1]).
+         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
 
@@ -208,7 +207,6 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(start_msg_store/0 :: () -> 'ok').
 -spec(start_msg_store/1 :: ([amqqueue()]) -> 'ok').
 
 -endif.
@@ -381,13 +379,6 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
         end,
     {LowSeqIdSeg, NextSeqId, State}.
 
-start_msg_store() ->
-    DurableQueues = rabbit_amqqueue:find_durable_queues(),
-    ok = start_msg_store(DurableQueues),
-    ok = rabbit_amqqueue:start(),
-    {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues),
-    ok.
-
 start_msg_store(DurableQueues) ->
     DurableDict =
         dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
-- 
cgit v1.2.1


From d034840b6ddaab701491ba3f354c3b03185fe955 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Mar 2010 17:27:01 +0000
Subject: Minor relaxation of boot dependencies

---
 src/rabbit.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 92db2e9e..b652958a 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -123,7 +123,7 @@
 -rabbit_boot_step({message_store_queue_sup_queue_recovery,
                    [{description, "message store, queue supervisor and queue recovery"},
                     {mfa,         {rabbit_amqqueue, start, []}},
-                    {requires,    exchange_recovery},
+                    {requires,    empty_db_check},
                     {enables,     routing_ready}]}).
 
 -rabbit_boot_step({routing_ready,
-- 
cgit v1.2.1


From 6169aceeff6a71960a0a45dd43c6f5c496915ebd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Mar 2010 17:27:46 +0000
Subject: Whilst erlang's namespace provision is poor, it's not quite /that/
 poor...

---
 src/rabbit_amqqueue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 394db477..e333a43b 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -132,14 +132,14 @@
 %%----------------------------------------------------------------------------
 
 start() ->
-    DurableQueues = rabbit_amqqueue:find_durable_queues(),
+    DurableQueues = find_durable_queues(),
     ok = rabbit_queue_index:start_msg_store(DurableQueues),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
                {rabbit_amqqueue_sup,
                 {rabbit_amqqueue_sup, start_link, []},
                 transient, infinity, supervisor, [rabbit_amqqueue_sup]}),
-    {ok, _RealDurableQueues} = rabbit_amqqueue:recover(DurableQueues),
+    {ok, _RealDurableQueues} = recover(DurableQueues),
     ok.
 
 recover(DurableQueues) ->
-- 
cgit v1.2.1


From 82d339f82cae5272e1ec07e0877a7b6104a4425e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 4 Mar 2010 18:21:18 +0000
Subject: Minor bug fix, but I don't think this is what's plaguing majek atm

---
 src/rabbit_msg_store.erl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 5e1c0311..85f5526e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -782,17 +782,15 @@ contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
             end
     end.
 
-remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
-                                         current_file = CurFile }) ->
+remove_message(MsgId, State = #msstate { sum_valid_data = SumValid }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
     case RefCount of
         1 ->
-            true = case File =:= CurFile of
-                       true  -> ets:delete(?CUR_FILE_CACHE_ETS_NAME, MsgId);
-                       false -> true
-                   end,
+            %% don't remove from CUR_FILE_CACHE_ETS_NAME here because
+            %% there may be further writes in the mailbox for the same
+            %% msg.
             ok = remove_cache_entry(MsgId),
             [#file_summary { valid_total_size = ValidTotalSize,
                              contiguous_top = ContiguousTop,
-- 
cgit v1.2.1


From 540d78184be0f7e7f2e33064ff2e9dadeaa24511 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 5 Mar 2010 16:21:38 +0000
Subject: Change to cur file cache. Rather than bother storing the file in
 there, just give a ref count which in inc'd when the client puts it in, and
 dec'd when the process actually writes it. On roll, delete everything with a
 ref count of 0.

---
 src/rabbit_msg_store.erl | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 85f5526e..27fcbbd0 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -270,8 +270,7 @@ start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [{timeout, infinity}]).
 
 write(MsgId, Msg) ->
-    %% could fail if msg already in there
-    ets:insert_new(?CUR_FILE_CACHE_ETS_NAME, {MsgId, undefined, Msg}),
+    ok = add_to_cache(MsgId, Msg),
     gen_server2:cast(?SERVER, {write, MsgId, Msg}).
 
 read(MsgId, CState) ->
@@ -288,7 +287,7 @@ read(MsgId, CState) ->
                         not_found   -> Defer();
                         MsgLocation -> client_read1(MsgLocation, Defer, CState)
                     end;
-                [{MsgId, _FileOrUndefined, Msg}] ->
+                [{MsgId, Msg, _CacheRefCount}] ->
                     %% Although we've found it, we don't know the
                     %% refcount, so can't insert into dedup cache
                     {{ok, Msg}, CState}
@@ -325,6 +324,18 @@ client_terminate(CState) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
+add_to_cache(MsgId, Msg) ->
+    case ets:insert_new(?CUR_FILE_CACHE_ETS_NAME, {MsgId, Msg, 1}) of
+        true ->
+            ok;
+        false ->
+            try
+                ets:update_counter(?CUR_FILE_CACHE_ETS_NAME, MsgId, {3, +1}),
+                ok
+            catch error:badarg -> add_to_cache(MsgId, Msg)
+            end
+    end.
+
 client_read1(MsgLocation = #msg_location { msg_id = MsgId, file = File }, Defer,
              CState) ->
     case ets:lookup(?FILE_SUMMARY_ETS_NAME, File) of
@@ -339,7 +350,7 @@ client_read2(false, undefined, #msg_location {
     case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
         [] ->
             Defer(); %% may have rolled over
-        [{MsgId, _FileOrUndefined, Msg}] ->
+        [{MsgId, Msg, _CacheRefCount}] ->
             ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
             {{ok, Msg}, CState}
     end;
@@ -502,11 +513,10 @@ handle_cast({write, MsgId, Msg},
                                current_file        = CurFile,
                                sum_valid_data      = SumValid,
                                sum_file_size       = SumFileSize }) ->
+    true = 0 =< ets:update_counter(?CUR_FILE_CACHE_ETS_NAME, MsgId, {3, -1}),
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
-            true = ets:update_element(?CUR_FILE_CACHE_ETS_NAME, MsgId,
-                                      {2, CurFile}),
             {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
@@ -543,10 +553,6 @@ handle_cast({write, MsgId, Msg},
             ok = index_update_fields(MsgId,
                                      {#msg_location.ref_count, RefCount + 1},
                                      State),
-            true = case File == CurFile of
-                       true  -> true;
-                       false -> ets:delete(?CUR_FILE_CACHE_ETS_NAME, MsgId)
-                   end,
             noreply(State)
     end;
 
@@ -721,7 +727,7 @@ read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
                                  false -> ok
                              end,
                         read_from_disk(MsgLoc, State);
-                    [{MsgId, File, Msg1}] ->
+                    [{MsgId, Msg1, _CacheRefCount}] ->
                         {Msg1, State}
                 end,
             ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
@@ -1193,7 +1199,7 @@ maybe_roll_to_new_file(Offset,
                 locked = false, readers = 0 }),
     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, CurFile,
                               {#file_summary.right, NextFile}),
-    true = ets:match_delete(?CUR_FILE_CACHE_ETS_NAME, {'_', CurFile, '_'}),
+    true = ets:match_delete(?CUR_FILE_CACHE_ETS_NAME, {'_', '_', 0}),
     State1 #msstate { current_file_handle = NextHdl,
                       current_file        = NextFile };
 maybe_roll_to_new_file(_, State) ->
-- 
cgit v1.2.1


From e05f196c34cee9b73d2ee2e43b0fcd4e2fb7018b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 8 Mar 2010 12:20:29 +0000
Subject: Push memory monitor and fhc into the restartable sup

---
 src/rabbit.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index d73f6859..6203630e 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -72,7 +72,7 @@
 
 -rabbit_boot_step({file_handle_cache,
                    [{description, "file handle cache server"},
-                    {mfa,         {rabbit_sup, start_child,
+                    {mfa,         {rabbit_restartable_sup, start_child,
                                    [file_handle_cache]}},
                     {enables,     kernel_ready}]}).
 
@@ -87,7 +87,7 @@
 
 -rabbit_boot_step({rabbit_memory_monitor,
                    [{description, "memory moniter"},
-                    {mfa,         {rabbit_sup, start_child,
+                    {mfa,         {rabbit_restartable_sup, start_child,
                                    [rabbit_memory_monitor]}},
                     {requires,    rabbit_alarm},
                     {enables,     core_initialized}]}).
-- 
cgit v1.2.1


From fdf50dbba4c4745c6c1145bffa20d9547c9d402c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 9 Mar 2010 09:06:18 +0000
Subject: remove unneeded exports and some minor refactoring

---
 src/rabbit_amqqueue.erl | 58 ++++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index e333a43b..98993540 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -31,8 +31,7 @@
 
 -module(rabbit_amqqueue).
 
--export([start/0, recover/1, find_durable_queues/0, declare/4, delete/3,
-         purge/1]).
+-export([start/0, declare/4, delete/3, purge/1]).
 -export([internal_declare/2, internal_delete/1, remeasure_rates/1,
          set_queue_duration/2, set_maximum_since_use/2]).
 -export([pseudo_queue/2]).
@@ -70,8 +69,6 @@
 -type(acktag() :: ('ack_not_on_disk' | {'ack_index_and_store', msg_id(), seq_id()})).
 
 -spec(start/0 :: () -> 'ok').
--spec(recover/1 :: ([amqqueue()]) -> {'ok', [amqqueue()]}).
--spec(find_durable_queues/0 :: () -> [amqqueue()]).
 -spec(declare/4 :: (queue_name(), boolean(), boolean(), amqp_table()) ->
              amqqueue()).
 -spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()).
@@ -139,38 +136,9 @@ start() ->
                {rabbit_amqqueue_sup,
                 {rabbit_amqqueue_sup, start_link, []},
                 transient, infinity, supervisor, [rabbit_amqqueue_sup]}),
-    {ok, _RealDurableQueues} = recover(DurableQueues),
+    _RealDurableQueues = recover_durable_queues(DurableQueues),
     ok.
 
-recover(DurableQueues) ->
-    {ok, _RealDurableQueues} = recover_durable_queues(DurableQueues).
-
-recover_durable_queues(DurableQueues) ->
-    RealDurableQueues =
-        lists:foldl(
-          fun (RecoveredQ, Acc) ->
-                  Q = start_queue_process(RecoveredQ),
-                  %% We need to catch the case where a client connected to
-                  %% another node has deleted the queue (and possibly
-                  %% re-created it).
-                  case rabbit_misc:execute_mnesia_transaction(
-                         fun () ->
-                                 Match =
-                                     mnesia:match_object(
-                                       rabbit_durable_queue, RecoveredQ, read),
-                                 case Match of
-                                     [_] -> ok = store_queue(Q),
-                                            true;
-                                     []  -> false
-                                 end
-                         end) of
-                      true  -> [Q|Acc];
-                      false -> exit(Q#amqqueue.pid, shutdown),
-                               Acc
-                  end
-          end, [], DurableQueues),
-    {ok, RealDurableQueues}.
-
 find_durable_queues() ->
     Node = node(),
     %% TODO: use dirty ops instead
@@ -181,6 +149,28 @@ find_durable_queues() ->
                                 node(Pid) == Node]))
       end).
 
+recover_durable_queues(DurableQueues) ->
+    lists:foldl(
+      fun (RecoveredQ, Acc) ->
+              Q = start_queue_process(RecoveredQ),
+              %% We need to catch the case where a client connected to
+              %% another node has deleted the queue (and possibly
+              %% re-created it).
+              case rabbit_misc:execute_mnesia_transaction(
+                     fun () ->
+                             case mnesia:match_object(
+                                    rabbit_durable_queue, RecoveredQ, read) of
+                                 [_] -> ok = store_queue(Q),
+                                        true;
+                                 []  -> false
+                             end
+                     end) of
+                  true  -> [Q|Acc];
+                  false -> exit(Q#amqqueue.pid, shutdown),
+                           Acc
+              end
+      end, [], DurableQueues).
+
 declare(QueueName, Durable, AutoDelete, Args) ->
     Q = start_queue_process(#amqqueue{name = QueueName,
                                       durable = Durable,
-- 
cgit v1.2.1


From 524d798bc59474948a1e578014f2381e6281c6c1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Mar 2010 13:04:30 +0000
Subject: Unnecessary variable

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 27fcbbd0..86f1d9c9 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -547,7 +547,7 @@ handle_cast({write, MsgId, Msg},
                                     { sum_valid_data = SumValid + TotalSize,
                                       sum_file_size = SumFileSize + TotalSize }
                                    )));
-        #msg_location { ref_count = RefCount, file = File } ->
+        #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter. Only
             %% update field otherwise bad interaction with concurrent GC
             ok = index_update_fields(MsgId,
-- 
cgit v1.2.1


From b851a84c3ea9535ab9a317ade3deeaeeaca7608e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 9 Mar 2010 23:53:51 +0000
Subject: Missed a 16#ffffffff

---
 src/rabbit_amqqueue_sup.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl
index 122915cf..f30c9fd5 100644
--- a/src/rabbit_amqqueue_sup.erl
+++ b/src/rabbit_amqqueue_sup.erl
@@ -37,6 +37,8 @@
 
 -export([init/1]).
 
+-include("rabbit.hrl").
+
 -define(SERVER, ?MODULE).
 
 start_link() ->
@@ -45,5 +47,4 @@ start_link() ->
 init([]) ->
     {ok, {{simple_one_for_one_terminate, 10, 10},
           [{rabbit_amqqueue, {rabbit_amqqueue_process, start_link, []},
-            %% 16#ffffffff is the biggest value allowed
-            temporary, 16#ffffffff, worker, [rabbit_amqqueue_process]}]}}.
+            temporary, ?MAX_WAIT, worker, [rabbit_amqqueue_process]}]}}.
-- 
cgit v1.2.1


From 2d3682168b298eba45b8b816c9f880337880a3ae Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 1 Apr 2010 19:08:04 +0100
Subject: Split msg_store into two msg stores, one for persistent and one for
 transient. This is the first step in trying to make startup and recovery of
 data on disk much faster.

---
 include/rabbit.hrl            |   3 +
 include/rabbit_msg_store.hrl  |   5 -
 src/rabbit_amqqueue.erl       |   8 +-
 src/rabbit_msg_store.erl      | 489 ++++++++++++++++++++++++------------------
 src/rabbit_msg_store_gc.erl   |  45 ++--
 src/rabbit_queue_index.erl    |  17 +-
 src/rabbit_sup.erl            |   7 +-
 src/rabbit_tests.erl          | 131 ++++++-----
 src/rabbit_variable_queue.erl | 205 +++++++++++-------
 9 files changed, 532 insertions(+), 378 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index df282029..e9fa6e37 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -177,6 +177,9 @@
 
 -define(MAX_WAIT, 16#ffffffff).
 
+-define(PERSISTENT_MSG_STORE,     msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,      msg_store_transient).
+
 -ifdef(debug).
 -define(LOGDEBUG0(F), rabbit_log:debug(F)).
 -define(LOGDEBUG(F,A), rabbit_log:debug(F,A)).
diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 6f557c18..2c2735d4 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -49,8 +49,3 @@
 -define(FILE_SIZE_LIMIT,       (16*1024*1024)).
 
 -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
-
--define(FILE_SUMMARY_ETS_NAME,    rabbit_msg_store_file_summary).
--define(CACHE_ETS_NAME,           rabbit_msg_store_cache).
--define(FILE_HANDLES_ETS_NAME,    rabbit_msg_store_file_handles).
--define(CUR_FILE_CACHE_ETS_NAME,  rabbit_msg_store_cur_file).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 98993540..f0540c93 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -65,8 +65,7 @@
 -type(qfun(A) :: fun ((amqqueue()) -> A)).
 -type(ok_or_errors() ::
       'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
--type(seq_id() :: non_neg_integer()).
--type(acktag() :: ('ack_not_on_disk' | {'ack_index_and_store', msg_id(), seq_id()})).
+-type(acktag() :: any()).
 
 -spec(start/0 :: () -> 'ok').
 -spec(declare/4 :: (queue_name(), boolean(), boolean(), amqp_table()) ->
@@ -129,8 +128,11 @@
 %%----------------------------------------------------------------------------
 
 start() ->
+    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
+                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 fun (ok) -> finished end, ok]),
     DurableQueues = find_durable_queues(),
-    ok = rabbit_queue_index:start_msg_store(DurableQueues),
+    ok = rabbit_queue_index:start_persistent_msg_store(DurableQueues),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
                {rabbit_amqqueue_sup,
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 86f1d9c9..a33b1a34 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,16 +33,14 @@
 
 -behaviour(gen_server2).
 
--export([start_link/3, write/2, read/2, contains/1, remove/1, release/1,
-         sync/2, client_init/0, client_terminate/1]).
+-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
+         sync/3, client_init/1, client_terminate/1]).
 
--export([sync/0, gc_done/3, set_maximum_since_use/1]). %% internal
+-export([sync/1, gc_done/4, set_maximum_since_use/2]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
 
--define(SERVER, ?MODULE).
-
 -define(SYNC_INTERVAL,         5). %% milliseconds
 
 -define(GEOMETRIC_P, 0.3). %% parameter to geometric distribution rng
@@ -62,44 +60,58 @@
           sum_valid_data,         %% sum of valid data in all files
           sum_file_size,          %% sum of file sizes
           pending_gc_completion,  %% things to do once GC completes
-          gc_active               %% is the GC currently working?
+          gc_active,              %% is the GC currently working?
+          gc_pid,                 %% pid of our GC
+          file_handles_ets,       %% tid of the shared file handles table
+          file_summary_ets,       %% tid of the file summary table
+          dedup_cache_ets,        %% tid of dedup cache table
+          cur_file_cache_ets      %% tid of current file cache table
         }).
 
 -record(client_msstate,
         { file_handle_cache,
           index_state,
           index_module,
-          dir
+          dir,
+          file_handles_ets,
+          file_summary_ets,
+          dedup_cache_ets,
+          cur_file_cache_ets
         }).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
+-type(server() :: pid() | atom()).
 -type(msg_id() :: binary()).
 -type(msg() :: any()).
 -type(file_path() :: any()).
 -type(file_num() :: non_neg_integer()).
--type(client_msstate() :: #client_msstate { file_handle_cache :: dict(),
-                                            index_state       :: any(),
-                                            index_module      :: atom(),
-                                            dir               :: file_path() }).
-
--spec(start_link/3 ::
-      (file_path(),
+-type(client_msstate() :: #client_msstate { file_handle_cache  :: dict(),
+                                            index_state        :: any(),
+                                            index_module       :: atom(),
+                                            dir                :: file_path(),
+                                            file_handles_ets   :: tid(),
+                                            file_summary_ets   :: tid(),
+                                            dedup_cache_ets    :: tid(),
+                                            cur_file_cache_ets :: tid() }).
+
+-spec(start_link/4 ::
+      (atom(), file_path(),
        (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})), A) ->
              {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(write/2 :: (msg_id(), msg()) -> 'ok').
-%% -spec(read/1 :: (msg_id()) -> {'ok', msg()} | 'not_found').
--spec(read/2 :: (msg_id(), client_msstate()) ->
+-spec(write/4 :: (server(), msg_id(), msg(), client_msstate()) ->
+                      {'ok', client_msstate()}).
+-spec(read/3 :: (server(), msg_id(), client_msstate()) ->
                      {{'ok', msg()} | 'not_found', client_msstate()}).
--spec(contains/1 :: (msg_id()) -> boolean()).
--spec(remove/1 :: ([msg_id()]) -> 'ok').
--spec(release/1 :: ([msg_id()]) -> 'ok').
--spec(sync/2 :: ([msg_id()], fun (() -> any())) -> 'ok').
--spec(gc_done/3 :: (non_neg_integer(), file_num(), file_num()) -> 'ok').
--spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
--spec(client_init/0 :: () -> client_msstate()).
+-spec(contains/2 :: (server(), msg_id()) -> boolean()).
+-spec(remove/2 :: (server(), [msg_id()]) -> 'ok').
+-spec(release/2 :: (server(), [msg_id()]) -> 'ok').
+-spec(sync/3 :: (server(), [msg_id()], fun (() -> any())) -> 'ok').
+-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> 'ok').
+-spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
+-spec(client_init/1 :: (server()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
 
 -endif.
@@ -264,28 +276,32 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-start_link(Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE,
-                           [Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
+start_link(Server, Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+    gen_server2:start_link({local, Server}, ?MODULE,
+                           [Server, Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(MsgId, Msg) ->
-    ok = add_to_cache(MsgId, Msg),
-    gen_server2:cast(?SERVER, {write, MsgId, Msg}).
+write(Server, MsgId, Msg, CState =
+          #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+    ok = add_to_cache(CurFileCacheEts, MsgId, Msg),
+    {gen_server2:cast(Server, {write, MsgId, Msg}), CState}.
 
-read(MsgId, CState) ->
+read(Server, MsgId, CState =
+         #client_msstate { dedup_cache_ets = DedupCacheEts,
+                           cur_file_cache_ets = CurFileCacheEts }) ->
     %% 1. Check the dedup cache
-    case fetch_and_increment_cache(MsgId) of
+    case fetch_and_increment_cache(DedupCacheEts, MsgId) of
         not_found ->
             %% 2. Check the cur file cache
-            case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+            case ets:lookup(CurFileCacheEts, MsgId) of
                 [] ->
                     Defer = fun() -> {gen_server2:pcall(
-                                        ?SERVER, 2, {read, MsgId}, infinity),
+                                        Server, 2, {read, MsgId}, infinity),
                                       CState} end,
                     case index_lookup(MsgId, CState) of
                         not_found   -> Defer();
-                        MsgLocation -> client_read1(MsgLocation, Defer, CState)
+                        MsgLocation -> client_read1(Server, MsgLocation, Defer,
+                                                    CState)
                     end;
                 [{MsgId, Msg, _CacheRefCount}] ->
                     %% Although we've found it, we don't know the
@@ -296,25 +312,29 @@ read(MsgId, CState) ->
             {{ok, Msg}, CState}
     end.
 
-contains(MsgId)     -> gen_server2:call(?SERVER, {contains, MsgId}, infinity).
-remove(MsgIds)      -> gen_server2:cast(?SERVER, {remove, MsgIds}).
-release(MsgIds)     -> gen_server2:cast(?SERVER, {release, MsgIds}).
-sync(MsgIds, K)     -> gen_server2:cast(?SERVER, {sync, MsgIds, K}).
-sync()              -> gen_server2:pcast(?SERVER, 8, sync). %% internal
-
-gc_done(Reclaimed, Source, Destination) ->
-    gen_server2:pcast(?SERVER, 8, {gc_done, Reclaimed, Source, Destination}).
-
-set_maximum_since_use(Age) ->
-    gen_server2:pcast(?SERVER, 8, {set_maximum_since_use, Age}).
-
-client_init() ->
-    {IState, IModule, Dir} =
-        gen_server2:call(?SERVER, new_client_state, infinity),
-    #client_msstate { file_handle_cache = dict:new(),
-                      index_state       = IState,
-                      index_module      = IModule,
-                      dir               = Dir }.
+contains(Server, MsgId) -> gen_server2:call(Server, {contains, MsgId}, infinity).
+remove(Server, MsgIds)  -> gen_server2:cast(Server, {remove, MsgIds}).
+release(Server, MsgIds) -> gen_server2:cast(Server, {release, MsgIds}).
+sync(Server, MsgIds, K) -> gen_server2:cast(Server, {sync, MsgIds, K}).
+sync(Server)            -> gen_server2:pcast(Server, 8, sync). %% internal
+
+gc_done(Server, Reclaimed, Source, Destination) ->
+    gen_server2:pcast(Server, 8, {gc_done, Reclaimed, Source, Destination}).
+
+set_maximum_since_use(Server, Age) ->
+    gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}).
+
+client_init(Server) ->
+    {IState, IModule, Dir, FileHandlesEts, FileSummaryEts, DedupCacheEts,
+     CurFileCacheEts} = gen_server2:call(Server, new_client_state, infinity),
+    #client_msstate { file_handle_cache  = dict:new(),
+                      index_state        = IState,
+                      index_module       = IModule,
+                      dir                = Dir,
+                      file_handles_ets   = FileHandlesEts,
+                      file_summary_ets   = FileSummaryEts,
+                      dedup_cache_ets    = DedupCacheEts,
+                      cur_file_cache_ets = CurFileCacheEts }.
 
 client_terminate(CState) ->
     close_all_handles(CState),
@@ -324,53 +344,58 @@ client_terminate(CState) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
-add_to_cache(MsgId, Msg) ->
-    case ets:insert_new(?CUR_FILE_CACHE_ETS_NAME, {MsgId, Msg, 1}) of
+add_to_cache(CurFileCacheEts, MsgId, Msg) ->
+    case ets:insert_new(CurFileCacheEts, {MsgId, Msg, 1}) of
         true ->
             ok;
         false ->
             try
-                ets:update_counter(?CUR_FILE_CACHE_ETS_NAME, MsgId, {3, +1}),
+                ets:update_counter(CurFileCacheEts, MsgId, {3, +1}),
                 ok
-            catch error:badarg -> add_to_cache(MsgId, Msg)
+            catch error:badarg -> add_to_cache(CurFileCacheEts, MsgId, Msg)
             end
     end.
 
-client_read1(MsgLocation = #msg_location { msg_id = MsgId, file = File }, Defer,
-             CState) ->
-    case ets:lookup(?FILE_SUMMARY_ETS_NAME, File) of
+client_read1(Server, #msg_location { msg_id = MsgId, file = File } =
+                 MsgLocation, Defer, CState =
+                 #client_msstate { file_summary_ets = FileSummaryEts }) ->
+    case ets:lookup(FileSummaryEts, File) of
         [] -> %% File has been GC'd and no longer exists. Go around again.
-            read(MsgId, CState);
+            read(Server, MsgId, CState);
         [#file_summary { locked = Locked, right = Right }] ->
-            client_read2(Locked, Right, MsgLocation, Defer, CState)
+            client_read2(Server, Locked, Right, MsgLocation, Defer, CState)
     end.
 
-client_read2(false, undefined, #msg_location {
-                      msg_id = MsgId, ref_count = RefCount }, Defer, CState) ->
-    case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+client_read2(_Server, false, undefined,
+             #msg_location { msg_id = MsgId, ref_count = RefCount }, Defer,
+             CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
+                                        dedup_cache_ets = DedupCacheEts }) ->
+    case ets:lookup(CurFileCacheEts, MsgId) of
         [] ->
             Defer(); %% may have rolled over
         [{MsgId, Msg, _CacheRefCount}] ->
-            ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg),
             {{ok, Msg}, CState}
     end;
-client_read2(true, _Right, _MsgLocation, Defer, _CState) ->
+client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
     %% Of course, in the mean time, the GC could have run and our msg
     %% is actually in a different file, unlocked. However, defering is
     %% the safest and simplest thing to do.
     Defer();
-client_read2(false, _Right, #msg_location {
-                      msg_id = MsgId, ref_count = RefCount, file = File },
-             Defer, CState) ->
+client_read2(Server, false, _Right,
+             #msg_location { msg_id = MsgId, ref_count = RefCount, file = File },
+             Defer, CState =
+                 #client_msstate { file_handles_ets = FileHandlesEts,
+                                   file_summary_ets = FileSummaryEts,
+                                   dedup_cache_ets  = DedupCacheEts }) ->
     %% It's entirely possible that everything we're doing from here on
     %% is for the wrong file, or a non-existent file, as a GC may have
     %% finished.
-    try ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
-                           {#file_summary.readers, +1})
+    try ets:update_counter(FileSummaryEts, File, {#file_summary.readers, +1})
     catch error:badarg -> %% the File has been GC'd and deleted. Go around.
-            read(MsgId, CState)
+            read(Server, MsgId, CState)
     end,
-    Release = fun() -> ets:update_counter(?FILE_SUMMARY_ETS_NAME, File,
+    Release = fun() -> ets:update_counter(FileSummaryEts, File,
                                           {#file_summary.readers, -1})
               end,
     %% If a GC hasn't already started, it won't start now. Need to
@@ -378,7 +403,7 @@ client_read2(false, _Right, #msg_location {
     %% between lookup and update_counter (thus GC started before our
     %% +1).
     [#file_summary { locked = Locked }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+        ets:lookup(FileSummaryEts, File),
     case Locked of
         true ->
             %% If we get a badarg here, then the GC has finished and
@@ -390,7 +415,7 @@ client_read2(false, _Right, #msg_location {
             %% readers, msg_store ets:deletes (and unlocks the dest)
             try Release(),
                 Defer()
-            catch error:badarg -> read(MsgId, CState)
+            catch error:badarg -> read(Server, MsgId, CState)
             end;
         false ->
             %% Ok, we're definitely safe to continue - a GC can't
@@ -410,23 +435,25 @@ client_read2(false, _Right, #msg_location {
                 MsgLocation = #msg_location { file = File } ->
                     %% Still the same file.
                     %% This is fine to fail (already exists)
-                    ets:insert_new(
-                      ?FILE_HANDLES_ETS_NAME, {{self(), File}, open}),
+                    ets:insert_new(FileHandlesEts, {{self(), File}, open}),
                     CState1 = close_all_indicated(CState),
-                    {Msg, CState2} = read_from_disk(MsgLocation, CState1),
-                    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+                    {Msg, CState2} =
+                        read_from_disk(MsgLocation, CState1, DedupCacheEts),
+                    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId,
+                                                 Msg),
                     Release(), %% this MUST NOT fail with badarg
                     {{ok, Msg}, CState2};
                 MsgLocation -> %% different file!
                     Release(), %% this MUST NOT fail with badarg
-                    client_read1(MsgLocation, Defer, CState)
+                    client_read1(Server, MsgLocation, Defer, CState)
             end
     end.
 
-close_all_indicated(CState) ->
-    Objs = ets:match_object(?FILE_HANDLES_ETS_NAME, {{self(), '_'}, close}),
+close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
+                        CState) ->
+    Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}),
     lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
-                        true = ets:delete(?FILE_HANDLES_ETS_NAME, Key),
+                        true = ets:delete(FileHandlesEts, Key),
                         close_handle(File, CStateM)
                 end, CState, Objs).
 
@@ -434,12 +461,13 @@ close_all_indicated(CState) ->
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
 
-init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
+init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     process_flag(trap_exit, true),
 
-    ok =
-        file_handle_cache:register_callback(?MODULE, set_maximum_since_use, []),
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
 
+    Dir = filename:join(BaseDir, atom_to_list(Server)),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     {ok, IndexModule} = application:get_env(msg_store_index_module),
@@ -448,28 +476,32 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     IndexState = IndexModule:init(Dir),
 
     InitFile = 0,
-    ?FILE_SUMMARY_ETS_NAME = ets:new(?FILE_SUMMARY_ETS_NAME,
-                                     [ordered_set, public, named_table,
-                                      {keypos, #file_summary.file}]),
-    ?CACHE_ETS_NAME = ets:new(?CACHE_ETS_NAME, [set, public, named_table]),
-    ?FILE_HANDLES_ETS_NAME = ets:new(?FILE_HANDLES_ETS_NAME,
-                                     [ordered_set, public, named_table]),
-    ?CUR_FILE_CACHE_ETS_NAME = ets:new(?CUR_FILE_CACHE_ETS_NAME,
-                                       [set, public, named_table]),
-    State =
-        #msstate { dir                    = Dir,
-                   index_module           = IndexModule,
-                   index_state            = IndexState,
-                   current_file           = InitFile,
-                   current_file_handle    = undefined,
-                   file_handle_cache      = dict:new(),
-                   on_sync                = [],
-                   sync_timer_ref         = undefined,
-                   sum_valid_data         = 0,
-                   sum_file_size          = 0,
-                   pending_gc_completion  = [],
-                   gc_active              = false
-                  },
+    FileSummaryEts = ets:new(rabbit_msg_store_file_summary,
+                             [ordered_set, public,
+                              {keypos, #file_summary.file}]),
+    DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
+    FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
+                             [ordered_set, public]),
+    CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
+
+    State = #msstate { dir                    = Dir,
+                       index_module           = IndexModule,
+                       index_state            = IndexState,
+                       current_file           = InitFile,
+                       current_file_handle    = undefined,
+                       file_handle_cache      = dict:new(),
+                       on_sync                = [],
+                       sync_timer_ref         = undefined,
+                       sum_valid_data         = 0,
+                       sum_file_size          = 0,
+                       pending_gc_completion  = [],
+                       gc_active              = false,
+                       gc_pid                 = undefined,
+                       file_handles_ets       = FileHandlesEts,
+                       file_summary_ets       = FileSummaryEts,
+                       dedup_cache_ets        = DedupCacheEts,
+                       cur_file_cache_ets     = CurFileCacheEts
+                     },
 
     ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
     FileNames =
@@ -490,9 +522,11 @@ init([Dir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
 
-    {ok, _Pid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule),
+    {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
+                                                 FileSummaryEts),
 
-    {ok, State1 #msstate { current_file_handle = FileHdl }, hibernate,
+    {ok, State1 #msstate { current_file_handle = FileHdl,
+                           gc_pid = GCPid }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({read, MsgId}, From, State) ->
@@ -504,16 +538,23 @@ handle_call({contains, MsgId}, From, State) ->
     noreply(State1);
 
 handle_call(new_client_state, _From,
-            State = #msstate { index_state = IndexState, dir = Dir,
-                               index_module = IndexModule }) ->
-    reply({IndexState, IndexModule, Dir}, State).
+            State = #msstate { index_state        = IndexState, dir = Dir,
+                               index_module       = IndexModule,
+                               file_handles_ets   = FileHandlesEts,
+                               file_summary_ets   = FileSummaryEts,
+                               dedup_cache_ets    = DedupCacheEts,
+                               cur_file_cache_ets = CurFileCacheEts }) ->
+    reply({IndexState, IndexModule, Dir, FileHandlesEts, FileSummaryEts,
+           DedupCacheEts, CurFileCacheEts}, State).
 
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
                                current_file        = CurFile,
                                sum_valid_data      = SumValid,
-                               sum_file_size       = SumFileSize }) ->
-    true = 0 =< ets:update_counter(?CUR_FILE_CACHE_ETS_NAME, MsgId, {3, -1}),
+                               sum_file_size       = SumFileSize,
+                               file_summary_ets    = FileSummaryEts,
+                               cur_file_cache_ets  = CurFileCacheEts }) ->
+    true = 0 =< ets:update_counter(CurFileCacheEts, MsgId, {3, -1}),
     case index_lookup(MsgId, State) of
         not_found ->
             %% New message, lots to do
@@ -528,7 +569,7 @@ handle_cast({write, MsgId, Msg},
                              right = undefined,
                              locked = false,
                              file_size = FileSize }] =
-                ets:lookup(?FILE_SUMMARY_ETS_NAME, CurFile),
+                ets:lookup(FileSummaryEts, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
                                      %% can't be any holes in this file
@@ -536,7 +577,7 @@ handle_cast({write, MsgId, Msg},
                                 true -> ContiguousTop
                              end,
             true = ets:update_element(
-                     ?FILE_SUMMARY_ETS_NAME,
+                     FileSummaryEts,
                      CurFile,
                      [{#file_summary.valid_total_size, ValidTotalSize1},
                       {#file_summary.contiguous_top, ContiguousTop1},
@@ -562,8 +603,10 @@ handle_cast({remove, MsgIds}, State) ->
                State, MsgIds),
     noreply(maybe_compact(State1));
 
-handle_cast({release, MsgIds}, State) ->
-    lists:foreach(fun (MsgId) -> decrement_cache(MsgId) end, MsgIds),
+handle_cast({release, MsgIds}, State =
+                #msstate { dedup_cache_ets = DedupCacheEts }) ->
+    lists:foreach(
+      fun (MsgId) -> decrement_cache(DedupCacheEts, MsgId) end, MsgIds),
     noreply(State);
 
 handle_cast({sync, MsgIds, K},
@@ -582,32 +625,34 @@ handle_cast({sync, MsgIds, K},
     end;
 
 handle_cast(sync, State) ->
-    noreply(sync(State));
+    noreply(internal_sync(State));
 
 handle_cast({gc_done, Reclaimed, Source, Dest},
             State = #msstate { sum_file_size = SumFileSize,
-                               gc_active = {Source, Dest} }) ->
+                               gc_active = {Source, Dest},
+                               file_handles_ets = FileHandlesEts,
+                               file_summary_ets = FileSummaryEts }) ->
     %% GC done, so now ensure that any clients that have open fhs to
     %% those files close them before using them again. This has to be
     %% done here, and not when starting up the GC, because if done
     %% when starting up the GC, the client could find the close, and
     %% close and reopen the fh, whilst the GC is waiting for readers
     %% to disappear, before it's actually done the GC.
-    true = mark_handle_to_close(Source),
-    true = mark_handle_to_close(Dest),
+    true = mark_handle_to_close(FileHandlesEts, Source),
+    true = mark_handle_to_close(FileHandlesEts, Dest),
     %% we always move data left, so Source has gone and was on the
     %% right, so need to make dest = source.right.left, and also
     %% dest.right = source.right
     [#file_summary { left = Dest, right = SourceRight, locked = true,
                      readers = 0 }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, Source),
+        ets:lookup(FileSummaryEts, Source),
     %% this could fail if SourceRight == undefined
-    ets:update_element(?FILE_SUMMARY_ETS_NAME, SourceRight,
+    ets:update_element(FileSummaryEts, SourceRight,
                        {#file_summary.left, Dest}),
-    true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Dest,
+    true = ets:update_element(FileSummaryEts, Dest,
                               [{#file_summary.locked, false},
                                {#file_summary.right, SourceRight}]),
-    true = ets:delete(?FILE_SUMMARY_ETS_NAME, Source),
+    true = ets:delete(FileSummaryEts, Source),
     noreply(run_pending(
               State #msstate { sum_file_size = SumFileSize - Reclaimed,
                                gc_active = false }));
@@ -617,28 +662,33 @@ handle_cast({set_maximum_since_use, Age}, State) ->
     noreply(State).
 
 handle_info(timeout, State) ->
-    noreply(sync(State));
+    noreply(internal_sync(State));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State}.
 
-terminate(_Reason, State = #msstate { index_state            = IndexState,
-                                      index_module           = IndexModule,
-                                      current_file_handle    = FileHdl }) ->
+terminate(_Reason, State = #msstate { index_state         = IndexState,
+                                      index_module        = IndexModule,
+                                      current_file_handle = FileHdl,
+                                      gc_pid              = GCPid,
+                                      file_handles_ets    = FileHandlesEts,
+                                      file_summary_ets    = FileSummaryEts,
+                                      dedup_cache_ets     = DedupCacheEts,
+                                      cur_file_cache_ets  = CurFileCacheEts }) ->
     %% stop the gc first, otherwise it could be working and we pull
     %% out the ets tables from under it.
-    ok = rabbit_msg_store_gc:stop(),
+    ok = rabbit_msg_store_gc:stop(GCPid),
     State1 = case FileHdl of
                  undefined -> State;
-                 _ -> State2 = sync(State),
+                 _ -> State2 = internal_sync(State),
                       file_handle_cache:close(FileHdl),
                       State2
              end,
     State3 = close_all_handles(State1),
-    ets:delete(?FILE_SUMMARY_ETS_NAME),
-    ets:delete(?CACHE_ETS_NAME),
-    ets:delete(?FILE_HANDLES_ETS_NAME),
-    ets:delete(?CUR_FILE_CACHE_ETS_NAME),
+    ets:delete(FileSummaryEts),
+    ets:delete(DedupCacheEts),
+    ets:delete(FileHandlesEts),
+    ets:delete(CurFileCacheEts),
     IndexModule:terminate(IndexState),
     State3 #msstate { index_state         = undefined,
                       current_file_handle = undefined }.
@@ -686,7 +736,7 @@ sort_file_names(FileNames) ->
     lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
                FileNames).
 
-sync(State = #msstate { current_file_handle = CurHdl,
+internal_sync(State = #msstate { current_file_handle = CurHdl,
                         on_sync = Syncs }) ->
     State1 = stop_sync_timer(State),
     case Syncs of
@@ -697,12 +747,13 @@ sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
-read_message(MsgId, From, State) ->
+read_message(MsgId, From, State =
+                 #msstate { dedup_cache_ets = DedupCacheEts }) ->
     case index_lookup(MsgId, State) of
         not_found -> gen_server2:reply(From, not_found),
                      State;
         MsgLocation ->
-            case fetch_and_increment_cache(MsgId) of
+            case fetch_and_increment_cache(DedupCacheEts, MsgId) of
                 not_found ->
                     read_message1(From, MsgLocation, State);
                 Msg ->
@@ -714,33 +765,36 @@ read_message(MsgId, From, State) ->
 read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
                                     file = File, offset = Offset } = MsgLoc,
               State = #msstate { current_file = CurFile,
-                                 current_file_handle = CurHdl }) ->
+                                 current_file_handle = CurHdl,
+                                 file_summary_ets = FileSummaryEts,
+                                 dedup_cache_ets = DedupCacheEts,
+                                 cur_file_cache_ets = CurFileCacheEts }) ->
     case File =:= CurFile of
         true ->
             {Msg, State1} =
                 %% can return [] if msg in file existed on startup
-                case ets:lookup(?CUR_FILE_CACHE_ETS_NAME, MsgId) of
+                case ets:lookup(CurFileCacheEts, MsgId) of
                     [] ->
                         ok = case {ok, Offset} >=
                                  file_handle_cache:current_raw_offset(CurHdl) of
                                  true  -> file_handle_cache:flush(CurHdl);
                                  false -> ok
                              end,
-                        read_from_disk(MsgLoc, State);
+                        read_from_disk(MsgLoc, State, DedupCacheEts);
                     [{MsgId, Msg1, _CacheRefCount}] ->
                         {Msg1, State}
                 end,
-            ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg),
             gen_server2:reply(From, {ok, Msg}),
             State1;
         false ->
             [#file_summary { locked = Locked }] =
-                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+                ets:lookup(FileSummaryEts, File),
             case Locked of
                 true ->
                     add_to_pending_gc_completion({read, MsgId, From}, State);
                 false ->
-                    {Msg, State1} = read_from_disk(MsgLoc, State),
+                    {Msg, State1} = read_from_disk(MsgLoc, State, DedupCacheEts),
                     gen_server2:reply(From, {ok, Msg}),
                     State1
             end
@@ -748,7 +802,8 @@ read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
 
 read_from_disk(#msg_location { msg_id = MsgId, ref_count = RefCount,
                                file = File, offset = Offset,
-                               total_size = TotalSize }, State) ->
+                               total_size = TotalSize }, State,
+               DedupCacheEts) ->
     {Hdl, State1} = get_read_handle(File, State),
     {ok, Offset} = file_handle_cache:position(Hdl, Offset),
     {ok, {MsgId, Msg}} =
@@ -764,12 +819,13 @@ read_from_disk(#msg_location { msg_id = MsgId, ref_count = RefCount,
                                          {proc_dict, get()}
                                         ]}})
         end,
-    ok = maybe_insert_into_cache(RefCount, MsgId, Msg),
+    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg),
     {Msg, State1}.
 
-maybe_insert_into_cache(RefCount, MsgId, Msg) when RefCount > 1 ->
-    insert_into_cache(MsgId, Msg);
-maybe_insert_into_cache(_RefCount, _MsgId, _Msg) ->
+maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg)
+  when RefCount > 1 ->
+    insert_into_cache(DedupCacheEts, MsgId, Msg);
+maybe_insert_into_cache(_DedupCacheEts, _RefCount, _MsgId, _Msg) ->
     ok.
 
 contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
@@ -788,7 +844,9 @@ contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
             end
     end.
 
-remove_message(MsgId, State = #msstate { sum_valid_data = SumValid }) ->
+remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
+                                         file_summary_ets = FileSummaryEts,
+                                         dedup_cache_ets = DedupCacheEts }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(MsgId, State),
@@ -797,11 +855,11 @@ remove_message(MsgId, State = #msstate { sum_valid_data = SumValid }) ->
             %% don't remove from CUR_FILE_CACHE_ETS_NAME here because
             %% there may be further writes in the mailbox for the same
             %% msg.
-            ok = remove_cache_entry(MsgId),
+            ok = remove_cache_entry(DedupCacheEts, MsgId),
             [#file_summary { valid_total_size = ValidTotalSize,
                              contiguous_top = ContiguousTop,
                              locked = Locked }] =
-                ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+                ets:lookup(FileSummaryEts, File),
             case Locked of
                 true ->
                     add_to_pending_gc_completion({remove, MsgId}, State);
@@ -810,15 +868,14 @@ remove_message(MsgId, State = #msstate { sum_valid_data = SumValid }) ->
                     ContiguousTop1 = lists:min([ContiguousTop, Offset]),
                     ValidTotalSize1 = ValidTotalSize - TotalSize,
                     true = ets:update_element(
-                             ?FILE_SUMMARY_ETS_NAME,
-                             File,
+                             FileSummaryEts, File,
                              [{#file_summary.valid_total_size, ValidTotalSize1},
                               {#file_summary.contiguous_top, ContiguousTop1}]),
                     State1 = delete_file_if_empty(File, State),
                     State1 #msstate { sum_valid_data = SumValid - TotalSize }
             end;
         _ when 1 < RefCount ->
-            ok = decrement_cache(MsgId),
+            ok = decrement_cache(DedupCacheEts, MsgId),
             %% only update field, otherwise bad interaction with concurrent GC
             ok = index_update_fields(MsgId,
                                      {#msg_location.ref_count, RefCount - 1},
@@ -857,11 +914,12 @@ close_handle(Key, FHC) ->
         error -> FHC
     end.
 
-close_all_handles(CState = #client_msstate { file_handle_cache = FHC }) ->
+close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                             file_handle_cache = FHC }) ->
     Self = self(),
     ok = dict:fold(fun (File, Hdl, ok) ->
                            true =
-                               ets:delete(?FILE_HANDLES_ETS_NAME, {Self, File}),
+                               ets:delete(FileHandlesEts, {Self, File}),
                            file_handle_cache:close(Hdl)
                    end, ok, FHC),
     CState #client_msstate { file_handle_cache = dict:new() };
@@ -897,27 +955,27 @@ get_read_handle(FileNum, FHC, Dir) ->
 %% message cache helper functions
 %%----------------------------------------------------------------------------
 
-remove_cache_entry(MsgId) ->
-    true = ets:delete(?CACHE_ETS_NAME, MsgId),
+remove_cache_entry(DedupCacheEts, MsgId) ->
+    true = ets:delete(DedupCacheEts, MsgId),
     ok.
 
-fetch_and_increment_cache(MsgId) ->
-    case ets:lookup(?CACHE_ETS_NAME, MsgId) of
+fetch_and_increment_cache(DedupCacheEts, MsgId) ->
+    case ets:lookup(DedupCacheEts, MsgId) of
         [] ->
             not_found;
         [{_MsgId, Msg, _RefCount}] ->
             try
-                ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, 1})
+                ets:update_counter(DedupCacheEts, MsgId, {3, 1})
             catch error:badarg ->
                     %% someone has deleted us in the meantime, insert us
-                    ok = insert_into_cache(MsgId, Msg)
+                    ok = insert_into_cache(DedupCacheEts, MsgId, Msg)
             end,
             Msg
     end.
 
-decrement_cache(MsgId) ->
-    true = try case ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, -1}) of
-                   N when N =< 0 -> true = ets:delete(?CACHE_ETS_NAME, MsgId);
+decrement_cache(DedupCacheEts, MsgId) ->
+    true = try case ets:update_counter(DedupCacheEts, MsgId, {3, -1}) of
+                   N when N =< 0 -> true = ets:delete(DedupCacheEts, MsgId);
                    _N            -> true
                end
            catch error:badarg ->
@@ -928,14 +986,14 @@ decrement_cache(MsgId) ->
            end,
     ok.
 
-insert_into_cache(MsgId, Msg) ->
-    case ets:insert_new(?CACHE_ETS_NAME, {MsgId, Msg, 1}) of
+insert_into_cache(DedupCacheEts, MsgId, Msg) ->
+    case ets:insert_new(DedupCacheEts, {MsgId, Msg, 1}) of
         true  -> ok;
         false -> try
-                     ets:update_counter(?CACHE_ETS_NAME, MsgId, {3, 1}),
+                     ets:update_counter(DedupCacheEts, MsgId, {3, 1}),
                      ok
                  catch error:badarg ->
-                         insert_into_cache(MsgId, Msg)
+                         insert_into_cache(DedupCacheEts, MsgId, Msg)
                  end
     end.
 
@@ -1125,16 +1183,17 @@ build_index(Files, State) ->
     {Offset, State1} = build_index(undefined, Files, State),
     {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)}.
 
-build_index(Left, [], State) ->
+build_index(Left, [], State = #msstate { file_summary_ets = FileSummaryEts }) ->
     ok = index_delete_by_file(undefined, State),
-    Offset = case ets:lookup(?FILE_SUMMARY_ETS_NAME, Left) of
+    Offset = case ets:lookup(FileSummaryEts, Left) of
                  []                                       -> 0;
                  [#file_summary { file_size = FileSize }] -> FileSize
              end,
     {Offset, State #msstate { current_file = Left }};
 build_index(Left, [File|Files],
             State = #msstate { dir = Dir, sum_valid_data = SumValid,
-                               sum_file_size = SumFileSize }) ->
+                               sum_file_size = SumFileSize,
+                               file_summary_ets = FileSummaryEts }) ->
     {ok, Messages, FileSize} =
         rabbit_msg_store_misc:scan_file_for_valid_messages(
           Dir, rabbit_msg_store_misc:filenum_to_name(File)),
@@ -1168,7 +1227,7 @@ build_index(Left, [File|Files],
             [F|_] -> {F, FileSize}
         end,
     true =
-        ets:insert_new(?FILE_SUMMARY_ETS_NAME, #file_summary {
+        ets:insert_new(FileSummaryEts, #file_summary {
                           file = File, valid_total_size = ValidTotalSize,
                           contiguous_top = ContiguousTop, locked = false,
                           left = Left, right = Right, file_size = FileSize1,
@@ -1184,61 +1243,65 @@ build_index(Left, [File|Files],
 maybe_roll_to_new_file(Offset,
                        State = #msstate { dir                 = Dir,
                                           current_file_handle = CurHdl,
-                                          current_file        = CurFile })
+                                          current_file        = CurFile,
+                                          file_summary_ets    = FileSummaryEts,
+                                          cur_file_cache_ets  = CurFileCacheEts })
   when Offset >= ?FILE_SIZE_LIMIT ->
-    State1 = sync(State),
+    State1 = internal_sync(State),
     ok = file_handle_cache:close(CurHdl),
     NextFile = CurFile + 1,
     {ok, NextHdl} = rabbit_msg_store_misc:open_file(
                       Dir, rabbit_msg_store_misc:filenum_to_name(NextFile),
                       ?WRITE_MODE),
     true = ets:insert_new(
-             ?FILE_SUMMARY_ETS_NAME, #file_summary {
+             FileSummaryEts, #file_summary {
                 file = NextFile, valid_total_size = 0, contiguous_top = 0,
                 left = CurFile, right = undefined, file_size = 0,
                 locked = false, readers = 0 }),
-    true = ets:update_element(?FILE_SUMMARY_ETS_NAME, CurFile,
+    true = ets:update_element(FileSummaryEts, CurFile,
                               {#file_summary.right, NextFile}),
-    true = ets:match_delete(?CUR_FILE_CACHE_ETS_NAME, {'_', '_', 0}),
+    true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}),
     State1 #msstate { current_file_handle = NextHdl,
                       current_file        = NextFile };
 maybe_roll_to_new_file(_, State) ->
     State.
 
-maybe_compact(State = #msstate { sum_valid_data = SumValid,
-                                 sum_file_size = SumFileSize,
-                                 gc_active = false })
+maybe_compact(State = #msstate { sum_valid_data   = SumValid,
+                                 sum_file_size    = SumFileSize,
+                                 gc_active        = false,
+                                 gc_pid           = GCPid,
+                                 file_summary_ets = FileSummaryEts })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
-    First = ets:first(?FILE_SUMMARY_ETS_NAME),
+    First = ets:first(FileSummaryEts),
     N = random_distributions:geometric(?GEOMETRIC_P),
-    case find_files_to_gc(N, First) of
+    case find_files_to_gc(FileSummaryEts, N, First) of
         undefined ->
             State;
         {Source, Dest} ->
             State1 = close_handle(Source, close_handle(Dest, State)),
-            true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Source,
+            true = ets:update_element(FileSummaryEts, Source,
                                       {#file_summary.locked, true}),
-            true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Dest,
+            true = ets:update_element(FileSummaryEts, Dest,
                                       {#file_summary.locked, true}),
-            ok = rabbit_msg_store_gc:gc(Source, Dest),
+            ok = rabbit_msg_store_gc:gc(GCPid, Source, Dest),
             State1 #msstate { gc_active = {Source, Dest} }
     end;
 maybe_compact(State) ->
     State.
 
-mark_handle_to_close(File) ->
-    [ ets:update_element(?FILE_HANDLES_ETS_NAME, Key, {2, close})
-      || {Key, open} <- ets:match_object(?FILE_HANDLES_ETS_NAME,
+mark_handle_to_close(FileHandlesEts, File) ->
+    [ ets:update_element(FileHandlesEts, Key, {2, close})
+      || {Key, open} <- ets:match_object(FileHandlesEts,
                                          {{'_', File}, open}) ],
     true.
 
-find_files_to_gc(_N, '$end_of_table') ->
+find_files_to_gc(_FileSummaryEts, _N, '$end_of_table') ->
     undefined;
-find_files_to_gc(N, First) ->
+find_files_to_gc(FileSummaryEts, N, First) ->
     [FirstObj = #file_summary { right = Right }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, First),
-    Pairs = find_files_to_gc(N, FirstObj,
-                             ets:lookup(?FILE_SUMMARY_ETS_NAME, Right), []),
+        ets:lookup(FileSummaryEts, First),
+    Pairs = find_files_to_gc(FileSummaryEts, N, FirstObj,
+                             ets:lookup(FileSummaryEts, Right), []),
     case Pairs of
         []     -> undefined;
         [Pair] -> Pair;
@@ -1246,9 +1309,9 @@ find_files_to_gc(N, First) ->
                   lists:nth(M, Pairs)
     end.
 
-find_files_to_gc(_N, #file_summary {}, [], Pairs) ->
+find_files_to_gc(_FileSummaryEts, _N, #file_summary {}, [], Pairs) ->
     lists:reverse(Pairs);
-find_files_to_gc(N,
+find_files_to_gc(FileSummaryEts, N,
                  #file_summary { right = Source, file = Dest,
                                  valid_total_size = DestValid },
                  [SourceObj = #file_summary { left = Dest, right = SourceRight,
@@ -1259,22 +1322,24 @@ find_files_to_gc(N,
     Pair = {Source, Dest},
     case N == 1 of
         true  -> [Pair];
-        false -> find_files_to_gc((N - 1), SourceObj,
-                                  ets:lookup(?FILE_SUMMARY_ETS_NAME, SourceRight),
+        false -> find_files_to_gc(FileSummaryEts, (N - 1), SourceObj,
+                                  ets:lookup(FileSummaryEts, SourceRight),
                                   [Pair | Pairs])
     end;
-find_files_to_gc(N, _Left,
+find_files_to_gc(FileSummaryEts, N, _Left,
                  [Right = #file_summary { right = RightRight }], Pairs) ->
     find_files_to_gc(
-      N, Right, ets:lookup(?FILE_SUMMARY_ETS_NAME, RightRight), Pairs).
+      FileSummaryEts, N, Right, ets:lookup(FileSummaryEts, RightRight), Pairs).
 
 delete_file_if_empty(File, State = #msstate { current_file = File }) ->
     State;
 delete_file_if_empty(File, State =
-                     #msstate { dir = Dir, sum_file_size = SumFileSize }) ->
+                         #msstate { dir = Dir, sum_file_size = SumFileSize,
+                                    file_handles_ets = FileHandlesEts,
+                                    file_summary_ets = FileSummaryEts }) ->
     [#file_summary { valid_total_size = ValidData, file_size = FileSize,
                      left = Left, right = Right, locked = false }]
-        = ets:lookup(?FILE_SUMMARY_ETS_NAME, File),
+        = ets:lookup(FileSummaryEts, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
@@ -1282,16 +1347,16 @@ delete_file_if_empty(File, State =
                  {undefined, _} when not is_atom(Right) ->
                      %% the eldest file is empty.
                      true = ets:update_element(
-                              ?FILE_SUMMARY_ETS_NAME, Right,
+                              FileSummaryEts, Right,
                               {#file_summary.left, undefined});
                  {_, _} when not is_atom(Right) ->
-                     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Right,
+                     true = ets:update_element(FileSummaryEts, Right,
                                                {#file_summary.left, Left}),
-                     true = ets:update_element(?FILE_SUMMARY_ETS_NAME, Left,
+                     true = ets:update_element(FileSummaryEts, Left,
                                                {#file_summary.right, Right})
              end,
-             true = mark_handle_to_close(File),
-             true = ets:delete(?FILE_SUMMARY_ETS_NAME, File),
+             true = mark_handle_to_close(FileHandlesEts, File),
+             true = ets:delete(FileSummaryEts, File),
              State1 = close_handle(File, State),
              ok = file:delete(rabbit_msg_store_misc:form_filename(
                                 Dir,
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index a64733df..9cf11af2 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/3, gc/2, stop/0]).
+-export([start_link/4, gc/3, stop/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -41,40 +41,42 @@
 -record(gcstate,
         {dir,
          index_state,
-         index_module
+         index_module,
+         parent,
+         file_summary_ets
         }).
 
 -include("rabbit_msg_store.hrl").
 
--define(SERVER, ?MODULE).
-
 %%----------------------------------------------------------------------------
 
-start_link(Dir, IndexState, IndexModule) ->
-    gen_server2:start_link({local, ?SERVER}, ?MODULE,
-                           [Dir, IndexState, IndexModule],
-                           [{timeout, infinity}]).
+start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
+    gen_server2:start_link(
+      ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts],
+      [{timeout, infinity}]).
 
-gc(Source, Destination) ->
-    gen_server2:cast(?SERVER, {gc, Source, Destination}).
+gc(Server, Source, Destination) ->
+    gen_server2:cast(Server, {gc, Source, Destination}).
 
-stop() ->
-    gen_server2:call(?SERVER, stop).
+stop(Server) ->
+    gen_server2:call(Server, stop).
 
 %%----------------------------------------------------------------------------
 
-init([Dir, IndexState, IndexModule]) ->
+init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
     {ok, #gcstate { dir = Dir, index_state = IndexState,
-                    index_module = IndexModule },
+                    index_module = IndexModule, parent = Parent,
+                    file_summary_ets = FileSummaryEts},
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}.
 
-handle_cast({gc, Source, Destination}, State) ->
-    Reclaimed = adjust_meta_and_combine(Source, Destination, State),
-    ok = rabbit_msg_store:gc_done(Reclaimed, Source, Destination),
+handle_cast({gc, Source, Destination}, State = #gcstate { parent = Parent }) ->
+    Reclaimed = adjust_meta_and_combine(Source, Destination,
+                                        State),
+    ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source, Destination),
     {noreply, State, hibernate}.
 
 handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
@@ -92,18 +94,19 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-adjust_meta_and_combine(SourceFile, DestFile, State) ->
+adjust_meta_and_combine(SourceFile, DestFile, State =
+                            #gcstate { file_summary_ets = FileSummaryEts }) ->
 
     [SourceObj = #file_summary {
        readers = SourceReaders,
        valid_total_size = SourceValidData, left = DestFile,
        file_size = SourceFileSize, locked = true }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, SourceFile),
+        ets:lookup(FileSummaryEts, SourceFile),
     [DestObj = #file_summary {
        readers = DestReaders,
        valid_total_size = DestValidData, right = SourceFile,
        file_size = DestFileSize, locked = true }] =
-        ets:lookup(?FILE_SUMMARY_ETS_NAME, DestFile),
+        ets:lookup(FileSummaryEts, DestFile),
 
     case SourceReaders =:= 0 andalso DestReaders =:= 0 of
         true ->
@@ -112,7 +115,7 @@ adjust_meta_and_combine(SourceFile, DestFile, State) ->
             %% don't update dest.right, because it could be changing
             %% at the same time
             true = ets:update_element(
-                     ?FILE_SUMMARY_ETS_NAME, DestFile,
+                     FileSummaryEts, DestFile,
                      [{#file_summary.valid_total_size, TotalValidData},
                       {#file_summary.contiguous_top,   TotalValidData},
                       {#file_summary.file_size,        TotalValidData}]),
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a16efb20..c0c1b40b 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -34,7 +34,8 @@
 -export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1, start_msg_store/1]).
+         find_lowest_seq_id_seg_and_next_seq_id/1,
+         start_persistent_msg_store/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
 
@@ -207,7 +208,7 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(start_msg_store/1 :: ([amqqueue()]) -> 'ok').
+-spec(start_persistent_msg_store/1 :: ([amqqueue()]) -> 'ok').
 
 -endif.
 
@@ -250,7 +251,8 @@ init(Name) ->
                              {Segment3, DCountAcc2}) ->
                                 {Segment4, DCountDelta} =
                                     maybe_add_to_journal(
-                                      rabbit_msg_store:contains(MsgId),
+                                      rabbit_msg_store:contains(
+                                        ?PERSISTENT_MSG_STORE, MsgId),
                                       CleanShutdown, Del, RelSeq, Segment3),
                                 {Segment4, DCountAcc2 + DCountDelta}
                         end, {Segment1 #segment { pubs = PubCount,
@@ -379,7 +381,7 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
         end,
     {LowSeqIdSeg, NextSeqId, State}.
 
-start_msg_store(DurableQueues) ->
+start_persistent_msg_store(DurableQueues) ->
     DurableDict =
         dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
                           Queue #amqqueue.name} || Queue <- DurableQueues ]),
@@ -404,10 +406,9 @@ start_msg_store(DurableQueues) ->
                           {DurableAcc, [QueueDir | TransientAcc]}
                   end
           end, {[], []}, Directories),
-    MsgStoreDir = filename:join(rabbit_mnesia:dir(), "msg_store"),
-    ok = rabbit_sup:start_child(rabbit_msg_store, [MsgStoreDir,
-                                               fun queue_index_walker/1,
-                                               DurableQueueNames]),
+    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
+                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 fun queue_index_walker/1, DurableQueueNames]),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = delete_queue_directory(Dir)
diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl
index 25715e6e..2c5e5112 100644
--- a/src/rabbit_sup.erl
+++ b/src/rabbit_sup.erl
@@ -33,7 +33,7 @@
 
 -behaviour(supervisor).
 
--export([start_link/0, start_child/1, start_child/2,
+-export([start_link/0, start_child/1, start_child/2, start_child/3,
          start_restartable_child/1, start_restartable_child/2]).
 
 -export([init/1]).
@@ -49,8 +49,11 @@ start_child(Mod) ->
     start_child(Mod, []).
 
 start_child(Mod, Args) ->
+    start_child(Mod, Mod, Args).
+
+start_child(ChildId, Mod, Args) ->
     {ok, _} = supervisor:start_child(?SERVER,
-                                     {Mod, {Mod, start_link, Args},
+                                     {ChildId, {Mod, start_link, Args},
                                       transient, ?MAX_WAIT, worker, [Mod]}),
     ok.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 96c3f1bc..3ccb83b6 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -991,20 +991,28 @@ bad_handle_hook(_, _, _) ->
 extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
     handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
 
-msg_store_dir() ->
-    filename:join(rabbit_mnesia:dir(), "msg_store").
-
 start_msg_store_empty() ->
     start_msg_store(fun (ok) -> finished end, ok).
 
 start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
-    rabbit_sup:start_child(rabbit_msg_store, [msg_store_dir(), MsgRefDeltaGen,
-                                              MsgRefDeltaGenInit]).
+    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
+                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 MsgRefDeltaGen, MsgRefDeltaGenInit]),
+    start_transient_msg_store().
+
+start_transient_msg_store() ->
+    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
+                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 fun (ok) -> finished end, ok]).
 
 stop_msg_store() ->
-    case supervisor:terminate_child(rabbit_sup, rabbit_msg_store) of
-        ok -> supervisor:delete_child(rabbit_sup, rabbit_msg_store);
-        E -> E
+    case supervisor:terminate_child(rabbit_sup, ?PERSISTENT_MSG_STORE) of
+        ok -> supervisor:delete_child(rabbit_sup, ?PERSISTENT_MSG_STORE),
+              case supervisor:terminate_child(rabbit_sup, ?TRANSIENT_MSG_STORE) of
+                  ok -> supervisor:delete_child(rabbit_sup, ?TRANSIENT_MSG_STORE);
+                  E -> {transient, E}
+              end;
+        E -> {persistent, E}
     end.
 
 msg_id_bin(X) ->
@@ -1012,13 +1020,14 @@ msg_id_bin(X) ->
 
 msg_store_contains(Atom, MsgIds) ->
     Atom = lists:foldl(
-              fun (MsgId, Atom1) when Atom1 =:= Atom ->
-                      rabbit_msg_store:contains(MsgId) end, Atom, MsgIds).
+             fun (MsgId, Atom1) when Atom1 =:= Atom ->
+                     rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, MsgId) end,
+             Atom, MsgIds).
 
 msg_store_sync(MsgIds) ->
     Ref = make_ref(),
     Self = self(),
-    ok = rabbit_msg_store:sync(MsgIds,
+    ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, MsgIds,
                                fun () -> Self ! {sync, Ref} end),
     receive
         {sync, Ref} -> ok
@@ -1031,15 +1040,17 @@ msg_store_sync(MsgIds) ->
 msg_store_read(MsgIds, MSCState) ->
     lists:foldl(
       fun (MsgId, MSCStateM) ->
-              {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read(MsgId, MSCStateM),
+              {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read(
+                                           ?PERSISTENT_MSG_STORE, MsgId, MSCStateM),
               MSCStateN
       end,
       MSCState, MsgIds).
 
-msg_store_write(MsgIds) ->
-    ok = lists:foldl(
-           fun (MsgId, ok) -> rabbit_msg_store:write(MsgId, MsgId) end,
-           ok, MsgIds).
+msg_store_write(MsgIds, MSCState) ->
+    lists:foldl(
+      fun (MsgId, {ok, MSCStateN}) ->
+              rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId, MsgId, MSCStateN) end,
+      {ok, MSCState}, MsgIds).
 
 test_msg_store() ->
     stop_msg_store(),
@@ -1049,25 +1060,27 @@ test_msg_store() ->
     {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
     %% check we don't contain any of the msgs we're about to publish
     false = msg_store_contains(false, MsgIds),
+    MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE),
     %% publish the first half
-    ok = msg_store_write(MsgIds1stHalf),
+    {ok, MSCState1} = msg_store_write(MsgIds1stHalf, MSCState),
     %% sync on the first half
     ok = msg_store_sync(MsgIds1stHalf),
     %% publish the second half
-    ok = msg_store_write(MsgIds2ndHalf),
+    {ok, MSCState2} = msg_store_write(MsgIds2ndHalf, MSCState1),
     %% sync on the first half again - the msg_store will be dirty, but
     %% we won't need the fsync
     ok = msg_store_sync(MsgIds1stHalf),
     %% check they're all in there
     true = msg_store_contains(true, MsgIds),
     %% publish the latter half twice so we hit the caching and ref count code
-    ok = msg_store_write(MsgIds2ndHalf),
+    {ok, MSCState3} = msg_store_write(MsgIds2ndHalf, MSCState2),
     %% check they're still all in there
     true = msg_store_contains(true, MsgIds),
     %% sync on the 2nd half, but do lots of individual syncs to try
     %% and cause coalescing to happen
     ok = lists:foldl(
            fun (MsgId, ok) -> rabbit_msg_store:sync(
+                                ?PERSISTENT_MSG_STORE,
                                 [MsgId], fun () -> Self ! {sync, MsgId} end)
            end, ok, MsgIds2ndHalf),
     lists:foldl(
@@ -1085,23 +1098,22 @@ test_msg_store() ->
     %% should hit a different code path
     ok = msg_store_sync(MsgIds1stHalf),
     %% read them all
-    MSCState = rabbit_msg_store:client_init(),
-    MSCState1 = msg_store_read(MsgIds, MSCState),
+    MSCState4 = msg_store_read(MsgIds, MSCState3),
     %% read them all again - this will hit the cache, not disk
-    MSCState2 = msg_store_read(MsgIds, MSCState1),
+    MSCState5 = msg_store_read(MsgIds, MSCState4),
     %% remove them all
-    ok = rabbit_msg_store:remove(MsgIds),
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIds),
     %% check first half doesn't exist
     false = msg_store_contains(false, MsgIds1stHalf),
     %% check second half does exist
     true = msg_store_contains(true, MsgIds2ndHalf),
     %% read the second half again
-    MSCState3 = msg_store_read(MsgIds2ndHalf, MSCState2),
+    MSCState6 = msg_store_read(MsgIds2ndHalf, MSCState5),
     %% release the second half, just for fun (aka code coverage)
-    ok = rabbit_msg_store:release(MsgIds2ndHalf),
+    ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, MsgIds2ndHalf),
     %% read the second half again, just for fun (aka code coverage)
-    MSCState4 = msg_store_read(MsgIds2ndHalf, MSCState3),
-    ok = rabbit_msg_store:client_terminate(MSCState4),
+    MSCState7 = msg_store_read(MsgIds2ndHalf, MSCState6),
+    ok = rabbit_msg_store:client_terminate(MSCState7),
     %% stop and restart, preserving every other msg in 2nd half
     ok = stop_msg_store(),
     ok = start_msg_store(fun ([]) -> finished;
@@ -1114,7 +1126,7 @@ test_msg_store() ->
     %% check we have the right msgs left
     lists:foldl(
       fun (MsgId, Bool) ->
-              not(Bool = rabbit_msg_store:contains(MsgId))
+              not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, MsgId))
       end, false, MsgIds2ndHalf),
     %% restart empty
     ok = stop_msg_store(),
@@ -1122,11 +1134,12 @@ test_msg_store() ->
     %% check we don't contain any of the msgs
     false = msg_store_contains(false, MsgIds),
     %% publish the first half again
-    ok = msg_store_write(MsgIds1stHalf),
+    MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE),
+    {ok, MSCState9} = msg_store_write(MsgIds1stHalf, MSCState8),
     %% this should force some sort of sync internally otherwise misread
     ok = rabbit_msg_store:client_terminate(
-           msg_store_read(MsgIds1stHalf, rabbit_msg_store:client_init())),
-    ok = rabbit_msg_store:remove(MsgIds1stHalf),
+           msg_store_read(MsgIds1stHalf, MSCState9)),
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIds1stHalf),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(), %% now safe to reuse msg_ids
@@ -1134,34 +1147,37 @@ test_msg_store() ->
     BigCount = 100000,
     MsgIdsBig = [msg_id_bin(X) || X <- lists:seq(1, BigCount)],
     Payload = << 0:65536 >>,
-    ok = lists:foldl(
-           fun (MsgId, ok) ->
-                   rabbit_msg_store:write(MsgId, Payload)
-           end, ok, MsgIdsBig),
+    ok = rabbit_msg_store:client_terminate(
+           lists:foldl(
+             fun (MsgId, MSCStateN) ->
+                     {ok, MSCStateM} =
+                         rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId, Payload, MSCStateN),
+                     MSCStateM
+             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE), MsgIdsBig)),
     %% now read them to ensure we hit the fast client-side reading
     ok = rabbit_msg_store:client_terminate(
            lists:foldl(
              fun (MsgId, MSCStateM) ->
                      {{ok, Payload}, MSCStateN} =
-                         rabbit_msg_store:read(MsgId, MSCStateM),
+                         rabbit_msg_store:read(?PERSISTENT_MSG_STORE, MsgId, MSCStateM),
                      MSCStateN
-             end, rabbit_msg_store:client_init(), MsgIdsBig)),
+             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE), MsgIdsBig)),
     %% .., then 3s by 1...
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove([msg_id_bin(MsgId)])
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [msg_id_bin(MsgId)])
            end, ok, lists:seq(BigCount, 1, -3)),
     %% .., then remove 3s by 2, from the young end first. This hits
     %% GC (under 50% good data left, but no empty files. Must GC).
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove([msg_id_bin(MsgId)])
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [msg_id_bin(MsgId)])
            end, ok, lists:seq(BigCount-1, 1, -3)),
     %% .., then remove 3s by 3, from the young end first. This hits
     %% GC...
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove([msg_id_bin(MsgId)])
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [msg_id_bin(MsgId)])
            end, ok, lists:seq(BigCount-2, 1, -3)),
     %% ensure empty
     false = msg_store_contains(false, MsgIdsBig),
@@ -1184,20 +1200,25 @@ test_amqqueue(Durable) ->
               pid = none}.
 
 empty_test_queue() ->
-    ok = rabbit_queue_index:start_msg_store([]),
+    ok = start_transient_msg_store(),
+    ok = rabbit_queue_index:start_persistent_msg_store([]),
     {0, Qi1} = rabbit_queue_index:init(test_queue()),
     _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
     ok.
 
 queue_index_publish(SeqIds, Persistent, Qi) ->
-    lists:foldl(
-      fun (SeqId, {QiN, SeqIdsMsgIdsAcc}) ->
-              MsgId = rabbit_guid:guid(),
-              QiM = rabbit_queue_index:write_published(MsgId, SeqId, Persistent,
-                                                       QiN),
-              ok = rabbit_msg_store:write(MsgId, MsgId),
-              {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]}
-      end, {Qi, []}, SeqIds).
+    {A, B, MSCStateEnd} =
+        lists:foldl(
+          fun (SeqId, {QiN, SeqIdsMsgIdsAcc, MSCStateN}) ->
+                  MsgId = rabbit_guid:guid(),
+                  QiM = rabbit_queue_index:write_published(MsgId, SeqId, Persistent,
+                                                           QiN),
+                  {ok, MSCStateM} = rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId,
+                                                           MsgId, MSCStateN),
+                  {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc], MSCStateM}
+          end, {Qi, [], rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE)}, SeqIds),
+    ok = rabbit_msg_store:client_terminate(MSCStateEnd),
+    {A, B}.
 
 queue_index_deliver(SeqIds, Qi) ->
     lists:foldl(
@@ -1236,7 +1257,8 @@ test_queue_index() ->
     %% call terminate twice to prove it's idempotent
     _Qi5 = rabbit_queue_index:terminate(rabbit_queue_index:terminate(Qi4)),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
+    ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
+    ok = start_transient_msg_store(),
     %% should get length back as 0, as all the msgs were transient
     {0, Qi6} = rabbit_queue_index:init(test_queue()),
     {0, SegSize, Qi7} =
@@ -1249,7 +1271,8 @@ test_queue_index() ->
                                     lists:reverse(SeqIdsMsgIdsB)),
     _Qi11 = rabbit_queue_index:terminate(Qi10),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
+    ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
+    ok = start_transient_msg_store(),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
     {LenB, Qi12} = rabbit_queue_index:init(test_queue()),
@@ -1266,7 +1289,8 @@ test_queue_index() ->
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate(Qi18),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_store([test_amqqueue(true)]),
+    ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
+    ok = start_transient_msg_store(),
     %% should get length back as 0 because all persistent msgs have been acked
     {0, Qi20} = rabbit_queue_index:init(test_queue()),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
@@ -1307,7 +1331,8 @@ test_queue_index() ->
     Qi40 = queue_index_flush_journal(Qi39),
     _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_store([]),
+    ok = rabbit_queue_index:start_persistent_msg_store([]),
+    ok = start_transient_msg_store(),
     ok = stop_msg_store(),
     passed.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0043bb5f..b9714f53 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -154,7 +154,7 @@
           rate_timestamp,
           len,
           on_sync,
-          msg_store_read_state
+          msg_store_clients
         }).
 
 -include("rabbit.hrl").
@@ -186,7 +186,7 @@
 -type(bpqueue() :: any()).
 -type(msg_id()  :: binary()).
 -type(seq_id()  :: non_neg_integer()).
--type(ack()     :: {'ack_index_and_store', msg_id(), seq_id()}
+-type(ack()     :: {'ack_index_and_store', msg_id(), seq_id(), boolean()}
                  | 'ack_not_on_disk').
 -type(vqstate() :: #vqstate {
                q1                    :: queue(),
@@ -210,7 +210,7 @@
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
                on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]},
-               msg_store_read_state  :: any()
+               msg_store_clients     :: {any(), any()}
               }).
 
 -spec(init/1 :: (queue_name()) -> vqstate()).
@@ -285,13 +285,15 @@ init(QueueName) ->
                    rate_timestamp = Now,
                    len = DeltaCount,
                    on_sync = {[], [], []},
-                   msg_store_read_state = rabbit_msg_store:client_init()
+                   msg_store_clients = {rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE),
+                                        rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE)}
                   },
     maybe_deltas_to_betas(State).
 
 terminate(State = #vqstate { index_state = IndexState,
-                             msg_store_read_state = MSCState }) ->
-    rabbit_msg_store:client_terminate(MSCState),
+                             msg_store_clients = {MSCStateP, MSCStateT} }) ->
+    rabbit_msg_store:client_terminate(MSCStateP),
+    rabbit_msg_store:client_terminate(MSCStateT),
     State #vqstate { index_state = rabbit_queue_index:terminate(IndexState) }.
 
 publish(Msg, State) ->
@@ -303,22 +305,24 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                   State = #vqstate { len = 0, index_state = IndexState,
                                      next_seq_id = SeqId,
                                      out_counter = OutCount,
-                                     in_counter = InCount}) ->
+                                     in_counter = InCount,
+                                     msg_store_clients = MSCState }) ->
     State1 = State #vqstate { out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
     MsgStatus = #msg_status {
       msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
-    MsgStatus1 = maybe_write_msg_to_disk(false, MsgStatus),
+    {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+    State2 = State1 #vqstate { msg_store_clients = MSCState1 },
     case MsgStatus1 #msg_status.msg_on_disk of
         true ->
             {#msg_status { index_on_disk = true }, IndexState1} =
                 maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-            {{ack_index_and_store, MsgId, SeqId},
-             State1 #vqstate { index_state = IndexState1,
+            {{ack_index_and_store, MsgId, SeqId, IsPersistent},
+             State2 #vqstate { index_state = IndexState1,
                                next_seq_id = SeqId + 1 }};
         false ->
-            {ack_not_on_disk, State1}
+            {ack_not_on_disk, State2}
     end.
 
 set_queue_ram_duration_target(
@@ -404,9 +408,9 @@ fetch(State =
                 case IndexOnDisk1 of
                     true  -> true = IsPersistent, %% ASSERTION
                              true = MsgOnDisk, %% ASSERTION
-                             {ack_index_and_store, MsgId, SeqId};
+                             {ack_index_and_store, MsgId, SeqId, IsPersistent};
                     false -> ok = case MsgOnDisk andalso not IsPersistent of
-                                      true -> rabbit_msg_store:remove([MsgId]);
+                                      true -> rabbit_msg_store:remove(find_msg_store(IsPersistent), [MsgId]);
                                       false -> ok
                                   end,
                              ack_not_on_disk
@@ -419,19 +423,25 @@ fetch(State =
     end.
 
 ack(AckTags, State = #vqstate { index_state = IndexState }) ->
-    {MsgIds, SeqIds} =
+    {MsgIdsPersistent, MsgIdsTransient, SeqIds} =
         lists:foldl(
           fun (ack_not_on_disk, Acc) -> Acc;
-              ({ack_index_and_store, MsgId, SeqId}, {MsgIds, SeqIds}) ->
-                  {[MsgId | MsgIds], [SeqId | SeqIds]}
-          end, {[], []}, AckTags),
+              ({ack_index_and_store, MsgId, SeqId, true},  {MsgIdsP, MsgIdsT, SeqIds}) ->
+                  {[MsgId | MsgIdsP], MsgIdsT, [SeqId | SeqIds]};
+              ({ack_index_and_store, MsgId, SeqId, false}, {MsgIdsP, MsgIdsT, SeqIds}) ->
+                  {MsgIdsP, [MsgId | MsgIdsT], [SeqId | SeqIds]}
+          end, {[], [], []}, AckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
-    ok = case MsgIds of
+    ok = case MsgIdsPersistent of
              [] -> ok;
-             _  -> rabbit_msg_store:remove(MsgIds)
+             _  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIdsPersistent)
+         end,
+    ok = case MsgIdsTransient of
+             [] -> ok;
+             _  -> rabbit_msg_store:remove(?TRANSIENT_MSG_STORE, MsgIdsTransient)
          end,
     State #vqstate { index_state = IndexState1 }.
 
@@ -453,7 +463,7 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
     {_PurgeCount, State1 = #vqstate { index_state = IndexState,
-                                      msg_store_read_state = MSCState }} =
+                                      msg_store_clients = {MSCStateP, MSCStateT} }} =
         purge(State),
     IndexState1 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
@@ -466,7 +476,8 @@ delete_and_terminate(State) ->
                 IndexState3
     end,
     IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
-    rabbit_msg_store:client_terminate(MSCState),
+    rabbit_msg_store:client_terminate(MSCStateP),
+    rabbit_msg_store:client_terminate(MSCStateT),
     State1 #vqstate { index_state = IndexState4 }.
 
 %% [{Msg, AckTag}]
@@ -479,45 +490,52 @@ delete_and_terminate(State) ->
 %% msg_store:release so that the cache isn't held full of msgs which
 %% are now at the tail of the queue.
 requeue(MsgsWithAckTags, State) ->
-    {SeqIds, MsgIds, State1 = #vqstate { index_state = IndexState }} =
+    {SeqIds, MsgIdsPersistent, MsgIdsTransient,
+     State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, AckTag},
-               {SeqIdsAcc, MsgIdsAcc, StateN}) ->
-                  {SeqIdsAcc1, MsgIdsAcc1, MsgOnDisk} =
+               {SeqIdsAcc, MsgIdsP, MsgIdsT, StateN}) ->
+                  {SeqIdsAcc1, MsgIdsP1, MsgIdsT1, MsgOnDisk} =
                       case AckTag of
                           ack_not_on_disk ->
-                              {SeqIdsAcc, MsgIdsAcc, false};
-                          {ack_index_and_store, MsgId, SeqId} ->
-                              {[SeqId | SeqIdsAcc], [MsgId | MsgIdsAcc], true}
+                              {SeqIdsAcc, MsgIdsP, MsgIdsT, false};
+                          {ack_index_and_store, MsgId, SeqId, true} ->
+                              {[SeqId | SeqIdsAcc], [MsgId | MsgIdsP], MsgIdsT, true};
+                          {ack_index_and_store, MsgId, SeqId, false} ->
+                              {[SeqId | SeqIdsAcc], MsgIdsP, [MsgId | MsgIdsT], true}
                       end,
                   {_SeqId, StateN1} = publish(Msg, true, MsgOnDisk, StateN),
-                  {SeqIdsAcc1, MsgIdsAcc1, StateN1}
-          end, {[], [], State}, MsgsWithAckTags),
+                  {SeqIdsAcc1, MsgIdsP1, MsgIdsT1, StateN1}
+          end, {[], [], [], State}, MsgsWithAckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
-    ok = case MsgIds of
+    ok = case MsgIdsPersistent of
+             [] -> ok;
+             _  -> rabbit_msg_store:release(?PERSISTENT_MSG_STORE, MsgIdsPersistent)
+         end,
+    ok = case MsgIdsTransient of
              [] -> ok;
-             _  -> rabbit_msg_store:release(MsgIds)
+             _  -> rabbit_msg_store:release(?TRANSIENT_MSG_STORE, MsgIdsTransient)
          end,
     State1 #vqstate { index_state = IndexState1 }.
 
 tx_publish(Msg = #basic_message { is_persistent = true, guid = MsgId },
-           State) ->
+           State = #vqstate { msg_store_clients = MSCState }) ->
     MsgStatus = #msg_status {
       msg = Msg, msg_id = MsgId, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
-    #msg_status { msg_on_disk = true } =
-        maybe_write_msg_to_disk(false, MsgStatus),
-    State;
+    {#msg_status { msg_on_disk = true }, MSCState1} =
+        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+    State #vqstate { msg_store_clients = MSCState1 };
 tx_publish(_Msg, State) ->
     State.
 
 tx_rollback(Pubs, State) ->
     ok = case persistent_msg_ids(Pubs) of
              [] -> ok;
-             PP -> rabbit_msg_store:remove(PP)
+             PP -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, PP)
          end,
     State.
 
@@ -528,6 +546,7 @@ tx_commit(Pubs, AckTags, From, State) ->
         PersistentMsgIds ->
             Self = self(),
             ok = rabbit_msg_store:sync(
+                   ?PERSISTENT_MSG_STORE,
                    PersistentMsgIds,
                    fun () -> ok = rabbit_amqqueue:tx_commit_msg_store_callback(
                                     Self, Pubs, AckTags, From)
@@ -712,11 +731,15 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
     end.
 
 remove_queue_entries(Fold, Q, IndexState) ->
-    {Count, MsgIds, SeqIds, IndexState1} =
-        Fold(fun remove_queue_entries1/2, {0, [], [], IndexState}, Q),
-    ok = case MsgIds of
+    {Count, MsgIdsPersistent, MsgIdsTransient, SeqIds, IndexState1} =
+        Fold(fun remove_queue_entries1/2, {0, [], [], [], IndexState}, Q),
+    ok = case MsgIdsPersistent of
+             [] -> ok;
+             _  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIdsPersistent)
+         end,
+    ok = case MsgIdsTransient of
              [] -> ok;
-             _  -> rabbit_msg_store:remove(MsgIds)
+             _  -> rabbit_msg_store:remove(?TRANSIENT_MSG_STORE, MsgIdsTransient)
          end,
     IndexState2 =
         case SeqIds of
@@ -728,12 +751,14 @@ remove_queue_entries(Fold, Q, IndexState) ->
 remove_queue_entries1(
   #msg_status { msg_id = MsgId, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
-                index_on_disk = IndexOnDisk },
-  {CountN, MsgIdsAcc, SeqIdsAcc, IndexStateN}) ->
-    MsgIdsAcc1 = case MsgOnDisk of
-                     true  -> [MsgId | MsgIdsAcc];
-                     false -> MsgIdsAcc
-                 end,
+                index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
+  {CountN, MsgIdsP, MsgIdsT, SeqIdsAcc, IndexStateN}) ->
+    {MsgIdsP1, MsgIdsT1} =
+        case {MsgOnDisk, IsPersistent} of
+            {true,  true}  -> {[MsgId | MsgIdsP], MsgIdsT};
+            {true,  false} -> {MsgIdsP, [MsgId | MsgIdsT]};
+            {false, _}     -> {MsgIdsP, MsgIdsT}
+        end,
     SeqIdsAcc1 = case IndexOnDisk of
                      true  -> [SeqId | SeqIdsAcc];
                      false -> SeqIdsAcc
@@ -743,13 +768,13 @@ remove_queue_entries1(
                                  SeqId, IndexStateN);
                        false -> IndexStateN
                    end,
-    {CountN + 1, MsgIdsAcc1, SeqIdsAcc1, IndexStateN1}.
+    {CountN + 1, MsgIdsP1, MsgIdsT1, SeqIdsAcc1, IndexStateN1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
                          q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
-                         msg_store_read_state = MSCState }) ->
+                         msg_store_clients = MSCState }) ->
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
             0 = DeltaCount, %% ASSERTION
@@ -761,7 +786,7 @@ fetch_from_q3_or_delta(State = #vqstate {
                                 is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
                                          guid = MsgId }}, MSCState1} =
-                rabbit_msg_store:read(MsgId, MSCState),
+                read_from_msg_store(MSCState, MsgId, IsPersistent),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
             RamIndexCount1 = case IndexOnDisk of
                                  true  -> RamIndexCount;
@@ -771,7 +796,7 @@ fetch_from_q3_or_delta(State = #vqstate {
             State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
                                       ram_msg_count = RamMsgCount + 1,
                                       ram_index_count = RamIndexCount1,
-                                      msg_store_read_state = MSCState1 },
+                                      msg_store_clients = MSCState1 },
             State2 =
                 case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
                     {true, true} ->
@@ -857,19 +882,22 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId },
                                      in_counter = InCount + 1 })}.
 
 publish(msg, MsgStatus, State = #vqstate { index_state = IndexState,
-                                           ram_msg_count = RamMsgCount }) ->
-    MsgStatus1 = maybe_write_msg_to_disk(false, MsgStatus),
+                                           ram_msg_count = RamMsgCount,
+                                           msg_store_clients = MSCState }) ->
+    {MsgStatus1, MSCState1} =
+        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(false, MsgStatus1, IndexState),
     State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
-                              index_state = IndexState1 },
+                              index_state = IndexState1,
+                              msg_store_clients = MSCState1 },
     store_alpha_entry(MsgStatus2, State1);
 
-publish(index, MsgStatus, State =
-        #vqstate { index_state = IndexState, q1 = Q1,
-                   ram_index_count = RamIndexCount }) ->
-    MsgStatus1 = #msg_status { msg_on_disk = true } =
-        maybe_write_msg_to_disk(true, MsgStatus),
+publish(index, MsgStatus, State = #vqstate { index_state = IndexState, q1 = Q1,
+                                             ram_index_count = RamIndexCount,
+                                             msg_store_clients = MSCState }) ->
+    {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
+        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
     ForceIndex = should_force_index_to_disk(State),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
@@ -878,15 +906,16 @@ publish(index, MsgStatus, State =
                          false -> RamIndexCount + 1
                      end,
     State1 = State #vqstate { index_state = IndexState1,
-                              ram_index_count = RamIndexCount1 },
+                              ram_index_count = RamIndexCount1,
+                              msg_store_clients = MSCState1 },
     true = queue:is_empty(Q1), %% ASSERTION
     store_beta_entry(MsgStatus2, State1);
 
 publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
-        #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                   delta = Delta }) ->
-    MsgStatus1 = #msg_status { msg_on_disk = true } =
-        maybe_write_msg_to_disk(true, MsgStatus),
+            #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
+                       delta = Delta, msg_store_clients = MSCState }) ->
+    {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
+        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
     {#msg_status { index_on_disk = true }, IndexState1} =
         maybe_write_index_to_disk(true, MsgStatus1, IndexState),
     true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
@@ -898,7 +927,8 @@ publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
     Delta1 = #delta { start_seq_id = DeltaSeqId, count = 1,
                       end_seq_id = SeqId + 1 },
     State #vqstate { index_state = IndexState1,
-                     delta = combine_deltas(Delta, Delta1) }.
+                     delta = combine_deltas(Delta, Delta1),
+                     msg_store_clients = MSCState1 }.
 
 store_alpha_entry(MsgStatus, State =
                   #vqstate { q1 = Q1, q2 = Q2,
@@ -925,17 +955,42 @@ store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
             State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
     end.
 
+find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
+find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
+
+read_from_msg_store({MSCStateP, MSCStateT}, MsgId, true) ->
+    {Res, MSCStateP1} =
+        rabbit_msg_store:read(?PERSISTENT_MSG_STORE, MsgId, MSCStateP),
+    {Res, {MSCStateP1, MSCStateT}};
+read_from_msg_store({MSCStateP, MSCStateT}, MsgId, false) ->
+    {Res, MSCStateT1} =
+        rabbit_msg_store:read(?TRANSIENT_MSG_STORE, MsgId, MSCStateT),
+    {Res, {MSCStateP, MSCStateT1}}.
+
 maybe_write_msg_to_disk(_Force, MsgStatus =
-                        #msg_status { msg_on_disk = true }) ->
-    MsgStatus;
+                        #msg_status { msg_on_disk = true }, MSCState) ->
+    {MsgStatus, MSCState};
 maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
                                  msg = Msg, msg_id = MsgId,
-                                 is_persistent = IsPersistent })
+                                 is_persistent = IsPersistent },
+                        {MSCStateP, MSCStateT})
   when Force orelse IsPersistent ->
-    ok = rabbit_msg_store:write(MsgId, ensure_binary_properties(Msg)),
-    MsgStatus #msg_status { msg_on_disk = true };
-maybe_write_msg_to_disk(_Force, MsgStatus) ->
-    MsgStatus.
+    MSCState1 =
+        case IsPersistent of
+            true ->
+                {ok, MSCStateP1} = rabbit_msg_store:write(
+                                     ?PERSISTENT_MSG_STORE, MsgId,
+                                     ensure_binary_properties(Msg), MSCStateP),
+                {MSCStateP1, MSCStateT};
+            false ->
+                {ok, MSCStateT1} = rabbit_msg_store:write(
+                                     ?TRANSIENT_MSG_STORE, MsgId,
+                                     ensure_binary_properties(Msg), MSCStateT),
+                {MSCStateP, MSCStateT1}
+        end,
+    {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
+maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
+    {MsgStatus, MSCState}.
 
 maybe_write_index_to_disk(_Force, MsgStatus =
                           #msg_status { index_on_disk = true }, IndexState) ->
@@ -1082,11 +1137,12 @@ maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
 maybe_push_alphas_to_betas(
   Generator, Consumer, Q, State =
   #vqstate { ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount,
-             index_state = IndexState }) ->
+             index_state = IndexState, msg_store_clients = MSCState }) ->
     case Generator(Q) of
         {empty, _Q} -> State;
         {{value, MsgStatus}, Qa} ->
-            MsgStatus1 = maybe_write_msg_to_disk(true, MsgStatus),
+            {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(true, MsgStatus,
+                                                              MSCState),
             ForceIndex = should_force_index_to_disk(State),
             {MsgStatus2, IndexState1} =
                 maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
@@ -1096,7 +1152,8 @@ maybe_push_alphas_to_betas(
                              end,
             State1 = State #vqstate { ram_msg_count = RamMsgCount - 1,
                                       ram_index_count = RamIndexCount1,
-                                      index_state = IndexState1 },
+                                      index_state = IndexState1,
+                                      msg_store_clients = MSCState1 },
             maybe_push_alphas_to_betas(Generator, Consumer, Qa,
                                        Consumer(MsgStatus2, Qa, State1))
     end.
-- 
cgit v1.2.1


From a66cd1f54a2a1ae8f3f58d2142788faf72db2a3b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Apr 2010 02:39:15 +0100
Subject: Support async job submission

---
 src/worker_pool.erl        | 27 ++++++++++++++++++++++-----
 src/worker_pool_worker.erl | 11 ++++++++++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/worker_pool.erl b/src/worker_pool.erl
index b883d4f0..fcacdd59 100644
--- a/src/worker_pool.erl
+++ b/src/worker_pool.erl
@@ -40,12 +40,10 @@
 %%
 %% 1. Allow priorities (basically, change the pending queue to a
 %% priority_queue).
-%%
-%% 2. Allow the submission to the pool_worker to be async.
 
 -behaviour(gen_server2).
 
--export([start_link/0, submit/1, idle/1]).
+-export([start_link/0, submit/1, submit_async/1, idle/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -56,6 +54,7 @@
 
 -spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(submit/1 :: (fun (() -> A) | {atom(), atom(), [any()]}) -> A).
+-spec(submit_async/1 :: (fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok').
 
 -endif.
 
@@ -80,6 +79,9 @@ submit(Fun) ->
                 worker_pool_worker:submit(Pid, Fun)
     end.
 
+submit_async(Fun) ->
+    gen_server2:cast(?SERVER, {run_async, Fun}).
+
 idle(WId) ->
     gen_server2:cast(?SERVER, {idle, WId}).
 
@@ -93,7 +95,8 @@ handle_call(next_free, From, State = #state { available = Avail,
                                               pending = Pending }) ->
     case queue:out(Avail) of
         {empty, _Avail} ->
-            {noreply, State #state { pending = queue:in(From, Pending) }};
+            {noreply,
+             State #state { pending = queue:in({next_free, From}, Pending) }};
         {{value, WId}, Avail1} ->
             {reply, get_worker_pid(WId), State #state { available = Avail1 }}
     end;
@@ -106,11 +109,25 @@ handle_cast({idle, WId}, State = #state { available = Avail,
     {noreply, case queue:out(Pending) of
                   {empty, _Pending} ->
                       State #state { available = queue:in(WId, Avail) };
-                  {{value, From}, Pending1} ->
+                  {{value, {next_free, From}}, Pending1} ->
                       gen_server2:reply(From, get_worker_pid(WId)),
+                      State #state { pending = Pending1 };
+                  {{value, {run_async, Fun}}, Pending1} ->
+                      worker_pool_worker:submit_async(get_worker_pid(WId), Fun),
                       State #state { pending = Pending1 }
               end};
 
+handle_cast({run_async, Fun}, State = #state { available = Avail,
+                                               pending = Pending }) ->
+    {noreply,
+     case queue:out(Avail) of
+         {empty, _Avail} ->
+             State #state { pending = queue:in({run_async, Fun}, Pending)};
+         {{value, WId}, Avail1} ->
+             worker_pool_worker:submit_async(get_worker_pid(WId), Fun),
+             State #state { available = Avail1 }
+     end};
+
 handle_cast(Msg, State) ->
     {stop, {unexpected_cast, Msg}, State}.
 
diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl
index fc3ce371..4defc5ba 100644
--- a/src/worker_pool_worker.erl
+++ b/src/worker_pool_worker.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/1, submit/2, run/1]).
+-export([start_link/1, submit/2, submit_async/2, run/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -44,6 +44,7 @@
 
 -spec(start_link/1 :: (any()) -> {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(submit/2 :: (pid(), fun (() -> A) | {atom(), atom(), [any()]}) -> A).
+-spec(submit_async/2 :: (pid(), fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok').
 
 -endif.
 
@@ -60,6 +61,9 @@ start_link(WId) ->
 submit(Pid, Fun) ->
     gen_server2:call(Pid, {submit, Fun}, infinity).
 
+submit_async(Pid, Fun) ->
+    gen_server2:cast(Pid, {submit_async, Fun}).
+
 init([WId]) ->
     ok = worker_pool:idle(WId),
     put(worker_pool_worker, true),
@@ -74,6 +78,11 @@ handle_call({submit, Fun}, From, WId) ->
 handle_call(Msg, _From, State) ->
     {stop, {unexpected_call, Msg}, State}.
 
+handle_cast({submit_async, Fun}, WId) ->
+    run(Fun),
+    ok = worker_pool:idle(WId),
+    {noreply, WId};
+
 handle_cast(Msg, State) ->
     {stop, {unexpected_cast, Msg}, State}.
 
-- 
cgit v1.2.1


From 7b413e40817ed436b02aaf83f0ff9380e1902f0b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Apr 2010 03:14:35 +0100
Subject: Next step on making startup faster is to allow the scanning of the
 queue indicies to occur in parallel (per queue). Also, queue creation can
 take some substantial time due to queue_index:init. Therefore, stagger the
 startup of a queue so that this potentially expensive step (a) doesn't get
 done at all if the queue already exists etc (b) doesn't block
 amqqueue_process:init from returning. Thus on startup now not only do we do
 the seeding of the msg_store in parallel (per queue), but the durable queues
 that come up can also do the bulk of their work in parallel, thus speeding
 recovery substantially.

---
 src/rabbit_amqqueue.erl         | 50 +++++++++++++++++++++++-----------------
 src/rabbit_amqqueue_process.erl | 49 +++++++++++++++++++++++++++------------
 src/rabbit_misc.erl             | 24 +++++++++++++++++++
 src/rabbit_msg_store.erl        |  7 +++++-
 src/rabbit_queue_index.erl      | 51 +++++++++++++++++++++++++++++------------
 src/rabbit_tests.erl            |  1 +
 6 files changed, 130 insertions(+), 52 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index f0540c93..b6e92e06 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -128,6 +128,7 @@
 %%----------------------------------------------------------------------------
 
 start() ->
+    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
     ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
                                 [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
                                  fun (ok) -> finished end, ok]),
@@ -152,26 +153,32 @@ find_durable_queues() ->
       end).
 
 recover_durable_queues(DurableQueues) ->
-    lists:foldl(
-      fun (RecoveredQ, Acc) ->
-              Q = start_queue_process(RecoveredQ),
-              %% We need to catch the case where a client connected to
-              %% another node has deleted the queue (and possibly
-              %% re-created it).
-              case rabbit_misc:execute_mnesia_transaction(
-                     fun () ->
-                             case mnesia:match_object(
-                                    rabbit_durable_queue, RecoveredQ, read) of
-                                 [_] -> ok = store_queue(Q),
-                                        true;
-                                 []  -> false
-                             end
-                     end) of
-                  true  -> [Q|Acc];
-                  false -> exit(Q#amqqueue.pid, shutdown),
-                           Acc
-              end
-      end, [], DurableQueues).
+    Qs = lists:foldl(
+           fun (RecoveredQ, Acc) ->
+                   Q = start_queue_process(RecoveredQ),
+                   %% We need to catch the case where a client
+                   %% connected to another node has deleted the queue
+                   %% (and possibly re-created it).
+                   case rabbit_misc:execute_mnesia_transaction(
+                          fun () ->
+                                  case mnesia:match_object(
+                                         rabbit_durable_queue, RecoveredQ,
+                                         read) of
+                                      [_] -> ok = store_queue(Q),
+                                             true;
+                                      []  -> false
+                                  end
+                          end) of
+                       true  ->
+                           ok = gen_server2:cast(Q#amqqueue.pid,
+                                                 init_variable_queue),
+                           [Q|Acc];
+                       false -> exit(Q#amqqueue.pid, shutdown),
+                                Acc
+                   end
+           end, [], DurableQueues),
+    [ok = gen_server2:call(Q#amqqueue.pid, sync, infinity) || Q <- Qs],
+    Qs.
 
 declare(QueueName, Durable, AutoDelete, Args) ->
     Q = start_queue_process(#amqqueue{name = QueueName,
@@ -202,7 +209,8 @@ internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
            end) of
         not_found -> exit(Q#amqqueue.pid, shutdown),
                      rabbit_misc:not_found(QueueName);
-        Q         -> Q;
+        Q         -> ok = gen_server2:cast(Q#amqqueue.pid, init_variable_queue),
+                     Q;
         ExistingQ -> exit(Q#amqqueue.pid, shutdown),
                      ExistingQ
     end.
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 41435c08..e6c8d238 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -105,19 +105,18 @@ info_keys() -> ?INFO_KEYS.
 
 %%----------------------------------------------------------------------------
 
-init(Q = #amqqueue { name = QName }) ->
+init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
     ok = file_handle_cache:register_callback(
            rabbit_amqqueue, set_maximum_since_use, [self()]),
     ok = rabbit_memory_monitor:register
            (self(), {rabbit_amqqueue, set_queue_duration, [self()]}),
-    VQS = rabbit_variable_queue:init(QName),
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
             has_had_consumers = false,
-            variable_queue_state = VQS,
+            variable_queue_state = undefined,
             next_msg_id = 1,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
@@ -127,25 +126,37 @@ init(Q = #amqqueue { name = QName }) ->
 
 terminate(shutdown, #q{variable_queue_state = VQS}) ->
     ok = rabbit_memory_monitor:deregister(self()),
-    _VQS = rabbit_variable_queue:terminate(VQS);
+    case VQS of
+        undefined -> ok;
+        _         -> rabbit_variable_queue:terminate(VQS)
+    end;
 terminate({shutdown, _}, #q{variable_queue_state = VQS}) ->
     ok = rabbit_memory_monitor:deregister(self()),
-    _VQS = rabbit_variable_queue:terminate(VQS);
+    case VQS of
+        undefined -> ok;
+        _         -> rabbit_variable_queue:terminate(VQS)
+    end;
 terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
     ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
     %% Ensure that any persisted tx messages are removed.
     %% TODO: wait for all in flight tx_commits to complete
-    VQS1 = rabbit_variable_queue:tx_rollback(
-             lists:concat([PM || #tx { pending_messages = PM } <-
-                                     all_tx_record()]), VQS),
-    %% Delete from disk first. If we crash at this point, when a
-    %% durable queue, we will be recreated at startup, possibly with
-    %% partial content. The alternative is much worse however - if we
-    %% called internal_delete first, we would then have a race between
-    %% the disk delete and a new queue with the same name being
-    %% created and published to.
-    _VQS = rabbit_variable_queue:delete_and_terminate(VQS1),
+    case VQS of
+        undefined ->
+            ok;
+        _ ->
+            VQS1 = rabbit_variable_queue:tx_rollback(
+                     lists:concat([PM || #tx { pending_messages = PM } <-
+                                             all_tx_record()]), VQS),
+            %% Delete from disk first. If we crash at this point, when
+            %% a durable queue, we will be recreated at startup,
+            %% possibly with partial content. The alternative is much
+            %% worse however - if we called internal_delete first, we
+            %% would then have a race between the disk delete and a
+            %% new queue with the same name being created and
+            %% published to.
+            rabbit_variable_queue:delete_and_terminate(VQS1)
+    end,
     ok = rabbit_amqqueue:internal_delete(qname(State)).
 
 code_change(_OldVsn, State, _Extra) ->
@@ -610,6 +621,9 @@ i(Item, _) ->
 
 %---------------------------------------------------------------------------
 
+handle_call(sync, _From, State) ->
+    reply(ok, State);
+
 handle_call(info, _From, State) ->
     reply(infos(?INFO_KEYS, State), State);
 
@@ -815,6 +829,11 @@ handle_call({claim_queue, ReaderPid}, _From,
             reply(locked, State)
     end.
 
+handle_cast(init_variable_queue, #q{variable_queue_state = undefined,
+                                    q = #amqqueue{name = QName}} = State) ->
+    noreply(
+      State #q { variable_queue_state = rabbit_variable_queue:init(QName) });
+
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 81cecb38..3bc35ca2 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -59,6 +59,7 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
+-export([recursive_delete/1]).
 
 -import(mnesia).
 -import(lists).
@@ -133,6 +134,7 @@
 -spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()).
 -spec(pid_to_string/1 :: (pid()) -> string()).
 -spec(string_to_pid/1 :: (string()) -> pid()).
+-spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
 
 -endif.
 
@@ -601,3 +603,25 @@ version_compare(A,  B) ->
        ANum < BNum   -> lt;
        ANum > BNum   -> gt
     end.
+
+recursive_delete(Path) ->
+    case filelib:is_dir(Path) of
+        false ->
+            case file:delete(Path) of
+                ok              -> ok;
+                %% Path doesn't exist anyway
+                {error, enoent} -> ok
+            end;
+        true ->
+            case file:list_dir(Path) of
+                {ok, FileNames} ->
+                    lists:foldl(
+                      fun (FileName, ok) ->
+                              recursive_delete(filename:join(Path, FileName));
+                          (_FileName, Error) ->
+                              Error
+                      end, ok, FileNames);
+                {error, Error} ->
+                    {error, {Path, Error}}
+            end
+    end.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index a33b1a34..5610b35e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -34,7 +34,7 @@
 -behaviour(gen_server2).
 
 -export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
-         sync/3, client_init/1, client_terminate/1]).
+         sync/3, client_init/1, client_terminate/1, clean/2]).
 
 -export([sync/1, gc_done/4, set_maximum_since_use/2]). %% internal
 
@@ -113,6 +113,7 @@
 -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
 -spec(client_init/1 :: (server()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
+-spec(clean/2 :: (atom(), file_path()) -> 'ok').
 
 -endif.
 
@@ -340,6 +341,10 @@ client_terminate(CState) ->
     close_all_handles(CState),
     ok.
 
+clean(Server, BaseDir) ->
+    Dir = filename:join(BaseDir, atom_to_list(Server)),
+    ok = rabbit_misc:recursive_delete(Dir).
+
 %%----------------------------------------------------------------------------
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c0c1b40b..935f2754 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -37,6 +37,8 @@
          find_lowest_seq_id_seg_and_next_seq_id/1,
          start_persistent_msg_store/1]).
 
+-export([queue_index_walker_reader/3]). %% for internal use only
+
 -define(CLEAN_FILENAME, "clean.dot").
 
 %%----------------------------------------------------------------------------
@@ -419,30 +421,49 @@ start_persistent_msg_store(DurableQueues) ->
 %% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
 
-queue_index_walker([]) ->
-    finished;
-queue_index_walker([QueueName|QueueNames]) ->
+queue_index_walker(DurableQueues) when is_list(DurableQueues) ->
+    queue_index_walker({DurableQueues, sets:new()});
+
+queue_index_walker({[], Kids}) ->
+    case sets:size(Kids) of
+        0 -> finished;
+        _ -> receive
+                 {found, MsgId, Count} ->
+                     {MsgId, Count, {[], Kids}};
+                 {finished, Child} ->
+                     queue_index_walker({[], sets:del_element(Child, Kids)})
+             end
+    end;
+queue_index_walker({[QueueName | QueueNames], Kids}) ->
+    Child = make_ref(),
+    ok = worker_pool:submit_async({?MODULE, queue_index_walker_reader,
+                                   [QueueName, self(), Child]}),
+    queue_index_walker({QueueNames, sets:add_element(Child, Kids)}).
+
+queue_index_walker_reader(QueueName, Parent, Guid) ->
     State = blank_state(QueueName),
     State1 = load_journal(State),
     SegNums = all_segment_nums(State1),
-    queue_index_walker({SegNums, State1, QueueNames});
+    queue_index_walker_reader(Parent, Guid, State1, SegNums).
 
-queue_index_walker({[], State, QueueNames}) ->
+queue_index_walker_reader(Parent, Guid, State, []) ->
     _State = terminate(false, State),
-    queue_index_walker(QueueNames);
-queue_index_walker({[Seg | SegNums], State, QueueNames}) ->
+    Parent ! {finished, Guid};
+queue_index_walker_reader(Parent, Guid, State, [Seg | SegNums]) ->
     SeqId = reconstruct_seq_id(Seg, 0),
     {Messages, State1} = read_segment_entries(SeqId, State),
-    queue_index_walker({Messages, State1, SegNums, QueueNames});
+    queue_index_walker_reader(Parent, Guid, SegNums, State1, Messages).
 
-queue_index_walker({[], State, SegNums, QueueNames}) ->
-    queue_index_walker({SegNums, State, QueueNames});
-queue_index_walker({[{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs],
-                    State, SegNums, QueueNames}) ->
+queue_index_walker_reader(Parent, Guid, SegNums, State, []) ->
+    queue_index_walker_reader(Parent, Guid, State, SegNums);
+queue_index_walker_reader(
+  Parent, Guid, SegNums, State,
+  [{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs]) ->
     case IsPersistent of
-        true  -> {MsgId, 1, {Msgs, State, SegNums, QueueNames}};
-        false -> queue_index_walker({Msgs, State, SegNums, QueueNames})
-    end.
+        true  -> Parent ! {found, MsgId, 1};
+        false -> ok
+    end,
+    queue_index_walker_reader(Parent, Guid, SegNums, State, Msgs).
 
 %%----------------------------------------------------------------------------
 %% Minors
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3ccb83b6..474afbca 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1001,6 +1001,7 @@ start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
     start_transient_msg_store().
 
 start_transient_msg_store() ->
+    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
     ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
                                 [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
                                  fun (ok) -> finished end, ok]).
-- 
cgit v1.2.1


From bbefae1a2cd1519323f644288b6f931078880b71 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 2 Apr 2010 11:34:43 +0100
Subject: The 2 part init of the queue must be complete before the queue is
 committed into mnesia. Furthermore, it must be a call, not a cast otherwise
 it could be overtaken (though it returns first, and inits second). Note that
 this means on a collision, the queue may get > 1 init calls, hence need for
 idempotency, and the 2nd call may be delayed while the first init completes.
 Also note that the queue index init alters disk content. Thus initing the
 same queue with disk content concurrently will lead to unexpected results.
 However, this can't occur because durable queue recovery is the only time we
 are initing queues with disk content, and there will be no collisions at that
 point, so safe. In normal queue declaration collisions, there will be no disk
 content anyway.

---
 src/rabbit_amqqueue.erl         | 11 +++++++----
 src/rabbit_amqqueue_process.erl | 15 ++++++++++-----
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index b6e92e06..c14a28fe 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -170,8 +170,9 @@ recover_durable_queues(DurableQueues) ->
                                   end
                           end) of
                        true  ->
-                           ok = gen_server2:cast(Q#amqqueue.pid,
-                                                 init_variable_queue),
+                           ok = gen_server2:call(Q#amqqueue.pid,
+                                                 init_variable_queue,
+                                                 infinity),
                            [Q|Acc];
                        false -> exit(Q#amqqueue.pid, shutdown),
                                 Acc
@@ -200,6 +201,9 @@ internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
                                           true  -> add_default_binding(Q);
                                           false -> ok
                                       end,
+                                      ok = gen_server2:call(
+                                             Q#amqqueue.pid,
+                                             init_variable_queue, infinity),
                                       Q;
                                [_] -> not_found %% existing Q on stopped node
                            end;
@@ -209,8 +213,7 @@ internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
            end) of
         not_found -> exit(Q#amqqueue.pid, shutdown),
                      rabbit_misc:not_found(QueueName);
-        Q         -> ok = gen_server2:cast(Q#amqqueue.pid, init_variable_queue),
-                     Q;
+        Q         -> Q;
         ExistingQ -> exit(Q#amqqueue.pid, shutdown),
                      ExistingQ
     end.
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index e6c8d238..1394f9db 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -621,6 +621,16 @@ i(Item, _) ->
 
 %---------------------------------------------------------------------------
 
+handle_call(init_variable_queue, From, State =
+                #q{variable_queue_state = undefined,
+                   q = #amqqueue{name = QName}}) ->
+    gen_server2:reply(From, ok),
+    noreply(
+      State #q { variable_queue_state = rabbit_variable_queue:init(QName) });
+
+handle_call(init_variable_queue, _From, State) ->
+    reply(ok, State);
+
 handle_call(sync, _From, State) ->
     reply(ok, State);
 
@@ -829,11 +839,6 @@ handle_call({claim_queue, ReaderPid}, _From,
             reply(locked, State)
     end.
 
-handle_cast(init_variable_queue, #q{variable_queue_state = undefined,
-                                    q = #amqqueue{name = QName}} = State) ->
-    noreply(
-      State #q { variable_queue_state = rabbit_variable_queue:init(QName) });
-
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
-- 
cgit v1.2.1


From 824722919f2c9e1efe1cdb7d0822e606b2c56b77 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 3 Apr 2010 20:56:46 +0100
Subject: Introduced rabbit_misc:dict_cons/3 which ends up being used in 3
 places. Also fixed a bug which I'd sleepily introduced in vq:requeue where a
 msg_store:release had accidentally become a msg_store:remove (no idea how the
 tests managed to pass after that enough to convince me to commit - certainly
 had the tests failing today due to that one). Finally, persistent msgs in a
 non-durable queue should be sent to the transient msg_store, not the
 persistent msg_store. Thus they will survive a crash of the queue, but not a
 restart of the server.

---
 src/rabbit_amqqueue_process.erl |  10 +-
 src/rabbit_channel.erl          |   5 +-
 src/rabbit_misc.erl             |   6 +-
 src/rabbit_router.erl           |   4 +-
 src/rabbit_tests.erl            |   2 +-
 src/rabbit_variable_queue.erl   | 288 +++++++++++++++++++++-------------------
 6 files changed, 166 insertions(+), 149 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 1394f9db..c9add5b2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -623,10 +623,14 @@ i(Item, _) ->
 
 handle_call(init_variable_queue, From, State =
                 #q{variable_queue_state = undefined,
-                   q = #amqqueue{name = QName}}) ->
+                   q = #amqqueue{name = QName, durable = IsDurable}}) ->
     gen_server2:reply(From, ok),
-    noreply(
-      State #q { variable_queue_state = rabbit_variable_queue:init(QName) });
+    PersistentStore = case IsDurable of
+                          true  -> ?PERSISTENT_MSG_STORE;
+                          false -> ?TRANSIENT_MSG_STORE
+                      end,
+    noreply(State #q { variable_queue_state =
+                           rabbit_variable_queue:init(QName, PersistentStore) });
 
 handle_call(init_variable_queue, _From, State) ->
     reply(ok, State);
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index be120c2e..c8733ed1 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -970,10 +970,7 @@ fold_per_queue(F, Acc0, UAQ) ->
                   %% lists:reverse in handle_message({recover, true},
                   %% ...). However, it is significantly slower when
                   %% going beyond a few thousand elements.
-                  dict:update(QPid,
-                              fun (MsgIds) -> [MsgId | MsgIds] end,
-                              [MsgId],
-                              D)
+                  rabbit_misc:dict_cons(QPid, MsgId, D)
           end, dict:new(), UAQ),
     dict:fold(fun (QPid, MsgIds, Acc) -> F(QPid, MsgIds, Acc) end,
               Acc0, D).
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 3bc35ca2..cd2e7fbc 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -59,7 +59,7 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
--export([recursive_delete/1]).
+-export([recursive_delete/1, dict_cons/3]).
 
 -import(mnesia).
 -import(lists).
@@ -135,6 +135,7 @@
 -spec(pid_to_string/1 :: (pid()) -> string()).
 -spec(string_to_pid/1 :: (string()) -> pid()).
 -spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
+-spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 
 -endif.
 
@@ -625,3 +626,6 @@ recursive_delete(Path) ->
                     {error, {Path, Error}}
             end
     end.
+
+dict_cons(Key, Value, Dict) ->
+    dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl
index 884ea4ab..96337b42 100644
--- a/src/rabbit_router.erl
+++ b/src/rabbit_router.erl
@@ -78,9 +78,7 @@ deliver(QPids, Delivery) ->
       dict:to_list(
         lists:foldl(
           fun (QPid, D) ->
-                  dict:update(node(QPid),
-                              fun (QPids1) -> [QPid | QPids1] end,
-                              [QPid], D)
+                  rabbit_misc:dict_cons(node(QPid), QPid, D)
           end,
           dict:new(), QPids)),
       Delivery).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 474afbca..75c66693 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1363,7 +1363,7 @@ assert_prop(List, Prop, Value) ->
 fresh_variable_queue() ->
     stop_msg_store(),
     ok = empty_test_queue(),
-    VQ = rabbit_variable_queue:init(test_queue()),
+    VQ = rabbit_variable_queue:init(test_queue(), ?PERSISTENT_MSG_STORE),
     S0 = rabbit_variable_queue:status(VQ),
     assert_prop(S0, len, 0),
     assert_prop(S0, q1, 0),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b9714f53..37c6b22e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_variable_queue).
 
--export([init/1, terminate/1, publish/2, publish_delivered/2,
+-export([init/2, terminate/1, publish/2, publish_delivered/2,
          set_queue_ram_duration_target/2, remeasure_rates/1,
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
@@ -154,7 +154,8 @@
           rate_timestamp,
           len,
           on_sync,
-          msg_store_clients
+          msg_store_clients,
+          persistent_store
         }).
 
 -include("rabbit.hrl").
@@ -186,7 +187,7 @@
 -type(bpqueue() :: any()).
 -type(msg_id()  :: binary()).
 -type(seq_id()  :: non_neg_integer()).
--type(ack()     :: {'ack_index_and_store', msg_id(), seq_id(), boolean()}
+-type(ack()     :: {'ack_index_and_store', msg_id(), seq_id(), atom() | pid()}
                  | 'ack_not_on_disk').
 -type(vqstate() :: #vqstate {
                q1                    :: queue(),
@@ -210,10 +211,11 @@
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
                on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]},
-               msg_store_clients     :: {any(), any()}
+               msg_store_clients     :: {any(), any()},
+               persistent_store      :: pid() | atom()
               }).
 
--spec(init/1 :: (queue_name()) -> vqstate()).
+-spec(init/2 :: (queue_name(), pid() | atom()) -> vqstate()).
 -spec(terminate/1 :: (vqstate()) -> vqstate()).
 -spec(publish/2 :: (basic_message(), vqstate()) ->
              {seq_id(), vqstate()}).
@@ -253,7 +255,7 @@
 %% Public API
 %%----------------------------------------------------------------------------
 
-init(QueueName) ->
+init(QueueName, PersistentStore) ->
     {DeltaCount, IndexState} =
         rabbit_queue_index:init(QueueName),
     {DeltaSeqId, NextSeqId, IndexState1} =
@@ -285,9 +287,10 @@ init(QueueName) ->
                    rate_timestamp = Now,
                    len = DeltaCount,
                    on_sync = {[], [], []},
-                   msg_store_clients = {rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE),
-                                        rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE)}
-                  },
+                   msg_store_clients = {rabbit_msg_store:client_init(PersistentStore),
+                                        rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE)},
+                   persistent_store = PersistentStore
+                 },
     maybe_deltas_to_betas(State).
 
 terminate(State = #vqstate { index_state = IndexState,
@@ -306,19 +309,22 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                                      next_seq_id = SeqId,
                                      out_counter = OutCount,
                                      in_counter = InCount,
-                                     msg_store_clients = MSCState }) ->
+                                     msg_store_clients = MSCState,
+                                     persistent_store = PersistentStore }) ->
     State1 = State #vqstate { out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
     MsgStatus = #msg_status {
       msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
-    {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+    {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
+                                                      MsgStatus, MSCState),
     State2 = State1 #vqstate { msg_store_clients = MSCState1 },
     case MsgStatus1 #msg_status.msg_on_disk of
         true ->
             {#msg_status { index_on_disk = true }, IndexState1} =
                 maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-            {{ack_index_and_store, MsgId, SeqId, IsPersistent},
+            {{ack_index_and_store, MsgId, SeqId,
+              find_msg_store(IsPersistent, PersistentStore)},
              State2 #vqstate { index_state = IndexState1,
                                next_seq_id = SeqId + 1 }};
         false ->
@@ -378,7 +384,8 @@ ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
 
 fetch(State =
       #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
-                 index_state = IndexState, len = Len }) ->
+                 index_state = IndexState, len = Len,
+                 persistent_store = PersistentStore }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
             fetch_from_q3_or_delta(State);
@@ -387,7 +394,7 @@ fetch(State =
             is_persistent = IsPersistent, is_delivered = IsDelivered,
             msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
-            {IndexState1, IndexOnDisk1} =
+            {IndexState1, IsPersistent} =
                 case IndexOnDisk of
                     true ->
                         IndexState2 =
@@ -404,13 +411,15 @@ fetch(State =
                     false ->
                         {IndexState, false}
                 end,
+            MsgStore = find_msg_store(IsPersistent, PersistentStore),
             AckTag =
-                case IndexOnDisk1 of
-                    true  -> true = IsPersistent, %% ASSERTION
-                             true = MsgOnDisk, %% ASSERTION
-                             {ack_index_and_store, MsgId, SeqId, IsPersistent};
-                    false -> ok = case MsgOnDisk andalso not IsPersistent of
-                                      true -> rabbit_msg_store:remove(find_msg_store(IsPersistent), [MsgId]);
+                case IsPersistent of
+                    true  -> true = MsgOnDisk, %% ASSERTION
+                             {ack_index_and_store, MsgId, SeqId, MsgStore};
+                    false -> ok = case MsgOnDisk of
+                                      true ->
+                                          rabbit_msg_store:remove(
+                                                MsgStore, [MsgId]);
                                       false -> ok
                                   end,
                              ack_not_on_disk
@@ -423,26 +432,19 @@ fetch(State =
     end.
 
 ack(AckTags, State = #vqstate { index_state = IndexState }) ->
-    {MsgIdsPersistent, MsgIdsTransient, SeqIds} =
+    {MsgIdsByStore, SeqIds} =
         lists:foldl(
           fun (ack_not_on_disk, Acc) -> Acc;
-              ({ack_index_and_store, MsgId, SeqId, true},  {MsgIdsP, MsgIdsT, SeqIds}) ->
-                  {[MsgId | MsgIdsP], MsgIdsT, [SeqId | SeqIds]};
-              ({ack_index_and_store, MsgId, SeqId, false}, {MsgIdsP, MsgIdsT, SeqIds}) ->
-                  {MsgIdsP, [MsgId | MsgIdsT], [SeqId | SeqIds]}
-          end, {[], [], []}, AckTags),
+              ({ack_index_and_store, MsgId, SeqId, MsgStore},  {Dict, SeqIds}) ->
+                  {rabbit_misc:dict_cons(MsgStore, MsgId, Dict), [SeqId | SeqIds]}
+          end, {dict:new(), []}, AckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
-    ok = case MsgIdsPersistent of
-             [] -> ok;
-             _  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIdsPersistent)
-         end,
-    ok = case MsgIdsTransient of
-             [] -> ok;
-             _  -> rabbit_msg_store:remove(?TRANSIENT_MSG_STORE, MsgIdsTransient)
-         end,
+    ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
+                           rabbit_msg_store:remove(MsgStore, MsgIds)
+                   end, ok, MsgIdsByStore),
     State #vqstate { index_state = IndexState1 }.
 
 len(#vqstate { len = Len }) ->
@@ -451,9 +453,11 @@ len(#vqstate { len = Len }) ->
 is_empty(State) ->
     0 == len(State).
 
-purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
+purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
+                         persistent_store = PersistentStore }) ->
     {Q4Count, IndexState1} =
-        remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, IndexState),
+        remove_queue_entries(PersistentStore, fun rabbit_misc:queue_fold/3,
+                             Q4, IndexState),
     {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
@@ -463,7 +467,8 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
     {_PurgeCount, State1 = #vqstate { index_state = IndexState,
-                                      msg_store_clients = {MSCStateP, MSCStateT} }} =
+                                      msg_store_clients = {MSCStateP, MSCStateT},
+                                      persistent_store = PersistentStore }} =
         purge(State),
     IndexState1 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
@@ -472,7 +477,8 @@ delete_and_terminate(State) ->
                 IndexState2;
             {DeltaSeqId, NextSeqId, IndexState2} ->
                 {_DeleteCount, IndexState3} =
-                    delete1(NextSeqId, 0, DeltaSeqId, IndexState2),
+                    delete1(PersistentStore, NextSeqId, 0, DeltaSeqId,
+                            IndexState2),
                 IndexState3
     end,
     IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
@@ -490,64 +496,59 @@ delete_and_terminate(State) ->
 %% msg_store:release so that the cache isn't held full of msgs which
 %% are now at the tail of the queue.
 requeue(MsgsWithAckTags, State) ->
-    {SeqIds, MsgIdsPersistent, MsgIdsTransient,
+    {SeqIds, MsgIdsByStore,
      State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, AckTag},
-               {SeqIdsAcc, MsgIdsP, MsgIdsT, StateN}) ->
-                  {SeqIdsAcc1, MsgIdsP1, MsgIdsT1, MsgOnDisk} =
+               {SeqIdsAcc, Dict, StateN}) ->
+                  {SeqIdsAcc1, Dict1, MsgOnDisk} =
                       case AckTag of
                           ack_not_on_disk ->
-                              {SeqIdsAcc, MsgIdsP, MsgIdsT, false};
-                          {ack_index_and_store, MsgId, SeqId, true} ->
-                              {[SeqId | SeqIdsAcc], [MsgId | MsgIdsP], MsgIdsT, true};
-                          {ack_index_and_store, MsgId, SeqId, false} ->
-                              {[SeqId | SeqIdsAcc], MsgIdsP, [MsgId | MsgIdsT], true}
+                              {SeqIdsAcc, Dict, false};
+                          {ack_index_and_store, MsgId, SeqId, MsgStore} ->
+                              {[SeqId | SeqIdsAcc],
+                               rabbit_misc:dict_cons(MsgStore, MsgId, Dict),
+                               true}
                       end,
                   {_SeqId, StateN1} = publish(Msg, true, MsgOnDisk, StateN),
-                  {SeqIdsAcc1, MsgIdsP1, MsgIdsT1, StateN1}
-          end, {[], [], [], State}, MsgsWithAckTags),
+                  {SeqIdsAcc1, Dict1, StateN1}
+          end, {[], dict:new(), State}, MsgsWithAckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
-    ok = case MsgIdsPersistent of
-             [] -> ok;
-             _  -> rabbit_msg_store:release(?PERSISTENT_MSG_STORE, MsgIdsPersistent)
-         end,
-    ok = case MsgIdsTransient of
-             [] -> ok;
-             _  -> rabbit_msg_store:release(?TRANSIENT_MSG_STORE, MsgIdsTransient)
-         end,
+    ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
+                           rabbit_msg_store:release(MsgStore, MsgIds)
+                   end, ok, MsgIdsByStore),
     State1 #vqstate { index_state = IndexState1 }.
 
 tx_publish(Msg = #basic_message { is_persistent = true, guid = MsgId },
-           State = #vqstate { msg_store_clients = MSCState }) ->
+           State = #vqstate { msg_store_clients = MSCState,
+                              persistent_store = PersistentStore }) ->
     MsgStatus = #msg_status {
       msg = Msg, msg_id = MsgId, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
     {#msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
     State #vqstate { msg_store_clients = MSCState1 };
 tx_publish(_Msg, State) ->
     State.
 
-tx_rollback(Pubs, State) ->
+tx_rollback(Pubs, State = #vqstate { persistent_store = PersistentStore }) ->
     ok = case persistent_msg_ids(Pubs) of
              [] -> ok;
-             PP -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, PP)
+             PP -> rabbit_msg_store:remove(PersistentStore, PP)
          end,
     State.
 
-tx_commit(Pubs, AckTags, From, State) ->
+tx_commit(Pubs, AckTags, From, State = #vqstate { persistent_store = PersistentStore }) ->
     case persistent_msg_ids(Pubs) of
         [] ->
             {true, tx_commit_from_msg_store(Pubs, AckTags, From, State)};
         PersistentMsgIds ->
             Self = self(),
             ok = rabbit_msg_store:sync(
-                   ?PERSISTENT_MSG_STORE,
-                   PersistentMsgIds,
+                   PersistentStore, PersistentMsgIds,
                    fun () -> ok = rabbit_amqqueue:tx_commit_msg_store_callback(
                                     Self, Pubs, AckTags, From)
                    end),
@@ -696,51 +697,53 @@ should_force_index_to_disk(State =
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-delete1(NextSeqId, Count, DeltaSeqId, IndexState)
+delete1(_PersistentStore, NextSeqId, Count, DeltaSeqId, IndexState)
   when DeltaSeqId >= NextSeqId ->
     {Count, IndexState};
-delete1(NextSeqId, Count, DeltaSeqId, IndexState) ->
+delete1(PersistentStore, NextSeqId, Count, DeltaSeqId, IndexState) ->
     Delta1SeqId = DeltaSeqId + rabbit_queue_index:segment_size(),
     case rabbit_queue_index:read_segment_entries(DeltaSeqId, IndexState) of
         {[], IndexState1} ->
-            delete1(NextSeqId, Count, Delta1SeqId, IndexState1);
+            delete1(PersistentStore, NextSeqId, Count, Delta1SeqId,
+                    IndexState1);
         {List, IndexState1} ->
             Q = betas_from_segment_entries(List, Delta1SeqId),
             {QCount, IndexState2} =
-                remove_queue_entries(fun beta_fold_no_index_on_disk/3,
-                                     Q, IndexState1),
-            delete1(NextSeqId, Count + QCount, Delta1SeqId, IndexState2)
+                remove_queue_entries(
+                  PersistentStore, fun beta_fold_no_index_on_disk/3,
+                  Q, IndexState1),
+            delete1(PersistentStore, NextSeqId, Count + QCount, Delta1SeqId,
+                    IndexState2)
     end.
 
-purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
+purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
+                                 persistent_store = PersistentStore }) ->
     case bpqueue:is_empty(Q3) of
         true ->
             {Q1Count, IndexState1} =
-                remove_queue_entries(fun rabbit_misc:queue_fold/3,
-                                     State #vqstate.q1, IndexState),
+                remove_queue_entries(
+                  PersistentStore, fun rabbit_misc:queue_fold/3,
+                  State #vqstate.q1, IndexState),
             {Count + Q1Count, State #vqstate { q1 = queue:new(),
                                                index_state = IndexState1 }};
         false ->
             {Q3Count, IndexState1} =
-                remove_queue_entries(fun beta_fold_no_index_on_disk/3,
-                                     Q3, IndexState),
+                remove_queue_entries(
+                  PersistentStore, fun beta_fold_no_index_on_disk/3,
+                  Q3, IndexState),
             purge1(Count + Q3Count,
                    maybe_deltas_to_betas(
                      State #vqstate { index_state = IndexState1,
                                       q3 = bpqueue:new() }))
     end.
 
-remove_queue_entries(Fold, Q, IndexState) ->
-    {Count, MsgIdsPersistent, MsgIdsTransient, SeqIds, IndexState1} =
-        Fold(fun remove_queue_entries1/2, {0, [], [], [], IndexState}, Q),
-    ok = case MsgIdsPersistent of
-             [] -> ok;
-             _  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIdsPersistent)
-         end,
-    ok = case MsgIdsTransient of
-             [] -> ok;
-             _  -> rabbit_msg_store:remove(?TRANSIENT_MSG_STORE, MsgIdsTransient)
-         end,
+remove_queue_entries(PersistentStore, Fold, Q, IndexState) ->
+    {_PersistentStore, Count, MsgIdsByStore, SeqIds, IndexState1} =
+        Fold(fun remove_queue_entries1/2,
+             {PersistentStore, 0, dict:new(), [], IndexState}, Q),
+    ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
+                           rabbit_msg_store:remove(MsgStore, MsgIds)
+                   end, ok, MsgIdsByStore),
     IndexState2 =
         case SeqIds of
             [] -> IndexState1;
@@ -752,12 +755,15 @@ remove_queue_entries1(
   #msg_status { msg_id = MsgId, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {CountN, MsgIdsP, MsgIdsT, SeqIdsAcc, IndexStateN}) ->
-    {MsgIdsP1, MsgIdsT1} =
+  {PersistentStore, CountN, MsgIdsByStore, SeqIdsAcc, IndexStateN}) ->
+    MsgIdsByStore1 =
         case {MsgOnDisk, IsPersistent} of
-            {true,  true}  -> {[MsgId | MsgIdsP], MsgIdsT};
-            {true,  false} -> {MsgIdsP, [MsgId | MsgIdsT]};
-            {false, _}     -> {MsgIdsP, MsgIdsT}
+            {true,  true}  ->
+                rabbit_misc:dict_cons(PersistentStore, MsgId, MsgIdsByStore);
+            {true,  false} ->
+                rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, MsgId, MsgIdsByStore);
+            {false, _}     ->
+                MsgIdsByStore
         end,
     SeqIdsAcc1 = case IndexOnDisk of
                      true  -> [SeqId | SeqIdsAcc];
@@ -768,13 +774,14 @@ remove_queue_entries1(
                                  SeqId, IndexStateN);
                        false -> IndexStateN
                    end,
-    {CountN + 1, MsgIdsP1, MsgIdsT1, SeqIdsAcc1, IndexStateN1}.
+    {PersistentStore, CountN + 1, MsgIdsByStore1, SeqIdsAcc1, IndexStateN1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
                          q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
-                         msg_store_clients = MSCState }) ->
+                         msg_store_clients = MSCState,
+                         persistent_store = PersistentStore }) ->
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
             0 = DeltaCount, %% ASSERTION
@@ -786,7 +793,8 @@ fetch_from_q3_or_delta(State = #vqstate {
                                 is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
                                          guid = MsgId }}, MSCState1} =
-                read_from_msg_store(MSCState, MsgId, IsPersistent),
+                read_from_msg_store(
+                  PersistentStore, MSCState, IsPersistent, MsgId),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
             RamIndexCount1 = case IndexOnDisk of
                                  true  -> RamIndexCount;
@@ -881,11 +889,12 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId },
                     State #vqstate { next_seq_id = SeqId + 1, len = Len + 1,
                                      in_counter = InCount + 1 })}.
 
-publish(msg, MsgStatus, State = #vqstate { index_state = IndexState,
-                                           ram_msg_count = RamMsgCount,
-                                           msg_store_clients = MSCState }) ->
+publish(msg, MsgStatus, #vqstate {
+               index_state = IndexState, ram_msg_count = RamMsgCount,
+               msg_store_clients = MSCState,
+               persistent_store = PersistentStore } = State) ->
     {MsgStatus1, MSCState1} =
-        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(false, MsgStatus1, IndexState),
     State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
@@ -893,11 +902,12 @@ publish(msg, MsgStatus, State = #vqstate { index_state = IndexState,
                               msg_store_clients = MSCState1 },
     store_alpha_entry(MsgStatus2, State1);
 
-publish(index, MsgStatus, State = #vqstate { index_state = IndexState, q1 = Q1,
-                                             ram_index_count = RamIndexCount,
-                                             msg_store_clients = MSCState }) ->
+publish(index, MsgStatus, #vqstate {
+                 index_state = IndexState, q1 = Q1,
+                 ram_index_count = RamIndexCount, msg_store_clients = MSCState,
+                 persistent_store = PersistentStore } = State) ->
     {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(PersistentStore, true, MsgStatus, MSCState),
     ForceIndex = should_force_index_to_disk(State),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
@@ -913,9 +923,10 @@ publish(index, MsgStatus, State = #vqstate { index_state = IndexState, q1 = Q1,
 
 publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
             #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                       delta = Delta, msg_store_clients = MSCState }) ->
+                       delta = Delta, msg_store_clients = MSCState,
+                       persistent_store = PersistentStore }) ->
     {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(PersistentStore, true, MsgStatus, MSCState),
     {#msg_status { index_on_disk = true }, IndexState1} =
         maybe_write_index_to_disk(true, MsgStatus1, IndexState),
     true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
@@ -955,41 +966,42 @@ store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
             State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
     end.
 
-find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
-find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
-
-read_from_msg_store({MSCStateP, MSCStateT}, MsgId, true) ->
-    {Res, MSCStateP1} =
-        rabbit_msg_store:read(?PERSISTENT_MSG_STORE, MsgId, MSCStateP),
-    {Res, {MSCStateP1, MSCStateT}};
-read_from_msg_store({MSCStateP, MSCStateT}, MsgId, false) ->
-    {Res, MSCStateT1} =
-        rabbit_msg_store:read(?TRANSIENT_MSG_STORE, MsgId, MSCStateT),
-    {Res, {MSCStateP, MSCStateT1}}.
-
-maybe_write_msg_to_disk(_Force, MsgStatus =
+find_msg_store(true, PersistentStore)   -> PersistentStore;
+find_msg_store(false, _PersistentStore) -> ?TRANSIENT_MSG_STORE.
+
+with_msg_store_state(PersistentStore, {MSCStateP, MSCStateT}, true,
+                     Fun) ->
+    {Result, MSCStateP1} = Fun(PersistentStore, MSCStateP),
+    {Result, {MSCStateP1, MSCStateT}};
+with_msg_store_state(_PersistentStore, {MSCStateP, MSCStateT}, false,
+                     Fun) ->
+    {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
+    {Result, {MSCStateP, MSCStateT1}}.
+
+read_from_msg_store(PersistentStore, MSCState, IsPersistent, MsgId) ->
+    with_msg_store_state(
+      PersistentStore, MSCState, IsPersistent,
+      fun (MsgStore, MSCState1) ->
+              rabbit_msg_store:read(MsgStore, MsgId, MSCState1)
+      end).
+
+maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus =
                         #msg_status { msg_on_disk = true }, MSCState) ->
     {MsgStatus, MSCState};
-maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
-                                 msg = Msg, msg_id = MsgId,
-                                 is_persistent = IsPersistent },
-                        {MSCStateP, MSCStateT})
+maybe_write_msg_to_disk(PersistentStore, Force,
+                        MsgStatus = #msg_status {
+                          msg = Msg, msg_id = MsgId,
+                          is_persistent = IsPersistent }, MSCState)
   when Force orelse IsPersistent ->
-    MSCState1 =
-        case IsPersistent of
-            true ->
-                {ok, MSCStateP1} = rabbit_msg_store:write(
-                                     ?PERSISTENT_MSG_STORE, MsgId,
-                                     ensure_binary_properties(Msg), MSCStateP),
-                {MSCStateP1, MSCStateT};
-            false ->
-                {ok, MSCStateT1} = rabbit_msg_store:write(
-                                     ?TRANSIENT_MSG_STORE, MsgId,
-                                     ensure_binary_properties(Msg), MSCStateT),
-                {MSCStateP, MSCStateT1}
-        end,
+    {ok, MSCState1} =
+        with_msg_store_state(
+          PersistentStore, MSCState, IsPersistent,
+          fun (MsgStore, MSCState2) ->
+                  rabbit_msg_store:write(
+                    MsgStore, MsgId, ensure_binary_properties(Msg), MSCState2)
+          end),
     {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
-maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
+maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus, MSCState) ->
     {MsgStatus, MSCState}.
 
 maybe_write_index_to_disk(_Force, MsgStatus =
@@ -1137,12 +1149,14 @@ maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
 maybe_push_alphas_to_betas(
   Generator, Consumer, Q, State =
   #vqstate { ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount,
-             index_state = IndexState, msg_store_clients = MSCState }) ->
+             index_state = IndexState, msg_store_clients = MSCState,
+             persistent_store = PersistentStore }) ->
     case Generator(Q) of
         {empty, _Q} -> State;
         {{value, MsgStatus}, Qa} ->
-            {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(true, MsgStatus,
-                                                              MSCState),
+            {MsgStatus1, MSCState1} =
+                maybe_write_msg_to_disk(
+                  PersistentStore, true, MsgStatus, MSCState),
             ForceIndex = should_force_index_to_disk(State),
             {MsgStatus2, IndexState1} =
                 maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
-- 
cgit v1.2.1


From ec801cd45f0c7514ce812f6809d82933e45e8e4a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 4 Apr 2010 01:39:10 +0100
Subject: Cosmetic simplification of logic

---
 src/rabbit_variable_queue.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 37c6b22e..70ebd074 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -403,12 +403,12 @@ fetch(State =
                                            SeqId, IndexState);
                                 true -> IndexState
                             end,
-                        case IsPersistent of
-                            true -> {IndexState2, true};
-                            false -> {rabbit_queue_index:write_acks(
-                                        [SeqId], IndexState2), false}
-                        end;
-                    false ->
+                        {case IsPersistent of
+                             true -> IndexState2;
+                             false -> rabbit_queue_index:write_acks(
+                                        [SeqId], IndexState2)
+                         end, IsPersistent};
+                    false -> %% If index isn't on disk, we can't be persistent
                         {IndexState, false}
                 end,
             MsgStore = find_msg_store(IsPersistent, PersistentStore),
-- 
cgit v1.2.1


From 678bc0c7a82c751596bf837a6e3b94f1d502c613 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 4 Apr 2010 01:40:53 +0100
Subject: Mistake in opening files leading to process dictionary being wrongly
 populated and updated when file opening fails (eg enoent)

---
 src/file_handle_cache.erl | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 520be0ce..c43695fb 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -230,26 +230,26 @@ open(Path, Mode, Options) ->
             File1 = #file { reader_count = RCount, has_writer = HasWriter } =
                 case get({Path1, fhc_file}) of
                     File = #file {} -> File;
-                    undefined       -> File = #file { reader_count = 0,
-                                                      has_writer = false },
-                                       put({Path1, fhc_file}, File),
-                                       File
+                    undefined       -> #file { reader_count = 0,
+                                               has_writer = false }
                 end,
             IsWriter = is_writer(Mode),
             case IsWriter andalso HasWriter of
                 true  -> {error, writer_exists};
-                false -> RCount1 = case is_reader(Mode) of
-                                       true  -> RCount + 1;
-                                       false -> RCount
-                                   end,
-                         HasWriter1 = HasWriter orelse IsWriter,
-                         put({Path1, fhc_file},
-                             File1 #file { reader_count = RCount1,
-                                           has_writer = HasWriter1}),
-                         Ref = make_ref(),
+                false -> Ref = make_ref(),
                          case open1(Path1, Mode, Options, Ref, bof, new) of
-                             {ok, _Handle} -> {ok, Ref};
-                             Error         -> Error
+                             {ok, _Handle} ->
+                                 RCount1 = case is_reader(Mode) of
+                                               true  -> RCount + 1;
+                                               false -> RCount
+                                           end,
+                                 HasWriter1 = HasWriter orelse IsWriter,
+                                 put({Path1, fhc_file},
+                                     File1 #file { reader_count = RCount1,
+                                                   has_writer = HasWriter1}),
+                                 {ok, Ref};
+                             Error ->
+                                 Error
                          end
             end
     end.
-- 
cgit v1.2.1


From a88dfdb59514682bcc86169ed91fa59abbf462a4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 4 Apr 2010 03:23:02 +0100
Subject: If we submit to the workers jobs which use the fhc, the workers may
 receive messages from the fhc, thus need to be able to process them. Also,
 that then requires that the fhc is started before the workers

---
 src/rabbit.erl             | 12 ++++++------
 src/worker_pool_worker.erl | 15 ++++++++++++++-
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/rabbit.erl b/src/rabbit.erl
index 387b3256..de2fec57 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -53,6 +53,12 @@
                    [{mfa,         {rabbit_mnesia, init, []}},
                     {enables,     external_infrastructure}]}).
 
+-rabbit_boot_step({file_handle_cache,
+                   [{description, "file handle cache server"},
+                    {mfa,         {rabbit_sup, start_restartable_child,
+                                   [file_handle_cache]}},
+                    {enables,     worker_pool}]}).
+
 -rabbit_boot_step({worker_pool,
                    [{description, "worker pool"},
                     {mfa,         {rabbit_sup, start_child, [worker_pool_sup]}},
@@ -81,12 +87,6 @@
                     {enables,     kernel_ready},
                     {requires,    external_infrastructure}]}).
 
--rabbit_boot_step({file_handle_cache,
-                   [{description, "file handle cache server"},
-                    {mfa,         {rabbit_sup, start_restartable_child,
-                                   [file_handle_cache]}},
-                    {enables,     kernel_ready}]}).
-
 -rabbit_boot_step({kernel_ready,
                    [{description, "kernel ready"},
                     {requires,    external_infrastructure}]}).
diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl
index 4defc5ba..5a6ccb16 100644
--- a/src/worker_pool_worker.erl
+++ b/src/worker_pool_worker.erl
@@ -35,6 +35,8 @@
 
 -export([start_link/1, submit/2, submit_async/2, run/1]).
 
+-export([set_maximum_since_use/2]).
+
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
@@ -64,7 +66,14 @@ submit(Pid, Fun) ->
 submit_async(Pid, Fun) ->
     gen_server2:cast(Pid, {submit_async, Fun}).
 
+set_maximum_since_use(Pid, Age) ->
+    gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}).
+
+%%----------------------------------------------------------------------------
+
 init([WId]) ->
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
     ok = worker_pool:idle(WId),
     put(worker_pool_worker, true),
     {ok, WId, hibernate,
@@ -81,7 +90,11 @@ handle_call(Msg, _From, State) ->
 handle_cast({submit_async, Fun}, WId) ->
     run(Fun),
     ok = worker_pool:idle(WId),
-    {noreply, WId};
+    {noreply, WId, hibernate};
+
+handle_cast({set_maximum_since_use, Age}, WId) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    {noreply, WId, hibernate};
 
 handle_cast(Msg, State) ->
     {stop, {unexpected_cast, Msg}, State}.
-- 
cgit v1.2.1


From 3d328954ec28eaa3ca56e3bc311c3438ea8d3d10 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 4 Apr 2010 03:55:55 +0100
Subject: Abstract out the "farming out work to the worker_pool and gathering
 it back in" pattern (gatherer.erl), and then make use of it when scanning
 queue indices and msg store files. Note the gatherer's exit signal was being
 caught in the handle_info of msg_store because trap_exits was on, hence
 moving that to later on in the msg_store init.

---
 src/gatherer.erl           | 142 +++++++++++++++++++++++++++++++++++++++++++++
 src/rabbit_msg_store.erl   |  75 +++++++++++++++---------
 src/rabbit_queue_index.erl |  51 ++++++++--------
 3 files changed, 213 insertions(+), 55 deletions(-)
 create mode 100644 src/gatherer.erl

diff --git a/src/gatherer.erl b/src/gatherer.erl
new file mode 100644
index 00000000..8c44388c
--- /dev/null
+++ b/src/gatherer.erl
@@ -0,0 +1,142 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(gatherer).
+
+-behaviour(gen_server2).
+
+-export([start_link/0, wait_on/2, produce/2, finished/2, fetch/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(wait_on/2 :: (pid(), any()) -> 'ok').
+-spec(produce/2 :: (pid(), any()) -> 'ok').
+-spec(finished/2 :: (pid(), any()) -> 'ok').
+-spec(fetch/1 :: (pid()) -> {'value', any()} | 'finished').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+-define(HIBERNATE_AFTER_MIN, 1000).
+-define(DESIRED_HIBERNATE, 10000).
+
+%%----------------------------------------------------------------------------
+
+-record(gstate, { waiting_on, results, blocking }).
+
+%%----------------------------------------------------------------------------
+
+wait_on(Pid, Token) ->
+    gen_server2:call(Pid, {wait_on, Token}, infinity).
+
+produce(Pid, Result) ->
+    gen_server2:cast(Pid, {produce, Result}).
+
+finished(Pid, Token) ->
+    gen_server2:call(Pid, {finished, Token}, infinity).
+
+fetch(Pid) ->
+    gen_server2:call(Pid, fetch, infinity).
+
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server2:start_link(?MODULE, [], [{timeout, infinity}]).
+
+init([]) ->
+    {ok, #gstate { waiting_on = sets:new(), results = queue:new(),
+                   blocking = queue:new() }, hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call({wait_on, Token}, _From, State = #gstate { waiting_on = Tokens }) ->
+    {reply, ok, State #gstate { waiting_on = sets:add_element(Token, Tokens) },
+     hibernate};
+
+handle_call({finished, Token}, _From,
+            State = #gstate { waiting_on = Tokens, results = Results,
+                              blocking = Blocking }) ->
+    Tokens1 = sets:del_element(Token, Tokens),
+    State1 = State #gstate { waiting_on = Tokens1 },
+    case 0 =:= sets:size(Tokens1) andalso queue:is_empty(Results) andalso
+        not queue:is_empty(Blocking) of
+        true  -> {stop, normal, ok, State1};
+        false -> {reply, ok, State1, hibernate}
+    end;
+
+handle_call(fetch, From, State =
+                #gstate { blocking = Blocking, results = Results,
+                          waiting_on = Tokens }) ->
+    case queue:out(Results) of
+        {empty, _Results} ->
+            case sets:size(Tokens) of
+                0 -> {stop, normal, finished, State};
+                _ -> {noreply,
+                      State #gstate { blocking = queue:in(From, Blocking) },
+                      hibernate}
+            end;
+        {{value, Result}, Results1} ->
+            {reply, {value, Result}, State #gstate { results = Results1 },
+             hibernate}
+    end;
+
+handle_call(Msg, _From, State) ->
+    {stop, {unexpected_call, Msg}, State}.
+
+handle_cast({produce, Result}, State = #gstate { blocking = Blocking,
+                                                 results = Results }) ->
+    {noreply, case queue:out(Blocking) of
+                  {empty, _Blocking} ->
+                      State #gstate { results = queue:in(Result, Results) };
+                  {{value, Blocked}, Blocking1} ->
+                      gen_server2:reply(Blocked, {value, Result}),
+                      State #gstate { blocking = Blocking1 }
+              end, hibernate};
+
+handle_cast(Msg, State) ->
+    {stop, {unexpected_cast, Msg}, State}.
+
+handle_info(Msg, State) ->
+    {stop, {unexpected_info, Msg}, State}.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+terminate(_Reason, State = #gstate { blocking = Blocking } ) ->
+    [gen_server2:reply(Blocked, finished)
+     || Blocked <- queue:to_list(Blocking) ],
+    State.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 5610b35e..82a9ddd7 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -36,7 +36,8 @@
 -export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
          sync/3, client_init/1, client_terminate/1, clean/2]).
 
--export([sync/1, gc_done/4, set_maximum_since_use/2]). %% internal
+-export([sync/1, gc_done/4, set_maximum_since_use/2,
+         build_index_worker/6]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
@@ -467,8 +468,6 @@ close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
 %%----------------------------------------------------------------------------
 
 init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
-    process_flag(trap_exit, true),
-
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
                                              [self()]),
 
@@ -527,6 +526,8 @@ init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
 
+    process_flag(trap_exit, true),
+
     {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
                                                  FileSummaryEts),
 
@@ -1182,23 +1183,44 @@ find_contiguous_block_prefix([{MsgId, TotalSize, ExpectedOffset} | Tail],
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
-build_index([], State) ->
-    build_index(undefined, [State #msstate.current_file], State);
 build_index(Files, State) ->
-    {Offset, State1} = build_index(undefined, Files, State),
-    {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)}.
-
-build_index(Left, [], State = #msstate { file_summary_ets = FileSummaryEts }) ->
-    ok = index_delete_by_file(undefined, State),
-    Offset = case ets:lookup(FileSummaryEts, Left) of
-                 []                                       -> 0;
-                 [#file_summary { file_size = FileSize }] -> FileSize
-             end,
-    {Offset, State #msstate { current_file = Left }};
-build_index(Left, [File|Files],
-            State = #msstate { dir = Dir, sum_valid_data = SumValid,
-                               sum_file_size = SumFileSize,
-                               file_summary_ets = FileSummaryEts }) ->
+    {ok, Pid} = gatherer:start_link(),
+    case Files of
+        [] -> build_index(Pid, undefined, [State #msstate.current_file], State);
+        _  -> {Offset, State1} = build_index(Pid, undefined, Files, State),
+              {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)}
+    end.
+
+build_index(Gatherer, Left, [],
+            State = #msstate { file_summary_ets = FileSummaryEts,
+                               sum_valid_data = SumValid,
+                               sum_file_size = SumFileSize }) ->
+    case gatherer:fetch(Gatherer) of
+        finished ->
+            ok = index_delete_by_file(undefined, State),
+            Offset = case ets:lookup(FileSummaryEts, Left) of
+                         []                                       -> 0;
+                         [#file_summary { file_size = FileSize }] -> FileSize
+                     end,
+            {Offset, State #msstate { current_file = Left }};
+        {value, FileSummary =
+             #file_summary { valid_total_size = ValidTotalSize,
+                             file_size = FileSize }} ->
+            true = ets:insert_new(FileSummaryEts, FileSummary),
+            build_index(Gatherer, Left, [],
+                        State #msstate {
+                          sum_valid_data = SumValid + ValidTotalSize,
+                          sum_file_size = SumFileSize + FileSize })
+    end;
+build_index(Gatherer, Left, [File|Files], State) ->
+    Child = make_ref(),
+    ok = gatherer:wait_on(Gatherer, Child),
+    ok = worker_pool:submit_async({?MODULE, build_index_worker,
+                                   [Gatherer, Child, State, Left, File, Files]}),
+    build_index(Gatherer, File, Files, State).
+
+build_index_worker(
+  Gatherer, Guid, State = #msstate { dir = Dir }, Left, File, Files) ->
     {ok, Messages, FileSize} =
         rabbit_msg_store_misc:scan_file_for_valid_messages(
           Dir, rabbit_msg_store_misc:filenum_to_name(File)),
@@ -1231,15 +1253,12 @@ build_index(Left, [File|Files],
                                  end};
             [F|_] -> {F, FileSize}
         end,
-    true =
-        ets:insert_new(FileSummaryEts, #file_summary {
-                          file = File, valid_total_size = ValidTotalSize,
-                          contiguous_top = ContiguousTop, locked = false,
-                          left = Left, right = Right, file_size = FileSize1,
-                          readers = 0 }),
-    build_index(File, Files,
-                State #msstate { sum_valid_data = SumValid + ValidTotalSize,
-                                 sum_file_size = SumFileSize + FileSize1 }).
+    ok = gatherer:produce(Gatherer, #file_summary {
+                            file = File, valid_total_size = ValidTotalSize,
+                            contiguous_top = ContiguousTop, locked = false,
+                            left = Left, right = Right, file_size = FileSize1,
+                            readers = 0 }),
+    ok = gatherer:finished(Gatherer, Guid).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 935f2754..2a94adf7 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -422,48 +422,45 @@ start_persistent_msg_store(DurableQueues) ->
 %%----------------------------------------------------------------------------
 
 queue_index_walker(DurableQueues) when is_list(DurableQueues) ->
-    queue_index_walker({DurableQueues, sets:new()});
-
-queue_index_walker({[], Kids}) ->
-    case sets:size(Kids) of
-        0 -> finished;
-        _ -> receive
-                 {found, MsgId, Count} ->
-                     {MsgId, Count, {[], Kids}};
-                 {finished, Child} ->
-                     queue_index_walker({[], sets:del_element(Child, Kids)})
-             end
+    {ok, Pid} = gatherer:start_link(),
+    queue_index_walker({DurableQueues, Pid});
+
+queue_index_walker({[], Gatherer}) ->
+    case gatherer:fetch(Gatherer) of
+        finished                -> finished;
+        {value, {MsgId, Count}} -> {MsgId, Count, {[], Gatherer}}
     end;
-queue_index_walker({[QueueName | QueueNames], Kids}) ->
+queue_index_walker({[QueueName | QueueNames], Gatherer}) ->
     Child = make_ref(),
+    ok = gatherer:wait_on(Gatherer, Child),
     ok = worker_pool:submit_async({?MODULE, queue_index_walker_reader,
-                                   [QueueName, self(), Child]}),
-    queue_index_walker({QueueNames, sets:add_element(Child, Kids)}).
+                                   [QueueName, Gatherer, Child]}),
+    queue_index_walker({QueueNames, Gatherer}).
 
-queue_index_walker_reader(QueueName, Parent, Guid) ->
+queue_index_walker_reader(QueueName, Gatherer, Guid) ->
     State = blank_state(QueueName),
     State1 = load_journal(State),
     SegNums = all_segment_nums(State1),
-    queue_index_walker_reader(Parent, Guid, State1, SegNums).
+    queue_index_walker_reader(Gatherer, Guid, State1, SegNums).
 
-queue_index_walker_reader(Parent, Guid, State, []) ->
+queue_index_walker_reader(Gatherer, Guid, State, []) ->
     _State = terminate(false, State),
-    Parent ! {finished, Guid};
-queue_index_walker_reader(Parent, Guid, State, [Seg | SegNums]) ->
+    ok = gatherer:finished(Gatherer, Guid);
+queue_index_walker_reader(Gatherer, Guid, State, [Seg | SegNums]) ->
     SeqId = reconstruct_seq_id(Seg, 0),
     {Messages, State1} = read_segment_entries(SeqId, State),
-    queue_index_walker_reader(Parent, Guid, SegNums, State1, Messages).
+    State2 = queue_index_walker_reader1(Gatherer, State1, Messages),
+    queue_index_walker_reader(Gatherer, Guid, State2, SegNums).
 
-queue_index_walker_reader(Parent, Guid, SegNums, State, []) ->
-    queue_index_walker_reader(Parent, Guid, State, SegNums);
-queue_index_walker_reader(
-  Parent, Guid, SegNums, State,
-  [{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs]) ->
+queue_index_walker_reader1(_Gatherer, State, []) ->
+    State;
+queue_index_walker_reader1(
+  Gatherer, State, [{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs]) ->
     case IsPersistent of
-        true  -> Parent ! {found, MsgId, 1};
+        true  -> gatherer:produce(Gatherer, {MsgId, 1});
         false -> ok
     end,
-    queue_index_walker_reader(Parent, Guid, SegNums, State, Msgs).
+    queue_index_walker_reader1(Gatherer, State, Msgs).
 
 %%----------------------------------------------------------------------------
 %% Minors
-- 
cgit v1.2.1


From b95eea8c9f8fa6cdebbb1c2b4c732f0f751a570f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 4 Apr 2010 15:05:56 +0100
Subject: Altered API of rabbit_msg_store_index so that terminate is mean to
 save out state too, and init can be asked to attempt to recover previously
 saved index

---
 include/rabbit_msg_store_index.hrl |  3 ++-
 src/rabbit_msg_store.erl           |  2 +-
 src/rabbit_msg_store_ets_index.erl | 44 ++++++++++++++++++++++++--------------
 src/rabbit_msg_store_index.erl     |  2 +-
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
index 9ae65507..db7e3b9f 100644
--- a/include/rabbit_msg_store_index.hrl
+++ b/include/rabbit_msg_store_index.hrl
@@ -40,7 +40,8 @@
 -type(fieldpos() :: non_neg_integer()).
 -type(fieldvalue() :: any()).
 
--spec(init/1 :: (dir()) -> index_state()).
+-spec(init/2 :: (('fresh'|'recover'), dir()) ->
+                     {'fresh'|'recovered', index_state()}).
 -spec(lookup/2 :: (msg_id(), index_state()) -> ('not_found' | keyvalue())).
 -spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
 -spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 82a9ddd7..3b3df720 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -477,7 +477,7 @@ init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, IndexModule} = application:get_env(msg_store_index_module),
     rabbit_log:info("Using ~p to provide index for message store~n",
                     [IndexModule]),
-    IndexState = IndexModule:init(Dir),
+    {fresh, IndexState} = IndexModule:init(fresh, Dir),
 
     InitFile = 0,
     FileSummaryEts = ets:new(rabbit_msg_store_file_summary,
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index fe921e92..f30934c5 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -33,42 +33,54 @@
 
 -behaviour(rabbit_msg_store_index).
 
--export([init/1, lookup/2, insert/2, update/2, update_fields/3, delete/2,
+-export([init/2, lookup/2, insert/2, update/2, update_fields/3, delete/2,
          delete_by_file/2, terminate/1]).
 
 -define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
+-define(FILENAME, msg_store_index.ets).
 
 -include("rabbit_msg_store_index.hrl").
 
-init(_Dir) ->
-    ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]).
+-record(state, { table, dir }).
 
-lookup(Key, MsgLocations) ->
-    case ets:lookup(MsgLocations, Key) of
+init(fresh, Dir) ->
+    file:delete(filename:join(Dir, ?FILENAME)),
+    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]),
+    {fresh, #state { table = Tid, dir = Dir }};
+init(recover, Dir) ->
+    case ets:file2tab(filename:join(Dir, ?FILENAME)) of
+        {ok, Tid}  -> {recovered, #state { table = Tid, dir = Dir }};
+        {error, _} -> init(fresh, Dir)
+    end.
+
+lookup(Key, State) ->
+    case ets:lookup(State #state.table, Key) of
         []      -> not_found;
         [Entry] -> Entry
     end.
 
-insert(Obj, MsgLocations) ->
-    true = ets:insert_new(MsgLocations, Obj),
+insert(Obj, State) ->
+    true = ets:insert_new(State #state.table, Obj),
     ok.
 
-update(Obj, MsgLocations) ->
-    true = ets:insert(MsgLocations, Obj),
+update(Obj, State) ->
+    true = ets:insert(State #state.table, Obj),
     ok.
 
-update_fields(Key, Updates, MsgLocations) ->
-    true = ets:update_element(MsgLocations, Key, Updates),
+update_fields(Key, Updates, State) ->
+    true = ets:update_element(State #state.table, Key, Updates),
     ok.
 
-delete(Key, MsgLocations) ->
-    true = ets:delete(MsgLocations, Key),
+delete(Key, State) ->
+    true = ets:delete(State #state.table, Key),
     ok.
 
-delete_by_file(File, MsgLocations) ->
+delete_by_file(File, State) ->
     MatchHead = #msg_location { file = File, _ = '_' },
-    ets:select_delete(MsgLocations, [{MatchHead, [], [true]}]),
+    ets:select_delete(State #state.table, [{MatchHead, [], [true]}]),
     ok.
 
-terminate(MsgLocations) ->
+terminate(#state { table = MsgLocations, dir = Dir }) ->
+    ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME),
+                      [{extended_info, [object_count]}]),
     ets:delete(MsgLocations).
diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl
index 42e06719..2c9de3fd 100644
--- a/src/rabbit_msg_store_index.erl
+++ b/src/rabbit_msg_store_index.erl
@@ -34,7 +34,7 @@
 -export([behaviour_info/1]).
 
 behaviour_info(callbacks) ->
-    [{init,            1},
+    [{init,            2},
      {lookup,          2},
      {insert,          2},
      {update,          2},
-- 
cgit v1.2.1


From 0fd12a6b11e56dec662fb5d6a298ed1cc8e47235 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Apr 2010 16:36:22 +0100
Subject: The msg_store now avoids building the index and scanning files iff it
 is shutdown cleanly, and all the clients that it previously knew about were
 also shutdown cleanly and found on startup.

---
 src/rabbit_amqqueue.erl            |   7 +-
 src/rabbit_msg_store.erl           | 149 ++++++++++++++++++++++++++++++-------
 src/rabbit_msg_store_ets_index.erl |   8 +-
 src/rabbit_queue_index.erl         |  78 +++++++++++++------
 src/rabbit_tests.erl               |  55 ++++++++------
 src/rabbit_variable_queue.erl      |  34 +++++----
 6 files changed, 237 insertions(+), 94 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index c14a28fe..d23cbd19 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -129,9 +129,10 @@
 
 start() ->
     ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
-    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
-                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
-                                 fun (ok) -> finished end, ok]),
+    ok = rabbit_sup:start_child(
+           ?TRANSIENT_MSG_STORE, rabbit_msg_store,
+           [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
+            fun (ok) -> finished end, ok]),
     DurableQueues = find_durable_queues(),
     ok = rabbit_queue_index:start_persistent_msg_store(DurableQueues),
     {ok,_} = supervisor:start_child(
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 3b3df720..418b5d58 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,8 +33,9 @@
 
 -behaviour(gen_server2).
 
--export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
-         sync/3, client_init/1, client_terminate/1, clean/2]).
+-export([start_link/5, write/4, read/3, contains/2, remove/2, release/2,
+         sync/3, client_init/2, client_terminate/1, delete_client/2, clean/2,
+         successfully_recovered_state/1]).
 
 -export([sync/1, gc_done/4, set_maximum_since_use/2,
          build_index_worker/6]). %% internal
@@ -42,9 +43,10 @@
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
 
--define(SYNC_INTERVAL,         5). %% milliseconds
-
--define(GEOMETRIC_P, 0.3). %% parameter to geometric distribution rng
+-define(SYNC_INTERVAL,  5).   %% milliseconds
+-define(GEOMETRIC_P,    0.3). %% parameter to geometric distribution rng
+-define(CLEAN_FILENAME, "clean.dot").
+-define(FILE_SUMMARY_FILENAME, "file_summary.ets").
 
 %%----------------------------------------------------------------------------
 
@@ -66,7 +68,9 @@
           file_handles_ets,       %% tid of the shared file handles table
           file_summary_ets,       %% tid of the file summary table
           dedup_cache_ets,        %% tid of dedup cache table
-          cur_file_cache_ets      %% tid of current file cache table
+          cur_file_cache_ets,     %% tid of current file cache table
+          client_refs,            %% set of references of all registered clients
+          recovered_state         %% boolean: did we recover state?
         }).
 
 -record(client_msstate,
@@ -98,8 +102,8 @@
                                             dedup_cache_ets    :: tid(),
                                             cur_file_cache_ets :: tid() }).
 
--spec(start_link/4 ::
-      (atom(), file_path(),
+-spec(start_link/5 ::
+      (atom(), file_path(), [binary()] | 'undefined',
        (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})), A) ->
              {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/4 :: (server(), msg_id(), msg(), client_msstate()) ->
@@ -112,9 +116,11 @@
 -spec(sync/3 :: (server(), [msg_id()], fun (() -> any())) -> 'ok').
 -spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> 'ok').
 -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
--spec(client_init/1 :: (server()) -> client_msstate()).
+-spec(client_init/2 :: (server(), binary()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
+-spec(delete_client/2 :: (server(), binary()) -> 'ok').
 -spec(clean/2 :: (atom(), file_path()) -> 'ok').
+-spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -endif.
 
@@ -278,9 +284,9 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-start_link(Server, Dir, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+start_link(Server, Dir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
     gen_server2:start_link({local, Server}, ?MODULE,
-                           [Server, Dir, MsgRefDeltaGen, MsgRefDeltaGenInit],
+                           [Server, Dir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
 write(Server, MsgId, Msg, CState =
@@ -326,9 +332,10 @@ gc_done(Server, Reclaimed, Source, Destination) ->
 set_maximum_since_use(Server, Age) ->
     gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}).
 
-client_init(Server) ->
+client_init(Server, Ref) ->
     {IState, IModule, Dir, FileHandlesEts, FileSummaryEts, DedupCacheEts,
-     CurFileCacheEts} = gen_server2:call(Server, new_client_state, infinity),
+     CurFileCacheEts} = gen_server2:call(Server, {new_client_state, Ref},
+                                         infinity),
     #client_msstate { file_handle_cache  = dict:new(),
                       index_state        = IState,
                       index_module       = IModule,
@@ -342,6 +349,12 @@ client_terminate(CState) ->
     close_all_handles(CState),
     ok.
 
+delete_client(Server, Ref) ->
+    ok = gen_server2:call(Server, {delete_client, Ref}, infinity).
+
+successfully_recovered_state(Server) ->
+    gen_server2:call(Server, successfully_recovered_state, infinity).
+
 clean(Server, BaseDir) ->
     Dir = filename:join(BaseDir, atom_to_list(Server)),
     ok = rabbit_misc:recursive_delete(Dir).
@@ -467,7 +480,7 @@ close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
 
-init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
+init([Server, BaseDir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
                                              [self()]),
 
@@ -477,12 +490,32 @@ init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, IndexModule} = application:get_env(msg_store_index_module),
     rabbit_log:info("Using ~p to provide index for message store~n",
                     [IndexModule]),
-    {fresh, IndexState} = IndexModule:init(fresh, Dir),
+
+    {Recovered, IndexState, ClientRefs1} =
+        case detect_clean_shutdown(Dir) of
+            {false, _Error} ->
+                {fresh, IndexState1} = IndexModule:init(fresh, Dir),
+                {false, IndexState1, sets:new()};
+            {true, Terms} ->
+                case undefined /= ClientRefs andalso lists:sort(ClientRefs) ==
+                    lists:sort(proplists:get_value(client_refs, Terms, []))
+                    andalso proplists:get_value(index_module, Terms) ==
+                    IndexModule of
+                    true ->
+                        case IndexModule:init(recover, Dir) of
+                            {fresh, IndexState1} ->
+                                {false, IndexState1, sets:new()};
+                            {recovered, IndexState1} ->
+                                {true, IndexState1, sets:from_list(ClientRefs)}
+                        end;
+                    false ->
+                        {fresh, IndexState1} = IndexModule:init(fresh, Dir),
+                        {false, IndexState1, sets:new()}
+                end
+        end,
 
     InitFile = 0,
-    FileSummaryEts = ets:new(rabbit_msg_store_file_summary,
-                             [ordered_set, public,
-                              {keypos, #file_summary.file}]),
+    {Recovered1, FileSummaryEts} = recover_file_summary(Recovered, Dir),
     DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
     FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
                              [ordered_set, public]),
@@ -504,20 +537,23 @@ init([Server, BaseDir, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                        file_handles_ets       = FileHandlesEts,
                        file_summary_ets       = FileSummaryEts,
                        dedup_cache_ets        = DedupCacheEts,
-                       cur_file_cache_ets     = CurFileCacheEts
+                       cur_file_cache_ets     = CurFileCacheEts,
+                       client_refs            = ClientRefs1,
+                       recovered_state        = Recovered
                      },
 
-    ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State),
+    ok = count_msg_refs(Recovered, MsgRefDeltaGen, MsgRefDeltaGenInit, State),
     FileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
     ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames),
+
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
     {Offset, State1 = #msstate { current_file = CurFile }} =
-        build_index(Files, State),
+        build_index(Recovered1, Files, State),
 
     %% read is only needed so that we can seek
     {ok, FileHdl} = rabbit_msg_store_misc:open_file(
@@ -543,15 +579,25 @@ handle_call({contains, MsgId}, From, State) ->
     State1 = contains_message(MsgId, From, State),
     noreply(State1);
 
-handle_call(new_client_state, _From,
+handle_call({new_client_state, CRef}, _From,
             State = #msstate { index_state        = IndexState, dir = Dir,
                                index_module       = IndexModule,
                                file_handles_ets   = FileHandlesEts,
                                file_summary_ets   = FileSummaryEts,
                                dedup_cache_ets    = DedupCacheEts,
-                               cur_file_cache_ets = CurFileCacheEts }) ->
+                               cur_file_cache_ets = CurFileCacheEts,
+                               client_refs        = ClientRefs }) ->
     reply({IndexState, IndexModule, Dir, FileHandlesEts, FileSummaryEts,
-           DedupCacheEts, CurFileCacheEts}, State).
+           DedupCacheEts, CurFileCacheEts},
+          State #msstate { client_refs = sets:add_element(CRef, ClientRefs) });
+
+handle_call(successfully_recovered_state, _From, State) ->
+    reply(State #msstate.recovered_state, State);
+
+handle_call({delete_client, CRef}, _From,
+            State = #msstate { client_refs = ClientRefs }) ->
+    reply(ok,
+          State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }).
 
 handle_cast({write, MsgId, Msg},
             State = #msstate { current_file_handle = CurHdl,
@@ -680,7 +726,9 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
                                       file_handles_ets    = FileHandlesEts,
                                       file_summary_ets    = FileSummaryEts,
                                       dedup_cache_ets     = DedupCacheEts,
-                                      cur_file_cache_ets  = CurFileCacheEts }) ->
+                                      cur_file_cache_ets  = CurFileCacheEts,
+                                      client_refs         = ClientRefs,
+                                      dir                 = Dir }) ->
     %% stop the gc first, otherwise it could be working and we pull
     %% out the ets tables from under it.
     ok = rabbit_msg_store_gc:stop(GCPid),
@@ -691,11 +739,13 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
                       State2
              end,
     State3 = close_all_handles(State1),
-    ets:delete(FileSummaryEts),
+    store_file_summary(FileSummaryEts, Dir),
     ets:delete(DedupCacheEts),
     ets:delete(FileHandlesEts),
     ets:delete(CurFileCacheEts),
     IndexModule:terminate(IndexState),
+    store_clean_shutdown([{client_refs, sets:to_list(ClientRefs)},
+                          {index_module, IndexModule}], Dir),
     State3 #msstate { index_state         = undefined,
                       current_file_handle = undefined }.
 
@@ -957,6 +1007,35 @@ get_read_handle(FileNum, FHC, Dir) ->
             {Hdl, dict:store(FileNum, Hdl, FHC) }
     end.
 
+detect_clean_shutdown(Dir) ->
+    Path = filename:join(Dir, ?CLEAN_FILENAME),
+    case rabbit_misc:read_term_file(Path) of
+        {ok, Terms}    -> case file:delete(Path) of
+                              ok             -> {true,  Terms};
+                              {error, Error} -> {false, Error}
+                          end;
+        {error, Error} -> {false, Error}
+    end.
+
+store_clean_shutdown(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+recover_file_summary(false, _Dir) ->
+    {false, ets:new(rabbit_msg_store_file_summary,
+                    [ordered_set, public, {keypos, #file_summary.file}])};
+recover_file_summary(true, Dir) ->
+    Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}  -> file:delete(Path),
+                      {true, Tid};
+        {error, _} -> recover_file_summary(false, Dir)
+    end.
+
+store_file_summary(Tid, Dir) ->
+    ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+                      [{extended_info, [object_count]}]),
+    ets:delete(Tid).
+
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
@@ -1034,6 +1113,11 @@ index_delete_by_file(File, #msstate { index_module = Index,
 %% recovery
 %%----------------------------------------------------------------------------
 
+count_msg_refs(false, Gen, Seed, State) ->
+    count_msg_refs(Gen, Seed, State);
+count_msg_refs(true, _Gen, _Seed, _State) ->
+    ok.
+
 count_msg_refs(Gen, Seed, State) ->
     case Gen(Seed) of
         finished -> ok;
@@ -1183,7 +1267,18 @@ find_contiguous_block_prefix([{MsgId, TotalSize, ExpectedOffset} | Tail],
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
     {ExpectedOffset, MsgIds}.
 
-build_index(Files, State) ->
+build_index(true, _Files, State =
+                #msstate { file_summary_ets = FileSummaryEts }) ->
+    ets:foldl(
+      fun (#file_summary { valid_total_size = ValidTotalSize,
+                           file_size = FileSize, file = File },
+           {_Offset, State1 = #msstate { sum_valid_data = SumValid,
+                                         sum_file_size = SumFileSize }}) ->
+              {FileSize, State1 #msstate { sum_valid_data = SumValid + ValidTotalSize,
+                                           sum_file_size = SumFileSize + FileSize,
+                                           current_file = File }}
+      end, {0, State}, FileSummaryEts);
+build_index(false, Files, State) ->
     {ok, Pid} = gatherer:start_link(),
     case Files of
         [] -> build_index(Pid, undefined, [State #msstate.current_file], State);
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index f30934c5..d46212ba 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -37,7 +37,7 @@
          delete_by_file/2, terminate/1]).
 
 -define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
--define(FILENAME, msg_store_index.ets).
+-define(FILENAME, "msg_store_index.ets").
 
 -include("rabbit_msg_store_index.hrl").
 
@@ -48,8 +48,10 @@ init(fresh, Dir) ->
     Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]),
     {fresh, #state { table = Tid, dir = Dir }};
 init(recover, Dir) ->
-    case ets:file2tab(filename:join(Dir, ?FILENAME)) of
-        {ok, Tid}  -> {recovered, #state { table = Tid, dir = Dir }};
+    Path = filename:join(Dir, ?FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}  -> file:delete(Path),
+                      {recovered, #state { table = Tid, dir = Dir }};
         {error, _} -> init(fresh, Dir)
     end.
 
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 2a94adf7..f37d7019 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_queue_index).
 
--export([init/1, terminate/1, terminate_and_erase/1, write_published/4,
+-export([init/1, terminate/2, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1,
@@ -195,8 +195,9 @@
                               dirty_count     :: integer()
                             }).
 
--spec(init/1 :: (queue_name()) -> {non_neg_integer(), qistate()}).
--spec(terminate/1 :: (qistate()) -> qistate()).
+-spec(init/1 :: (queue_name()) ->
+                     {non_neg_integer(), binary(), binary(), qistate()}).
+-spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
       -> qistate()).
@@ -221,6 +222,19 @@
 
 init(Name) ->
     State = blank_state(Name),
+    {PRef, TRef} = case read_shutdown_terms(State #qistate.dir) of
+                       {error, _} ->
+                           {rabbit_guid:guid(), rabbit_guid:guid()};
+                       {ok, Terms} ->
+                           case [persistent_ref, transient_ref] --
+                               proplists:get_keys(Terms) of
+                               [] ->
+                                   {proplists:get_value(persistent_ref, Terms),
+                                    proplists:get_value(transient_ref, Terms)};
+                               _ ->
+                                   {rabbit_guid:guid(), rabbit_guid:guid()}
+                           end
+                   end,
     %% 1. Load the journal completely. This will also load segments
     %%    which have entries in the journal and remove duplicates.
     %%    The counts will correctly reflect the combination of the
@@ -263,7 +277,8 @@ init(Name) ->
                   {segment_store(Segment2, Segments2),
                    CountAcc + PubCount1 - AckCount1, DCountAcc1}
           end, {Segments, 0, DCount}, AllSegs),
-    {Count, State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
+    {Count, PRef, TRef,
+     State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
 
 maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
     {Segment, 0};
@@ -276,11 +291,11 @@ maybe_add_to_journal(false,     _,  del,  RelSeq, Segment) ->
 maybe_add_to_journal(false,     _, _Del,  RelSeq, Segment) ->
     {add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)), 2}.
 
-terminate(State) ->
-    terminate(true, State).
+terminate(Terms, State) ->
+    terminate(true, Terms, State).
 
 terminate_and_erase(State) ->
-    State1 = terminate(State),
+    State1 = terminate(false, [], State),
     ok = delete_queue_directory(State1 #qistate.dir),
     State1.
 
@@ -397,20 +412,33 @@ start_persistent_msg_store(DurableQueues) ->
                           []
                   end,
     DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
-    {DurableQueueNames, TransientDirs} =
+    {DurableQueueNames, TransientDirs, DurableRefs} =
         lists:foldl(
-          fun (QueueDir, {DurableAcc, TransientAcc}) ->
+          fun (QueueDir, {DurableAcc, TransientAcc, RefsAcc}) ->
                   case sets:is_element(QueueDir, DurableDirectories) of
                       true ->
+                          RefsAcc1 =
+                              case read_shutdown_terms(
+                                     filename:join(QueuesDir, QueueDir)) of
+                                  {error, _} ->
+                                      RefsAcc;
+                                  {ok, Terms} ->
+                                      case proplists:get_value(
+                                             persistent_ref, Terms) of
+                                          undefined -> RefsAcc;
+                                          Ref       -> [Ref | RefsAcc]
+                                      end
+                              end,
                           {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
-                           TransientAcc};
+                           TransientAcc, RefsAcc1};
                       false ->
-                          {DurableAcc, [QueueDir | TransientAcc]}
+                          {DurableAcc, [QueueDir | TransientAcc], RefsAcc}
                   end
-          end, {[], []}, Directories),
-    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
-                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
-                                 fun queue_index_walker/1, DurableQueueNames]),
+          end, {[], [], []}, Directories),
+    ok = rabbit_sup:start_child(
+           ?PERSISTENT_MSG_STORE, rabbit_msg_store,
+           [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), DurableRefs,
+            fun queue_index_walker/1, DurableQueueNames]),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = delete_queue_directory(Dir)
@@ -444,7 +472,7 @@ queue_index_walker_reader(QueueName, Gatherer, Guid) ->
     queue_index_walker_reader(Gatherer, Guid, State1, SegNums).
 
 queue_index_walker_reader(Gatherer, Guid, State, []) ->
-    _State = terminate(false, State),
+    _State = terminate(false, [], State),
     ok = gatherer:finished(Gatherer, Guid);
 queue_index_walker_reader(Gatherer, Guid, State, [Seg | SegNums]) ->
     SeqId = reconstruct_seq_id(Seg, 0),
@@ -518,11 +546,11 @@ detect_clean_shutdown(Dir) ->
         {error, enoent} -> false
     end.
 
-store_clean_shutdown(Dir) ->
-    {ok, Hdl} = file_handle_cache:open(filename:join(Dir, ?CLEAN_FILENAME),
-                                       [write, raw, binary],
-                                       [{write_buffer, unbuffered}]),
-    ok = file_handle_cache:close(Hdl).
+read_shutdown_terms(Dir) ->
+    rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)).
+
+store_clean_shutdown(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
 
 queue_name_to_dir_name(Name = #resource { kind = queue }) ->
     Bin = term_to_binary(Name),
@@ -646,7 +674,9 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
          end,
     Hdl.
 
-terminate(StoreShutdown, State =
+terminate(_StoreShutdown, _Terms, State = #qistate { segments = undefined }) ->
+    State;
+terminate(StoreShutdown, Terms, State =
           #qistate { journal_handle = JournalHdl,
                      dir = Dir, segments = Segments }) ->
     ok = case JournalHdl of
@@ -660,10 +690,10 @@ terminate(StoreShutdown, State =
                    file_handle_cache:close(Hdl)
            end, ok, Segments),
     case StoreShutdown of
-        true  -> store_clean_shutdown(Dir);
+        true  -> store_clean_shutdown(Terms, Dir);
         false -> ok
     end,
-    State #qistate { journal_handle = undefined, segments = segments_new() }.
+    State #qistate { journal_handle = undefined, segments = undefined }.
 
 %%----------------------------------------------------------------------------
 %% Majors
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 75c66693..22473594 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -995,16 +995,18 @@ start_msg_store_empty() ->
     start_msg_store(fun (ok) -> finished end, ok).
 
 start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
-    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
-                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
-                                 MsgRefDeltaGen, MsgRefDeltaGenInit]),
+    ok = rabbit_sup:start_child(
+           ?PERSISTENT_MSG_STORE, rabbit_msg_store,
+           [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
+            MsgRefDeltaGen, MsgRefDeltaGenInit]),
     start_transient_msg_store().
 
 start_transient_msg_store() ->
     ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
-    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
-                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
-                                 fun (ok) -> finished end, ok]).
+    ok = rabbit_sup:start_child(
+           ?TRANSIENT_MSG_STORE, rabbit_msg_store,
+           [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
+            fun (ok) -> finished end, ok]).
 
 stop_msg_store() ->
     case supervisor:terminate_child(rabbit_sup, ?PERSISTENT_MSG_STORE) of
@@ -1061,7 +1063,8 @@ test_msg_store() ->
     {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
     %% check we don't contain any of the msgs we're about to publish
     false = msg_store_contains(false, MsgIds),
-    MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE),
+    Ref = rabbit_guid:guid(),
+    MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
     %% publish the first half
     {ok, MSCState1} = msg_store_write(MsgIds1stHalf, MSCState),
     %% sync on the first half
@@ -1135,7 +1138,7 @@ test_msg_store() ->
     %% check we don't contain any of the msgs
     false = msg_store_contains(false, MsgIds),
     %% publish the first half again
-    MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE),
+    MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
     {ok, MSCState9} = msg_store_write(MsgIds1stHalf, MSCState8),
     %% this should force some sort of sync internally otherwise misread
     ok = rabbit_msg_store:client_terminate(
@@ -1154,7 +1157,7 @@ test_msg_store() ->
                      {ok, MSCStateM} =
                          rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId, Payload, MSCStateN),
                      MSCStateM
-             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE), MsgIdsBig)),
+             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), MsgIdsBig)),
     %% now read them to ensure we hit the fast client-side reading
     ok = rabbit_msg_store:client_terminate(
            lists:foldl(
@@ -1162,7 +1165,7 @@ test_msg_store() ->
                      {{ok, Payload}, MSCStateN} =
                          rabbit_msg_store:read(?PERSISTENT_MSG_STORE, MsgId, MSCStateM),
                      MSCStateN
-             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE), MsgIdsBig)),
+             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), MsgIdsBig)),
     %% .., then 3s by 1...
     ok = lists:foldl(
            fun (MsgId, ok) ->
@@ -1203,21 +1206,27 @@ test_amqqueue(Durable) ->
 empty_test_queue() ->
     ok = start_transient_msg_store(),
     ok = rabbit_queue_index:start_persistent_msg_store([]),
-    {0, Qi1} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef, _TRef, Qi1} = rabbit_queue_index:init(test_queue()),
     _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
     ok.
 
 queue_index_publish(SeqIds, Persistent, Qi) ->
+    Ref = rabbit_guid:guid(),
+    MsgStore = case Persistent of
+                   true  -> ?PERSISTENT_MSG_STORE;
+                   false -> ?TRANSIENT_MSG_STORE
+               end,
     {A, B, MSCStateEnd} =
         lists:foldl(
           fun (SeqId, {QiN, SeqIdsMsgIdsAcc, MSCStateN}) ->
                   MsgId = rabbit_guid:guid(),
                   QiM = rabbit_queue_index:write_published(MsgId, SeqId, Persistent,
                                                            QiN),
-                  {ok, MSCStateM} = rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId,
+                  {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, MsgId,
                                                            MsgId, MSCStateN),
                   {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc], MSCStateM}
-          end, {Qi, [], rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE)}, SeqIds),
+          end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds),
+    ok = rabbit_msg_store:delete_client(MsgStore, Ref),
     ok = rabbit_msg_store:client_terminate(MSCStateEnd),
     {A, B}.
 
@@ -1246,7 +1255,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(0,9999),
     SeqIdsB = lists:seq(10000,19999),
-    {0, Qi0} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef, _TRef, Qi0} = rabbit_queue_index:init(test_queue()),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
@@ -1256,12 +1265,12 @@ test_queue_index() ->
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsMsgIdsA)),
     %% call terminate twice to prove it's idempotent
-    _Qi5 = rabbit_queue_index:terminate(rabbit_queue_index:terminate(Qi4)),
+    _Qi5 = rabbit_queue_index:terminate([], rabbit_queue_index:terminate([], Qi4)),
     ok = stop_msg_store(),
     ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
     ok = start_transient_msg_store(),
     %% should get length back as 0, as all the msgs were transient
-    {0, Qi6} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef1, _TRef1, Qi6} = rabbit_queue_index:init(test_queue()),
     {0, SegSize, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
@@ -1270,13 +1279,13 @@ test_queue_index() ->
     {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsMsgIdsB)),
-    _Qi11 = rabbit_queue_index:terminate(Qi10),
+    _Qi11 = rabbit_queue_index:terminate([], Qi10),
     ok = stop_msg_store(),
     ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
     ok = start_transient_msg_store(),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
-    {LenB, Qi12} = rabbit_queue_index:init(test_queue()),
+    {LenB, _PRef2, _TRef2, Qi12} = rabbit_queue_index:init(test_queue()),
     {0, TwoSegs, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
@@ -1288,12 +1297,12 @@ test_queue_index() ->
     %% Everything will have gone now because #pubs == #acks
     {0, 0, Qi18} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
-    _Qi19 = rabbit_queue_index:terminate(Qi18),
+    _Qi19 = rabbit_queue_index:terminate([], Qi18),
     ok = stop_msg_store(),
     ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
     ok = start_transient_msg_store(),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, Qi20} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef3, _TRef3, Qi20} = rabbit_queue_index:init(test_queue()),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1302,7 +1311,7 @@ test_queue_index() ->
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    {0, Qi22} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef4, _TRef4, Qi22} = rabbit_queue_index:init(test_queue()),
     {Qi23, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = queue_index_deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
@@ -1313,7 +1322,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    {0, Qi29} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef5, _TRef5, Qi29} = rabbit_queue_index:init(test_queue()),
     {Qi30, _SeqIdsMsgIdsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = queue_index_deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi31),
@@ -1325,7 +1334,7 @@ test_queue_index() ->
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
-    {0, Qi36} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef6, _TRef6, Qi36} = rabbit_queue_index:init(test_queue()),
     {Qi37, _SeqIdsMsgIdsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 70ebd074..03db8510 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -211,7 +211,7 @@
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
                on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]},
-               msg_store_clients     :: {any(), any()},
+               msg_store_clients     :: {{any(), binary()}, {any(), binary()}},
                persistent_store      :: pid() | atom()
               }).
 
@@ -256,7 +256,7 @@
 %%----------------------------------------------------------------------------
 
 init(QueueName, PersistentStore) ->
-    {DeltaCount, IndexState} =
+    {DeltaCount, PRef, TRef, IndexState} =
         rabbit_queue_index:init(QueueName),
     {DeltaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
@@ -287,17 +287,20 @@ init(QueueName, PersistentStore) ->
                    rate_timestamp = Now,
                    len = DeltaCount,
                    on_sync = {[], [], []},
-                   msg_store_clients = {rabbit_msg_store:client_init(PersistentStore),
-                                        rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE)},
+                   msg_store_clients = {
+                     {rabbit_msg_store:client_init(PersistentStore, PRef), PRef},
+                     {rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), TRef}},
                    persistent_store = PersistentStore
                  },
     maybe_deltas_to_betas(State).
 
-terminate(State = #vqstate { index_state = IndexState,
-                             msg_store_clients = {MSCStateP, MSCStateT} }) ->
+terminate(State = #vqstate {
+            index_state = IndexState,
+            msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} }) ->
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
-    State #vqstate { index_state = rabbit_queue_index:terminate(IndexState) }.
+    Terms = [{persistent_ref, PRef}, {transient_ref, TRef}],
+    State #vqstate { index_state = rabbit_queue_index:terminate(Terms, IndexState) }.
 
 publish(Msg, State) ->
     State1 = limit_ram_index(State),
@@ -466,9 +469,10 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
-    {_PurgeCount, State1 = #vqstate { index_state = IndexState,
-                                      msg_store_clients = {MSCStateP, MSCStateT},
-                                      persistent_store = PersistentStore }} =
+    {_PurgeCount, State1 = #vqstate {
+                    index_state = IndexState,
+                    msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
+                    persistent_store = PersistentStore }} =
         purge(State),
     IndexState1 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
@@ -482,6 +486,8 @@ delete_and_terminate(State) ->
                 IndexState3
     end,
     IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
+    rabbit_msg_store:delete_client(PersistentStore, PRef),
+    rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
     State1 #vqstate { index_state = IndexState4 }.
@@ -969,14 +975,14 @@ store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
 find_msg_store(true, PersistentStore)   -> PersistentStore;
 find_msg_store(false, _PersistentStore) -> ?TRANSIENT_MSG_STORE.
 
-with_msg_store_state(PersistentStore, {MSCStateP, MSCStateT}, true,
+with_msg_store_state(PersistentStore, {{MSCStateP, PRef}, MSCStateT}, true,
                      Fun) ->
     {Result, MSCStateP1} = Fun(PersistentStore, MSCStateP),
-    {Result, {MSCStateP1, MSCStateT}};
-with_msg_store_state(_PersistentStore, {MSCStateP, MSCStateT}, false,
+    {Result, {{MSCStateP1, PRef}, MSCStateT}};
+with_msg_store_state(_PersistentStore, {MSCStateP, {MSCStateT, TRef}}, false,
                      Fun) ->
     {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
-    {Result, {MSCStateP, MSCStateT1}}.
+    {Result, {MSCStateP, {MSCStateT1, TRef}}}.
 
 read_from_msg_store(PersistentStore, MSCState, IsPersistent, MsgId) ->
     with_msg_store_state(
-- 
cgit v1.2.1


From 04b083837f56308a10ecbb3b91de5cf77d0d1a7a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Apr 2010 20:58:48 +0100
Subject: Given a clean shutdown, near instantaneous startup, regardless of
 queue length. Note most expensive element in startup is loading in the
 msg_store index. Also note for some unexplained reason, this currently
 doesn't work with toke - the toke plugin will need reworking to become
 available to both msg_stores simultaneously.

---
 src/rabbit_msg_store.erl      |  11 +++-
 src/rabbit_queue_index.erl    |  90 +++++++++++++++-------------
 src/rabbit_tests.erl          |  16 ++---
 src/rabbit_variable_queue.erl | 135 ++++++++++++++++++++++++++++--------------
 4 files changed, 155 insertions(+), 97 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 418b5d58..1455b456 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -481,6 +481,8 @@ close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
 %%----------------------------------------------------------------------------
 
 init([Server, BaseDir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
+    process_flag(trap_exit, true),
+
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
                                              [self()]),
 
@@ -562,8 +564,6 @@ init([Server, BaseDir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
 
-    process_flag(trap_exit, true),
-
     {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
                                                  FileSummaryEts),
 
@@ -716,7 +716,8 @@ handle_cast({set_maximum_since_use, Age}, State) ->
 handle_info(timeout, State) ->
     noreply(internal_sync(State));
 
-handle_info({'EXIT', _Pid, Reason}, State) ->
+handle_info({'EXIT', Pid, Reason}, State) ->
+    io:format("~p EXIT! ~p ~p ~p~n", [self(), Reason, Pid, State]),
     {stop, Reason, State}.
 
 terminate(_Reason, State = #msstate { index_state         = IndexState,
@@ -1292,6 +1293,10 @@ build_index(Gatherer, Left, [],
                                sum_file_size = SumFileSize }) ->
     case gatherer:fetch(Gatherer) of
         finished ->
+            unlink(Gatherer),
+            receive {'EXIT', Gatherer, _} -> ok
+            after 0 -> ok
+            end,
             ok = index_delete_by_file(undefined, State),
             Offset = case ets:lookup(FileSummaryEts, Left) of
                          []                                       -> 0;
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f37d7019..7227481d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_queue_index).
 
--export([init/1, terminate/2, terminate_and_erase/1, write_published/4,
+-export([init/2, terminate/2, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1,
@@ -190,13 +190,13 @@
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
 -type(qistate() :: #qistate { dir             :: file_path(),
-                              segments        :: seg_dict(),
+                              segments        :: 'undefined' | seg_dict(),
                               journal_handle  :: hdl(),
                               dirty_count     :: integer()
                             }).
 
--spec(init/1 :: (queue_name()) ->
-                     {non_neg_integer(), binary(), binary(), qistate()}).
+-spec(init/2 :: (queue_name(), boolean()) ->
+                     {'undefined' | non_neg_integer(), binary(), binary(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
 -spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
@@ -220,21 +220,22 @@
 %% Public API
 %%----------------------------------------------------------------------------
 
-init(Name) ->
+init(Name, MsgStoreRecovered) ->
     State = blank_state(Name),
-    {PRef, TRef} = case read_shutdown_terms(State #qistate.dir) of
-                       {error, _} ->
-                           {rabbit_guid:guid(), rabbit_guid:guid()};
-                       {ok, Terms} ->
-                           case [persistent_ref, transient_ref] --
-                               proplists:get_keys(Terms) of
-                               [] ->
-                                   {proplists:get_value(persistent_ref, Terms),
-                                    proplists:get_value(transient_ref, Terms)};
-                               _ ->
-                                   {rabbit_guid:guid(), rabbit_guid:guid()}
-                           end
-                   end,
+    {PRef, TRef, Terms} =
+        case read_shutdown_terms(State #qistate.dir) of
+            {error, _} ->
+                {rabbit_guid:guid(), rabbit_guid:guid(), []};
+            {ok, Terms1} ->
+                case [persistent_ref, transient_ref] --
+                    proplists:get_keys(Terms1) of
+                    [] ->
+                        {proplists:get_value(persistent_ref, Terms1),
+                         proplists:get_value(transient_ref, Terms1), Terms1};
+                    _ ->
+                        {rabbit_guid:guid(), rabbit_guid:guid(), []}
+                end
+        end,
     %% 1. Load the journal completely. This will also load segments
     %%    which have entries in the journal and remove duplicates.
     %%    The counts will correctly reflect the combination of the
@@ -249,35 +250,40 @@ init(Name) ->
     %%    acks only go to the RAM journal as it doesn't matter if we
     %%    lose them. Also mark delivered if not clean shutdown. Also
     %%    find the number of unacked messages.
-    AllSegs = all_segment_nums(State2),
+    AllSegs = 
     CleanShutdown = detect_clean_shutdown(Dir),
     %% We know the journal is empty here, so we don't need to combine
     %% with the journal, and we don't need to worry about messages
     %% that have been acked.
     {Segments1, Count, DCount1} =
-        lists:foldl(
-          fun (Seg, {Segments2, CountAcc, DCountAcc}) ->
-                  Segment = segment_find_or_new(Seg, Dir, Segments2),
-                  {SegEntries, PubCount, AckCount, Segment1} =
-                      load_segment(false, Segment),
-                  {Segment2 = #segment { pubs = PubCount1, acks = AckCount1 },
-                   DCountAcc1} =
-                      array:sparse_foldl(
-                        fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
-                             {Segment3, DCountAcc2}) ->
-                                {Segment4, DCountDelta} =
-                                    maybe_add_to_journal(
-                                      rabbit_msg_store:contains(
-                                        ?PERSISTENT_MSG_STORE, MsgId),
-                                      CleanShutdown, Del, RelSeq, Segment3),
-                                {Segment4, DCountAcc2 + DCountDelta}
-                        end, {Segment1 #segment { pubs = PubCount,
-                                                  acks = AckCount }, DCountAcc},
-                        SegEntries),
-                  {segment_store(Segment2, Segments2),
-                   CountAcc + PubCount1 - AckCount1, DCountAcc1}
-          end, {Segments, 0, DCount}, AllSegs),
-    {Count, PRef, TRef,
+        case CleanShutdown andalso MsgStoreRecovered of
+            false ->
+                lists:foldl(
+                  fun (Seg, {Segments2, CountAcc, DCountAcc}) ->
+                          Segment = segment_find_or_new(Seg, Dir, Segments2),
+                          {SegEntries, PubCount, AckCount, Segment1} =
+                              load_segment(false, Segment),
+                          {Segment2 = #segment { pubs = PubCount1, acks = AckCount1 },
+                           DCountAcc1} =
+                              array:sparse_foldl(
+                                fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
+                                     {Segment3, DCountAcc2}) ->
+                                        {Segment4, DCountDelta} =
+                                            maybe_add_to_journal(
+                                              rabbit_msg_store:contains(
+                                                ?PERSISTENT_MSG_STORE, MsgId),
+                                              CleanShutdown, Del, RelSeq, Segment3),
+                                        {Segment4, DCountAcc2 + DCountDelta}
+                                end, {Segment1 #segment { pubs = PubCount,
+                                                          acks = AckCount }, DCountAcc},
+                                SegEntries),
+                          {segment_store(Segment2, Segments2),
+                           CountAcc + PubCount1 - AckCount1, DCountAcc1}
+                  end, {Segments, 0, DCount}, all_segment_nums(State2));
+            true ->
+                {Segments, undefined, DCount}
+        end,
+    {Count, PRef, TRef, Terms,
      State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
 
 maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 22473594..788aeedd 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1206,7 +1206,7 @@ test_amqqueue(Durable) ->
 empty_test_queue() ->
     ok = start_transient_msg_store(),
     ok = rabbit_queue_index:start_persistent_msg_store([]),
-    {0, _PRef, _TRef, Qi1} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef, _TRef, _Terms, Qi1} = rabbit_queue_index:init(test_queue(), false),
     _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
     ok.
 
@@ -1255,7 +1255,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(0,9999),
     SeqIdsB = lists:seq(10000,19999),
-    {0, _PRef, _TRef, Qi0} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef, _TRef, _Terms, Qi0} = rabbit_queue_index:init(test_queue(), false),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
@@ -1270,7 +1270,7 @@ test_queue_index() ->
     ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
     ok = start_transient_msg_store(),
     %% should get length back as 0, as all the msgs were transient
-    {0, _PRef1, _TRef1, Qi6} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef1, _TRef1, _Terms1, Qi6} = rabbit_queue_index:init(test_queue(), false),
     {0, SegSize, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
@@ -1285,7 +1285,7 @@ test_queue_index() ->
     ok = start_transient_msg_store(),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
-    {LenB, _PRef2, _TRef2, Qi12} = rabbit_queue_index:init(test_queue()),
+    {LenB, _PRef2, _TRef2, _Terms2, Qi12} = rabbit_queue_index:init(test_queue(), false),
     {0, TwoSegs, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
@@ -1302,7 +1302,7 @@ test_queue_index() ->
     ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
     ok = start_transient_msg_store(),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, _PRef3, _TRef3, Qi20} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef3, _TRef3, _Terms3, Qi20} = rabbit_queue_index:init(test_queue(), false),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1311,7 +1311,7 @@ test_queue_index() ->
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    {0, _PRef4, _TRef4, Qi22} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef4, _TRef4, _Terms4, Qi22} = rabbit_queue_index:init(test_queue(), false),
     {Qi23, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = queue_index_deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
@@ -1322,7 +1322,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    {0, _PRef5, _TRef5, Qi29} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef5, _TRef5, _Terms5, Qi29} = rabbit_queue_index:init(test_queue(), false),
     {Qi30, _SeqIdsMsgIdsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = queue_index_deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi31),
@@ -1334,7 +1334,7 @@ test_queue_index() ->
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
-    {0, _PRef6, _TRef6, Qi36} = rabbit_queue_index:init(test_queue()),
+    {0, _PRef6, _TRef6, _Terms6, Qi36} = rabbit_queue_index:init(test_queue(), false),
     {Qi37, _SeqIdsMsgIdsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 03db8510..56a79f47 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -155,7 +155,9 @@
           len,
           on_sync,
           msg_store_clients,
-          persistent_store
+          persistent_store,
+          persistent_count,
+          transient_threshold
         }).
 
 -include("rabbit.hrl").
@@ -212,7 +214,9 @@
                len                   :: non_neg_integer(),
                on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]},
                msg_store_clients     :: {{any(), binary()}, {any(), binary()}},
-               persistent_store      :: pid() | atom()
+               persistent_store      :: pid() | atom(),
+               persistent_count      :: non_neg_integer(),
+               transient_threshold   :: non_neg_integer()
               }).
 
 -spec(init/2 :: (queue_name(), pid() | atom()) -> vqstate()).
@@ -256,14 +260,18 @@
 %%----------------------------------------------------------------------------
 
 init(QueueName, PersistentStore) ->
-    {DeltaCount, PRef, TRef, IndexState} =
-        rabbit_queue_index:init(QueueName),
+    MsgStoreRecovered =
+        rabbit_msg_store:successfully_recovered_state(PersistentStore),
+    {DeltaCount, PRef, TRef, Terms, IndexState} =
+        rabbit_queue_index:init(QueueName, MsgStoreRecovered),
     {DeltaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
-    Delta = case DeltaCount of
+
+    DeltaCount1 = proplists:get_value(persistent_count, Terms, DeltaCount),
+    Delta = case DeltaCount1 of
                 0 -> ?BLANK_DELTA;
                 _ -> #delta { start_seq_id = DeltaSeqId,
-                              count = DeltaCount,
+                              count = DeltaCount1,
                               end_seq_id = NextSeqId }
             end,
     Now = now(),
@@ -282,24 +290,28 @@ init(QueueName, PersistentStore) ->
                    in_counter = 0,
                    egress_rate = {Now, 0},
                    avg_egress_rate = 0,
-                   ingress_rate = {Now, DeltaCount},
+                   ingress_rate = {Now, DeltaCount1},
                    avg_ingress_rate = 0,
                    rate_timestamp = Now,
-                   len = DeltaCount,
+                   len = DeltaCount1,
                    on_sync = {[], [], []},
                    msg_store_clients = {
                      {rabbit_msg_store:client_init(PersistentStore, PRef), PRef},
                      {rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), TRef}},
-                   persistent_store = PersistentStore
+                   persistent_store = PersistentStore,
+                   persistent_count = DeltaCount1,
+                   transient_threshold = NextSeqId
                  },
     maybe_deltas_to_betas(State).
 
 terminate(State = #vqstate {
+            persistent_count = PCount,
             index_state = IndexState,
             msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} }) ->
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
-    Terms = [{persistent_ref, PRef}, {transient_ref, TRef}],
+    Terms = [{persistent_ref, PRef}, {transient_ref, TRef},
+             {persistent_count, PCount}],
     State #vqstate { index_state = rabbit_queue_index:terminate(Terms, IndexState) }.
 
 publish(Msg, State) ->
@@ -313,7 +325,8 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
                                      out_counter = OutCount,
                                      in_counter = InCount,
                                      msg_store_clients = MSCState,
-                                     persistent_store = PersistentStore }) ->
+                                     persistent_store = PersistentStore,
+                                     persistent_count = PCount }) ->
     State1 = State #vqstate { out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
     MsgStatus = #msg_status {
@@ -321,7 +334,11 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
     {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
                                                       MsgStatus, MSCState),
-    State2 = State1 #vqstate { msg_store_clients = MSCState1 },
+    State2 = State1 #vqstate { msg_store_clients = MSCState1,
+                               persistent_count = PCount + case IsPersistent of
+                                                               true  -> 1;
+                                                               false -> 0
+                                                           end },
     case MsgStatus1 #msg_status.msg_on_disk of
         true ->
             {#msg_status { index_on_disk = true }, IndexState1} =
@@ -422,7 +439,7 @@ fetch(State =
                     false -> ok = case MsgOnDisk of
                                       true ->
                                           rabbit_msg_store:remove(
-                                                MsgStore, [MsgId]);
+                                            MsgStore, [MsgId]);
                                       false -> ok
                                   end,
                              ack_not_on_disk
@@ -434,7 +451,9 @@ fetch(State =
                               index_state = IndexState1, len = Len1 }}
     end.
 
-ack(AckTags, State = #vqstate { index_state = IndexState }) ->
+ack(AckTags, State = #vqstate { index_state = IndexState,
+                                persistent_count = PCount,
+                                persistent_store = PersistentStore }) ->
     {MsgIdsByStore, SeqIds} =
         lists:foldl(
           fun (ack_not_on_disk, Acc) -> Acc;
@@ -448,7 +467,11 @@ ack(AckTags, State = #vqstate { index_state = IndexState }) ->
     ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
                            rabbit_msg_store:remove(MsgStore, MsgIds)
                    end, ok, MsgIdsByStore),
-    State #vqstate { index_state = IndexState1 }.
+    PCount1 = PCount - case dict:find(PersistentStore, MsgIdsByStore) of
+                           error        -> 0;
+                           {ok, MsgIds} -> length(MsgIds)
+                       end,
+    State #vqstate { index_state = IndexState1, persistent_count = PCount1 }.
 
 len(#vqstate { len = Len }) ->
     Len.
@@ -464,7 +487,8 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
     {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
-    {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0 }}.
+    {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0,
+                            persistent_count = 0 }}.
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
@@ -472,7 +496,8 @@ delete_and_terminate(State) ->
     {_PurgeCount, State1 = #vqstate {
                     index_state = IndexState,
                     msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
-                    persistent_store = PersistentStore }} =
+                    persistent_store = PersistentStore,
+                    transient_threshold = TransientThreshold }} =
         purge(State),
     IndexState1 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
@@ -481,8 +506,8 @@ delete_and_terminate(State) ->
                 IndexState2;
             {DeltaSeqId, NextSeqId, IndexState2} ->
                 {_DeleteCount, IndexState3} =
-                    delete1(PersistentStore, NextSeqId, 0, DeltaSeqId,
-                            IndexState2),
+                    delete1(PersistentStore, TransientThreshold, NextSeqId, 0,
+                            DeltaSeqId, IndexState2),
                 IndexState3
     end,
     IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
@@ -503,7 +528,9 @@ delete_and_terminate(State) ->
 %% are now at the tail of the queue.
 requeue(MsgsWithAckTags, State) ->
     {SeqIds, MsgIdsByStore,
-     State1 = #vqstate { index_state = IndexState }} =
+     State1 = #vqstate { index_state = IndexState,
+                         persistent_count = PCount,
+                         persistent_store = PersistentStore }} =
         lists:foldl(
           fun ({Msg = #basic_message { guid = MsgId }, AckTag},
                {SeqIdsAcc, Dict, StateN}) ->
@@ -519,14 +546,20 @@ requeue(MsgsWithAckTags, State) ->
                   {_SeqId, StateN1} = publish(Msg, true, MsgOnDisk, StateN),
                   {SeqIdsAcc1, Dict1, StateN1}
           end, {[], dict:new(), State}, MsgsWithAckTags),
-    IndexState1 = case SeqIds of
-                      [] -> IndexState;
-                      _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
-                  end,
+    IndexState1 =
+        case SeqIds of
+            [] -> IndexState;
+            _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
+        end,
     ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
                            rabbit_msg_store:release(MsgStore, MsgIds)
                    end, ok, MsgIdsByStore),
-    State1 #vqstate { index_state = IndexState1 }.
+    PCount1 = PCount - case dict:find(PersistentStore, MsgIdsByStore) of
+                           error        -> 0;
+                           {ok, MsgIds} -> length(MsgIds)
+                       end,
+    State1 #vqstate { index_state = IndexState1,
+                      persistent_count = PCount1 }.
 
 tx_publish(Msg = #basic_message { is_persistent = true, guid = MsgId },
            State = #vqstate { msg_store_clients = MSCState,
@@ -633,7 +666,7 @@ persistent_msg_ids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
               Obj #basic_message.is_persistent].
 
-betas_from_segment_entries(List, SeqIdLimit) ->
+betas_from_segment_entries(List, SeqIdLimit, TransientThreshold) ->
     bpqueue:from_list([{true,
                         [#msg_status { msg           = undefined,
                                        msg_id        = MsgId,
@@ -644,7 +677,8 @@ betas_from_segment_entries(List, SeqIdLimit) ->
                                        index_on_disk = true
                                      }
                          || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
-                            SeqId < SeqIdLimit ]}]).
+                            SeqId < SeqIdLimit,
+                            (IsPersistent orelse SeqId >= TransientThreshold)]}]).
 
 read_index_segment(SeqId, IndexState) ->
     SeqId1 = SeqId + rabbit_queue_index:segment_size(),
@@ -703,23 +737,25 @@ should_force_index_to_disk(State =
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-delete1(_PersistentStore, NextSeqId, Count, DeltaSeqId, IndexState)
-  when DeltaSeqId >= NextSeqId ->
+delete1(_PersistentStore, _TransientThreshold, NextSeqId, Count, DeltaSeqId,
+        IndexState) when DeltaSeqId >= NextSeqId ->
     {Count, IndexState};
-delete1(PersistentStore, NextSeqId, Count, DeltaSeqId, IndexState) ->
+delete1(PersistentStore, TransientThreshold, NextSeqId, Count, DeltaSeqId,
+        IndexState) ->
     Delta1SeqId = DeltaSeqId + rabbit_queue_index:segment_size(),
     case rabbit_queue_index:read_segment_entries(DeltaSeqId, IndexState) of
         {[], IndexState1} ->
-            delete1(PersistentStore, NextSeqId, Count, Delta1SeqId,
-                    IndexState1);
+            delete1(PersistentStore, TransientThreshold, NextSeqId, Count,
+                    Delta1SeqId, IndexState1);
         {List, IndexState1} ->
-            Q = betas_from_segment_entries(List, Delta1SeqId),
+            Q = betas_from_segment_entries(List, Delta1SeqId,
+                                           TransientThreshold),
             {QCount, IndexState2} =
                 remove_queue_entries(
                   PersistentStore, fun beta_fold_no_index_on_disk/3,
                   Q, IndexState1),
-            delete1(PersistentStore, NextSeqId, Count + QCount, Delta1SeqId,
-                    IndexState2)
+            delete1(PersistentStore, TransientThreshold, NextSeqId,
+                    Count + QCount, Delta1SeqId, IndexState2)
     end.
 
 purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
@@ -886,14 +922,20 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
 
 publish(Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId },
         IsDelivered, MsgOnDisk, State =
-        #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount }) ->
+        #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount,
+                   persistent_count = PCount }) ->
     MsgStatus = #msg_status {
       msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
       index_on_disk = false },
+    PCount1 = PCount + case IsPersistent of
+                           true  -> 1;
+                           false -> 0
+                       end,
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id = SeqId + 1, len = Len + 1,
-                                     in_counter = InCount + 1 })}.
+                                     in_counter = InCount + 1,
+                                     persistent_count = PCount1 })}.
 
 publish(msg, MsgStatus, #vqstate {
                index_state = IndexState, ram_msg_count = RamMsgCount,
@@ -1097,7 +1139,8 @@ maybe_deltas_to_betas(
                      target_ram_msg_count = TargetRamMsgCount,
                      delta = #delta { start_seq_id = DeltaSeqId,
                                       count = DeltaCount,
-                                      end_seq_id = DeltaSeqIdEnd }}) ->
+                                      end_seq_id = DeltaSeqIdEnd },
+                     transient_threshold = TransientThreshold}) ->
     case (not bpqueue:is_empty(Q3)) andalso (0 == TargetRamMsgCount) of
         true ->
             State;
@@ -1110,7 +1153,7 @@ maybe_deltas_to_betas(
             State1 = State #vqstate { index_state = IndexState1 },
             %% length(List) may be < segment_size because of acks. But
             %% it can't be []
-            Q3a = betas_from_segment_entries(List, DeltaSeqIdEnd),
+            Q3a = betas_from_segment_entries(List, DeltaSeqIdEnd, TransientThreshold),
             Q3b = bpqueue:join(Q3, Q3a),
             case DeltaCount - bpqueue:len(Q3a) of
                 0 ->
@@ -1120,11 +1163,15 @@ maybe_deltas_to_betas(
                                       q2 = bpqueue:new(),
                                       q3 = bpqueue:join(Q3b, Q2) };
                 N when N > 0 ->
-                    State1 #vqstate {
-                      q3 = Q3b,
-                      delta = #delta { start_seq_id = Delta1SeqId,
-                                       count = N,
-                                       end_seq_id = DeltaSeqIdEnd } }
+                    State2 = State1 #vqstate {
+                               q3 = Q3b,
+                               delta = #delta { start_seq_id = Delta1SeqId,
+                                                count = N,
+                                                end_seq_id = DeltaSeqIdEnd } },
+                    case N == DeltaCount of
+                        true  -> maybe_deltas_to_betas(State2);
+                        false -> State2
+                    end
             end
     end.
 
-- 
cgit v1.2.1


From 24bbd03cd3b704861ebec8a25c54b40da9e44ddd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Apr 2010 21:29:29 +0100
Subject: Forgot to remove a printf

---
 src/rabbit_msg_store.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 1455b456..8ea2344c 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -716,8 +716,7 @@ handle_cast({set_maximum_since_use, Age}, State) ->
 handle_info(timeout, State) ->
     noreply(internal_sync(State));
 
-handle_info({'EXIT', Pid, Reason}, State) ->
-    io:format("~p EXIT! ~p ~p ~p~n", [self(), Reason, Pid, State]),
+handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State}.
 
 terminate(_Reason, State = #msstate { index_state         = IndexState,
-- 
cgit v1.2.1


From 67651cbb3a2505de7fbbf57d4420813f2bfa95ed Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Apr 2010 23:07:12 +0100
Subject: Corrected comment

---
 src/rabbit_variable_queue.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 56a79f47..8585e139 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1151,8 +1151,9 @@ maybe_deltas_to_betas(
             {List, IndexState1, Delta1SeqId} =
                 read_index_segment(DeltaSeqId, IndexState),
             State1 = State #vqstate { index_state = IndexState1 },
-            %% length(List) may be < segment_size because of acks. But
-            %% it can't be []
+            %% length(List) may be < segment_size because of acks.  It
+            %% could be [] if we ignored every message in the segment
+            %% due to it being transient and below the threshold
             Q3a = betas_from_segment_entries(List, DeltaSeqIdEnd, TransientThreshold),
             Q3b = bpqueue:join(Q3, Q3a),
             case DeltaCount - bpqueue:len(Q3a) of
-- 
cgit v1.2.1


From 2ea5b9b110cabf8313f96fb4519c63b26d57d057 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 5 Apr 2010 23:08:27 +0100
Subject: The walker is run within the msg_store process, which traps exits.
 Thus need to be careful to unlink and ensure we have received any EXIT
 message generated

---
 src/rabbit_queue_index.erl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 7227481d..8ad55583 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -461,7 +461,11 @@ queue_index_walker(DurableQueues) when is_list(DurableQueues) ->
 
 queue_index_walker({[], Gatherer}) ->
     case gatherer:fetch(Gatherer) of
-        finished                -> finished;
+        finished                -> unlink(Gatherer),
+                                   receive {'EXIT', Gatherer, _} -> ok
+                                   after 0 -> ok
+                                   end,
+                                   finished;
         {value, {MsgId, Count}} -> {MsgId, Count, {[], Gatherer}}
     end;
 queue_index_walker({[QueueName | QueueNames], Gatherer}) ->
-- 
cgit v1.2.1


From ea06be89924c6955fbe698b6719f72b595675f9f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 6 Apr 2010 00:18:05 +0100
Subject: Fixed a leak - if we have many queue index segments which were
 written to disk but contained only references to transient messages then on
 restart, they would never be removed. Unfortunately, this does potentially
 introduce further work on startup (the queue will try to ensure that it loads
 the persistent contents of the first segment which contains a persistent
 message, which can involve scanning through a lot of segments. However, this
 is actually pretty quick. The only way to fix this would be to keep per
 segment counters of persistent messages and ensure that's written to disk
 too, but the extra code cost and complexity may make this just not worth it.

---
 src/rabbit_queue_index.erl    |   1 -
 src/rabbit_variable_queue.erl | 125 ++++++++++++++++++++++++++----------------
 2 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 8ad55583..6ab370b2 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -250,7 +250,6 @@ init(Name, MsgStoreRecovered) ->
     %%    acks only go to the RAM journal as it doesn't matter if we
     %%    lose them. Also mark delivered if not clean shutdown. Also
     %%    find the number of unacked messages.
-    AllSegs = 
     CleanShutdown = detect_clean_shutdown(Dir),
     %% We know the journal is empty here, so we don't need to combine
     %% with the journal, and we don't need to worry about messages
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 8585e139..4a4ba999 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -254,6 +254,9 @@
 -define(BLANK_DELTA, #delta { start_seq_id = undefined,
                               count = 0,
                               end_seq_id = undefined }).
+-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z,
+                                         count = 0,
+                                         end_seq_id = Z }).
 
 %%----------------------------------------------------------------------------
 %% Public API
@@ -268,11 +271,11 @@ init(QueueName, PersistentStore) ->
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
 
     DeltaCount1 = proplists:get_value(persistent_count, Terms, DeltaCount),
-    Delta = case DeltaCount1 of
-                0 -> ?BLANK_DELTA;
-                _ -> #delta { start_seq_id = DeltaSeqId,
-                              count = DeltaCount1,
-                              end_seq_id = NextSeqId }
+    Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
+                true  -> ?BLANK_DELTA;
+                false -> #delta { start_seq_id = DeltaSeqId,
+                                  count = DeltaCount1,
+                                  end_seq_id = NextSeqId }
             end,
     Now = now(),
     State =
@@ -666,19 +669,39 @@ persistent_msg_ids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
               Obj #basic_message.is_persistent].
 
-betas_from_segment_entries(List, SeqIdLimit, TransientThreshold) ->
-    bpqueue:from_list([{true,
-                        [#msg_status { msg           = undefined,
-                                       msg_id        = MsgId,
-                                       seq_id        = SeqId,
-                                       is_persistent = IsPersistent,
-                                       is_delivered  = IsDelivered,
-                                       msg_on_disk   = true,
-                                       index_on_disk = true
-                                     }
-                         || {MsgId, SeqId, IsPersistent, IsDelivered} <- List,
-                            SeqId < SeqIdLimit,
-                            (IsPersistent orelse SeqId >= TransientThreshold)]}]).
+betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
+    {Filtered, IndexState1} =
+        lists:foldr(
+          fun ({MsgId, SeqId, IsPersistent, IsDelivered},
+               {FilteredAcc, IndexStateAcc}) ->
+                  case SeqId < TransientThreshold andalso not IsPersistent of
+                      true ->
+                          IndexStateAcc1 =
+                              case IsDelivered of
+                                  false -> rabbit_queue_index:write_delivered(
+                                             SeqId, IndexStateAcc);
+                                  true  -> IndexStateAcc
+                              end,
+                          {FilteredAcc, rabbit_queue_index:write_acks(
+                                          [SeqId], IndexStateAcc1)};
+                      false ->
+                          case SeqId < SeqIdLimit of
+                              true ->
+                                  {[#msg_status { msg           = undefined,
+                                                  msg_id        = MsgId,
+                                                  seq_id        = SeqId,
+                                                  is_persistent = IsPersistent,
+                                                  is_delivered  = IsDelivered,
+                                                  msg_on_disk   = true,
+                                                  index_on_disk = true
+                                                } | FilteredAcc],
+                                   IndexStateAcc};
+                              false ->
+                                  {FilteredAcc, IndexStateAcc}
+                          end
+                  end
+          end, {[], IndexState}, List),
+    {bpqueue:from_list([{true, Filtered}]), IndexState1}.
 
 read_index_segment(SeqId, IndexState) ->
     SeqId1 = SeqId + rabbit_queue_index:segment_size(),
@@ -693,10 +716,10 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
                   rabbit_binary_generator:ensure_content_encoded(Content)) }.
 
 %% the first arg is the older delta
-combine_deltas(#delta { count = 0 }, #delta { count = 0 }) ->
+combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) ->
     ?BLANK_DELTA;
-combine_deltas(#delta { count = 0 }, #delta {       } = B) -> B;
-combine_deltas(#delta {       } = A, #delta { count = 0 }) -> A;
+combine_deltas(?BLANK_DELTA_PATTERN(X), #delta {       } = B) -> B;
+combine_deltas(#delta {       } = A, ?BLANK_DELTA_PATTERN(Y)) -> A;
 combine_deltas(#delta { start_seq_id = SeqIdLow,  count = CountLow},
                #delta { start_seq_id = SeqIdHigh, count = CountHigh,
                         end_seq_id = SeqIdEnd }) ->
@@ -748,14 +771,15 @@ delete1(PersistentStore, TransientThreshold, NextSeqId, Count, DeltaSeqId,
             delete1(PersistentStore, TransientThreshold, NextSeqId, Count,
                     Delta1SeqId, IndexState1);
         {List, IndexState1} ->
-            Q = betas_from_segment_entries(List, Delta1SeqId,
-                                           TransientThreshold),
-            {QCount, IndexState2} =
+            {Q, IndexState2} =
+                betas_from_segment_entries(
+                  List, Delta1SeqId, TransientThreshold, IndexState1),
+            {QCount, IndexState3} =
                 remove_queue_entries(
                   PersistentStore, fun beta_fold_no_index_on_disk/3,
-                  Q, IndexState1),
+                  Q, IndexState2),
             delete1(PersistentStore, TransientThreshold, NextSeqId,
-                    Count + QCount, Delta1SeqId, IndexState2)
+                    Count + QCount, Delta1SeqId, IndexState3)
     end.
 
 purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
@@ -1132,14 +1156,14 @@ limit_ram_index(MapFoldFilterFun, Q, Reduction, State =
     {Qa, Reduction1, State #vqstate { index_state = IndexState1,
                                       ram_index_count = RamIndexCount1 }}.
 
-maybe_deltas_to_betas(State = #vqstate { delta = #delta { count = 0 } }) ->
+maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
 maybe_deltas_to_betas(
   State = #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
                      target_ram_msg_count = TargetRamMsgCount,
-                     delta = #delta { start_seq_id = DeltaSeqId,
-                                      count = DeltaCount,
-                                      end_seq_id = DeltaSeqIdEnd },
+                     delta = Delta = #delta { start_seq_id = DeltaSeqId,
+                                              count = DeltaCount,
+                                              end_seq_id = DeltaSeqIdEnd },
                      transient_threshold = TransientThreshold}) ->
     case (not bpqueue:is_empty(Q3)) andalso (0 == TargetRamMsgCount) of
         true ->
@@ -1150,28 +1174,33 @@ maybe_deltas_to_betas(
             %% really be holding all the betas in memory.
             {List, IndexState1, Delta1SeqId} =
                 read_index_segment(DeltaSeqId, IndexState),
-            State1 = State #vqstate { index_state = IndexState1 },
             %% length(List) may be < segment_size because of acks.  It
             %% could be [] if we ignored every message in the segment
             %% due to it being transient and below the threshold
-            Q3a = betas_from_segment_entries(List, DeltaSeqIdEnd, TransientThreshold),
-            Q3b = bpqueue:join(Q3, Q3a),
-            case DeltaCount - bpqueue:len(Q3a) of
+            {Q3a, IndexState2} =
+                betas_from_segment_entries(
+                  List, DeltaSeqIdEnd, TransientThreshold, IndexState1),
+            State1 = State #vqstate { index_state = IndexState2 },
+            case bpqueue:len(Q3a) of
                 0 ->
-                    %% delta is now empty, but it wasn't before, so
-                    %% can now join q2 onto q3
-                    State1 #vqstate { delta = ?BLANK_DELTA,
-                                      q2 = bpqueue:new(),
-                                      q3 = bpqueue:join(Q3b, Q2) };
-                N when N > 0 ->
-                    State2 = State1 #vqstate {
-                               q3 = Q3b,
-                               delta = #delta { start_seq_id = Delta1SeqId,
-                                                count = N,
-                                                end_seq_id = DeltaSeqIdEnd } },
-                    case N == DeltaCount of
-                        true  -> maybe_deltas_to_betas(State2);
-                        false -> State2
+                    maybe_deltas_to_betas(
+                      State #vqstate {
+                        delta = Delta #delta { start_seq_id = Delta1SeqId }});
+                _ ->
+                    Q3b = bpqueue:join(Q3, Q3a),
+                    case DeltaCount - bpqueue:len(Q3a) of
+                        0 ->
+                            %% delta is now empty, but it wasn't
+                            %% before, so can now join q2 onto q3
+                            State1 #vqstate { delta = ?BLANK_DELTA,
+                                              q2 = bpqueue:new(),
+                                              q3 = bpqueue:join(Q3b, Q2) };
+                        N when N > 0 ->
+                            State1 #vqstate {
+                              q3 = Q3b,
+                              delta = #delta { start_seq_id = Delta1SeqId,
+                                               count = N,
+                                               end_seq_id = DeltaSeqIdEnd } }
                     end
             end
     end.
-- 
cgit v1.2.1


From 291b2c524c80a6be3ef867b20d668d4c6cbf3a32 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 6 Apr 2010 13:49:24 +0100
Subject: Whoops, API changed...

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 8ea2344c..4f85b566 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -777,7 +777,7 @@ next_state(State) ->
     {State, 0}.
 
 start_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
-    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, []),
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]),
     State #msstate { sync_timer_ref = TRef }.
 
 stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
-- 
cgit v1.2.1


From a1555f366090e5637db072e75dba064dc2eec667 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 6 Apr 2010 14:00:34 +0100
Subject: Removed code duplication

---
 src/rabbit_limiter.erl     | 5 +----
 src/rabbit_misc.erl        | 9 ++++++++-
 src/rabbit_msg_store.erl   | 5 +----
 src/rabbit_queue_index.erl | 5 +----
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl
index 7d840861..878af029 100644
--- a/src/rabbit_limiter.erl
+++ b/src/rabbit_limiter.erl
@@ -249,10 +249,7 @@ notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) ->
     State#lim{queues = NewQueues}.
 
 unlink_on_stopped(LimiterPid, stopped) ->
-    true = unlink(LimiterPid),
-    ok = receive {'EXIT', LimiterPid, _Reason} -> ok
-         after 0 -> ok
-         end,
+    ok = rabbit_misc:unlink_and_capture_exit(LimiterPid),
     stopped;
 unlink_on_stopped(_LimiterPid, Result) ->
     Result.
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index cd2e7fbc..340f308f 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -59,7 +59,7 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
--export([recursive_delete/1, dict_cons/3]).
+-export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
 
 -import(mnesia).
 -import(lists).
@@ -136,6 +136,7 @@
 -spec(string_to_pid/1 :: (string()) -> pid()).
 -spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
+-spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
 
 -endif.
 
@@ -629,3 +630,9 @@ recursive_delete(Path) ->
 
 dict_cons(Key, Value, Dict) ->
     dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
+
+unlink_and_capture_exit(Pid) ->
+    unlink(Pid),
+    receive {'EXIT', Pid, _} -> ok
+    after 0 -> ok
+    end.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 4f85b566..e5de24ce 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1292,10 +1292,7 @@ build_index(Gatherer, Left, [],
                                sum_file_size = SumFileSize }) ->
     case gatherer:fetch(Gatherer) of
         finished ->
-            unlink(Gatherer),
-            receive {'EXIT', Gatherer, _} -> ok
-            after 0 -> ok
-            end,
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             ok = index_delete_by_file(undefined, State),
             Offset = case ets:lookup(FileSummaryEts, Left) of
                          []                                       -> 0;
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6ab370b2..4887ec21 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -460,10 +460,7 @@ queue_index_walker(DurableQueues) when is_list(DurableQueues) ->
 
 queue_index_walker({[], Gatherer}) ->
     case gatherer:fetch(Gatherer) of
-        finished                -> unlink(Gatherer),
-                                   receive {'EXIT', Gatherer, _} -> ok
-                                   after 0 -> ok
-                                   end,
+        finished                -> rabbit_misc:unlink_and_capture_exit(Gatherer),
                                    finished;
         {value, {MsgId, Count}} -> {MsgId, Count, {[], Gatherer}}
     end;
-- 
cgit v1.2.1


From c6b40f6ddc62a6f7abda483a3576c640e3191105 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 6 Apr 2010 18:13:51 +0100
Subject: Large accountancy bug in queue index leading to great confusion and
 indeed infinite loop. Fixed.

---
 src/rabbit_queue_index.erl | 72 +++++++++++++++++++++++++++++++---------------
 src/rabbit_tests.erl       |  4 +--
 2 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 4887ec21..ee4f05b4 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -243,8 +243,8 @@ init(Name, MsgStoreRecovered) ->
     State1 = load_journal(State),
     %% 2. Flush the journal. This makes life easier for everyone, as
     %%    it means there won't be any publishes in the journal alone.
-    State2 = #qistate { dir = Dir, segments = Segments,
-                        dirty_count = DCount } = flush_journal(State1),
+    State2 = #qistate { dir = Dir, segments = Segments } =
+        flush_journal(State1),
     %% 3. Load each segment in turn and filter out messages that are
     %%    not in the msg_store, by adding acks to the journal. These
     %%    acks only go to the RAM journal as it doesn't matter if we
@@ -254,36 +254,59 @@ init(Name, MsgStoreRecovered) ->
     %% We know the journal is empty here, so we don't need to combine
     %% with the journal, and we don't need to worry about messages
     %% that have been acked.
-    {Segments1, Count, DCount1} =
+    {Segments1, Count} =
         case CleanShutdown andalso MsgStoreRecovered of
             false ->
                 lists:foldl(
-                  fun (Seg, {Segments2, CountAcc, DCountAcc}) ->
+                  fun (Seg, {Segments2, CountAcc}) ->
                           Segment = segment_find_or_new(Seg, Dir, Segments2),
                           {SegEntries, PubCount, AckCount, Segment1} =
                               load_segment(false, Segment),
-                          {Segment2 = #segment { pubs = PubCount1, acks = AckCount1 },
-                           DCountAcc1} =
+                          Segment2 =
+                               #segment { pubs = PubCount1, acks = AckCount1 } =
                               array:sparse_foldl(
                                 fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
-                                     {Segment3, DCountAcc2}) ->
-                                        {Segment4, DCountDelta} =
+                                     Segment3) ->
+                                        {Segment4, _DCountDelta} =
                                             maybe_add_to_journal(
                                               rabbit_msg_store:contains(
                                                 ?PERSISTENT_MSG_STORE, MsgId),
                                               CleanShutdown, Del, RelSeq, Segment3),
-                                        {Segment4, DCountAcc2 + DCountDelta}
-                                end, {Segment1 #segment { pubs = PubCount,
-                                                          acks = AckCount }, DCountAcc},
+                                        Segment4
+                                end, Segment1 #segment { pubs = PubCount,
+                                                         acks = AckCount },
                                 SegEntries),
                           {segment_store(Segment2, Segments2),
-                           CountAcc + PubCount1 - AckCount1, DCountAcc1}
-                  end, {Segments, 0, DCount}, all_segment_nums(State2));
+                           CountAcc + PubCount1 - AckCount1}
+                  end, {Segments, 0}, all_segment_nums(State2));
             true ->
-                {Segments, undefined, DCount}
+                %% At this stage, we will only know about files that
+                %% were loaded during flushing. They *will* have
+                %% correct ack and pub counts, but for all remaining
+                %% segments, if they're not in the Segments store then
+                %% we need to add them and populate with saved data.
+                SegmentDictTerms =
+                    dict:from_list(proplists:get_value(segments, Terms, [])),
+                {lists:foldl(
+                   fun (Seg, SegmentsN) ->
+                           case {segment_find(Seg, SegmentsN),
+                                 dict:find(Seg, SegmentDictTerms)} of
+                               {error, {ok, {PubCount, AckCount}}} ->
+                                   Segment = segment_new(Seg, Dir),
+                                   segment_store(
+                                     Segment #segment { pubs = PubCount,
+                                                        acks = AckCount },
+                                     SegmentsN);
+                               _ ->
+                                   SegmentsN
+                           end
+                   end, Segments, all_segment_nums(State2)),
+                 undefined}
         end,
-    {Count, PRef, TRef, Terms,
-     State2 #qistate { segments = Segments1, dirty_count = DCount1 }}.
+    %% artificially set the dirty_count non zero and call flush again
+    State3 = flush_journal(State2 #qistate { segments = Segments1,
+                                             dirty_count = 1 }),
+    {Count, PRef, TRef, Terms, State3}.
 
 maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
     {Segment, 0};
@@ -689,14 +712,17 @@ terminate(StoreShutdown, Terms, State =
              undefined -> ok;
              _         -> file_handle_cache:close(JournalHdl)
          end,
-    ok = segment_fold(
-           fun (_Seg, #segment { handle = undefined }, ok) ->
-                   ok;
-               (_Seg, #segment { handle = Hdl }, ok) ->
-                   file_handle_cache:close(Hdl)
-           end, ok, Segments),
+    SegTerms = segment_fold(
+           fun (Seg, #segment { handle = Hdl, pubs = PubCount,
+                                acks = AckCount }, SegTermsAcc) ->
+                   ok = case Hdl of
+                            undefined -> ok;
+                            _         -> file_handle_cache:close(Hdl)
+                        end,
+                   [{Seg, {PubCount, AckCount}} | SegTermsAcc]
+           end, [], Segments),
     case StoreShutdown of
-        true  -> store_clean_shutdown(Terms, Dir);
+        true  -> store_clean_shutdown([{segments, SegTerms} | Terms], Dir);
         false -> ok
     end,
     State #qistate { journal_handle = undefined, segments = undefined }.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 788aeedd..6b8998c2 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1259,7 +1259,7 @@ test_queue_index() ->
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
-    {0, SegSize, Qi3} =
+    {0, SegmentSize, Qi3} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
     {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
@@ -1271,7 +1271,7 @@ test_queue_index() ->
     ok = start_transient_msg_store(),
     %% should get length back as 0, as all the msgs were transient
     {0, _PRef1, _TRef1, _Terms1, Qi6} = rabbit_queue_index:init(test_queue(), false),
-    {0, SegSize, Qi7} =
+    {0, 0, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} =
-- 
cgit v1.2.1


From 2164469c775119c762150e3a9f264e80140143a9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Apr 2010 14:35:59 +0100
Subject: Simplification of function called from one place only

---
 src/rabbit_queue_index.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index ee4f05b4..5257f201 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -267,7 +267,7 @@ init(Name, MsgStoreRecovered) ->
                               array:sparse_foldl(
                                 fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
                                      Segment3) ->
-                                        {Segment4, _DCountDelta} =
+                                        Segment4 =
                                             maybe_add_to_journal(
                                               rabbit_msg_store:contains(
                                                 ?PERSISTENT_MSG_STORE, MsgId),
@@ -309,15 +309,15 @@ init(Name, MsgStoreRecovered) ->
     {Count, PRef, TRef, Terms, State3}.
 
 maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
-    {Segment, 0};
+    Segment;
 maybe_add_to_journal( true, false,  del, _RelSeq, Segment) ->
-    {Segment, 0};
+    Segment;
 maybe_add_to_journal( true, false, _Del,  RelSeq, Segment) ->
-    {add_to_journal(RelSeq, del, Segment), 1};
+    add_to_journal(RelSeq, del, Segment);
 maybe_add_to_journal(false,     _,  del,  RelSeq, Segment) ->
-    {add_to_journal(RelSeq, ack, Segment), 1};
+    add_to_journal(RelSeq, ack, Segment);
 maybe_add_to_journal(false,     _, _Del,  RelSeq, Segment) ->
-    {add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)), 2}.
+    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
 
 terminate(Terms, State) ->
     terminate(true, Terms, State).
-- 
cgit v1.2.1


From ec43c15b3dfe7ed60750bf387a3ab413ac26a86e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Apr 2010 15:33:07 +0100
Subject: Fix a leak in the fhc - all clients of the fhc now have a monitor
 created for them to enable us to tidy up after the process dies. The
 distinction between these monitors and the ones created in release_on_death
 is that the release_on_death ones are not stored in the client_mrefs dict,
 thus if a monitor DOWN message appears which we can't find in that dict, it
 is assumed it is a release_on_death monitor

---
 src/file_handle_cache.erl | 48 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index c43695fb..59bb01bf 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -170,7 +170,8 @@
           limit,
           count,
           obtains,
-          callbacks
+          callbacks,
+          client_mrefs
         }).
 
 %%----------------------------------------------------------------------------
@@ -688,7 +689,8 @@ init([]) ->
             end,
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
-                      obtains = [], callbacks = dict:new() }}.
+                      obtains = [], callbacks = dict:new(),
+                      client_mrefs = dict:new() }}.
 
 handle_call(obtain, From, State = #fhc_state { count = Count }) ->
     State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
@@ -701,21 +703,23 @@ handle_call(obtain, From, State = #fhc_state { count = Count }) ->
 
 handle_call({register_callback, Pid, MFA}, _From,
             State = #fhc_state { callbacks = Callbacks }) ->
-    {reply, ok,
-     State #fhc_state { callbacks = dict:store(Pid, MFA, Callbacks) }}.
+    {reply, ok, ensure_mref(
+                  Pid, State #fhc_state {
+                         callbacks = dict:store(Pid, MFA, Callbacks) })}.
 
 handle_cast({open, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
     Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
-    {noreply, maybe_reduce(State #fhc_state { elders = Elders1,
-                                              count = Count + 1 })};
+    {noreply, maybe_reduce(
+                ensure_mref(Pid, State #fhc_state { elders = Elders1,
+                                                    count = Count + 1 }))};
 
 handle_cast({update, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders }) ->
     Elders1 = dict:store(Pid, EldestUnusedSince, Elders),
     %% don't call maybe_reduce from here otherwise we can create a
     %% storm of messages
-    {noreply, State #fhc_state { elders = Elders1 }};
+    {noreply, ensure_mref(Pid, State #fhc_state { elders = Elders1 })};
 
 handle_cast({close, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
@@ -723,8 +727,9 @@ handle_cast({close, Pid, EldestUnusedSince}, State =
                   undefined -> dict:erase(Pid, Elders);
                   _         -> dict:store(Pid, EldestUnusedSince, Elders)
               end,
-    {noreply, process_obtains(State #fhc_state { elders = Elders1,
-                                                 count = Count - 1 })};
+    {noreply, process_obtains(
+                ensure_mref(Pid, State #fhc_state { elders = Elders1,
+                                                    count = Count - 1 }))};
 
 handle_cast(check_counts, State) ->
     {noreply, maybe_reduce(State)};
@@ -733,11 +738,18 @@ handle_cast({release_on_death, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
     {noreply, State}.
 
-handle_info({'DOWN', _MRef, process, Pid, _Reason},
-            State = #fhc_state { count = Count, callbacks = Callbacks }) ->
-    {noreply, process_obtains(
-                State #fhc_state { count = Count - 1,
-                                   callbacks = dict:erase(Pid, Callbacks) })}.
+handle_info({'DOWN', MRef, process, Pid, _Reason},
+            State = #fhc_state { count = Count, callbacks = Callbacks,
+                                 client_mrefs = ClientMRefs,
+                                 elders = Elders }) ->
+    State1 = case dict:find(Pid, ClientMRefs) of
+                 {ok, MRef} -> State #fhc_state {
+                                 elders       = dict:erase(Pid, Elders),
+                                 client_mrefs = dict:erase(Pid, ClientMRefs),
+                                 callbacks    = dict:erase(Pid, Callbacks) };
+                 _          -> State #fhc_state { count = Count - 1 }
+             end,
+    {noreply, process_obtains(State1)}.
 
 terminate(_Reason, State) ->
     State.
@@ -826,3 +838,11 @@ ulimit() ->
         _ ->
             ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
     end.
+
+ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) ->
+    State #fhc_state { client_mrefs = ensure_mref(Pid, ClientMRefs) };
+ensure_mref(Pid, ClientMRefs) ->
+    case dict:find(Pid, ClientMRefs) of
+        {ok, _MRef} -> ClientMRefs;
+        error       -> dict:store(Pid, erlang:monitor(process, Pid), ClientMRefs)
+    end.
-- 
cgit v1.2.1


From 952ad372f58d7601dfd8db93c395e8757f6bd8d8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 7 Apr 2010 17:46:38 +0100
Subject: Cleanup and improvements to the txn commit system. We now ensure: a)
 if the txn consists only of acks, we do sync those acks in the qi (assuming
 the acks were on disk already); b) txns in non-durable queues never cause
 fsyncs, even if the messages are persistent (or the acks are for persistent
 messages); c) transactions which contain no publishes can now overtake txns
 which do contain publishes; d) txns which do not need to be sync'd in the qi
 can overtake those that do need to be sync'd in the qi (eg a txn with only
 acks for non-persistent msgs can overtake a txn with persistent publishes).
 The overtakings are all safe as commit is a sync operation on a channel, and
 can only overtake other txns, not other operations in general.

---
 src/rabbit_amqqueue.erl         | 12 +++----
 src/rabbit_amqqueue_process.erl | 13 ++++---
 src/rabbit_queue_index.erl      |  2 ++
 src/rabbit_variable_queue.erl   | 79 +++++++++++++++++++++++++++--------------
 4 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index d23cbd19..f0d12ae5 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -41,7 +41,7 @@
 -export([consumers/1, consumers_all/1]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, tx_commit_msg_store_callback/4,
+-export([notify_sent/2, unblock/2, tx_commit_msg_store_callback/5,
          tx_commit_vq_callback/1, flush_all/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
@@ -111,8 +111,8 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(tx_commit_msg_store_callback/4 ::
-      (pid(), [message()], [acktag()], {pid(), any()}) -> 'ok').
+-spec(tx_commit_msg_store_callback/5 ::
+      (pid(), boolean(), [message()], [acktag()], {pid(), any()}) -> 'ok').
 -spec(tx_commit_vq_callback/1 :: (pid()) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
@@ -362,9 +362,9 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
-tx_commit_msg_store_callback(QPid, Pubs, AckTags, From) ->
-    gen_server2:pcast(QPid, 7,
-                      {tx_commit_msg_store_callback, Pubs, AckTags, From}).
+tx_commit_msg_store_callback(QPid, IsTransientPubs, Pubs, AckTags, From) ->
+    gen_server2:pcast(QPid, 7, {tx_commit_msg_store_callback,
+                                IsTransientPubs, Pubs, AckTags, From}).
 
 tx_commit_vq_callback(QPid) ->
     gen_server2:pcast(QPid, 7, tx_commit_vq_callback).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index c9add5b2..fa445c3a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -894,12 +894,15 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({tx_commit_msg_store_callback, Pubs, AckTags, From},
+handle_cast({tx_commit_msg_store_callback, IsTransientPubs, Pubs, AckTags, From},
             State = #q{variable_queue_state = VQS}) ->
-    noreply(
-      State#q{variable_queue_state =
-              rabbit_variable_queue:tx_commit_from_msg_store(
-                Pubs, AckTags, From, VQS)});
+    {RunQueue, VQS1} = rabbit_variable_queue:tx_commit_from_msg_store(
+                         IsTransientPubs, Pubs, AckTags, From, VQS),
+    State1 = State#q{variable_queue_state = VQS1},
+    noreply(case RunQueue of
+                true  -> run_message_queue(State1);
+                false -> State1
+            end);
 
 handle_cast(tx_commit_vq_callback, State = #q{variable_queue_state = VQS}) ->
     noreply(
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5257f201..b37845d4 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -352,6 +352,8 @@ write_acks(SeqIds, State) ->
                                             add_to_journal(SeqId, ack, StateN)
                                     end, State1, SeqIds)).
 
+sync_seq_ids([], State) ->
+    State;
 sync_seq_ids(_SeqIds, State = #qistate { journal_handle = undefined }) ->
     State;
 sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4a4ba999..8f813fb4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -35,7 +35,7 @@
          set_queue_ram_duration_target/2, remeasure_rates/1,
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
-         tx_commit/4, tx_commit_from_msg_store/4, tx_commit_from_vq/1,
+         tx_commit/4, tx_commit_from_msg_store/5, tx_commit_from_vq/1,
          needs_sync/1, flush_journal/1, status/1]).
 
 %%----------------------------------------------------------------------------
@@ -241,9 +241,10 @@
 -spec(tx_publish/2 :: (basic_message(), vqstate()) -> vqstate()).
 -spec(tx_rollback/2 :: ([msg_id()], vqstate()) -> vqstate()).
 -spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, vqstate()) ->
-             {boolean(), vqstate()}).
--spec(tx_commit_from_msg_store/4 ::
-      ([msg_id()], [ack()], {pid(), any()}, vqstate()) -> vqstate()).
+                          {boolean(), vqstate()}).
+-spec(tx_commit_from_msg_store/5 ::
+        (boolean(), [msg_id()], [ack()], {pid(), any()}, vqstate()) ->
+                                         {boolean(), vqstate()}).
 -spec(tx_commit_from_vq/1 :: (vqstate()) -> vqstate()).
 -spec(needs_sync/1 :: (vqstate()) -> boolean()).
 -spec(flush_journal/1 :: (vqstate()) -> vqstate()).
@@ -454,6 +455,8 @@ fetch(State =
                               index_state = IndexState1, len = Len1 }}
     end.
 
+ack([], State) ->
+    State;
 ack(AckTags, State = #vqstate { index_state = IndexState,
                                 persistent_count = PCount,
                                 persistent_store = PersistentStore }) ->
@@ -583,45 +586,69 @@ tx_rollback(Pubs, State = #vqstate { persistent_store = PersistentStore }) ->
          end,
     State.
 
-tx_commit(Pubs, AckTags, From, State = #vqstate { persistent_store = PersistentStore }) ->
-    case persistent_msg_ids(Pubs) of
-        [] ->
-            {true, tx_commit_from_msg_store(Pubs, AckTags, From, State)};
-        PersistentMsgIds ->
+tx_commit(Pubs, AckTags, From, State =
+              #vqstate { persistent_store = PersistentStore }) ->
+    %% If we are a non-durable queue, or we have no persistent pubs,
+    %% we can skip the msg_store loop.
+    PersistentMsgIds = persistent_msg_ids(Pubs),
+    IsTransientPubs = [] == PersistentMsgIds,
+    case IsTransientPubs orelse
+        ?TRANSIENT_MSG_STORE == PersistentStore of
+        true ->
+            tx_commit_from_msg_store(
+              IsTransientPubs, Pubs, AckTags, From, State);
+        false ->
             Self = self(),
             ok = rabbit_msg_store:sync(
-                   PersistentStore, PersistentMsgIds,
+                   ?PERSISTENT_MSG_STORE, PersistentMsgIds,
                    fun () -> ok = rabbit_amqqueue:tx_commit_msg_store_callback(
-                                    Self, Pubs, AckTags, From)
+                                    Self, IsTransientPubs, Pubs, AckTags, From)
                    end),
             {false, State}
     end.
 
-tx_commit_from_msg_store(Pubs, AckTags, From,
-                         State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
+tx_commit_from_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
+                             #vqstate { on_sync = OnSync = {SAcks, SPubs, SFroms},
+                                        persistent_store = PersistentStore }) ->
+    %% If we are a non-durable queue, or (no persisent pubs, and no
+    %% persistent acks) then we can skip the queue_index loop.
     DiskAcks =
         lists:filter(fun (AckTag) -> AckTag /= ack_not_on_disk end, AckTags),
-    State #vqstate { on_sync = { [DiskAcks | SAcks],
-                                 [Pubs | SPubs],
-                                 [From | SFroms] }}.
+    case PersistentStore == ?TRANSIENT_MSG_STORE orelse
+        (IsTransientPubs andalso [] == DiskAcks) of
+        true  -> State1 = tx_commit_from_vq(State #vqstate {
+                                              on_sync = {[], [Pubs], [From]} }),
+                 {true, State1 #vqstate { on_sync = OnSync }};
+        false -> {false, State #vqstate { on_sync = { [DiskAcks | SAcks],
+                                                      [Pubs | SPubs],
+                                                      [From | SFroms] }}}
+    end.
 
 tx_commit_from_vq(State = #vqstate { on_sync = {_, _, []} }) ->
     State;
-tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms} }) ->
-    State1 = ack(lists:flatten(SAcks), State),
-    {PubSeqIds, State2 = #vqstate { index_state = IndexState }} =
+tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
+                                     persistent_store = PersistentStore }) ->
+    Acks = lists:flatten(SAcks),
+    State1 = ack(Acks, State),
+    AckSeqIds = lists:foldl(fun ({ack_index_and_store, _MsgId,
+                                  SeqId, ?PERSISTENT_MSG_STORE}, SeqIdsAcc) ->
+                                    [SeqId | SeqIdsAcc];
+                                (_, SeqIdsAcc) ->
+                                    SeqIdsAcc
+                            end, [], Acks),
+    IsPersistentStore = ?PERSISTENT_MSG_STORE == PersistentStore,
+    {SeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
                {SeqIdsAcc, StateN}) ->
                   {SeqId, StateN1} = publish(Msg, false, IsPersistent, StateN),
-                  SeqIdsAcc1 = case IsPersistent of
-                                   true -> [SeqId | SeqIdsAcc];
-                                   false -> SeqIdsAcc
-                               end,
-                  {SeqIdsAcc1, StateN1}
-          end, {[], State1}, lists:flatten(lists:reverse(SPubs))),
+                  {case IsPersistentStore andalso IsPersistent of
+                       true  -> [SeqId | SeqIdsAcc];
+                       false -> SeqIdsAcc
+                   end, StateN1}
+          end, {AckSeqIds, State1}, lists:flatten(lists:reverse(SPubs))),
     IndexState1 =
-        rabbit_queue_index:sync_seq_ids(PubSeqIds, IndexState),
+        rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
     [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
-- 
cgit v1.2.1


From bbc838bee48dd5e283c9c42965e8763893d25eb4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 13:02:49 +0100
Subject: Unify the APIs of the various commit callbacks. Prevents running the
 queue when we are doing ack-only txns

---
 src/rabbit_amqqueue_process.erl | 20 ++++++++++++--------
 src/rabbit_variable_queue.erl   | 17 ++++++++++-------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index fa445c3a..b92de667 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -905,10 +905,12 @@ handle_cast({tx_commit_msg_store_callback, IsTransientPubs, Pubs, AckTags, From}
             end);
 
 handle_cast(tx_commit_vq_callback, State = #q{variable_queue_state = VQS}) ->
-    noreply(
-      run_message_queue(
-        State#q{variable_queue_state =
-                rabbit_variable_queue:tx_commit_from_vq(VQS)}));
+    {RunQueue, VQS1} = rabbit_variable_queue:tx_commit_from_vq(VQS),
+    State1 = State#q{variable_queue_state = VQS1},
+    noreply(case RunQueue of
+                true  -> run_message_queue(State1);
+                false -> State1
+            end);
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
@@ -970,10 +972,12 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     end;
 
 handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
-    noreply(
-      run_message_queue(
-        State#q{variable_queue_state =
-                rabbit_variable_queue:tx_commit_from_vq(VQS)}));
+    {RunQueue, VQS1} = rabbit_variable_queue:tx_commit_from_vq(VQS),
+    State1 = State#q{variable_queue_state = VQS1},
+    noreply(case RunQueue of
+                true  -> run_message_queue(State1);
+                false -> State1
+            end);
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 8f813fb4..9d33cc7c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -245,7 +245,7 @@
 -spec(tx_commit_from_msg_store/5 ::
         (boolean(), [msg_id()], [ack()], {pid(), any()}, vqstate()) ->
                                          {boolean(), vqstate()}).
--spec(tx_commit_from_vq/1 :: (vqstate()) -> vqstate()).
+-spec(tx_commit_from_vq/1 :: (vqstate()) -> {boolean(), vqstate()}).
 -spec(needs_sync/1 :: (vqstate()) -> boolean()).
 -spec(flush_journal/1 :: (vqstate()) -> vqstate()).
 -spec(status/1 :: (vqstate()) -> [{atom(), any()}]).
@@ -616,16 +616,17 @@ tx_commit_from_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
         lists:filter(fun (AckTag) -> AckTag /= ack_not_on_disk end, AckTags),
     case PersistentStore == ?TRANSIENT_MSG_STORE orelse
         (IsTransientPubs andalso [] == DiskAcks) of
-        true  -> State1 = tx_commit_from_vq(State #vqstate {
-                                              on_sync = {[], [Pubs], [From]} }),
-                 {true, State1 #vqstate { on_sync = OnSync }};
+        true  -> {Res, State1} =
+                     tx_commit_from_vq(State #vqstate {
+                                         on_sync = {[], [Pubs], [From]} }),
+                 {Res, State1 #vqstate { on_sync = OnSync }};
         false -> {false, State #vqstate { on_sync = { [DiskAcks | SAcks],
                                                       [Pubs | SPubs],
                                                       [From | SFroms] }}}
     end.
 
 tx_commit_from_vq(State = #vqstate { on_sync = {_, _, []} }) ->
-    State;
+    {false, State};
 tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                                      persistent_store = PersistentStore }) ->
     Acks = lists:flatten(SAcks),
@@ -637,6 +638,7 @@ tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                                     SeqIdsAcc
                             end, [], Acks),
     IsPersistentStore = ?PERSISTENT_MSG_STORE == PersistentStore,
+    Pubs = lists:flatten(lists:reverse(SPubs)),
     {SeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
@@ -646,11 +648,12 @@ tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                        true  -> [SeqId | SeqIdsAcc];
                        false -> SeqIdsAcc
                    end, StateN1}
-          end, {AckSeqIds, State1}, lists:flatten(lists:reverse(SPubs))),
+          end, {AckSeqIds, State1}, Pubs),
     IndexState1 =
         rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
     [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
-    State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
+    {Pubs /= [],
+     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }}.
 
 needs_sync(#vqstate { on_sync = {_, _, []} }) ->
     false;
-- 
cgit v1.2.1


From ad30c6089f4748305ba38d8f446f57806a6d4fef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 14:44:15 +0100
Subject: Rationalise the entire VQ api in preparation for making it pluggable.
 Just a behaviour, the externalisation of the specs to follow, and a means to
 specify the internal_queue module to come.

---
 src/rabbit_amqqueue.erl         |  21 +--
 src/rabbit_amqqueue_process.erl | 285 ++++++++++++++++++++--------------------
 src/rabbit_tests.erl            |   2 +-
 src/rabbit_variable_queue.erl   |  59 +++++----
 4 files changed, 180 insertions(+), 187 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index f0d12ae5..1c8cf522 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -41,8 +41,8 @@
 -export([consumers/1, consumers_all/1]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, tx_commit_msg_store_callback/5,
-         tx_commit_vq_callback/1, flush_all/2]).
+-export([notify_sent/2, unblock/2, maybe_run_queue_via_internal_queue/3,
+         flush_all/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -65,7 +65,6 @@
 -type(qfun(A) :: fun ((amqqueue()) -> A)).
 -type(ok_or_errors() ::
       'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
--type(acktag() :: any()).
 
 -spec(start/0 :: () -> 'ok').
 -spec(declare/4 :: (queue_name(), boolean(), boolean(), amqp_table()) ->
@@ -111,9 +110,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(tx_commit_msg_store_callback/5 ::
-      (pid(), boolean(), [message()], [acktag()], {pid(), any()}) -> 'ok').
--spec(tx_commit_vq_callback/1 :: (pid()) -> 'ok').
+-spec(maybe_run_queue_via_internal_queue/3 :: (pid(), atom(), [any()]) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -172,7 +169,7 @@ recover_durable_queues(DurableQueues) ->
                           end) of
                        true  ->
                            ok = gen_server2:call(Q#amqqueue.pid,
-                                                 init_variable_queue,
+                                                 init_internal_queue,
                                                  infinity),
                            [Q|Acc];
                        false -> exit(Q#amqqueue.pid, shutdown),
@@ -204,7 +201,7 @@ internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
                                       end,
                                       ok = gen_server2:call(
                                              Q#amqqueue.pid,
-                                             init_variable_queue, infinity),
+                                             init_internal_queue, infinity),
                                       Q;
                                [_] -> not_found %% existing Q on stopped node
                            end;
@@ -362,12 +359,8 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
-tx_commit_msg_store_callback(QPid, IsTransientPubs, Pubs, AckTags, From) ->
-    gen_server2:pcast(QPid, 7, {tx_commit_msg_store_callback,
-                                IsTransientPubs, Pubs, AckTags, From}).
-
-tx_commit_vq_callback(QPid) ->
-    gen_server2:pcast(QPid, 7, tx_commit_vq_callback).
+maybe_run_queue_via_internal_queue(QPid, Fun, Args) ->
+    gen_server2:pcast(QPid, 7, {maybe_run_queue_via_internal_queue, Fun, Args}).
 
 flush_all(QPids, ChPid) ->
     safe_pmap_ok(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b92de667..33ea625c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -55,7 +55,9 @@
             owner,
             exclusive_consumer,
             has_had_consumers,
-            variable_queue_state,
+            internal_queue,
+            internal_queue_state,
+            internal_queue_timeout_fun,
             next_msg_id,
             active_consumers,
             blocked_consumers,
@@ -94,7 +96,7 @@
          consumers,
          transactions,
          memory,
-         raw_vq_status
+         internal_queue_status
         ]).
 
 %%----------------------------------------------------------------------------
@@ -116,7 +118,9 @@ init(Q) ->
             owner = none,
             exclusive_consumer = none,
             has_had_consumers = false,
-            variable_queue_state = undefined,
+            internal_queue = rabbit_variable_queue,
+            internal_queue_state = undefined,
+            internal_queue_timeout_fun = undefined,
             next_msg_id = 1,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
@@ -124,30 +128,33 @@ init(Q) ->
             rate_timer_ref = undefined}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-terminate(shutdown, #q{variable_queue_state = VQS}) ->
+terminate(shutdown, #q{internal_queue_state = IQS,
+                       internal_queue = IQ}) ->
     ok = rabbit_memory_monitor:deregister(self()),
-    case VQS of
+    case IQS of
         undefined -> ok;
-        _         -> rabbit_variable_queue:terminate(VQS)
+        _         -> IQ:terminate(IQS)
     end;
-terminate({shutdown, _}, #q{variable_queue_state = VQS}) ->
+terminate({shutdown, _}, #q{internal_queue_state = IQS,
+                            internal_queue = IQ}) ->
     ok = rabbit_memory_monitor:deregister(self()),
-    case VQS of
+    case IQS of
         undefined -> ok;
-        _         -> rabbit_variable_queue:terminate(VQS)
+        _         -> IQ:terminate(IQS)
     end;
-terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
+terminate(_Reason, State = #q{internal_queue_state = IQS,
+                              internal_queue = IQ}) ->
     ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
     %% Ensure that any persisted tx messages are removed.
     %% TODO: wait for all in flight tx_commits to complete
-    case VQS of
+    case IQS of
         undefined ->
             ok;
         _ ->
-            VQS1 = rabbit_variable_queue:tx_rollback(
+            IQS1 = IQ:tx_rollback(
                      lists:concat([PM || #tx { pending_messages = PM } <-
-                                             all_tx_record()]), VQS),
+                                             all_tx_record()]), IQS),
             %% Delete from disk first. If we crash at this point, when
             %% a durable queue, we will be recreated at startup,
             %% possibly with partial content. The alternative is much
@@ -155,7 +162,7 @@ terminate(_Reason, State = #q{variable_queue_state = VQS}) ->
             %% would then have a race between the disk delete and a
             %% new queue with the same name being created and
             %% published to.
-            rabbit_variable_queue:delete_and_terminate(VQS1)
+            IQ:delete_and_terminate(IQS1)
     end,
     ok = rabbit_amqqueue:internal_delete(qname(State)).
 
@@ -174,18 +181,18 @@ noreply(NewState) ->
     {NewState1, Timeout} = next_state(NewState),
     {noreply, NewState1, Timeout}.
 
-next_state(State = #q{variable_queue_state = VQS}) ->
-    next_state1(ensure_rate_timer(State),
-                rabbit_variable_queue:needs_sync(VQS)).
+next_state(State = #q{internal_queue_state = IQS,
+                      internal_queue = IQ}) ->
+    next_state1(ensure_rate_timer(State), IQ:needs_sync(IQS)).
 
-next_state1(State = #q{sync_timer_ref = undefined}, true) ->
-    {start_sync_timer(State), 0};
-next_state1(State, true) ->
+next_state1(State = #q{sync_timer_ref = undefined}, Callback = {_Fun, _Args}) ->
+    {start_sync_timer(State, Callback), 0};
+next_state1(State, {_Fun, _Args}) ->
     {State, 0};
-next_state1(State = #q{sync_timer_ref = undefined}, false) ->
+next_state1(State = #q{sync_timer_ref = undefined}, undefined) ->
     {State, hibernate};
-next_state1(State, false) ->
-    {stop_sync_timer(State), hibernate}.
+next_state1(State, undefined) ->
+    {stop_sync_timer(State#q{internal_queue_timeout_fun = undefined}), hibernate}.
 
 ensure_rate_timer(State = #q{rate_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?RATES_REMEASURE_INTERVAL, rabbit_amqqueue,
@@ -204,17 +211,20 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
     State#q{rate_timer_ref = undefined}.
 
-start_sync_timer(State = #q{sync_timer_ref = undefined}) ->
-    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, rabbit_amqqueue,
-                                   tx_commit_vq_callback, [self()]),
-    State#q{sync_timer_ref = TRef}.
+start_sync_timer(State = #q{sync_timer_ref = undefined},
+                 Callback = {Fun, Args}) ->
+    {ok, TRef} = timer:apply_after(
+                   ?SYNC_INTERVAL, rabbit_amqqueue,
+                   maybe_run_queue_via_internal_queue, [self(), Fun, Args]),
+    State#q{sync_timer_ref = TRef, internal_queue_timeout_fun = Callback}.
 
 stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
-    State#q{sync_timer_ref = undefined}.
+    State#q{sync_timer_ref = undefined, internal_queue_timeout_fun = undefined}.
 
-assert_invariant(#q{active_consumers = AC, variable_queue_state = VQS}) ->
-    true = (queue:is_empty(AC) orelse rabbit_variable_queue:is_empty(VQS)).
+assert_invariant(#q{active_consumers = AC, internal_queue_state = IQS,
+                    internal_queue = IQ}) ->
+    true = (queue:is_empty(AC) orelse IQ:is_empty(IQS)).
 
 lookup_ch(ChPid) ->
     case get({ch, ChPid}) of
@@ -329,74 +339,73 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
 deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
     not IsEmpty.
 deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
-                           State = #q { variable_queue_state = VQS }) ->
-    {{Message, IsDelivered, AckTag, Remaining}, VQS1} =
-        rabbit_variable_queue:fetch(VQS),
+                           State = #q{internal_queue_state = IQS,
+                                      internal_queue = IQ}) ->
+    {{Message, IsDelivered, AckTag, Remaining}, IQS1} = IQ:fetch(IQS),
     AutoAcks1 = case AckRequired of
                     true -> AutoAcks;
                     false -> [AckTag | AutoAcks]
                 end,
     {{Message, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
-     State #q { variable_queue_state = VQS1 }}.
+     State #q { internal_queue_state = IQS1 }}.
 
-run_message_queue(State = #q { variable_queue_state = VQS }) ->
+run_message_queue(State = #q{internal_queue_state = IQS,
+                             internal_queue = IQ}) ->
     Funs = { fun deliver_from_queue_pred/2,
              fun deliver_from_queue_deliver/3 },
-    IsEmpty = rabbit_variable_queue:is_empty(VQS),
+    IsEmpty = IQ:is_empty(IQS),
     {{_IsEmpty1, AutoAcks}, State1} =
         deliver_msgs_to_consumers(Funs, {IsEmpty, []}, State),
-    VQS1 = rabbit_variable_queue:ack(AutoAcks, State1 #q.variable_queue_state),
-    State1 #q { variable_queue_state = VQS1 }.
+    IQS1 = IQ:ack(AutoAcks, State1 #q.internal_queue_state),
+    State1 #q { internal_queue_state = IQS1 }.
 
-attempt_delivery(none, _ChPid, Message, State) ->
+attempt_delivery(none, _ChPid, Message, State = #q{internal_queue = IQ}) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
     DeliverFun =
         fun (AckRequired, false, State1) ->
                 {AckTag, State2} =
                     case AckRequired of
                         true ->
-                            {AckTag1, VQS} =
-                                rabbit_variable_queue:publish_delivered(
-                                  Message, State1 #q.variable_queue_state),
-                            {AckTag1, State1 #q { variable_queue_state = VQS }};
+                            {AckTag1, IQS} =
+                                IQ:publish_delivered(
+                                  Message, State1 #q.internal_queue_state),
+                            {AckTag1, State1 #q { internal_queue_state = IQS }};
                         false ->
                             {noack, State1}
                     end,
                 {{Message, false, AckTag}, true, State2}
         end,
     deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
-attempt_delivery(Txn, ChPid, Message, State) ->
-    VQS = rabbit_variable_queue:tx_publish(
-            Message, State #q.variable_queue_state),
+attempt_delivery(Txn, ChPid, Message, State = #q{internal_queue = IQ}) ->
+    IQS = IQ:tx_publish(Message, State #q.internal_queue_state),
     record_pending_message(Txn, ChPid, Message),
-    {true, State #q { variable_queue_state = VQS }}.
+    {true, State #q { internal_queue_state = IQS }}.
 
-deliver_or_enqueue(Txn, ChPid, Message, State) ->
+deliver_or_enqueue(Txn, ChPid, Message, State = #q{internal_queue = IQ}) ->
     case attempt_delivery(Txn, ChPid, Message, State) of
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
-            {_SeqId, VQS} = rabbit_variable_queue:publish(
-                              Message, State #q.variable_queue_state),
-            {false, NewState #q { variable_queue_state = VQS }}
+            {_SeqId, IQS} = IQ:publish(Message, State #q.internal_queue_state),
+            {false, NewState #q { internal_queue_state = IQS }}
     end.
 
 %% all these messages have already been delivered at least once and
 %% not ack'd, but need to be either redelivered or requeued
 deliver_or_requeue_n([], State) ->
     State;
-deliver_or_requeue_n(MsgsWithAcks, State) ->
+deliver_or_requeue_n(MsgsWithAcks, State = #q{internal_queue = IQ}) ->
     Funs = { fun deliver_or_requeue_msgs_pred/2,
              fun deliver_or_requeue_msgs_deliver/3 },
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
         deliver_msgs_to_consumers(
           Funs, {length(MsgsWithAcks), [], MsgsWithAcks}, State),
-    VQS = rabbit_variable_queue:ack(AutoAcks, NewState #q.variable_queue_state),
+    IQS = IQ:ack(AutoAcks, NewState #q.internal_queue_state),
     case OutstandingMsgs of
-        [] -> NewState #q { variable_queue_state = VQS };
-        _ -> VQS1 = rabbit_variable_queue:requeue(OutstandingMsgs, VQS),
-             NewState #q { variable_queue_state = VQS1 }
+        [] -> NewState #q { internal_queue_state = IQS };
+        _ -> IQS1 = IQ:requeue(OutstandingMsgs, IQS),
+             NewState #q { internal_queue_state = IQS1 }
     end.
 
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
@@ -508,6 +517,16 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
+maybe_run_queue_via_internal_queue(Fun, Args,
+                                   State = #q{internal_queue_state = IQS,
+                                              internal_queue = IQ}) ->
+    {RunQueue, IQS1} = apply(IQ, Fun, Args ++ [IQS]),
+    State1 = State#q{internal_queue_state = IQS1},
+    case RunQueue of
+        true  -> run_message_queue(State1);
+        false -> State1
+    end.
+
 lookup_tx(Txn) ->
     case get({txn, Txn}) of
         undefined -> #tx{ch_pid = none,
@@ -537,35 +556,31 @@ record_pending_acks(Txn, ChPid, MsgIds) ->
     store_tx(Txn, Tx#tx{pending_acks = [MsgIds | Pending],
                         ch_pid = ChPid}).
 
-commit_transaction(Txn, From, State) ->
-    #tx { ch_pid = ChPid,
-          pending_messages = PendingMessages,
-          pending_acks = PendingAcks
-        } = lookup_tx(Txn),
+commit_transaction(Txn, From, State = #q{internal_queue = IQ}) ->
+    #tx{ch_pid = ChPid, pending_messages = PendingMessages,
+        pending_acks = PendingAcks} = lookup_tx(Txn),
     PendingMessagesOrdered = lists:reverse(PendingMessages),
     PendingAcksOrdered = lists:append(PendingAcks),
     Acks =
         case lookup_ch(ChPid) of
-            not_found -> [];
-            C = #cr { unacked_messages = UAM } ->
+            not_found ->
+                [];
+            C = #cr{unacked_messages = UAM} ->
                 {MsgsWithAcks, Remaining} =
                     collect_messages(PendingAcksOrdered, UAM),
                 store_ch_record(C#cr{unacked_messages = Remaining}),
                 [AckTag || {_Message, AckTag} <- MsgsWithAcks]
         end,
-    {RunQueue, VQS} =
-        rabbit_variable_queue:tx_commit(
-          PendingMessagesOrdered, Acks, From, State #q.variable_queue_state),
+    {RunQueue, IQS} = IQ:tx_commit(PendingMessagesOrdered, Acks, From,
+                                   State#q.internal_queue_state),
     erase_tx(Txn),
-    {RunQueue, State #q { variable_queue_state = VQS }}.
+    {RunQueue, State#q{internal_queue_state = IQS}}.
 
-rollback_transaction(Txn, State) ->
-    #tx { pending_messages = PendingMessages
-        } = lookup_tx(Txn),
-    VQS = rabbit_variable_queue:tx_rollback(PendingMessages,
-                                            State #q.variable_queue_state),
+rollback_transaction(Txn, State = #q{internal_queue = IQ}) ->
+    #tx{pending_messages = PendingMessages} = lookup_tx(Txn),
+    IQS = IQ:tx_rollback(PendingMessages, State #q.internal_queue_state),
     erase_tx(Txn),
-    State #q { variable_queue_state = VQS }.
+    State#q{internal_queue_state = IQS}.
 
 collect_messages(MsgIds, UAM) ->
     lists:mapfoldl(
@@ -592,8 +607,8 @@ i(exclusive_consumer_tag, #q{exclusive_consumer = none}) ->
     '';
 i(exclusive_consumer_tag, #q{exclusive_consumer = {_ChPid, ConsumerTag}}) ->
     ConsumerTag;
-i(messages_ready, #q{variable_queue_state = VQS}) ->
-    rabbit_variable_queue:len(VQS);
+i(messages_ready, #q{internal_queue_state = IQS, internal_queue = IQ}) ->
+    IQ:len(IQS);
 i(messages_unacknowledged, _) ->
     lists:sum([dict:size(UAM) ||
                   #cr{unacked_messages = UAM} <- all_ch_record()]);
@@ -614,25 +629,24 @@ i(transactions, _) ->
 i(memory, _) ->
     {memory, M} = process_info(self(), memory),
     M;
-i(raw_vq_status, State) ->
-    rabbit_variable_queue:status(State#q.variable_queue_state);
+i(internal_queue_status, #q{internal_queue_state = IQS, internal_queue = IQ}) ->
+    IQ:status(IQS);
 i(Item, _) ->
     throw({bad_argument, Item}).
 
 %---------------------------------------------------------------------------
 
-handle_call(init_variable_queue, From, State =
-                #q{variable_queue_state = undefined,
+handle_call(init_internal_queue, From, State =
+                #q{internal_queue_state = undefined, internal_queue = IQ,
                    q = #amqqueue{name = QName, durable = IsDurable}}) ->
     gen_server2:reply(From, ok),
     PersistentStore = case IsDurable of
                           true  -> ?PERSISTENT_MSG_STORE;
                           false -> ?TRANSIENT_MSG_STORE
                       end,
-    noreply(State #q { variable_queue_state =
-                           rabbit_variable_queue:init(QName, PersistentStore) });
+    noreply(State#q{internal_queue_state = IQ:init(QName, PersistentStore)});
 
-handle_call(init_variable_queue, _From, State) ->
+handle_call(init_internal_queue, _From, State) ->
     reply(ok, State);
 
 handle_call(sync, _From, State) ->
@@ -697,27 +711,25 @@ handle_call({notify_down, ChPid}, _From, State) ->
     end;
 
 handle_call({basic_get, ChPid, NoAck}, _From,
-            State = #q{q = #amqqueue{name = QName},
-                       next_msg_id = NextId,
-                       variable_queue_state = VQS
-                       }) ->
-    case rabbit_variable_queue:fetch(VQS) of
-        {empty, VQS1} -> reply(empty, State #q { variable_queue_state = VQS1 });
-        {{Message, IsDelivered, AckTag, Remaining}, VQS1} ->
+            State = #q{q = #amqqueue{name = QName}, next_msg_id = NextId,
+                       internal_queue_state = IQS, internal_queue = IQ}) ->
+    case IQ:fetch(IQS) of
+        {empty, IQS1} -> reply(empty, State #q { internal_queue_state = IQS1 });
+        {{Message, IsDelivered, AckTag, Remaining}, IQS1} ->
             AckRequired = not(NoAck),
-            VQS2 =
+            IQS2 =
                 case AckRequired of
                     true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Message, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        VQS1;
+                        IQS1;
                     false ->
-                        rabbit_variable_queue:ack([AckTag], VQS1)
+                        IQ:ack([AckTag], IQS1)
                 end,
             Msg = {QName, self(), NextId, IsDelivered, Message},
             reply({ok, Remaining, Msg},
-                  State #q { next_msg_id = NextId + 1, variable_queue_state = VQS2 })
+                  State #q { next_msg_id = NextId + 1, internal_queue_state = IQS2 })
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -797,14 +809,14 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
     end;
 
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    variable_queue_state = VQS,
+                                    internal_queue_state = IQS,
+                                    internal_queue = IQ,
                                     active_consumers = ActiveConsumers}) ->
-    Length = rabbit_variable_queue:len(VQS),
-    reply({ok, Name, Length, queue:len(ActiveConsumers)}, State);
+    reply({ok, Name, IQ:len(IQS), queue:len(ActiveConsumers)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
-            State = #q { variable_queue_state = VQS }) ->
-    Length = rabbit_variable_queue:len(VQS),
+            State = #q{internal_queue_state = IQS, internal_queue = IQ}) ->
+    Length = IQ:len(IQS),
     IsEmpty = Length == 0,
     IsUnused = is_unused(State),
     if
@@ -816,9 +828,9 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
             {stop, normal, {ok, Length}, State}
     end;
 
-handle_call(purge, _From, State) ->
-    {Count, VQS} = rabbit_variable_queue:purge(State #q.variable_queue_state),
-    reply({ok, Count}, State #q { variable_queue_state = VQS });
+handle_call(purge, _From, State = #q{internal_queue = IQ}) ->
+    {Count, IQS} = IQ:purge(State#q.internal_queue_state),
+    reply({ok, Count}, State#q{internal_queue_state = IQS});
 
 handle_call({claim_queue, ReaderPid}, _From,
             State = #q{owner = Owner, exclusive_consumer = Holder}) ->
@@ -848,7 +860,7 @@ handle_cast({deliver, Txn, Message, ChPid}, State) ->
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
     noreply(NewState);
 
-handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
+handle_cast({ack, Txn, MsgIds, ChPid}, State = #q{internal_queue = IQ}) ->
     case lookup_ch(ChPid) of
         not_found ->
             noreply(State);
@@ -856,11 +868,10 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State) ->
             case Txn of
                 none ->
                     {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
-                    VQS = rabbit_variable_queue:ack(
-                            [AckTag || {_Message, AckTag} <- MsgWithAcks],
-                            State #q.variable_queue_state),
+                    IQS = IQ:ack([AckTag || {_Message, AckTag} <- MsgWithAcks],
+                                 State #q.internal_queue_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
-                    noreply(State #q { variable_queue_state = VQS });
+                    noreply(State #q { internal_queue_state = IQS });
                 _  ->
                     record_pending_acks(Txn, ChPid, MsgIds),
                     noreply(State)
@@ -894,23 +905,8 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({tx_commit_msg_store_callback, IsTransientPubs, Pubs, AckTags, From},
-            State = #q{variable_queue_state = VQS}) ->
-    {RunQueue, VQS1} = rabbit_variable_queue:tx_commit_from_msg_store(
-                         IsTransientPubs, Pubs, AckTags, From, VQS),
-    State1 = State#q{variable_queue_state = VQS1},
-    noreply(case RunQueue of
-                true  -> run_message_queue(State1);
-                false -> State1
-            end);
-
-handle_cast(tx_commit_vq_callback, State = #q{variable_queue_state = VQS}) ->
-    {RunQueue, VQS1} = rabbit_variable_queue:tx_commit_from_vq(VQS),
-    State1 = State#q{variable_queue_state = VQS1},
-    noreply(case RunQueue of
-                true  -> run_message_queue(State1);
-                false -> State1
-            end);
+handle_cast({maybe_run_queue_via_internal_queue, Fun, Args}, State) ->
+    noreply(maybe_run_queue_via_internal_queue(Fun, Args, State));
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
@@ -932,21 +928,21 @@ handle_cast({flush, ChPid}, State) ->
     ok = rabbit_channel:flushed(ChPid, self()),
     noreply(State);
 
-handle_cast(remeasure_rates, State = #q{variable_queue_state = VQS}) ->
-    VQS1 = rabbit_variable_queue:remeasure_rates(VQS),
-    RamDuration = rabbit_variable_queue:ram_duration(VQS1),
+handle_cast(remeasure_rates, State = #q{internal_queue_state = IQS,
+                                        internal_queue = IQ}) ->
+    IQS1 = IQ:remeasure_rates(IQS),
+    RamDuration = IQ:ram_duration(IQS1),
     DesiredDuration =
         rabbit_memory_monitor:report_queue_duration(self(), RamDuration),
-    VQS2 = rabbit_variable_queue:set_queue_ram_duration_target(
-             DesiredDuration, VQS1),
+    IQS2 = IQ:set_queue_ram_duration_target(DesiredDuration, IQS1),
     noreply(State#q{rate_timer_ref = just_measured,
-                    variable_queue_state = VQS2});
+                    internal_queue_state = IQS2});
 
 handle_cast({set_queue_duration, Duration},
-            State = #q{variable_queue_state = VQS}) ->
-    VQS1 = rabbit_variable_queue:set_queue_ram_duration_target(
-             Duration, VQS),
-    noreply(State#q{variable_queue_state = VQS1});
+            State = #q{internal_queue_state = IQS,
+                       internal_queue = IQ}) ->
+    IQS1 = IQ:set_queue_ram_duration_target(Duration, IQS),
+    noreply(State#q{internal_queue_state = IQS1});
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
@@ -971,13 +967,12 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
         {stop, NewState} -> {stop, normal, NewState}
     end;
 
-handle_info(timeout, State = #q{variable_queue_state = VQS}) ->
-    {RunQueue, VQS1} = rabbit_variable_queue:tx_commit_from_vq(VQS),
-    State1 = State#q{variable_queue_state = VQS1},
-    noreply(case RunQueue of
-                true  -> run_message_queue(State1);
-                false -> State1
-            end);
+handle_info(timeout, State = #q{internal_queue_timeout_fun = undefined}) ->
+    noreply(State);
+
+handle_info(timeout, State = #q{internal_queue_timeout_fun = {Fun, Args}}) ->
+    noreply(maybe_run_queue_via_internal_queue(
+              Fun, Args, State#q{internal_queue_timeout_fun = undefined}));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -986,11 +981,11 @@ handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
 
-handle_pre_hibernate(State = #q{ variable_queue_state = VQS }) ->
-    VQS1 = rabbit_variable_queue:flush_journal(VQS),
+handle_pre_hibernate(State = #q{internal_queue_state = IQS,
+                                internal_queue = IQ}) ->
+    IQS1 = IQ:handle_pre_hibernate(IQS),
     %% no activity for a while == 0 egress and ingress rates
     DesiredDuration =
         rabbit_memory_monitor:report_queue_duration(self(), infinity),
-    VQS2 = rabbit_variable_queue:set_queue_ram_duration_target(
-             DesiredDuration, VQS1),
-    {hibernate, stop_rate_timer(State#q{variable_queue_state = VQS2})}.
+    IQS2 = IQ:set_queue_ram_duration_target(DesiredDuration, IQS1),
+    {hibernate, stop_rate_timer(State#q{internal_queue_state = IQS2})}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 6b8998c2..22138bf1 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1409,7 +1409,7 @@ test_variable_queue_dynamic_duration_change() ->
     {_SeqIds1, VQ7} = variable_queue_publish(true, 20, VQ6),
     {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
-    VQ10 = rabbit_variable_queue:flush_journal(VQ9),
+    VQ10 = rabbit_variable_queue:handle_pre_hibernate(VQ9),
     {empty, VQ11} = rabbit_variable_queue:fetch(VQ10),
 
     rabbit_variable_queue:terminate(VQ11),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9d33cc7c..1934fafc 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -35,8 +35,9 @@
          set_queue_ram_duration_target/2, remeasure_rates/1,
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
-         tx_commit/4, tx_commit_from_msg_store/5, tx_commit_from_vq/1,
-         needs_sync/1, flush_journal/1, status/1]).
+         tx_commit/4, needs_sync/1, handle_pre_hibernate/1, status/1]).
+
+-export([tx_commit_post_msg_store/5, tx_commit_index/1]). %% internal
 
 %%----------------------------------------------------------------------------
 %% Definitions:
@@ -242,12 +243,12 @@
 -spec(tx_rollback/2 :: ([msg_id()], vqstate()) -> vqstate()).
 -spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, vqstate()) ->
                           {boolean(), vqstate()}).
--spec(tx_commit_from_msg_store/5 ::
+-spec(tx_commit_post_msg_store/5 ::
         (boolean(), [msg_id()], [ack()], {pid(), any()}, vqstate()) ->
                                          {boolean(), vqstate()}).
--spec(tx_commit_from_vq/1 :: (vqstate()) -> {boolean(), vqstate()}).
--spec(needs_sync/1 :: (vqstate()) -> boolean()).
--spec(flush_journal/1 :: (vqstate()) -> vqstate()).
+-spec(tx_commit_index/1 :: (vqstate()) -> {boolean(), vqstate()}).
+-spec(needs_sync/1 :: (vqstate()) -> ('undefined' | {atom(), [any()]})).
+-spec(handle_pre_hibernate/1 :: (vqstate()) -> vqstate()).
 -spec(status/1 :: (vqstate()) -> [{atom(), any()}]).
 
 -endif.
@@ -505,23 +506,26 @@ delete_and_terminate(State) ->
                     persistent_store = PersistentStore,
                     transient_threshold = TransientThreshold }} =
         purge(State),
-    IndexState1 =
+    %% flushing here is good because it deletes all full segments,
+    %% leaving only partial segments around.
+    IndexState1 = rabbit_queue_index:flush_journal(IndexState),
+    IndexState2 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
-               IndexState) of
-            {N, N, IndexState2} ->
-                IndexState2;
-            {DeltaSeqId, NextSeqId, IndexState2} ->
-                {_DeleteCount, IndexState3} =
+               IndexState1) of
+            {N, N, IndexState3} ->
+                IndexState3;
+            {DeltaSeqId, NextSeqId, IndexState3} ->
+                {_DeleteCount, IndexState4} =
                     delete1(PersistentStore, TransientThreshold, NextSeqId, 0,
-                            DeltaSeqId, IndexState2),
-                IndexState3
+                            DeltaSeqId, IndexState3),
+                IndexState4
     end,
-    IndexState4 = rabbit_queue_index:terminate_and_erase(IndexState1),
+    IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
     rabbit_msg_store:delete_client(PersistentStore, PRef),
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
-    State1 #vqstate { index_state = IndexState4 }.
+    State1 #vqstate { index_state = IndexState5 }.
 
 %% [{Msg, AckTag}]
 %% We guarantee that after fetch, only persistent msgs are left on
@@ -595,19 +599,20 @@ tx_commit(Pubs, AckTags, From, State =
     case IsTransientPubs orelse
         ?TRANSIENT_MSG_STORE == PersistentStore of
         true ->
-            tx_commit_from_msg_store(
+            tx_commit_post_msg_store(
               IsTransientPubs, Pubs, AckTags, From, State);
         false ->
             Self = self(),
             ok = rabbit_msg_store:sync(
                    ?PERSISTENT_MSG_STORE, PersistentMsgIds,
-                   fun () -> ok = rabbit_amqqueue:tx_commit_msg_store_callback(
-                                    Self, IsTransientPubs, Pubs, AckTags, From)
+                   fun () -> ok = rabbit_amqqueue:maybe_run_queue_via_internal_queue(
+                                    Self, tx_commit_post_msg_store,
+                                    [IsTransientPubs, Pubs, AckTags, From])
                    end),
             {false, State}
     end.
 
-tx_commit_from_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
+tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
                              #vqstate { on_sync = OnSync = {SAcks, SPubs, SFroms},
                                         persistent_store = PersistentStore }) ->
     %% If we are a non-durable queue, or (no persisent pubs, and no
@@ -617,17 +622,17 @@ tx_commit_from_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
     case PersistentStore == ?TRANSIENT_MSG_STORE orelse
         (IsTransientPubs andalso [] == DiskAcks) of
         true  -> {Res, State1} =
-                     tx_commit_from_vq(State #vqstate {
-                                         on_sync = {[], [Pubs], [From]} }),
+                     tx_commit_index(State #vqstate {
+                                       on_sync = {[], [Pubs], [From]} }),
                  {Res, State1 #vqstate { on_sync = OnSync }};
         false -> {false, State #vqstate { on_sync = { [DiskAcks | SAcks],
                                                       [Pubs | SPubs],
                                                       [From | SFroms] }}}
     end.
 
-tx_commit_from_vq(State = #vqstate { on_sync = {_, _, []} }) ->
+tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
     {false, State};
-tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
+tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                                      persistent_store = PersistentStore }) ->
     Acks = lists:flatten(SAcks),
     State1 = ack(Acks, State),
@@ -656,11 +661,11 @@ tx_commit_from_vq(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
      State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }}.
 
 needs_sync(#vqstate { on_sync = {_, _, []} }) ->
-    false;
+    undefined;
 needs_sync(_) ->
-    true.
+    {tx_commit_index, []}.
 
-flush_journal(State = #vqstate { index_state = IndexState }) ->
+handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
                      rabbit_queue_index:flush_journal(IndexState) }.
 
-- 
cgit v1.2.1


From 046157f6bed024abb34bdafb8092f68e561f4dbf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 16:05:08 +0100
Subject: Pluggable queues land

---
 ebin/rabbit_app.in                          |  1 +
 include/rabbit_internal_queue_type_spec.hrl | 55 ++++++++++++++++
 include/rabbit_queue.hrl                    | 44 -------------
 src/rabbit_amqqueue_process.erl             |  7 ++-
 src/rabbit_internal_queue_type.erl          | 97 +++++++++++++++++++++++++++++
 src/rabbit_tests.erl                        | 45 ++++++-------
 src/rabbit_variable_queue.erl               | 61 ++++++++----------
 7 files changed, 202 insertions(+), 108 deletions(-)
 create mode 100644 include/rabbit_internal_queue_type_spec.hrl
 delete mode 100644 include/rabbit_queue.hrl
 create mode 100644 src/rabbit_internal_queue_type.erl

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index 035fa054..a481af08 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -19,6 +19,7 @@
          {ssl_options, []},
          {vm_memory_high_watermark, 0.4},
          {msg_store_index_module, rabbit_msg_store_ets_index},
+         {queue_internal_queue_module, rabbit_variable_queue},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/include/rabbit_internal_queue_type_spec.hrl b/include/rabbit_internal_queue_type_spec.hrl
new file mode 100644
index 00000000..6409efb6
--- /dev/null
+++ b/include/rabbit_internal_queue_type_spec.hrl
@@ -0,0 +1,55 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-spec(init/2 :: (queue_name(), pid() | atom()) -> state()).
+-spec(terminate/1 :: (state()) -> state()).
+-spec(publish/2 :: (basic_message(), state()) -> state()).
+-spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
+-spec(set_queue_ram_duration_target/2 ::
+      (('undefined' | 'infinity' | number()), state()) -> state()).
+-spec(remeasure_rates/1 :: (state()) -> state()).
+-spec(ram_duration/1 :: (state()) -> number()).
+-spec(fetch/1 :: (state()) ->
+             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
+              state()}).
+-spec(ack/2 :: ([ack()], state()) -> state()).
+-spec(len/1 :: (state()) -> non_neg_integer()).
+-spec(is_empty/1 :: (state()) -> boolean()).
+-spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
+-spec(delete_and_terminate/1 :: (state()) -> state()).
+-spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
+-spec(tx_publish/2 :: (basic_message(), state()) -> state()).
+-spec(tx_rollback/2 :: ([msg_id()], state()) -> state()).
+-spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, state()) ->
+                          {boolean(), state()}).
+-spec(needs_sync/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
+-spec(handle_pre_hibernate/1 :: (state()) -> state()).
+-spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/include/rabbit_queue.hrl b/include/rabbit_queue.hrl
deleted file mode 100644
index 66966ba8..00000000
--- a/include/rabbit_queue.hrl
+++ /dev/null
@@ -1,44 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2010 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--record(delta,
-        { start_seq_id,
-          count,
-          end_seq_id %% note the end_seq_id is always >, not >=
-        }).
-
--ifdef(use_specs).
-
--type(delta() :: #delta { start_seq_id :: non_neg_integer(),
-                          count :: non_neg_integer (),
-                          end_seq_id :: non_neg_integer() }).
-
--endif.
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 33ea625c..4c42b0ef 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -114,11 +114,14 @@ init(Q) ->
            rabbit_amqqueue, set_maximum_since_use, [self()]),
     ok = rabbit_memory_monitor:register
            (self(), {rabbit_amqqueue, set_queue_duration, [self()]}),
+    {ok, InternalQueueModule} =
+        application:get_env(queue_internal_queue_module),
+
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
             has_had_consumers = false,
-            internal_queue = rabbit_variable_queue,
+            internal_queue = InternalQueueModule,
             internal_queue_state = undefined,
             internal_queue_timeout_fun = undefined,
             next_msg_id = 1,
@@ -387,7 +390,7 @@ deliver_or_enqueue(Txn, ChPid, Message, State = #q{internal_queue = IQ}) ->
             {true, NewState};
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
-            {_SeqId, IQS} = IQ:publish(Message, State #q.internal_queue_state),
+            IQS = IQ:publish(Message, State #q.internal_queue_state),
             {false, NewState #q { internal_queue_state = IQS }}
     end.
 
diff --git a/src/rabbit_internal_queue_type.erl b/src/rabbit_internal_queue_type.erl
new file mode 100644
index 00000000..4ee4556a
--- /dev/null
+++ b/src/rabbit_internal_queue_type.erl
@@ -0,0 +1,97 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_internal_queue_type).
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [
+     %% Called with queue name and the persistent msg_store to
+     %% use. Transient store is in ?TRANSIENT_MSG_STORE
+     {init, 2},
+
+     %% Called on queue shutdown when queue isn't being deleted
+     {terminate, 1},
+
+     %% Called when the queue is terminating and needs to delete all
+     %% its content.
+     {delete_and_terminate, 1},
+
+     %% Remove all messages in the queue, but not messages which have
+     %% been fetched and are pending acks.
+     {purge, 1},
+
+     %% Publish a message
+     {publish, 2},
+
+     %% Called for messages which have already been passed straight
+     %% out to a client. The queue will be empty for these calls
+     %% (i.e. saves the round trip through the internal queue).
+     {publish_delivered, 2},
+
+     {fetch, 1},
+
+     {ack, 2},
+
+     {tx_publish, 2},
+     {tx_rollback, 2},
+     {tx_commit, 4},
+
+     %% Reinsert messages into the queue which have already been
+     %% delivered and were (likely) pending acks.q
+     {requeue, 2},
+
+     {len, 1},
+
+     {is_empty, 1},
+
+     {set_queue_ram_duration_target, 2},
+
+     {remeasure_rates, 1},
+
+     {ram_duration, 1},
+
+     %% Can return 'undefined' or a function atom name plus list of
+     %% arguments to be invoked in the internal queue module as soon
+     %% as the queue process can manage (either on an empty mailbox,
+     %% or when a timer fires).
+     {needs_sync, 1},
+
+     %% Called immediately before the queue hibernates
+     {handle_pre_hibernate, 1},
+
+     %% Exists for debugging purposes, to be able to expose state via
+     %% rabbitmqctl list_queues internal_queue_status
+     {status, 1}
+    ];
+behaviour_info(_Other) ->
+    undefined.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 22138bf1..838c5f9c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -41,7 +41,6 @@
 -import(lists).
 
 -include("rabbit.hrl").
--include("rabbit_queue.hrl").
 -include_lib("kernel/include/file.hrl").
 
 test_content_prop_roundtrip(Datum, Binary) ->
@@ -1348,14 +1347,13 @@ test_queue_index() ->
 
 variable_queue_publish(IsPersistent, Count, VQ) ->
     lists:foldl(
-      fun (_N, {Acc, VQ1}) ->
-              {SeqId, VQ2} = rabbit_variable_queue:publish(
-                               rabbit_basic:message(
-                                 rabbit_misc:r(<<>>, exchange, <<>>),
-                                 <<>>, [], <<>>, rabbit_guid:guid(),
-                                 IsPersistent), VQ1),
-              {[SeqId | Acc], VQ2}
-      end, {[], VQ}, lists:seq(1, Count)).
+      fun (_N, VQN) ->
+              rabbit_variable_queue:publish(
+                rabbit_basic:message(
+                  rabbit_misc:r(<<>>, exchange, <<>>),
+                  <<>>, [], <<>>, rabbit_guid:guid(),
+                  IsPersistent), VQN)
+      end, VQ, lists:seq(1, Count)).
 
 variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
     lists:foldl(fun (N, {VQN, AckTagsAcc}) ->
@@ -1377,9 +1375,7 @@ fresh_variable_queue() ->
     assert_prop(S0, len, 0),
     assert_prop(S0, q1, 0),
     assert_prop(S0, q2, 0),
-    assert_prop(S0, delta, #delta { start_seq_id = undefined,
-                                    count = 0,
-                                    end_seq_id = undefined }),
+    assert_prop(S0, delta, {delta, undefined, 0, undefined}),
     assert_prop(S0, q3, 0),
     assert_prop(S0, q4, 0),
     VQ.
@@ -1394,7 +1390,7 @@ test_variable_queue_dynamic_duration_change() ->
     VQ0 = fresh_variable_queue(),
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
-    {_SeqIds, VQ1} = variable_queue_publish(false, Len1, VQ0),
+    VQ1 = variable_queue_publish(false, Len1, VQ0),
     VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
     {ok, _TRef} = timer:send_after(1000, {duration, 60,
                                           fun (V) -> (V*0.75)-1 end}),
@@ -1406,7 +1402,7 @@ test_variable_queue_dynamic_duration_change() ->
     %% just publish and fetch some persistent msgs, this hits the the
     %% partial segment path in queue_index due to the period when
     %% duration was 0 and the entire queue was delta.
-    {_SeqIds1, VQ7} = variable_queue_publish(true, 20, VQ6),
+    VQ7 = variable_queue_publish(true, 20, VQ6),
     {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
     VQ10 = rabbit_variable_queue:handle_pre_hibernate(VQ9),
@@ -1417,7 +1413,7 @@ test_variable_queue_dynamic_duration_change() ->
     passed.
 
 test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
-    {_SeqIds, VQ1} = variable_queue_publish(false, 1, VQ0),
+    VQ1 = variable_queue_publish(false, 1, VQ0),
     {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(VQ1),
     VQ3 = rabbit_variable_queue:ack([AckTag], VQ2),
     receive
@@ -1444,27 +1440,24 @@ test_variable_queue_partial_segments_delta_thing() ->
     SegmentSize = rabbit_queue_index:segment_size(),
     HalfSegment = SegmentSize div 2,
     VQ0 = fresh_variable_queue(),
-    {_SeqIds, VQ1} =
-        variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
+    VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
     VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
     VQ3 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ2),
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
     io:format("~p~n", [S3]),
-    assert_prop(S3, delta, #delta { start_seq_id = SegmentSize,
-                                    count = HalfSegment,
-                                    end_seq_id = SegmentSize + HalfSegment }),
+    assert_prop(S3, delta, {delta, SegmentSize, HalfSegment,
+                            SegmentSize + HalfSegment}),
     assert_prop(S3, q3, SegmentSize),
     assert_prop(S3, len, SegmentSize + HalfSegment),
     VQ4 = rabbit_variable_queue:set_queue_ram_duration_target(infinity, VQ3),
-    {[_SeqId], VQ5} = variable_queue_publish(true, 1, VQ4),
+    VQ5 = variable_queue_publish(true, 1, VQ4),
     %% should have 1 alpha, but it's in the same segment as the deltas
     S5 = rabbit_variable_queue:status(VQ5),
     io:format("~p~n", [S5]),
     assert_prop(S5, q1, 1),
-    assert_prop(S5, delta, #delta { start_seq_id = SegmentSize,
-                                    count = HalfSegment,
-                                    end_seq_id = SegmentSize + HalfSegment }),
+    assert_prop(S5, delta, {delta, SegmentSize, HalfSegment,
+                            SegmentSize + HalfSegment}),
     assert_prop(S5, q3, SegmentSize),
     assert_prop(S5, len, SegmentSize + HalfSegment + 1),
     {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false,
@@ -1472,9 +1465,7 @@ test_variable_queue_partial_segments_delta_thing() ->
     %% the half segment should now be in q3 as betas
     S6 = rabbit_variable_queue:status(VQ6),
     io:format("~p~n", [S6]),
-    assert_prop(S6, delta, #delta { start_seq_id = undefined,
-                                    count = 0,
-                                    end_seq_id = undefined }),
+    assert_prop(S6, delta, {delta, undefined, 0, undefined}),
     assert_prop(S6, q1, 1),
     assert_prop(S6, q3, HalfSegment),
     assert_prop(S6, len, HalfSegment + 1),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 1934fafc..297c3ef4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -133,6 +133,8 @@
 
 %%----------------------------------------------------------------------------
 
+-behaviour(rabbit_internal_queue_type).
+
 -record(vqstate,
         { q1,
           q2,
@@ -162,7 +164,6 @@
         }).
 
 -include("rabbit.hrl").
--include("rabbit_queue.hrl").
 
 -record(msg_status,
         { msg,
@@ -174,6 +175,12 @@
           index_on_disk
         }).
 
+-record(delta,
+        { start_seq_id,
+          count,
+          end_seq_id %% note the end_seq_id is always >, not >=
+        }).
+
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
 %% betas that we must be due to write indices for before we do any
@@ -187,12 +194,17 @@
 
 -ifdef(use_specs).
 
+-type(msg_id() :: binary()).
 -type(bpqueue() :: any()).
--type(msg_id()  :: binary()).
 -type(seq_id()  :: non_neg_integer()).
 -type(ack()     :: {'ack_index_and_store', msg_id(), seq_id(), atom() | pid()}
                  | 'ack_not_on_disk').
--type(vqstate() :: #vqstate {
+
+-type(delta() :: #delta { start_seq_id :: non_neg_integer(),
+                          count :: non_neg_integer (),
+                          end_seq_id :: non_neg_integer() }).
+
+-type(state() :: #vqstate {
                q1                    :: queue(),
                q2                    :: bpqueue(),
                delta                 :: delta(),
@@ -220,36 +232,12 @@
                transient_threshold   :: non_neg_integer()
               }).
 
--spec(init/2 :: (queue_name(), pid() | atom()) -> vqstate()).
--spec(terminate/1 :: (vqstate()) -> vqstate()).
--spec(publish/2 :: (basic_message(), vqstate()) ->
-             {seq_id(), vqstate()}).
--spec(publish_delivered/2 :: (basic_message(), vqstate()) ->
-             {ack(), vqstate()}).
--spec(set_queue_ram_duration_target/2 ::
-      (('undefined' | 'infinity' | number()), vqstate()) -> vqstate()).
--spec(remeasure_rates/1 :: (vqstate()) -> vqstate()).
--spec(ram_duration/1 :: (vqstate()) -> number()).
--spec(fetch/1 :: (vqstate()) ->
-             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
-              vqstate()}).
--spec(ack/2 :: ([ack()], vqstate()) -> vqstate()).
--spec(len/1 :: (vqstate()) -> non_neg_integer()).
--spec(is_empty/1 :: (vqstate()) -> boolean()).
--spec(purge/1 :: (vqstate()) -> {non_neg_integer(), vqstate()}).
--spec(delete_and_terminate/1 :: (vqstate()) -> vqstate()).
--spec(requeue/2 :: ([{basic_message(), ack()}], vqstate()) -> vqstate()).
--spec(tx_publish/2 :: (basic_message(), vqstate()) -> vqstate()).
--spec(tx_rollback/2 :: ([msg_id()], vqstate()) -> vqstate()).
--spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, vqstate()) ->
-                          {boolean(), vqstate()}).
 -spec(tx_commit_post_msg_store/5 ::
-        (boolean(), [msg_id()], [ack()], {pid(), any()}, vqstate()) ->
-                                         {boolean(), vqstate()}).
--spec(tx_commit_index/1 :: (vqstate()) -> {boolean(), vqstate()}).
--spec(needs_sync/1 :: (vqstate()) -> ('undefined' | {atom(), [any()]})).
--spec(handle_pre_hibernate/1 :: (vqstate()) -> vqstate()).
--spec(status/1 :: (vqstate()) -> [{atom(), any()}]).
+        (boolean(), [msg_id()], [ack()], {pid(), any()}, state()) ->
+                                         {boolean(), state()}).
+-spec(tx_commit_index/1 :: (state()) -> {boolean(), state()}).
+
+-include("rabbit_internal_queue_type_spec.hrl").
 
 -endif.
 
@@ -321,7 +309,8 @@ terminate(State = #vqstate {
 
 publish(Msg, State) ->
     State1 = limit_ram_index(State),
-    publish(Msg, false, false, State1).
+    {_SeqId, State2} = publish(Msg, false, false, State1),
+    State2.
 
 publish_delivered(Msg = #basic_message { guid = MsgId,
                                          is_persistent = IsPersistent },
@@ -553,7 +542,8 @@ requeue(MsgsWithAckTags, State) ->
                                rabbit_misc:dict_cons(MsgStore, MsgId, Dict),
                                true}
                       end,
-                  {_SeqId, StateN1} = publish(Msg, true, MsgOnDisk, StateN),
+                  {_SeqId, StateN1} =
+                      publish(Msg, true, MsgOnDisk, StateN),
                   {SeqIdsAcc1, Dict1, StateN1}
           end, {[], dict:new(), State}, MsgsWithAckTags),
     IndexState1 =
@@ -648,7 +638,8 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
                {SeqIdsAcc, StateN}) ->
-                  {SeqId, StateN1} = publish(Msg, false, IsPersistent, StateN),
+                  {SeqId, StateN1} =
+                      publish(Msg, false, IsPersistent, StateN),
                   {case IsPersistentStore andalso IsPersistent of
                        true  -> [SeqId | SeqIdsAcc];
                        false -> SeqIdsAcc
-- 
cgit v1.2.1


From ef38bdde3a70d8c1a7b6ff25187aaceb56dd7a81 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 17:20:11 +0100
Subject: Rearranging GC code and tidying up of some common dialyzer type defs

---
 include/rabbit.hrl                 |   7 +
 include/rabbit_msg_store.hrl       |  20 +--
 include/rabbit_msg_store_index.hrl |   5 +-
 src/rabbit_amqqueue.erl            |   5 +-
 src/rabbit_amqqueue_process.erl    |   2 -
 src/rabbit_channel.erl             |   8 +-
 src/rabbit_msg_file.erl            |   5 +-
 src/rabbit_msg_store.erl           | 264 +++++++++++++++++++++++++++++++++----
 src/rabbit_msg_store_gc.erl        | 181 +------------------------
 src/rabbit_msg_store_misc.erl      |  74 -----------
 src/rabbit_queue_index.erl         |   1 -
 src/rabbit_variable_queue.erl      |   5 +-
 12 files changed, 268 insertions(+), 309 deletions(-)
 delete mode 100644 src/rabbit_msg_store_misc.erl

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index e9fa6e37..35134ee7 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -90,6 +90,7 @@
 
 %% this is really an abstract type, but dialyzer does not support them
 -type(guid() :: binary()).
+-type(msg_id() :: guid()).
 -type(txn() :: guid()).
 -type(pkey() :: guid()).
 -type(r(Kind) ::
@@ -168,6 +169,9 @@
       #amqp_error{name        :: atom(),
                   explanation :: string(),
                   method      :: atom()}).
+
+-type(msg() :: any()).
+
 -endif.
 
 %%----------------------------------------------------------------------------
@@ -180,6 +184,9 @@
 -define(PERSISTENT_MSG_STORE,     msg_store_persistent).
 -define(TRANSIENT_MSG_STORE,      msg_store_transient).
 
+-define(HIBERNATE_AFTER_MIN,        1000).
+-define(DESIRED_HIBERNATE,         10000).
+
 -ifdef(debug).
 -define(LOGDEBUG0(F), rabbit_log:debug(F)).
 -define(LOGDEBUG(F,A), rabbit_log:debug(F,A)).
diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 2c2735d4..112588f3 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -29,23 +29,7 @@
 %%   Contributor(s): ______________________________________.
 %%
 
+-include("rabbit.hrl").
+
 -record(msg_location,
         {msg_id, ref_count, file, offset, total_size}).
-
--record(file_summary,
-        {file, valid_total_size, contiguous_top, left, right, file_size,
-         locked, readers}).
-
--define(BINARY_MODE,     [raw, binary]).
--define(READ_MODE,       [read]).
--define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
--define(WRITE_MODE,      [write]).
-
--define(HIBERNATE_AFTER_MIN,        1000).
--define(DESIRED_HIBERNATE,         10000).
--define(FILE_EXTENSION,        ".rdq").
--define(FILE_EXTENSION_TMP,    ".rdt").
-
--define(FILE_SIZE_LIMIT,       (16*1024*1024)).
-
--define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
index db7e3b9f..9b3332ee 100644
--- a/include/rabbit_msg_store_index.hrl
+++ b/include/rabbit_msg_store_index.hrl
@@ -29,11 +29,12 @@
 %%   Contributor(s): ______________________________________.
 %%
 
+-include("rabbit_msg_store.hrl").
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(msg_id() :: binary()).
 -type(dir() :: any()).
 -type(index_state() :: any()).
 -type(keyvalue() :: any()).
@@ -55,5 +56,3 @@
 -endif.
 
 %%----------------------------------------------------------------------------
-
--include("rabbit_msg_store.hrl").
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 1c8cf522..00407824 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -58,8 +58,7 @@
 
 -ifdef(use_specs).
 
--type(msg_id() :: non_neg_integer()).
--type(msg() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
+-type(get_msg_result() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
 -type(qstats() :: {'ok', queue_name(), non_neg_integer(), non_neg_integer()}).
 -type(qlen() :: {'ok', non_neg_integer()}).
 -type(qfun(A) :: fun ((amqqueue()) -> A)).
@@ -101,7 +100,7 @@
 -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()).
 -spec(claim_queue/2 :: (amqqueue(), pid()) -> 'ok' | 'locked').
 -spec(basic_get/3 :: (amqqueue(), pid(), boolean()) ->
-             {'ok', non_neg_integer(), msg()} | 'empty').
+             {'ok', non_neg_integer(), get_msg_result()} | 'empty').
 -spec(basic_consume/8 ::
       (amqqueue(), boolean(), pid(), pid(), pid() | 'undefined', ctag(),
        boolean(), any()) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 4c42b0ef..7f43f79a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -36,8 +36,6 @@
 -behaviour(gen_server2).
 
 -define(UNSENT_MESSAGE_LIMIT,        100).
--define(HIBERNATE_AFTER_MIN,        1000).
--define(DESIRED_HIBERNATE,         10000).
 -define(SYNC_INTERVAL,                 5). %% milliseconds
 -define(RATES_REMEASURE_INTERVAL,  5000).
 
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index c8733ed1..a8c17efb 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -48,9 +48,6 @@
              username, virtual_host, most_recently_declared_queue,
              consumer_mapping, blocking}).
 
--define(HIBERNATE_AFTER_MIN, 1000).
--define(DESIRED_HIBERNATE, 10000).
-
 -define(MAX_PERMISSION_CACHE_SIZE, 12).
 
 -define(INFO_KEYS,
@@ -69,8 +66,7 @@
 
 -ifdef(use_specs).
 
--type(msg_id() :: non_neg_integer()).
--type(msg() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
+-type(msg_to_deliver() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
 
 -spec(start_link/5 ::
       (channel_number(), pid(), pid(), username(), vhost()) -> pid()).
@@ -78,7 +74,7 @@
 -spec(do/3 :: (pid(), amqp_method(), maybe(content())) -> 'ok').
 -spec(shutdown/1 :: (pid()) -> 'ok').
 -spec(send_command/2 :: (pid(), amqp_method()) -> 'ok').
--spec(deliver/4 :: (pid(), ctag(), boolean(), msg()) -> 'ok').
+-spec(deliver/4 :: (pid(), ctag(), boolean(), msg_to_deliver()) -> 'ok').
 -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
 -spec(flushed/2 :: (pid(), pid()) -> 'ok').
 -spec(list/0 :: () -> [pid()]).
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 831b4d79..267cb633 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -46,11 +46,10 @@
 
 %%----------------------------------------------------------------------------
 
+-include("rabbit.hrl").
+
 -ifdef(use_specs).
 
--type(io_device() :: any()).
--type(msg_id() :: binary()).
--type(msg() :: any()).
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e5de24ce..b2db0ea5 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -38,16 +38,32 @@
          successfully_recovered_state/1]).
 
 -export([sync/1, gc_done/4, set_maximum_since_use/2,
-         build_index_worker/6]). %% internal
+         build_index_worker/6, gc/3]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
 
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
 -define(SYNC_INTERVAL,  5).   %% milliseconds
 -define(GEOMETRIC_P,    0.3). %% parameter to geometric distribution rng
 -define(CLEAN_FILENAME, "clean.dot").
 -define(FILE_SUMMARY_FILENAME, "file_summary.ets").
 
+-define(BINARY_MODE,     [raw, binary]).
+-define(READ_MODE,       [read]).
+-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
+-define(WRITE_MODE,      [write]).
+
+-define(FILE_EXTENSION,        ".rdq").
+-define(FILE_EXTENSION_TMP,    ".rdt").
+
+-define(FILE_SIZE_LIMIT,       (16*1024*1024)).
+
+-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
+
 %%----------------------------------------------------------------------------
 
 -record(msstate,
@@ -84,14 +100,15 @@
           cur_file_cache_ets
         }).
 
+-record(file_summary,
+        {file, valid_total_size, contiguous_top, left, right, file_size,
+         locked, readers}).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
 -type(server() :: pid() | atom()).
--type(msg_id() :: binary()).
--type(msg() :: any()).
--type(file_path() :: any()).
 -type(file_num() :: non_neg_integer()).
 -type(client_msstate() :: #client_msstate { file_handle_cache  :: dict(),
                                             index_state        :: any(),
@@ -122,12 +139,13 @@
 -spec(clean/2 :: (atom(), file_path()) -> 'ok').
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
+-spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
+               {tid(), file_path(), atom(), any()}) -> non_neg_integer()).
+
 -endif.
 
 %%----------------------------------------------------------------------------
 
--include("rabbit_msg_store.hrl").
-
 %% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
 %% It is not recommended to set this to < 0.5
 -define(GARBAGE_FRACTION,      0.5).
@@ -558,8 +576,8 @@ init([Server, BaseDir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
         build_index(Recovered1, Files, State),
 
     %% read is only needed so that we can seek
-    {ok, FileHdl} = rabbit_msg_store_misc:open_file(
-                      Dir, rabbit_msg_store_misc:filenum_to_name(CurFile),
+    {ok, FileHdl} = open_file(
+                      Dir, filenum_to_name(CurFile),
                       [read | ?WRITE_MODE]),
     {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
     ok = file_handle_cache:truncate(FileHdl),
@@ -956,6 +974,10 @@ run_pending({contains, MsgId, From}, State) ->
 run_pending({remove, MsgId}, State) ->
     remove_message(MsgId, State).
 
+open_file(Dir, FileName, Mode) ->
+    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
+                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
+
 close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) ->
     CState #client_msstate { file_handle_cache = close_handle(Key, FHC) };
 
@@ -1001,8 +1023,8 @@ get_read_handle(FileNum, FHC, Dir) ->
         {ok, Hdl} ->
             {Hdl, FHC};
         error ->
-            {ok, Hdl} = rabbit_msg_store_misc:open_file(
-                          Dir, rabbit_msg_store_misc:filenum_to_name(FileNum),
+            {ok, Hdl} = open_file(
+                          Dir, filenum_to_name(FileNum),
                           ?READ_MODE),
             {Hdl, dict:store(FileNum, Hdl, FHC) }
     end.
@@ -1036,6 +1058,35 @@ store_file_summary(Tid, Dir) ->
                       [{extended_info, [object_count]}]),
     ets:delete(Tid).
 
+
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
+    ok = file_handle_cache:truncate(Hdl),
+    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
+    ok.
+
+truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file_handle_cache:position(FileHdl, Lowpoint),
+    ok = file_handle_cache:truncate(FileHdl),
+    ok = preallocate(FileHdl, Highpoint, Lowpoint).
+
+form_filename(Dir, Name) -> filename:join(Dir, Name).
+
+filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
+
+scan_file_for_valid_messages(Dir, FileName) ->
+    case open_file(Dir, FileName, ?READ_MODE) of
+        {ok, Hdl} ->
+            Valid = rabbit_msg_file:scan(Hdl),
+            %% if something really bad's happened, the close could fail,
+            %% but ignore
+            file_handle_cache:close(Hdl),
+            Valid;
+        {error, enoent} -> {ok, [], 0};
+        {error, Reason} -> throw({error,
+                                  {unable_to_scan_file, FileName, Reason}})
+    end.
+
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
@@ -1180,7 +1231,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
     %%    consist only of valid messages. Plan: Truncate the main file
     %%    back to before any of the files in the tmp file and copy
     %%    them over again
-    TmpPath = rabbit_msg_store_misc:form_filename(Dir, TmpFileName),
+    TmpPath = form_filename(Dir, TmpFileName),
     case is_sublist(MsgIdsTmp, MsgIds) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
                 %% note this also catches the case when the tmp file
@@ -1212,7 +1263,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% are in the tmp file
             true = is_disjoint(MsgIds1, MsgIdsTmp),
             %% must open with read flag, otherwise will stomp over contents
-            {ok, MainHdl} = rabbit_msg_store_misc:open_file(
+            {ok, MainHdl} = open_file(
                               Dir, NonTmpRelatedFileName, [read | ?WRITE_MODE]),
             %% Wipe out any rubbish at the end of the file. Remember
             %% the head of the list will be the highest entry in the
@@ -1222,9 +1273,9 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% Extend the main file as big as necessary in a single
             %% move. If we run out of disk space, this truncate could
             %% fail, but we still aren't risking losing data
-            ok = rabbit_msg_store_misc:truncate_and_extend_file(
+            ok = truncate_and_extend_file(
                    MainHdl, Top, Top + TmpSize),
-            {ok, TmpHdl} = rabbit_msg_store_misc:open_file(
+            {ok, TmpHdl} = open_file(
                              Dir, TmpFileName, ?READ_AHEAD_MODE),
             {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize),
             ok = file_handle_cache:close(MainHdl),
@@ -1248,7 +1299,7 @@ is_disjoint(SmallerL, BiggerL) ->
 
 scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
     {ok, Messages, _FileSize} =
-        rabbit_msg_store_misc:scan_file_for_valid_messages(Dir, FileName),
+        scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [MsgId || {MsgId, _TotalSize, _FileOffset} <- Messages]}.
 
 %% Takes the list in *ascending* order (i.e. eldest message
@@ -1318,8 +1369,8 @@ build_index(Gatherer, Left, [File|Files], State) ->
 build_index_worker(
   Gatherer, Guid, State = #msstate { dir = Dir }, Left, File, Files) ->
     {ok, Messages, FileSize} =
-        rabbit_msg_store_misc:scan_file_for_valid_messages(
-          Dir, rabbit_msg_store_misc:filenum_to_name(File)),
+        scan_file_for_valid_messages(
+          Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
           fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
@@ -1357,7 +1408,7 @@ build_index_worker(
     ok = gatherer:finished(Gatherer, Guid).
 
 %%----------------------------------------------------------------------------
-%% garbage collection / compaction / aggregation
+%% garbage collection / compaction / aggregation -- internal
 %%----------------------------------------------------------------------------
 
 maybe_roll_to_new_file(Offset,
@@ -1370,8 +1421,8 @@ maybe_roll_to_new_file(Offset,
     State1 = internal_sync(State),
     ok = file_handle_cache:close(CurHdl),
     NextFile = CurFile + 1,
-    {ok, NextHdl} = rabbit_msg_store_misc:open_file(
-                      Dir, rabbit_msg_store_misc:filenum_to_name(NextFile),
+    {ok, NextHdl} = open_file(
+                      Dir, filenum_to_name(NextFile),
                       ?WRITE_MODE),
     true = ets:insert_new(
              FileSummaryEts, #file_summary {
@@ -1478,9 +1529,178 @@ delete_file_if_empty(File, State =
              true = mark_handle_to_close(FileHandlesEts, File),
              true = ets:delete(FileSummaryEts, File),
              State1 = close_handle(File, State),
-             ok = file:delete(rabbit_msg_store_misc:form_filename(
+             ok = file:delete(form_filename(
                                 Dir,
-                                rabbit_msg_store_misc:filenum_to_name(File))),
+                                filenum_to_name(File))),
              State1 #msstate { sum_file_size = SumFileSize - FileSize };
         _ -> State
     end.
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- external
+%%----------------------------------------------------------------------------
+
+gc(SourceFile, DestFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
+
+    [SourceObj = #file_summary {
+       readers = SourceReaders,
+       valid_total_size = SourceValidData, left = DestFile,
+       file_size = SourceFileSize, locked = true }] =
+        ets:lookup(FileSummaryEts, SourceFile),
+    [DestObj = #file_summary {
+       readers = DestReaders,
+       valid_total_size = DestValidData, right = SourceFile,
+       file_size = DestFileSize, locked = true }] =
+        ets:lookup(FileSummaryEts, DestFile),
+
+    case SourceReaders =:= 0 andalso DestReaders =:= 0 of
+        true ->
+            TotalValidData = DestValidData + SourceValidData,
+            ok = combine_files(SourceObj, DestObj, State),
+            %% don't update dest.right, because it could be changing
+            %% at the same time
+            true = ets:update_element(
+                     FileSummaryEts, DestFile,
+                     [{#file_summary.valid_total_size, TotalValidData},
+                      {#file_summary.contiguous_top,   TotalValidData},
+                      {#file_summary.file_size,        TotalValidData}]),
+            SourceFileSize + DestFileSize - TotalValidData;
+        false ->
+            timer:sleep(100),
+            gc(SourceFile, DestFile, State)
+    end.
+
+combine_files(#file_summary { file = Source,
+                              valid_total_size = SourceValid,
+                              left = Destination },
+              #file_summary { file = Destination,
+                              valid_total_size = DestinationValid,
+                              contiguous_top = DestinationContiguousTop,
+                              right = Source },
+              State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
+    SourceName = filenum_to_name(Source),
+    DestinationName = filenum_to_name(Destination),
+    {ok, SourceHdl} =
+        open_file(Dir, SourceName, ?READ_AHEAD_MODE),
+    {ok, DestinationHdl} =
+        open_file(Dir, DestinationName,
+                                        ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+    ExpectedSize = SourceValid + DestinationValid,
+    %% if DestinationValid =:= DestinationContiguousTop then we don't
+    %% need a tmp file
+    %% if they're not equal, then we need to write out everything past
+    %%   the DestinationContiguousTop to a tmp file then truncate,
+    %%   copy back in, and then copy over from Source
+    %% otherwise we just truncate straight away and copy over from Source
+    if DestinationContiguousTop =:= DestinationValid ->
+            ok = truncate_and_extend_file(
+                   DestinationHdl, DestinationValid, ExpectedSize);
+       true ->
+            Worklist =
+                lists:dropwhile(
+                  fun (#msg_location { offset = Offset })
+                      when Offset /= DestinationContiguousTop ->
+                          %% it cannot be that Offset ==
+                          %% DestinationContiguousTop because if it
+                          %% was then DestinationContiguousTop would
+                          %% have been extended by TotalSize
+                          Offset < DestinationContiguousTop
+                          %% Given expected access patterns, I suspect
+                          %% that the list should be naturally sorted
+                          %% as we require, however, we need to
+                          %% enforce it anyway
+                  end,
+                  find_unremoved_messages_in_file(Destination, State)),
+            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+            {ok, TmpHdl} = open_file(
+                             Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+            ok = copy_messages(
+                   Worklist, DestinationContiguousTop, DestinationValid,
+                   DestinationHdl, TmpHdl, Destination, State),
+            TmpSize = DestinationValid - DestinationContiguousTop,
+            %% so now Tmp contains everything we need to salvage from
+            %% Destination, and index_state has been updated to
+            %% reflect the compaction of Destination so truncate
+            %% Destination and copy from Tmp back to the end
+            {ok, 0} = file_handle_cache:position(TmpHdl, 0),
+            ok = truncate_and_extend_file(
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
+            {ok, TmpSize} =
+                file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
+            %% position in DestinationHdl should now be DestinationValid
+            ok = file_handle_cache:sync(DestinationHdl),
+            ok = file_handle_cache:close(TmpHdl),
+            ok = file:delete(form_filename(Dir, Tmp))
+    end,
+    SourceWorkList = find_unremoved_messages_in_file(Source, State),
+    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                       SourceHdl, DestinationHdl, Destination, State),
+    %% tidy up
+    ok = file_handle_cache:close(SourceHdl),
+    ok = file_handle_cache:close(DestinationHdl),
+    ok = file:delete(form_filename(Dir, SourceName)),
+    ok.
+
+find_unremoved_messages_in_file(File,
+                                {_FileSummaryEts, Dir, Index, IndexState}) ->
+    %% Msgs here will be end-of-file at start-of-list
+    {ok, Messages, _FileSize} =
+        scan_file_for_valid_messages(
+          Dir, filenum_to_name(File)),
+    %% foldl will reverse so will end up with msgs in ascending offset order
+    lists:foldl(
+      fun ({MsgId, _TotalSize, _Offset}, Acc) ->
+              case Index:lookup(MsgId, IndexState) of
+                  Entry = #msg_location { file = File } -> [ Entry | Acc ];
+                  _                                     -> Acc
+              end
+      end, [], Messages).
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+              Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
+    {FinalOffset, BlockStart1, BlockEnd1} =
+        lists:foldl(
+          fun (#msg_location { msg_id = MsgId, offset = Offset,
+                               total_size = TotalSize },
+               {CurOffset, BlockStart, BlockEnd}) ->
+                  %% CurOffset is in the DestinationFile.
+                  %% Offset, BlockStart and BlockEnd are in the SourceFile
+                  %% update MsgLocation to reflect change of file and offset
+                  ok = Index:update_fields(MsgId,
+                                           [{#msg_location.file, Destination},
+                                            {#msg_location.offset, CurOffset}],
+                                           IndexState),
+                  {BlockStart2, BlockEnd2} =
+                      if BlockStart =:= undefined ->
+                              %% base case, called only for the first list elem
+                              {Offset, Offset + TotalSize};
+                         Offset =:= BlockEnd ->
+                              %% extend the current block because the
+                              %% next msg follows straight on
+                              {BlockStart, BlockEnd + TotalSize};
+                         true ->
+                              %% found a gap, so actually do the work
+                              %% for the previous block
+                              BSize = BlockEnd - BlockStart,
+                              {ok, BlockStart} =
+                                  file_handle_cache:position(SourceHdl,
+                                                             BlockStart),
+                              {ok, BSize} = file_handle_cache:copy(
+                                              SourceHdl, DestinationHdl, BSize),
+                              {Offset, Offset + TotalSize}
+                      end,
+                  {CurOffset + TotalSize, BlockStart2, BlockEnd2}
+          end, {InitOffset, undefined, undefined}, WorkList),
+    case WorkList of
+        [] ->
+            ok;
+        _ ->
+            %% do the last remaining block
+            BSize1 = BlockEnd1 - BlockStart1,
+            {ok, BlockStart1} =
+                file_handle_cache:position(SourceHdl, BlockStart1),
+            {ok, BSize1} =
+                file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
+            ok = file_handle_cache:sync(DestinationHdl)
+    end,
+    ok.
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 9cf11af2..5c8e88d6 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -46,7 +46,7 @@
          file_summary_ets
         }).
 
--include("rabbit_msg_store.hrl").
+-include("rabbit.hrl").
 
 %%----------------------------------------------------------------------------
 
@@ -73,9 +73,12 @@ init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}.
 
-handle_cast({gc, Source, Destination}, State = #gcstate { parent = Parent }) ->
-    Reclaimed = adjust_meta_and_combine(Source, Destination,
-                                        State),
+handle_cast({gc, Source, Destination}, State =
+                #gcstate { parent = Parent, dir = Dir, index_module = Index,
+                           index_state = IndexState,
+                           file_summary_ets = FileSummaryEts }) ->
+    Reclaimed = rabbit_msg_store:gc(Source, Destination,
+                                    {FileSummaryEts, Dir, Index, IndexState}),
     ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source, Destination),
     {noreply, State, hibernate}.
 
@@ -91,173 +94,3 @@ terminate(_Reason, State) ->
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
-
-%%----------------------------------------------------------------------------
-
-adjust_meta_and_combine(SourceFile, DestFile, State =
-                            #gcstate { file_summary_ets = FileSummaryEts }) ->
-
-    [SourceObj = #file_summary {
-       readers = SourceReaders,
-       valid_total_size = SourceValidData, left = DestFile,
-       file_size = SourceFileSize, locked = true }] =
-        ets:lookup(FileSummaryEts, SourceFile),
-    [DestObj = #file_summary {
-       readers = DestReaders,
-       valid_total_size = DestValidData, right = SourceFile,
-       file_size = DestFileSize, locked = true }] =
-        ets:lookup(FileSummaryEts, DestFile),
-
-    case SourceReaders =:= 0 andalso DestReaders =:= 0 of
-        true ->
-            TotalValidData = DestValidData + SourceValidData,
-            ok = combine_files(SourceObj, DestObj, State),
-            %% don't update dest.right, because it could be changing
-            %% at the same time
-            true = ets:update_element(
-                     FileSummaryEts, DestFile,
-                     [{#file_summary.valid_total_size, TotalValidData},
-                      {#file_summary.contiguous_top,   TotalValidData},
-                      {#file_summary.file_size,        TotalValidData}]),
-            SourceFileSize + DestFileSize - TotalValidData;
-        false ->
-            timer:sleep(100),
-            adjust_meta_and_combine(SourceFile, DestFile, State)
-    end.
-
-combine_files(#file_summary { file = Source,
-                              valid_total_size = SourceValid,
-                              left = Destination },
-              #file_summary { file = Destination,
-                              valid_total_size = DestinationValid,
-                              contiguous_top = DestinationContiguousTop,
-                              right = Source },
-              State = #gcstate { dir = Dir }) ->
-    SourceName = rabbit_msg_store_misc:filenum_to_name(Source),
-    DestinationName = rabbit_msg_store_misc:filenum_to_name(Destination),
-    {ok, SourceHdl} =
-        rabbit_msg_store_misc:open_file(Dir, SourceName, ?READ_AHEAD_MODE),
-    {ok, DestinationHdl} =
-        rabbit_msg_store_misc:open_file(Dir, DestinationName,
-                                        ?READ_AHEAD_MODE ++ ?WRITE_MODE),
-    ExpectedSize = SourceValid + DestinationValid,
-    %% if DestinationValid =:= DestinationContiguousTop then we don't
-    %% need a tmp file
-    %% if they're not equal, then we need to write out everything past
-    %%   the DestinationContiguousTop to a tmp file then truncate,
-    %%   copy back in, and then copy over from Source
-    %% otherwise we just truncate straight away and copy over from Source
-    if DestinationContiguousTop =:= DestinationValid ->
-            ok = rabbit_msg_store_misc:truncate_and_extend_file(
-                   DestinationHdl, DestinationValid, ExpectedSize);
-       true ->
-            Worklist =
-                lists:dropwhile(
-                  fun (#msg_location { offset = Offset })
-                      when Offset /= DestinationContiguousTop ->
-                          %% it cannot be that Offset ==
-                          %% DestinationContiguousTop because if it
-                          %% was then DestinationContiguousTop would
-                          %% have been extended by TotalSize
-                          Offset < DestinationContiguousTop
-                          %% Given expected access patterns, I suspect
-                          %% that the list should be naturally sorted
-                          %% as we require, however, we need to
-                          %% enforce it anyway
-                  end,
-                  find_unremoved_messages_in_file(Destination, State)),
-            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} = rabbit_msg_store_misc:open_file(
-                             Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
-            ok = copy_messages(
-                   Worklist, DestinationContiguousTop, DestinationValid,
-                   DestinationHdl, TmpHdl, Destination, State),
-            TmpSize = DestinationValid - DestinationContiguousTop,
-            %% so now Tmp contains everything we need to salvage from
-            %% Destination, and index_state has been updated to
-            %% reflect the compaction of Destination so truncate
-            %% Destination and copy from Tmp back to the end
-            {ok, 0} = file_handle_cache:position(TmpHdl, 0),
-            ok = rabbit_msg_store_misc:truncate_and_extend_file(
-                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
-            {ok, TmpSize} =
-                file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
-            %% position in DestinationHdl should now be DestinationValid
-            ok = file_handle_cache:sync(DestinationHdl),
-            ok = file_handle_cache:close(TmpHdl),
-            ok = file:delete(rabbit_msg_store_misc:form_filename(Dir, Tmp))
-    end,
-    SourceWorkList = find_unremoved_messages_in_file(Source, State),
-    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
-                       SourceHdl, DestinationHdl, Destination, State),
-    %% tidy up
-    ok = file_handle_cache:close(SourceHdl),
-    ok = file_handle_cache:close(DestinationHdl),
-    ok = file:delete(rabbit_msg_store_misc:form_filename(Dir, SourceName)),
-    ok.
-
-find_unremoved_messages_in_file(File, #gcstate { dir = Dir,
-                                                 index_state = IndexState,
-                                                 index_module = Index }) ->
-    %% Msgs here will be end-of-file at start-of-list
-    {ok, Messages, _FileSize} =
-        rabbit_msg_store_misc:scan_file_for_valid_messages(
-          Dir, rabbit_msg_store_misc:filenum_to_name(File)),
-    %% foldl will reverse so will end up with msgs in ascending offset order
-    lists:foldl(
-      fun ({MsgId, _TotalSize, _Offset}, Acc) ->
-              case Index:lookup(MsgId, IndexState) of
-                  Entry = #msg_location { file = File } -> [ Entry | Acc ];
-                  _                                     -> Acc
-              end
-      end, [], Messages).
-
-copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
-              Destination, #gcstate { index_module = Index,
-                                      index_state = IndexState }) ->
-    {FinalOffset, BlockStart1, BlockEnd1} =
-        lists:foldl(
-          fun (#msg_location { msg_id = MsgId, offset = Offset,
-                               total_size = TotalSize },
-               {CurOffset, BlockStart, BlockEnd}) ->
-                  %% CurOffset is in the DestinationFile.
-                  %% Offset, BlockStart and BlockEnd are in the SourceFile
-                  %% update MsgLocation to reflect change of file and offset
-                  ok = Index:update_fields(MsgId,
-                                           [{#msg_location.file, Destination},
-                                            {#msg_location.offset, CurOffset}],
-                                           IndexState),
-                  {BlockStart2, BlockEnd2} =
-                      if BlockStart =:= undefined ->
-                              %% base case, called only for the first list elem
-                              {Offset, Offset + TotalSize};
-                         Offset =:= BlockEnd ->
-                              %% extend the current block because the
-                              %% next msg follows straight on
-                              {BlockStart, BlockEnd + TotalSize};
-                         true ->
-                              %% found a gap, so actually do the work
-                              %% for the previous block
-                              BSize = BlockEnd - BlockStart,
-                              {ok, BlockStart} =
-                                  file_handle_cache:position(SourceHdl,
-                                                             BlockStart),
-                              {ok, BSize} = file_handle_cache:copy(
-                                              SourceHdl, DestinationHdl, BSize),
-                              {Offset, Offset + TotalSize}
-                      end,
-                  {CurOffset + TotalSize, BlockStart2, BlockEnd2}
-          end, {InitOffset, undefined, undefined}, WorkList),
-    case WorkList of
-        [] ->
-            ok;
-        _ ->
-            %% do the last remaining block
-            BSize1 = BlockEnd1 - BlockStart1,
-            {ok, BlockStart1} =
-                file_handle_cache:position(SourceHdl, BlockStart1),
-            {ok, BSize1} =
-                file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
-            ok = file_handle_cache:sync(DestinationHdl)
-    end,
-    ok.
diff --git a/src/rabbit_msg_store_misc.erl b/src/rabbit_msg_store_misc.erl
deleted file mode 100644
index 3cece7da..00000000
--- a/src/rabbit_msg_store_misc.erl
+++ /dev/null
@@ -1,74 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2010 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_msg_store_misc).
-
--export([open_file/3, preallocate/3, truncate_and_extend_file/3,
-         form_filename/2, filenum_to_name/1, scan_file_for_valid_messages/2]).
-
--include("rabbit_msg_store.hrl").
-
-
-%%----------------------------------------------------------------------------
-
-open_file(Dir, FileName, Mode) ->
-    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
-                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
-
-%%----------------------------------------------------------------------------
-
-preallocate(Hdl, FileSizeLimit, FinalPos) ->
-    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
-    ok = file_handle_cache:truncate(Hdl),
-    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
-    ok.
-
-truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
-    {ok, Lowpoint} = file_handle_cache:position(FileHdl, Lowpoint),
-    ok = file_handle_cache:truncate(FileHdl),
-    ok = preallocate(FileHdl, Highpoint, Lowpoint).
-
-form_filename(Dir, Name) -> filename:join(Dir, Name).
-
-filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
-
-scan_file_for_valid_messages(Dir, FileName) ->
-    case open_file(Dir, FileName, ?READ_MODE) of
-        {ok, Hdl} ->
-            Valid = rabbit_msg_file:scan(Hdl),
-            %% if something really bad's happened, the close could fail,
-            %% but ignore
-            file_handle_cache:close(Hdl),
-            Valid;
-        {error, enoent} -> {ok, [], 0};
-        {error, Reason} -> throw({error,
-                                  {unable_to_scan_file, FileName, Reason}})
-    end.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index b37845d4..f5f49cf4 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -186,7 +186,6 @@
                                path            :: file_path(),
                                num             :: non_neg_integer()
                              })).
--type(msg_id() :: binary()).
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
 -type(qistate() :: #qistate { dir             :: file_path(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 297c3ef4..9bb031f3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -163,8 +163,6 @@
           transient_threshold
         }).
 
--include("rabbit.hrl").
-
 -record(msg_status,
         { msg,
           msg_id,
@@ -190,11 +188,12 @@
 %% more.
 -define(RAM_INDEX_BATCH_SIZE, 64).
 
+-include("rabbit.hrl").
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(msg_id() :: binary()).
 -type(bpqueue() :: any()).
 -type(seq_id()  :: non_neg_integer()).
 -type(ack()     :: {'ack_index_and_store', msg_id(), seq_id(), atom() | pid()}
-- 
cgit v1.2.1


From c34c600ad5c348a878c33195bd9762081b3bd525 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 17:26:45 +0100
Subject: set_queue_ram_duration_target => set_queue_duration_target and
 ram_duration => queue_duration for censistency

---
 include/rabbit_internal_queue_type_spec.hrl |  4 ++--
 src/rabbit_amqqueue_process.erl             |  8 ++++----
 src/rabbit_internal_queue_type.erl          |  4 ++--
 src/rabbit_variable_queue.erl               | 10 +++++-----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/rabbit_internal_queue_type_spec.hrl b/include/rabbit_internal_queue_type_spec.hrl
index 6409efb6..f8b6877a 100644
--- a/include/rabbit_internal_queue_type_spec.hrl
+++ b/include/rabbit_internal_queue_type_spec.hrl
@@ -33,10 +33,10 @@
 -spec(terminate/1 :: (state()) -> state()).
 -spec(publish/2 :: (basic_message(), state()) -> state()).
 -spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
--spec(set_queue_ram_duration_target/2 ::
+-spec(set_queue_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(remeasure_rates/1 :: (state()) -> state()).
--spec(ram_duration/1 :: (state()) -> number()).
+-spec(queue_duration/1 :: (state()) -> number()).
 -spec(fetch/1 :: (state()) ->
              {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
               state()}).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 7f43f79a..94e8662d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -932,17 +932,17 @@ handle_cast({flush, ChPid}, State) ->
 handle_cast(remeasure_rates, State = #q{internal_queue_state = IQS,
                                         internal_queue = IQ}) ->
     IQS1 = IQ:remeasure_rates(IQS),
-    RamDuration = IQ:ram_duration(IQS1),
+    RamDuration = IQ:queue_duration(IQS1),
     DesiredDuration =
         rabbit_memory_monitor:report_queue_duration(self(), RamDuration),
-    IQS2 = IQ:set_queue_ram_duration_target(DesiredDuration, IQS1),
+    IQS2 = IQ:set_queue_duration_target(DesiredDuration, IQS1),
     noreply(State#q{rate_timer_ref = just_measured,
                     internal_queue_state = IQS2});
 
 handle_cast({set_queue_duration, Duration},
             State = #q{internal_queue_state = IQS,
                        internal_queue = IQ}) ->
-    IQS1 = IQ:set_queue_ram_duration_target(Duration, IQS),
+    IQS1 = IQ:set_queue_duration_target(Duration, IQS),
     noreply(State#q{internal_queue_state = IQS1});
 
 handle_cast({set_maximum_since_use, Age}, State) ->
@@ -988,5 +988,5 @@ handle_pre_hibernate(State = #q{internal_queue_state = IQS,
     %% no activity for a while == 0 egress and ingress rates
     DesiredDuration =
         rabbit_memory_monitor:report_queue_duration(self(), infinity),
-    IQS2 = IQ:set_queue_ram_duration_target(DesiredDuration, IQS1),
+    IQS2 = IQ:set_queue_duration_target(DesiredDuration, IQS1),
     {hibernate, stop_rate_timer(State#q{internal_queue_state = IQS2})}.
diff --git a/src/rabbit_internal_queue_type.erl b/src/rabbit_internal_queue_type.erl
index 4ee4556a..48d9314d 100644
--- a/src/rabbit_internal_queue_type.erl
+++ b/src/rabbit_internal_queue_type.erl
@@ -74,11 +74,11 @@ behaviour_info(callbacks) ->
 
      {is_empty, 1},
 
-     {set_queue_ram_duration_target, 2},
+     {set_queue_duration_target, 2},
 
      {remeasure_rates, 1},
 
-     {ram_duration, 1},
+     {queue_duration, 1},
 
      %% Can return 'undefined' or a function atom name plus list of
      %% arguments to be invoked in the internal queue module as soon
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9bb031f3..c01ab5a4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,8 +32,8 @@
 -module(rabbit_variable_queue).
 
 -export([init/2, terminate/1, publish/2, publish_delivered/2,
-         set_queue_ram_duration_target/2, remeasure_rates/1,
-         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
+         set_queue_duration_target/2, remeasure_rates/1,
+         queue_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
          tx_commit/4, needs_sync/1, handle_pre_hibernate/1, status/1]).
 
@@ -344,7 +344,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
             {ack_not_on_disk, State2}
     end.
 
-set_queue_ram_duration_target(
+set_queue_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
                                      avg_ingress_rate = AvgIngressRate,
                                      target_ram_msg_count = TargetRamMsgCount
@@ -375,7 +375,7 @@ remeasure_rates(State = #vqstate { egress_rate = Egress,
     {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
     {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
 
-    set_queue_ram_duration_target(
+    set_queue_duration_target(
       DurationTarget,
       State #vqstate { egress_rate = Egress1,
                        avg_egress_rate = AvgEgressRate,
@@ -385,7 +385,7 @@ remeasure_rates(State = #vqstate { egress_rate = Egress,
                        ram_msg_count_prev = RamMsgCount,
                        out_counter = 0, in_counter = 0 }).
 
-ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
+queue_duration(#vqstate { avg_egress_rate = AvgEgressRate,
                         avg_ingress_rate = AvgIngressRate,
                         ram_msg_count = RamMsgCount,
                         ram_msg_count_prev = RamMsgCountPrev }) ->
-- 
cgit v1.2.1


From 84d3b89f3687a8419855aa927a234c98c19c05b8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 17:46:35 +0100
Subject: Ooops, missed a few

---
 src/rabbit_tests.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 838c5f9c..a97730e0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1429,7 +1429,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
             VQ4 = rabbit_variable_queue:remeasure_rates(VQ3),
             VQ5 = %% /37 otherwise the duration is just to high to stress things
-                rabbit_variable_queue:set_queue_ram_duration_target(N/37, VQ4),
+                rabbit_variable_queue:set_queue_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
             test_variable_queue_dynamic_duration_change_f(Len, VQ5)
     after 0 ->
@@ -1442,7 +1442,7 @@ test_variable_queue_partial_segments_delta_thing() ->
     VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
     VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
-    VQ3 = rabbit_variable_queue:set_queue_ram_duration_target(0, VQ2),
+    VQ3 = rabbit_variable_queue:set_queue_duration_target(0, VQ2),
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
     io:format("~p~n", [S3]),
@@ -1450,7 +1450,7 @@ test_variable_queue_partial_segments_delta_thing() ->
                             SegmentSize + HalfSegment}),
     assert_prop(S3, q3, SegmentSize),
     assert_prop(S3, len, SegmentSize + HalfSegment),
-    VQ4 = rabbit_variable_queue:set_queue_ram_duration_target(infinity, VQ3),
+    VQ4 = rabbit_variable_queue:set_queue_duration_target(infinity, VQ3),
     VQ5 = variable_queue_publish(true, 1, VQ4),
     %% should have 1 alpha, but it's in the same segment as the deltas
     S5 = rabbit_variable_queue:status(VQ5),
-- 
cgit v1.2.1


From 3dee227bdc7bc391ef5b6c02a693c22d68ccaf8f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 8 Apr 2010 18:01:16 +0100
Subject: Just making the order of the specs match the order in the behaviour

---
 include/rabbit_internal_queue_type_spec.hrl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/rabbit_internal_queue_type_spec.hrl b/include/rabbit_internal_queue_type_spec.hrl
index f8b6877a..ec120f24 100644
--- a/include/rabbit_internal_queue_type_spec.hrl
+++ b/include/rabbit_internal_queue_type_spec.hrl
@@ -31,25 +31,25 @@
 
 -spec(init/2 :: (queue_name(), pid() | atom()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
+-spec(delete_and_terminate/1 :: (state()) -> state()).
+-spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
 -spec(publish/2 :: (basic_message(), state()) -> state()).
 -spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
--spec(set_queue_duration_target/2 ::
-      (('undefined' | 'infinity' | number()), state()) -> state()).
--spec(remeasure_rates/1 :: (state()) -> state()).
--spec(queue_duration/1 :: (state()) -> number()).
 -spec(fetch/1 :: (state()) ->
              {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
               state()}).
 -spec(ack/2 :: ([ack()], state()) -> state()).
--spec(len/1 :: (state()) -> non_neg_integer()).
--spec(is_empty/1 :: (state()) -> boolean()).
--spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
--spec(delete_and_terminate/1 :: (state()) -> state()).
--spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
 -spec(tx_publish/2 :: (basic_message(), state()) -> state()).
 -spec(tx_rollback/2 :: ([msg_id()], state()) -> state()).
 -spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, state()) ->
                           {boolean(), state()}).
+-spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
+-spec(len/1 :: (state()) -> non_neg_integer()).
+-spec(is_empty/1 :: (state()) -> boolean()).
+-spec(set_queue_duration_target/2 ::
+      (('undefined' | 'infinity' | number()), state()) -> state()).
+-spec(remeasure_rates/1 :: (state()) -> state()).
+-spec(queue_duration/1 :: (state()) -> number()).
 -spec(needs_sync/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
-- 
cgit v1.2.1


From f21782367ae29ff419d1d51d2b76b3b6b44f34a4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Apr 2010 15:37:12 +0100
Subject: All sorts of tidying, cosmetics, reorganisation and pruning. A
 veritable sm?rg?sbord of improvements.

---
 ebin/rabbit_app.in                          |   2 +-
 include/rabbit.hrl                          |   8 +-
 include/rabbit_backing_queue_type_spec.hrl  |  55 ++++++
 include/rabbit_internal_queue_type_spec.hrl |  55 ------
 include/rabbit_msg_store.hrl                |   6 +
 include/rabbit_variable_queue.hrl           |  33 ++++
 src/rabbit_amqqueue.erl                     |  53 +++---
 src/rabbit_amqqueue_process.erl             | 278 ++++++++++++++--------------
 src/rabbit_backing_queue_type.erl           | 122 ++++++++++++
 src/rabbit_internal_queue_type.erl          |  97 ----------
 src/rabbit_memory_monitor.erl               |  10 +-
 src/rabbit_misc.erl                         |  11 +-
 src/rabbit_msg_file.erl                     |   2 +-
 src/rabbit_msg_store.erl                    |   2 +-
 src/rabbit_queue_index.erl                  |  12 +-
 src/rabbit_tests.erl                        |  30 ++-
 src/rabbit_variable_queue.erl               |  37 ++--
 src/random_distributions.erl                |  38 ----
 18 files changed, 440 insertions(+), 411 deletions(-)
 create mode 100644 include/rabbit_backing_queue_type_spec.hrl
 delete mode 100644 include/rabbit_internal_queue_type_spec.hrl
 create mode 100644 include/rabbit_variable_queue.hrl
 create mode 100644 src/rabbit_backing_queue_type.erl
 delete mode 100644 src/rabbit_internal_queue_type.erl
 delete mode 100644 src/random_distributions.erl

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index a481af08..ffac1cd8 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -19,7 +19,7 @@
          {ssl_options, []},
          {vm_memory_high_watermark, 0.4},
          {msg_store_index_module, rabbit_msg_store_ets_index},
-         {queue_internal_queue_module, rabbit_variable_queue},
+         {backing_queue_module, rabbit_variable_queue},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 35134ee7..d3c9ffee 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -84,9 +84,8 @@
 -type(info_key() :: atom()).
 -type(info() :: {info_key(), any()}).
 -type(regexp() :: binary()).
--type(file_path() :: any()).
+-type(file_path() :: string()).
 -type(io_device() :: any()).
--type(file_open_mode() :: any()).
 
 %% this is really an abstract type, but dialyzer does not support them
 -type(guid() :: binary()).
@@ -170,8 +169,6 @@
                   explanation :: string(),
                   method      :: atom()}).
 
--type(msg() :: any()).
-
 -endif.
 
 %%----------------------------------------------------------------------------
@@ -181,9 +178,6 @@
 
 -define(MAX_WAIT, 16#ffffffff).
 
--define(PERSISTENT_MSG_STORE,     msg_store_persistent).
--define(TRANSIENT_MSG_STORE,      msg_store_transient).
-
 -define(HIBERNATE_AFTER_MIN,        1000).
 -define(DESIRED_HIBERNATE,         10000).
 
diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
new file mode 100644
index 00000000..836e220b
--- /dev/null
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -0,0 +1,55 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-spec(init/2 :: (queue_name(), boolean()) -> state()).
+-spec(terminate/1 :: (state()) -> state()).
+-spec(delete_and_terminate/1 :: (state()) -> state()).
+-spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
+-spec(publish/2 :: (basic_message(), state()) -> state()).
+-spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
+-spec(fetch/1 :: (state()) ->
+             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
+              state()}).
+-spec(ack/2 :: ([ack()], state()) -> state()).
+-spec(tx_publish/2 :: (basic_message(), state()) -> state()).
+-spec(tx_rollback/2 :: ([msg_id()], state()) -> state()).
+-spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, state()) ->
+                          {boolean(), state()}).
+-spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
+-spec(len/1 :: (state()) -> non_neg_integer()).
+-spec(is_empty/1 :: (state()) -> boolean()).
+-spec(set_ram_duration_target/2 ::
+      (('undefined' | 'infinity' | number()), state()) -> state()).
+-spec(update_ram_duration/1 :: (state()) -> state()).
+-spec(ram_duration/1 :: (state()) -> number()).
+-spec(needs_sync/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
+-spec(handle_pre_hibernate/1 :: (state()) -> state()).
+-spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/include/rabbit_internal_queue_type_spec.hrl b/include/rabbit_internal_queue_type_spec.hrl
deleted file mode 100644
index ec120f24..00000000
--- a/include/rabbit_internal_queue_type_spec.hrl
+++ /dev/null
@@ -1,55 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--spec(init/2 :: (queue_name(), pid() | atom()) -> state()).
--spec(terminate/1 :: (state()) -> state()).
--spec(delete_and_terminate/1 :: (state()) -> state()).
--spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
--spec(publish/2 :: (basic_message(), state()) -> state()).
--spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
--spec(fetch/1 :: (state()) ->
-             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
-              state()}).
--spec(ack/2 :: ([ack()], state()) -> state()).
--spec(tx_publish/2 :: (basic_message(), state()) -> state()).
--spec(tx_rollback/2 :: ([msg_id()], state()) -> state()).
--spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, state()) ->
-                          {boolean(), state()}).
--spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
--spec(len/1 :: (state()) -> non_neg_integer()).
--spec(is_empty/1 :: (state()) -> boolean()).
--spec(set_queue_duration_target/2 ::
-      (('undefined' | 'infinity' | number()), state()) -> state()).
--spec(remeasure_rates/1 :: (state()) -> state()).
--spec(queue_duration/1 :: (state()) -> number()).
--spec(needs_sync/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
--spec(handle_pre_hibernate/1 :: (state()) -> state()).
--spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 112588f3..696ccf3c 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -31,5 +31,11 @@
 
 -include("rabbit.hrl").
 
+-ifdef(use_specs).
+
+-type(msg() :: any()).
+
+-endif.
+
 -record(msg_location,
         {msg_id, ref_count, file, offset, total_size}).
diff --git a/include/rabbit_variable_queue.hrl b/include/rabbit_variable_queue.hrl
new file mode 100644
index 00000000..2e43a8fa
--- /dev/null
+++ b/include/rabbit_variable_queue.hrl
@@ -0,0 +1,33 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-define(PERSISTENT_MSG_STORE,     msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,      msg_store_transient).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 00407824..235b1edb 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -32,8 +32,8 @@
 -module(rabbit_amqqueue).
 
 -export([start/0, declare/4, delete/3, purge/1]).
--export([internal_declare/2, internal_delete/1, remeasure_rates/1,
-         set_queue_duration/2, set_maximum_since_use/2]).
+-export([internal_declare/2, internal_delete/1, update_ram_duration/1,
+         set_ram_duration_target/2, set_maximum_since_use/2]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
          stat/1, stat_all/0, deliver/2, redeliver/2, requeue/3, ack/4]).
@@ -41,7 +41,7 @@
 -export([consumers/1, consumers_all/1]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, maybe_run_queue_via_internal_queue/3,
+-export([notify_sent/2, unblock/2, maybe_run_queue_via_backing_queue/3,
          flush_all/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
@@ -109,12 +109,12 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(maybe_run_queue_via_internal_queue/3 :: (pid(), atom(), [any()]) -> 'ok').
+-spec(maybe_run_queue_via_backing_queue/3 :: (pid(), atom(), [any()]) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
--spec(remeasure_rates/1 :: (pid()) -> 'ok').
--spec(set_queue_duration/2 :: (pid(), number()) -> 'ok').
+-spec(update_ram_duration/1 :: (pid()) -> 'ok').
+-spec(set_ram_duration_target/2 :: (pid(), number()) -> 'ok').
 -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
 -spec(on_node_down/1 :: (erlang_node()) -> 'ok').
 -spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
@@ -124,13 +124,8 @@
 %%----------------------------------------------------------------------------
 
 start() ->
-    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
-    ok = rabbit_sup:start_child(
-           ?TRANSIENT_MSG_STORE, rabbit_msg_store,
-           [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
-            fun (ok) -> finished end, ok]),
     DurableQueues = find_durable_queues(),
-    ok = rabbit_queue_index:start_persistent_msg_store(DurableQueues),
+    ok = rabbit_queue_index:start_msg_stores(DurableQueues),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
                {rabbit_amqqueue_sup,
@@ -152,7 +147,7 @@ find_durable_queues() ->
 recover_durable_queues(DurableQueues) ->
     Qs = lists:foldl(
            fun (RecoveredQ, Acc) ->
-                   Q = start_queue_process(RecoveredQ),
+                   Q = start_queue_process(RecoveredQ, false),
                    %% We need to catch the case where a client
                    %% connected to another node has deleted the queue
                    %% (and possibly re-created it).
@@ -166,16 +161,14 @@ recover_durable_queues(DurableQueues) ->
                                       []  -> false
                                   end
                           end) of
-                       true  ->
-                           ok = gen_server2:call(Q#amqqueue.pid,
-                                                 init_internal_queue,
-                                                 infinity),
-                           [Q|Acc];
+                       true  -> [Q|Acc];
                        false -> exit(Q#amqqueue.pid, shutdown),
                                 Acc
                    end
            end, [], DurableQueues),
-    [ok = gen_server2:call(Q#amqqueue.pid, sync, infinity) || Q <- Qs],
+    %% Issue inits to *all* the queues so that they all init at the same time
+    [ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue) || Q <- Qs],
+    [ok = gen_server2:call(Q#amqqueue.pid, sync) || Q <- Qs],
     Qs.
 
 declare(QueueName, Durable, AutoDelete, Args) ->
@@ -183,7 +176,7 @@ declare(QueueName, Durable, AutoDelete, Args) ->
                                       durable = Durable,
                                       auto_delete = AutoDelete,
                                       arguments = Args,
-                                      pid = none}),
+                                      pid = none}, true),
     internal_declare(Q, true).
 
 internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
@@ -198,9 +191,6 @@ internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
                                           true  -> add_default_binding(Q);
                                           false -> ok
                                       end,
-                                      ok = gen_server2:call(
-                                             Q#amqqueue.pid,
-                                             init_internal_queue, infinity),
                                       Q;
                                [_] -> not_found %% existing Q on stopped node
                            end;
@@ -223,8 +213,9 @@ store_queue(Q = #amqqueue{durable = false}) ->
     ok = mnesia:write(rabbit_queue, Q, write),
     ok.
 
-start_queue_process(Q) ->
-    {ok, Pid} = supervisor2:start_child(rabbit_amqqueue_sup, [Q]),
+start_queue_process(Q, InitBackingQueue) ->
+    {ok, Pid} =
+        supervisor2:start_child(rabbit_amqqueue_sup, [Q, InitBackingQueue]),
     Q#amqqueue{pid = Pid}.
 
 add_default_binding(#amqqueue{name = QueueName}) ->
@@ -358,8 +349,8 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
-maybe_run_queue_via_internal_queue(QPid, Fun, Args) ->
-    gen_server2:pcast(QPid, 7, {maybe_run_queue_via_internal_queue, Fun, Args}).
+maybe_run_queue_via_backing_queue(QPid, Fun, Args) ->
+    gen_server2:pcast(QPid, 7, {maybe_run_queue_via_backing_queue, Fun, Args}).
 
 flush_all(QPids, ChPid) ->
     safe_pmap_ok(
@@ -388,11 +379,11 @@ internal_delete(QueueName) ->
             ok
     end.
 
-remeasure_rates(QPid) ->
-    gen_server2:pcast(QPid, 8, remeasure_rates).
+update_ram_duration(QPid) ->
+    gen_server2:pcast(QPid, 8, update_ram_duration).
 
-set_queue_duration(QPid, Duration) ->
-    gen_server2:pcast(QPid, 8, {set_queue_duration, Duration}).
+set_ram_duration_target(QPid, Duration) ->
+    gen_server2:pcast(QPid, 8, {set_ram_duration_target, Duration}).
 
 set_maximum_since_use(QPid, Age) ->
     gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 94e8662d..a20cd6c3 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -37,9 +37,9 @@
 
 -define(UNSENT_MESSAGE_LIMIT,        100).
 -define(SYNC_INTERVAL,                 5). %% milliseconds
--define(RATES_REMEASURE_INTERVAL,  5000).
+-define(RAM_DURATION_UPDATE_INTERVAL,  5000).
 
--export([start_link/1, info_keys/0]).
+-export([start_link/2, info_keys/0]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
          handle_info/2, handle_pre_hibernate/1]).
@@ -53,9 +53,9 @@
             owner,
             exclusive_consumer,
             has_had_consumers,
-            internal_queue,
-            internal_queue_state,
-            internal_queue_timeout_fun,
+            backing_queue,
+            backing_queue_state,
+            backing_queue_timeout_fun,
             next_msg_id,
             active_consumers,
             blocked_consumers,
@@ -94,34 +94,34 @@
          consumers,
          transactions,
          memory,
-         internal_queue_status
+         backing_queue_status
         ]).
 
 %%----------------------------------------------------------------------------
 
-start_link(Q) -> gen_server2:start_link(?MODULE, Q, []).
+start_link(Q, InitBackingQueue) ->
+    gen_server2:start_link(?MODULE, [Q, InitBackingQueue], []).
 
 info_keys() -> ?INFO_KEYS.
 
 %%----------------------------------------------------------------------------
 
-init(Q) ->
+init([Q, InitBQ]) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
     ok = file_handle_cache:register_callback(
            rabbit_amqqueue, set_maximum_since_use, [self()]),
     ok = rabbit_memory_monitor:register
-           (self(), {rabbit_amqqueue, set_queue_duration, [self()]}),
-    {ok, InternalQueueModule} =
-        application:get_env(queue_internal_queue_module),
+           (self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
+    {ok, BQ} = application:get_env(backing_queue_module),
 
     {ok, #q{q = Q,
             owner = none,
             exclusive_consumer = none,
             has_had_consumers = false,
-            internal_queue = InternalQueueModule,
-            internal_queue_state = undefined,
-            internal_queue_timeout_fun = undefined,
+            backing_queue = BQ,
+            backing_queue_state = maybe_init_backing_queue(InitBQ, BQ, Q),
+            backing_queue_timeout_fun = undefined,
             next_msg_id = 1,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
@@ -129,33 +129,39 @@ init(Q) ->
             rate_timer_ref = undefined}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-terminate(shutdown, #q{internal_queue_state = IQS,
-                       internal_queue = IQ}) ->
+maybe_init_backing_queue(
+  true, BQ, #amqqueue{name = QName, durable = IsDurable}) ->
+    BQ:init(QName, IsDurable);
+maybe_init_backing_queue(false, _BQ, _Q) ->
+    undefined.
+
+terminate(shutdown, #q{backing_queue_state = BQS,
+                       backing_queue = BQ}) ->
     ok = rabbit_memory_monitor:deregister(self()),
-    case IQS of
+    case BQS of
         undefined -> ok;
-        _         -> IQ:terminate(IQS)
+        _         -> BQ:terminate(BQS)
     end;
-terminate({shutdown, _}, #q{internal_queue_state = IQS,
-                            internal_queue = IQ}) ->
+terminate({shutdown, _}, #q{backing_queue_state = BQS,
+                            backing_queue = BQ}) ->
     ok = rabbit_memory_monitor:deregister(self()),
-    case IQS of
+    case BQS of
         undefined -> ok;
-        _         -> IQ:terminate(IQS)
+        _         -> BQ:terminate(BQS)
     end;
-terminate(_Reason, State = #q{internal_queue_state = IQS,
-                              internal_queue = IQ}) ->
+terminate(_Reason, State = #q{backing_queue_state = BQS,
+                              backing_queue = BQ}) ->
     ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
     %% Ensure that any persisted tx messages are removed.
     %% TODO: wait for all in flight tx_commits to complete
-    case IQS of
+    case BQS of
         undefined ->
             ok;
         _ ->
-            IQS1 = IQ:tx_rollback(
+            BQS1 = BQ:tx_rollback(
                      lists:concat([PM || #tx { pending_messages = PM } <-
-                                             all_tx_record()]), IQS),
+                                             all_tx_record()]), BQS),
             %% Delete from disk first. If we crash at this point, when
             %% a durable queue, we will be recreated at startup,
             %% possibly with partial content. The alternative is much
@@ -163,7 +169,7 @@ terminate(_Reason, State = #q{internal_queue_state = IQS,
             %% would then have a race between the disk delete and a
             %% new queue with the same name being created and
             %% published to.
-            IQ:delete_and_terminate(IQS1)
+            BQ:delete_and_terminate(BQS1)
     end,
     ok = rabbit_amqqueue:internal_delete(qname(State)).
 
@@ -182,9 +188,9 @@ noreply(NewState) ->
     {NewState1, Timeout} = next_state(NewState),
     {noreply, NewState1, Timeout}.
 
-next_state(State = #q{internal_queue_state = IQS,
-                      internal_queue = IQ}) ->
-    next_state1(ensure_rate_timer(State), IQ:needs_sync(IQS)).
+next_state(State = #q{backing_queue_state = BQS,
+                      backing_queue = BQ}) ->
+    next_state1(ensure_rate_timer(State), BQ:needs_sync(BQS)).
 
 next_state1(State = #q{sync_timer_ref = undefined}, Callback = {_Fun, _Args}) ->
     {start_sync_timer(State, Callback), 0};
@@ -193,11 +199,11 @@ next_state1(State, {_Fun, _Args}) ->
 next_state1(State = #q{sync_timer_ref = undefined}, undefined) ->
     {State, hibernate};
 next_state1(State, undefined) ->
-    {stop_sync_timer(State#q{internal_queue_timeout_fun = undefined}), hibernate}.
+    {stop_sync_timer(State#q{backing_queue_timeout_fun = undefined}), hibernate}.
 
 ensure_rate_timer(State = #q{rate_timer_ref = undefined}) ->
-    {ok, TRef} = timer:apply_after(?RATES_REMEASURE_INTERVAL, rabbit_amqqueue,
-                                   remeasure_rates, [self()]),
+    {ok, TRef} = timer:apply_after(?RAM_DURATION_UPDATE_INTERVAL, rabbit_amqqueue,
+                                   update_ram_duration, [self()]),
     State#q{rate_timer_ref = TRef};
 ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) ->
     State#q{rate_timer_ref = undefined};
@@ -216,16 +222,16 @@ start_sync_timer(State = #q{sync_timer_ref = undefined},
                  Callback = {Fun, Args}) ->
     {ok, TRef} = timer:apply_after(
                    ?SYNC_INTERVAL, rabbit_amqqueue,
-                   maybe_run_queue_via_internal_queue, [self(), Fun, Args]),
-    State#q{sync_timer_ref = TRef, internal_queue_timeout_fun = Callback}.
+                   maybe_run_queue_via_backing_queue, [self(), Fun, Args]),
+    State#q{sync_timer_ref = TRef, backing_queue_timeout_fun = Callback}.
 
 stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
-    State#q{sync_timer_ref = undefined, internal_queue_timeout_fun = undefined}.
+    State#q{sync_timer_ref = undefined, backing_queue_timeout_fun = undefined}.
 
-assert_invariant(#q{active_consumers = AC, internal_queue_state = IQS,
-                    internal_queue = IQ}) ->
-    true = (queue:is_empty(AC) orelse IQ:is_empty(IQS)).
+assert_invariant(#q{active_consumers = AC, backing_queue_state = BQS,
+                    backing_queue = BQ}) ->
+    true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)).
 
 lookup_ch(ChPid) ->
     case get({ch, ChPid}) of
@@ -340,73 +346,73 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
 deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
     not IsEmpty.
 deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
-                           State = #q{internal_queue_state = IQS,
-                                      internal_queue = IQ}) ->
-    {{Message, IsDelivered, AckTag, Remaining}, IQS1} = IQ:fetch(IQS),
+                           State = #q{backing_queue_state = BQS,
+                                      backing_queue = BQ}) ->
+    {{Message, IsDelivered, AckTag, Remaining}, BQS1} = BQ:fetch(BQS),
     AutoAcks1 = case AckRequired of
                     true -> AutoAcks;
                     false -> [AckTag | AutoAcks]
                 end,
     {{Message, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
-     State #q { internal_queue_state = IQS1 }}.
+     State #q { backing_queue_state = BQS1 }}.
 
-run_message_queue(State = #q{internal_queue_state = IQS,
-                             internal_queue = IQ}) ->
+run_message_queue(State = #q{backing_queue_state = BQS,
+                             backing_queue = BQ}) ->
     Funs = { fun deliver_from_queue_pred/2,
              fun deliver_from_queue_deliver/3 },
-    IsEmpty = IQ:is_empty(IQS),
+    IsEmpty = BQ:is_empty(BQS),
     {{_IsEmpty1, AutoAcks}, State1} =
         deliver_msgs_to_consumers(Funs, {IsEmpty, []}, State),
-    IQS1 = IQ:ack(AutoAcks, State1 #q.internal_queue_state),
-    State1 #q { internal_queue_state = IQS1 }.
+    BQS1 = BQ:ack(AutoAcks, State1 #q.backing_queue_state),
+    State1 #q { backing_queue_state = BQS1 }.
 
-attempt_delivery(none, _ChPid, Message, State = #q{internal_queue = IQ}) ->
+attempt_delivery(none, _ChPid, Message, State = #q{backing_queue = BQ}) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
     DeliverFun =
         fun (AckRequired, false, State1) ->
                 {AckTag, State2} =
                     case AckRequired of
                         true ->
-                            {AckTag1, IQS} =
-                                IQ:publish_delivered(
-                                  Message, State1 #q.internal_queue_state),
-                            {AckTag1, State1 #q { internal_queue_state = IQS }};
+                            {AckTag1, BQS} =
+                                BQ:publish_delivered(
+                                  Message, State1 #q.backing_queue_state),
+                            {AckTag1, State1 #q { backing_queue_state = BQS }};
                         false ->
                             {noack, State1}
                     end,
                 {{Message, false, AckTag}, true, State2}
         end,
     deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
-attempt_delivery(Txn, ChPid, Message, State = #q{internal_queue = IQ}) ->
-    IQS = IQ:tx_publish(Message, State #q.internal_queue_state),
+attempt_delivery(Txn, ChPid, Message, State = #q{backing_queue = BQ}) ->
+    BQS = BQ:tx_publish(Message, State #q.backing_queue_state),
     record_pending_message(Txn, ChPid, Message),
-    {true, State #q { internal_queue_state = IQS }}.
+    {true, State #q { backing_queue_state = BQS }}.
 
-deliver_or_enqueue(Txn, ChPid, Message, State = #q{internal_queue = IQ}) ->
+deliver_or_enqueue(Txn, ChPid, Message, State = #q{backing_queue = BQ}) ->
     case attempt_delivery(Txn, ChPid, Message, State) of
         {true, NewState} ->
             {true, NewState};
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
-            IQS = IQ:publish(Message, State #q.internal_queue_state),
-            {false, NewState #q { internal_queue_state = IQS }}
+            BQS = BQ:publish(Message, State #q.backing_queue_state),
+            {false, NewState #q { backing_queue_state = BQS }}
     end.
 
 %% all these messages have already been delivered at least once and
 %% not ack'd, but need to be either redelivered or requeued
 deliver_or_requeue_n([], State) ->
     State;
-deliver_or_requeue_n(MsgsWithAcks, State = #q{internal_queue = IQ}) ->
+deliver_or_requeue_n(MsgsWithAcks, State = #q{backing_queue = BQ}) ->
     Funs = { fun deliver_or_requeue_msgs_pred/2,
              fun deliver_or_requeue_msgs_deliver/3 },
     {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
         deliver_msgs_to_consumers(
           Funs, {length(MsgsWithAcks), [], MsgsWithAcks}, State),
-    IQS = IQ:ack(AutoAcks, NewState #q.internal_queue_state),
+    BQS = BQ:ack(AutoAcks, NewState #q.backing_queue_state),
     case OutstandingMsgs of
-        [] -> NewState #q { internal_queue_state = IQS };
-        _ -> IQS1 = IQ:requeue(OutstandingMsgs, IQS),
-             NewState #q { internal_queue_state = IQS1 }
+        [] -> NewState #q { backing_queue_state = BQS };
+        _ -> BQS1 = BQ:requeue(OutstandingMsgs, BQS),
+             NewState #q { backing_queue_state = BQS1 }
     end.
 
 deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
@@ -518,11 +524,11 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
-maybe_run_queue_via_internal_queue(Fun, Args,
-                                   State = #q{internal_queue_state = IQS,
-                                              internal_queue = IQ}) ->
-    {RunQueue, IQS1} = apply(IQ, Fun, Args ++ [IQS]),
-    State1 = State#q{internal_queue_state = IQS1},
+maybe_run_queue_via_backing_queue(Fun, Args,
+                                   State = #q{backing_queue_state = BQS,
+                                              backing_queue = BQ}) ->
+    {RunQueue, BQS1} = apply(BQ, Fun, Args ++ [BQS]),
+    State1 = State#q{backing_queue_state = BQS1},
     case RunQueue of
         true  -> run_message_queue(State1);
         false -> State1
@@ -557,7 +563,7 @@ record_pending_acks(Txn, ChPid, MsgIds) ->
     store_tx(Txn, Tx#tx{pending_acks = [MsgIds | Pending],
                         ch_pid = ChPid}).
 
-commit_transaction(Txn, From, State = #q{internal_queue = IQ}) ->
+commit_transaction(Txn, From, State = #q{backing_queue = BQ}) ->
     #tx{ch_pid = ChPid, pending_messages = PendingMessages,
         pending_acks = PendingAcks} = lookup_tx(Txn),
     PendingMessagesOrdered = lists:reverse(PendingMessages),
@@ -572,16 +578,16 @@ commit_transaction(Txn, From, State = #q{internal_queue = IQ}) ->
                 store_ch_record(C#cr{unacked_messages = Remaining}),
                 [AckTag || {_Message, AckTag} <- MsgsWithAcks]
         end,
-    {RunQueue, IQS} = IQ:tx_commit(PendingMessagesOrdered, Acks, From,
-                                   State#q.internal_queue_state),
+    {RunQueue, BQS} = BQ:tx_commit(PendingMessagesOrdered, Acks, From,
+                                   State#q.backing_queue_state),
     erase_tx(Txn),
-    {RunQueue, State#q{internal_queue_state = IQS}}.
+    {RunQueue, State#q{backing_queue_state = BQS}}.
 
-rollback_transaction(Txn, State = #q{internal_queue = IQ}) ->
+rollback_transaction(Txn, State = #q{backing_queue = BQ}) ->
     #tx{pending_messages = PendingMessages} = lookup_tx(Txn),
-    IQS = IQ:tx_rollback(PendingMessages, State #q.internal_queue_state),
+    BQS = BQ:tx_rollback(PendingMessages, State #q.backing_queue_state),
     erase_tx(Txn),
-    State#q{internal_queue_state = IQS}.
+    State#q{backing_queue_state = BQS}.
 
 collect_messages(MsgIds, UAM) ->
     lists:mapfoldl(
@@ -608,8 +614,8 @@ i(exclusive_consumer_tag, #q{exclusive_consumer = none}) ->
     '';
 i(exclusive_consumer_tag, #q{exclusive_consumer = {_ChPid, ConsumerTag}}) ->
     ConsumerTag;
-i(messages_ready, #q{internal_queue_state = IQS, internal_queue = IQ}) ->
-    IQ:len(IQS);
+i(messages_ready, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
+    BQ:len(BQS);
 i(messages_unacknowledged, _) ->
     lists:sum([dict:size(UAM) ||
                   #cr{unacked_messages = UAM} <- all_ch_record()]);
@@ -630,26 +636,13 @@ i(transactions, _) ->
 i(memory, _) ->
     {memory, M} = process_info(self(), memory),
     M;
-i(internal_queue_status, #q{internal_queue_state = IQS, internal_queue = IQ}) ->
-    IQ:status(IQS);
+i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
+    BQ:status(BQS);
 i(Item, _) ->
     throw({bad_argument, Item}).
 
 %---------------------------------------------------------------------------
 
-handle_call(init_internal_queue, From, State =
-                #q{internal_queue_state = undefined, internal_queue = IQ,
-                   q = #amqqueue{name = QName, durable = IsDurable}}) ->
-    gen_server2:reply(From, ok),
-    PersistentStore = case IsDurable of
-                          true  -> ?PERSISTENT_MSG_STORE;
-                          false -> ?TRANSIENT_MSG_STORE
-                      end,
-    noreply(State#q{internal_queue_state = IQ:init(QName, PersistentStore)});
-
-handle_call(init_internal_queue, _From, State) ->
-    reply(ok, State);
-
 handle_call(sync, _From, State) ->
     reply(ok, State);
 
@@ -713,24 +706,24 @@ handle_call({notify_down, ChPid}, _From, State) ->
 
 handle_call({basic_get, ChPid, NoAck}, _From,
             State = #q{q = #amqqueue{name = QName}, next_msg_id = NextId,
-                       internal_queue_state = IQS, internal_queue = IQ}) ->
-    case IQ:fetch(IQS) of
-        {empty, IQS1} -> reply(empty, State #q { internal_queue_state = IQS1 });
-        {{Message, IsDelivered, AckTag, Remaining}, IQS1} ->
+                       backing_queue_state = BQS, backing_queue = BQ}) ->
+    case BQ:fetch(BQS) of
+        {empty, BQS1} -> reply(empty, State #q { backing_queue_state = BQS1 });
+        {{Message, IsDelivered, AckTag, Remaining}, BQS1} ->
             AckRequired = not(NoAck),
-            IQS2 =
+            BQS2 =
                 case AckRequired of
                     true ->
                         C = #cr{unacked_messages = UAM} = ch_record(ChPid),
                         NewUAM = dict:store(NextId, {Message, AckTag}, UAM),
                         store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        IQS1;
+                        BQS1;
                     false ->
-                        IQ:ack([AckTag], IQS1)
+                        BQ:ack([AckTag], BQS1)
                 end,
             Msg = {QName, self(), NextId, IsDelivered, Message},
             reply({ok, Remaining, Msg},
-                  State #q { next_msg_id = NextId + 1, internal_queue_state = IQS2 })
+                  State #q { next_msg_id = NextId + 1, backing_queue_state = BQS2 })
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -810,14 +803,14 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
     end;
 
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    internal_queue_state = IQS,
-                                    internal_queue = IQ,
+                                    backing_queue_state = BQS,
+                                    backing_queue = BQ,
                                     active_consumers = ActiveConsumers}) ->
-    reply({ok, Name, IQ:len(IQS), queue:len(ActiveConsumers)}, State);
+    reply({ok, Name, BQ:len(BQS), queue:len(ActiveConsumers)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
-            State = #q{internal_queue_state = IQS, internal_queue = IQ}) ->
-    Length = IQ:len(IQS),
+            State = #q{backing_queue_state = BQS, backing_queue = BQ}) ->
+    Length = BQ:len(BQS),
     IsEmpty = Length == 0,
     IsUnused = is_unused(State),
     if
@@ -829,9 +822,9 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
             {stop, normal, {ok, Length}, State}
     end;
 
-handle_call(purge, _From, State = #q{internal_queue = IQ}) ->
-    {Count, IQS} = IQ:purge(State#q.internal_queue_state),
-    reply({ok, Count}, State#q{internal_queue_state = IQS});
+handle_call(purge, _From, State = #q{backing_queue = BQ}) ->
+    {Count, BQS} = BQ:purge(State#q.backing_queue_state),
+    reply({ok, Count}, State#q{backing_queue_state = BQS});
 
 handle_call({claim_queue, ReaderPid}, _From,
             State = #q{owner = Owner, exclusive_consumer = Holder}) ->
@@ -856,12 +849,21 @@ handle_call({claim_queue, ReaderPid}, _From,
             reply(locked, State)
     end.
 
+
+handle_cast(init_backing_queue, State = #q{backing_queue_state = undefined,
+                                           backing_queue = BQ, q = Q}) ->
+    noreply(State#q{backing_queue_state =
+                        maybe_init_backing_queue(true, BQ, Q)});
+
+handle_cast(init_backing_queue, State) ->
+    noreply(State);
+
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
     noreply(NewState);
 
-handle_cast({ack, Txn, MsgIds, ChPid}, State = #q{internal_queue = IQ}) ->
+handle_cast({ack, Txn, MsgIds, ChPid}, State = #q{backing_queue = BQ}) ->
     case lookup_ch(ChPid) of
         not_found ->
             noreply(State);
@@ -869,10 +871,10 @@ handle_cast({ack, Txn, MsgIds, ChPid}, State = #q{internal_queue = IQ}) ->
             case Txn of
                 none ->
                     {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
-                    IQS = IQ:ack([AckTag || {_Message, AckTag} <- MsgWithAcks],
-                                 State #q.internal_queue_state),
+                    BQS = BQ:ack([AckTag || {_Message, AckTag} <- MsgWithAcks],
+                                 State #q.backing_queue_state),
                     store_ch_record(C#cr{unacked_messages = Remaining}),
-                    noreply(State #q { internal_queue_state = IQS });
+                    noreply(State #q { backing_queue_state = BQS });
                 _  ->
                     record_pending_acks(Txn, ChPid, MsgIds),
                     noreply(State)
@@ -906,8 +908,8 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({maybe_run_queue_via_internal_queue, Fun, Args}, State) ->
-    noreply(maybe_run_queue_via_internal_queue(Fun, Args, State));
+handle_cast({maybe_run_queue_via_backing_queue, Fun, Args}, State) ->
+    noreply(maybe_run_queue_via_backing_queue(Fun, Args, State));
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
@@ -929,21 +931,21 @@ handle_cast({flush, ChPid}, State) ->
     ok = rabbit_channel:flushed(ChPid, self()),
     noreply(State);
 
-handle_cast(remeasure_rates, State = #q{internal_queue_state = IQS,
-                                        internal_queue = IQ}) ->
-    IQS1 = IQ:remeasure_rates(IQS),
-    RamDuration = IQ:queue_duration(IQS1),
+handle_cast(update_ram_duration, State = #q{backing_queue_state = BQS,
+                                            backing_queue = BQ}) ->
+    BQS1 = BQ:update_ram_duration(BQS),
+    RamDuration = BQ:ram_duration(BQS1),
     DesiredDuration =
-        rabbit_memory_monitor:report_queue_duration(self(), RamDuration),
-    IQS2 = IQ:set_queue_duration_target(DesiredDuration, IQS1),
+        rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+    BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
     noreply(State#q{rate_timer_ref = just_measured,
-                    internal_queue_state = IQS2});
+                    backing_queue_state = BQS2});
 
-handle_cast({set_queue_duration, Duration},
-            State = #q{internal_queue_state = IQS,
-                       internal_queue = IQ}) ->
-    IQS1 = IQ:set_queue_duration_target(Duration, IQS),
-    noreply(State#q{internal_queue_state = IQS1});
+handle_cast({set_ram_duration_target, Duration},
+            State = #q{backing_queue_state = BQS,
+                       backing_queue = BQ}) ->
+    BQS1 = BQ:set_ram_duration_target(Duration, BQS),
+    noreply(State#q{backing_queue_state = BQS1});
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
@@ -968,12 +970,12 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
         {stop, NewState} -> {stop, normal, NewState}
     end;
 
-handle_info(timeout, State = #q{internal_queue_timeout_fun = undefined}) ->
+handle_info(timeout, State = #q{backing_queue_timeout_fun = undefined}) ->
     noreply(State);
 
-handle_info(timeout, State = #q{internal_queue_timeout_fun = {Fun, Args}}) ->
-    noreply(maybe_run_queue_via_internal_queue(
-              Fun, Args, State#q{internal_queue_timeout_fun = undefined}));
+handle_info(timeout, State = #q{backing_queue_timeout_fun = {Fun, Args}}) ->
+    noreply(maybe_run_queue_via_backing_queue(
+              Fun, Args, State#q{backing_queue_timeout_fun = undefined}));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
@@ -982,11 +984,11 @@ handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
 
-handle_pre_hibernate(State = #q{internal_queue_state = IQS,
-                                internal_queue = IQ}) ->
-    IQS1 = IQ:handle_pre_hibernate(IQS),
+handle_pre_hibernate(State = #q{backing_queue_state = BQS,
+                                backing_queue = BQ}) ->
+    BQS1 = BQ:handle_pre_hibernate(BQS),
     %% no activity for a while == 0 egress and ingress rates
     DesiredDuration =
-        rabbit_memory_monitor:report_queue_duration(self(), infinity),
-    IQS2 = IQ:set_queue_duration_target(DesiredDuration, IQS1),
-    {hibernate, stop_rate_timer(State#q{internal_queue_state = IQS2})}.
+        rabbit_memory_monitor:report_ram_duration(self(), infinity),
+    BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+    {hibernate, stop_rate_timer(State#q{backing_queue_state = BQS2})}.
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
new file mode 100644
index 00000000..46299d02
--- /dev/null
+++ b/src/rabbit_backing_queue_type.erl
@@ -0,0 +1,122 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_backing_queue_type).
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [
+     %% Called with queue name and a boolean to indicate whether or
+     %% not the queue is durable.
+     {init, 2},
+
+     %% Called on queue shutdown when queue isn't being deleted
+     {terminate, 1},
+
+     %% Called when the queue is terminating and needs to delete all
+     %% its content.
+     {delete_and_terminate, 1},
+
+     %% Remove all messages in the queue, but not messages which have
+     %% been fetched and are pending acks.
+     {purge, 1},
+
+     %% Publish a message
+     {publish, 2},
+
+     %% Called for messages which have already been passed straight
+     %% out to a client. The queue will be empty for these calls
+     %% (i.e. saves the round trip through the internal queue).
+     {publish_delivered, 2},
+
+     %% Produce the next message
+     {fetch, 1},
+
+     %% Acktags supplied are for messages which can now be forgotten
+     %% about
+     {ack, 2},
+
+     %% A publish, but in the context of a transaction.
+     {tx_publish, 2},
+
+     %% Undo anything which has been done by the tx_publish of the
+     %% indicated messages.
+     {tx_rollback, 2},
+
+     %% Commit these publishes and acktags. The publishes you will
+     %% have previously seen in calls to tx_publish.
+     {tx_commit, 4},
+
+     %% Reinsert messages into the queue which have already been
+     %% delivered and were (likely) pending acks.q
+     {requeue, 2},
+
+     %% How long is my queue?
+     {len, 1},
+
+     %% Is my queue empty?
+     {is_empty, 1},
+
+     %% For the next three functions, the assumption is that you're
+     %% monitoring something like the ingress and egress rates of the
+     %% queue. The RAM duration is thus the length of time represented
+     %% by the messages held in RAM given the current rates. If you
+     %% want to ignore all of this stuff, then do so, and return 0 in
+     %% ram_duration/1.
+
+     %% The target is to have no more messages in RAM than indicated
+     %% by the duration and the current queue rates.
+     {set_ram_duration_target, 2},
+
+     %% Recalculate the duration internally (likely to be just update
+     %% your internal rates).
+     {update_ram_duration, 1},
+
+     %% Report how many seconds the messages in RAM represent given
+     %% the current rates of the queue.
+     {ram_duration, 1},
+
+     %% Can return 'undefined' or a function atom name plus list of
+     %% arguments to be invoked in the internal queue module as soon
+     %% as the queue process can manage (either on an empty mailbox,
+     %% or when a timer fires).
+     {needs_sync, 1},
+
+     %% Called immediately before the queue hibernates
+     {handle_pre_hibernate, 1},
+
+     %% Exists for debugging purposes, to be able to expose state via
+     %% rabbitmqctl list_queues backing_queue_status
+     {status, 1}
+    ];
+behaviour_info(_Other) ->
+    undefined.
diff --git a/src/rabbit_internal_queue_type.erl b/src/rabbit_internal_queue_type.erl
deleted file mode 100644
index 48d9314d..00000000
--- a/src/rabbit_internal_queue_type.erl
+++ /dev/null
@@ -1,97 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_internal_queue_type).
-
--export([behaviour_info/1]).
-
-behaviour_info(callbacks) ->
-    [
-     %% Called with queue name and the persistent msg_store to
-     %% use. Transient store is in ?TRANSIENT_MSG_STORE
-     {init, 2},
-
-     %% Called on queue shutdown when queue isn't being deleted
-     {terminate, 1},
-
-     %% Called when the queue is terminating and needs to delete all
-     %% its content.
-     {delete_and_terminate, 1},
-
-     %% Remove all messages in the queue, but not messages which have
-     %% been fetched and are pending acks.
-     {purge, 1},
-
-     %% Publish a message
-     {publish, 2},
-
-     %% Called for messages which have already been passed straight
-     %% out to a client. The queue will be empty for these calls
-     %% (i.e. saves the round trip through the internal queue).
-     {publish_delivered, 2},
-
-     {fetch, 1},
-
-     {ack, 2},
-
-     {tx_publish, 2},
-     {tx_rollback, 2},
-     {tx_commit, 4},
-
-     %% Reinsert messages into the queue which have already been
-     %% delivered and were (likely) pending acks.q
-     {requeue, 2},
-
-     {len, 1},
-
-     {is_empty, 1},
-
-     {set_queue_duration_target, 2},
-
-     {remeasure_rates, 1},
-
-     {queue_duration, 1},
-
-     %% Can return 'undefined' or a function atom name plus list of
-     %% arguments to be invoked in the internal queue module as soon
-     %% as the queue process can manage (either on an empty mailbox,
-     %% or when a timer fires).
-     {needs_sync, 1},
-
-     %% Called immediately before the queue hibernates
-     {handle_pre_hibernate, 1},
-
-     %% Exists for debugging purposes, to be able to expose state via
-     %% rabbitmqctl list_queues internal_queue_status
-     {status, 1}
-    ];
-behaviour_info(_Other) ->
-    undefined.
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index a76600fe..91e97ffe 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -40,7 +40,7 @@
 -behaviour(gen_server2).
 
 -export([start_link/0, update/0, register/2, deregister/1,
-         report_queue_duration/2, stop/0]).
+         report_ram_duration/2, stop/0]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -90,7 +90,7 @@
 -spec(update/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok').
 -spec(deregister/1 :: (pid()) -> 'ok').
--spec(report_queue_duration/2 :: (pid(), float() | 'infinity') -> number()).
+-spec(report_ram_duration/2 :: (pid(), float() | 'infinity') -> number()).
 -spec(stop/0 :: () -> 'ok').
 
 -endif.
@@ -111,9 +111,9 @@ register(Pid, MFA = {_M, _F, _A}) ->
 deregister(Pid) ->
     gen_server2:cast(?SERVER, {deregister, Pid}).
 
-report_queue_duration(Pid, QueueDuration) ->
+report_ram_duration(Pid, QueueDuration) ->
     gen_server2:call(?SERVER,
-                     {report_queue_duration, Pid, QueueDuration}, infinity).
+                     {report_ram_duration, Pid, QueueDuration}, infinity).
 
 stop() ->
     gen_server2:cast(?SERVER, stop).
@@ -143,7 +143,7 @@ init([]) ->
                     memory_limit         = MemoryLimit,
                     desired_duration     = infinity })}.
 
-handle_call({report_queue_duration, Pid, QueueDuration}, From,
+handle_call({report_ram_duration, Pid, QueueDuration}, From,
             State = #state { queue_duration_sum = Sum,
                              queue_duration_count = Count,
                              queue_durations = Durations,
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 340f308f..6d5ab2f0 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -60,6 +60,7 @@
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
 -export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
+-export([geometric/1]).
 
 -import(mnesia).
 -import(lists).
@@ -129,14 +130,18 @@
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
 -spec(stop_applications/1 :: ([atom()]) -> 'ok').
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
--spec(ceil/1 :: (number()) -> number()).
+-spec(ceil/1 :: (number()) -> integer()).
 -spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B).
 -spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()).
 -spec(pid_to_string/1 :: (pid()) -> string()).
 -spec(string_to_pid/1 :: (string()) -> pid()).
+-spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt').
+-spec(version_compare/3 :: (string(), string(), ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) ->
+                                boolean()).
 -spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
+-spec(geometric/1 :: (float()) -> non_neg_integer()).
 
 -endif.
 
@@ -636,3 +641,7 @@ unlink_and_capture_exit(Pid) ->
     receive {'EXIT', Pid, _} -> ok
     after 0 -> ok
     end.
+
+geometric(P) when 0.0 < P andalso P < 1.0 ->
+    U = 1.0 - random:uniform(),
+    ceil(math:log(U) / math:log(1.0 - P)).
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 267cb633..2c7ea893 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -46,7 +46,7 @@
 
 %%----------------------------------------------------------------------------
 
--include("rabbit.hrl").
+-include("rabbit_msg_store.hrl").
 
 -ifdef(use_specs).
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b2db0ea5..2af16bc1 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1444,7 +1444,7 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  file_summary_ets = FileSummaryEts })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     First = ets:first(FileSummaryEts),
-    N = random_distributions:geometric(?GEOMETRIC_P),
+    N = rabbit_misc:geometric(?GEOMETRIC_P),
     case find_files_to_gc(FileSummaryEts, N, First) of
         undefined ->
             State;
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f5f49cf4..f7f265af 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -35,7 +35,7 @@
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1,
-         start_persistent_msg_store/1]).
+         start_msg_stores/1]).
 
 -export([queue_index_walker_reader/3]). %% for internal use only
 
@@ -172,6 +172,7 @@
         }).
 
 -include("rabbit.hrl").
+-include("rabbit_variable_queue.hrl").
 
 %%----------------------------------------------------------------------------
 
@@ -210,7 +211,7 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(start_persistent_msg_store/1 :: ([amqqueue()]) -> 'ok').
+-spec(start_msg_stores/1 :: ([amqqueue()]) -> 'ok').
 
 -endif.
 
@@ -427,7 +428,12 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
         end,
     {LowSeqIdSeg, NextSeqId, State}.
 
-start_persistent_msg_store(DurableQueues) ->
+start_msg_stores(DurableQueues) ->
+    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
+    ok = rabbit_sup:start_child(
+           ?TRANSIENT_MSG_STORE, rabbit_msg_store,
+           [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
+            fun (ok) -> finished end, ok]),
     DurableDict =
         dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
                           Queue #amqqueue.name} || Queue <- DurableQueues ]),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a97730e0..29699829 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -41,6 +41,7 @@
 -import(lists).
 
 -include("rabbit.hrl").
+-include("rabbit_variable_queue.hrl").
 -include_lib("kernel/include/file.hrl").
 
 test_content_prop_roundtrip(Datum, Binary) ->
@@ -1203,8 +1204,7 @@ test_amqqueue(Durable) ->
               pid = none}.
 
 empty_test_queue() ->
-    ok = start_transient_msg_store(),
-    ok = rabbit_queue_index:start_persistent_msg_store([]),
+    ok = rabbit_queue_index:start_msg_stores([]),
     {0, _PRef, _TRef, _Terms, Qi1} = rabbit_queue_index:init(test_queue(), false),
     _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
     ok.
@@ -1266,8 +1266,7 @@ test_queue_index() ->
     %% call terminate twice to prove it's idempotent
     _Qi5 = rabbit_queue_index:terminate([], rabbit_queue_index:terminate([], Qi4)),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
-    ok = start_transient_msg_store(),
+    ok = rabbit_queue_index:start_msg_stores([test_amqqueue(true)]),
     %% should get length back as 0, as all the msgs were transient
     {0, _PRef1, _TRef1, _Terms1, Qi6} = rabbit_queue_index:init(test_queue(), false),
     {0, 0, Qi7} =
@@ -1280,8 +1279,7 @@ test_queue_index() ->
                                     lists:reverse(SeqIdsMsgIdsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
-    ok = start_transient_msg_store(),
+    ok = rabbit_queue_index:start_msg_stores([test_amqqueue(true)]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
     {LenB, _PRef2, _TRef2, _Terms2, Qi12} = rabbit_queue_index:init(test_queue(), false),
@@ -1298,8 +1296,7 @@ test_queue_index() ->
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate([], Qi18),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_persistent_msg_store([test_amqqueue(true)]),
-    ok = start_transient_msg_store(),
+    ok = rabbit_queue_index:start_msg_stores([test_amqqueue(true)]),
     %% should get length back as 0 because all persistent msgs have been acked
     {0, _PRef3, _TRef3, _Terms3, Qi20} = rabbit_queue_index:init(test_queue(), false),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
@@ -1340,8 +1337,7 @@ test_queue_index() ->
     Qi40 = queue_index_flush_journal(Qi39),
     _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_persistent_msg_store([]),
-    ok = start_transient_msg_store(),
+    ok = rabbit_queue_index:start_msg_stores([]),
     ok = stop_msg_store(),
     passed.
 
@@ -1370,7 +1366,7 @@ assert_prop(List, Prop, Value) ->
 fresh_variable_queue() ->
     stop_msg_store(),
     ok = empty_test_queue(),
-    VQ = rabbit_variable_queue:init(test_queue(), ?PERSISTENT_MSG_STORE),
+    VQ = rabbit_variable_queue:init(test_queue(), true),
     S0 = rabbit_variable_queue:status(VQ),
     assert_prop(S0, len, 0),
     assert_prop(S0, q1, 0),
@@ -1391,7 +1387,7 @@ test_variable_queue_dynamic_duration_change() ->
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
     VQ1 = variable_queue_publish(false, Len1, VQ0),
-    VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
+    VQ2 = rabbit_variable_queue:update_ram_duration(VQ1),
     {ok, _TRef} = timer:send_after(1000, {duration, 60,
                                           fun (V) -> (V*0.75)-1 end}),
     VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
@@ -1427,9 +1423,9 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
                        _               -> Fun
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
-            VQ4 = rabbit_variable_queue:remeasure_rates(VQ3),
+            VQ4 = rabbit_variable_queue:update_ram_duration(VQ3),
             VQ5 = %% /37 otherwise the duration is just to high to stress things
-                rabbit_variable_queue:set_queue_duration_target(N/37, VQ4),
+                rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
             test_variable_queue_dynamic_duration_change_f(Len, VQ5)
     after 0 ->
@@ -1441,8 +1437,8 @@ test_variable_queue_partial_segments_delta_thing() ->
     HalfSegment = SegmentSize div 2,
     VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
-    VQ2 = rabbit_variable_queue:remeasure_rates(VQ1),
-    VQ3 = rabbit_variable_queue:set_queue_duration_target(0, VQ2),
+    VQ2 = rabbit_variable_queue:update_ram_duration(VQ1),
+    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
     io:format("~p~n", [S3]),
@@ -1450,7 +1446,7 @@ test_variable_queue_partial_segments_delta_thing() ->
                             SegmentSize + HalfSegment}),
     assert_prop(S3, q3, SegmentSize),
     assert_prop(S3, len, SegmentSize + HalfSegment),
-    VQ4 = rabbit_variable_queue:set_queue_duration_target(infinity, VQ3),
+    VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
     VQ5 = variable_queue_publish(true, 1, VQ4),
     %% should have 1 alpha, but it's in the same segment as the deltas
     S5 = rabbit_variable_queue:status(VQ5),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c01ab5a4..b798a2c9 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,8 +32,8 @@
 -module(rabbit_variable_queue).
 
 -export([init/2, terminate/1, publish/2, publish_delivered/2,
-         set_queue_duration_target/2, remeasure_rates/1,
-         queue_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
+         set_ram_duration_target/2, update_ram_duration/1,
+         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
          tx_commit/4, needs_sync/1, handle_pre_hibernate/1, status/1]).
 
@@ -133,7 +133,7 @@
 
 %%----------------------------------------------------------------------------
 
--behaviour(rabbit_internal_queue_type).
+-behaviour(rabbit_backing_queue_type).
 
 -record(vqstate,
         { q1,
@@ -189,6 +189,7 @@
 -define(RAM_INDEX_BATCH_SIZE, 64).
 
 -include("rabbit.hrl").
+-include("rabbit_variable_queue.hrl").
 
 %%----------------------------------------------------------------------------
 
@@ -236,7 +237,7 @@
                                          {boolean(), state()}).
 -spec(tx_commit_index/1 :: (state()) -> {boolean(), state()}).
 
--include("rabbit_internal_queue_type_spec.hrl").
+-include("rabbit_backing_queue_type_spec.hrl").
 
 -endif.
 
@@ -251,7 +252,11 @@
 %% Public API
 %%----------------------------------------------------------------------------
 
-init(QueueName, PersistentStore) ->
+init(QueueName, IsDurable) ->
+    PersistentStore = case IsDurable of
+                          true  -> ?PERSISTENT_MSG_STORE;
+                          false -> ?TRANSIENT_MSG_STORE
+                      end,
     MsgStoreRecovered =
         rabbit_msg_store:successfully_recovered_state(PersistentStore),
     {DeltaCount, PRef, TRef, Terms, IndexState} =
@@ -344,7 +349,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
             {ack_not_on_disk, State2}
     end.
 
-set_queue_duration_target(
+set_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
                                      avg_ingress_rate = AvgIngressRate,
                                      target_ram_msg_count = TargetRamMsgCount
@@ -364,18 +369,18 @@ set_queue_duration_target(
         false -> reduce_memory_use(State1)
     end.
 
-remeasure_rates(State = #vqstate { egress_rate = Egress,
-                                   ingress_rate = Ingress,
-                                   rate_timestamp = Timestamp,
-                                   in_counter = InCount,
-                                   out_counter = OutCount,
-                                   ram_msg_count = RamMsgCount,
-                                   duration_target = DurationTarget }) ->
+update_ram_duration(State = #vqstate { egress_rate = Egress,
+                                       ingress_rate = Ingress,
+                                       rate_timestamp = Timestamp,
+                                       in_counter = InCount,
+                                       out_counter = OutCount,
+                                       ram_msg_count = RamMsgCount,
+                                       duration_target = DurationTarget }) ->
     Now = now(),
     {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
     {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
 
-    set_queue_duration_target(
+    set_ram_duration_target(
       DurationTarget,
       State #vqstate { egress_rate = Egress1,
                        avg_egress_rate = AvgEgressRate,
@@ -385,7 +390,7 @@ remeasure_rates(State = #vqstate { egress_rate = Egress,
                        ram_msg_count_prev = RamMsgCount,
                        out_counter = 0, in_counter = 0 }).
 
-queue_duration(#vqstate { avg_egress_rate = AvgEgressRate,
+ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
                         avg_ingress_rate = AvgIngressRate,
                         ram_msg_count = RamMsgCount,
                         ram_msg_count_prev = RamMsgCountPrev }) ->
@@ -594,7 +599,7 @@ tx_commit(Pubs, AckTags, From, State =
             Self = self(),
             ok = rabbit_msg_store:sync(
                    ?PERSISTENT_MSG_STORE, PersistentMsgIds,
-                   fun () -> ok = rabbit_amqqueue:maybe_run_queue_via_internal_queue(
+                   fun () -> ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue(
                                     Self, tx_commit_post_msg_store,
                                     [IsTransientPubs, Pubs, AckTags, From])
                    end),
diff --git a/src/random_distributions.erl b/src/random_distributions.erl
deleted file mode 100644
index 0f7d115c..00000000
--- a/src/random_distributions.erl
+++ /dev/null
@@ -1,38 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2010 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(random_distributions).
-
--export([geometric/1]).
-
-geometric(P) when 0.0 < P andalso P < 1.0 ->
-    U = 1.0 - random:uniform(),
-    rabbit_misc:ceil(math:log(U) / math:log(1.0 - P)).
-- 
cgit v1.2.1


From a41b89f3419c0d44abb5ffa4daea8d92d94e3ee3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Apr 2010 16:09:28 +0100
Subject: Hide the startup of the msg_stores behind the pluggable API

---
 include/rabbit_backing_queue_type_spec.hrl | 1 +
 src/rabbit_amqqueue.erl                    | 3 ++-
 src/rabbit_backing_queue_type.erl          | 7 +++++++
 src/rabbit_variable_queue.erl              | 5 +++++
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index 836e220b..85d9a067 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -29,6 +29,7 @@
 %%   Contributor(s): ______________________________________.
 %%
 
+-spec(start/1 :: ([amqqueue()]) -> 'ok').
 -spec(init/2 :: (queue_name(), boolean()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 235b1edb..7a18eb4b 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -125,7 +125,8 @@
 
 start() ->
     DurableQueues = find_durable_queues(),
-    ok = rabbit_queue_index:start_msg_stores(DurableQueues),
+    {ok, BQ} = application:get_env(backing_queue_module),
+    ok = BQ:start(DurableQueues),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
                {rabbit_amqqueue_sup,
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index 46299d02..b00d351b 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -35,6 +35,13 @@
 
 behaviour_info(callbacks) ->
     [
+     %% Called on startup with a list of durable queues. The queues
+     %% aren't being started at this point, but this call allows the
+     %% backing queue to perform any checking necessary for the
+     %% consistency of those queues, or initialise any other shared
+     %% resources.
+     {start, 1},
+
      %% Called with queue name and a boolean to indicate whether or
      %% not the queue is durable.
      {init, 2},
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b798a2c9..7ee88dea 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -37,6 +37,8 @@
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
          tx_commit/4, needs_sync/1, handle_pre_hibernate/1, status/1]).
 
+-export([start/1]).
+
 -export([tx_commit_post_msg_store/5, tx_commit_index/1]). %% internal
 
 %%----------------------------------------------------------------------------
@@ -252,6 +254,9 @@
 %% Public API
 %%----------------------------------------------------------------------------
 
+start(DurableQueues) ->
+    rabbit_queue_index:start_msg_stores(DurableQueues).
+
 init(QueueName, IsDurable) ->
     PersistentStore = case IsDurable of
                           true  -> ?PERSISTENT_MSG_STORE;
-- 
cgit v1.2.1


From adfab7ba76d606b5dd9eef359153a8ea6b7e5e62 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Apr 2010 16:25:00 +0100
Subject: Startup only requires durable queue names, not the whole amqqueue
 record

---
 include/rabbit_backing_queue_type_spec.hrl |  2 +-
 src/rabbit_amqqueue.erl                    |  2 +-
 src/rabbit_backing_queue_type.erl          | 10 +++++-----
 src/rabbit_queue_index.erl                 |  4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index 85d9a067..5db43bb6 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -29,7 +29,7 @@
 %%   Contributor(s): ______________________________________.
 %%
 
--spec(start/1 :: ([amqqueue()]) -> 'ok').
+-spec(start/1 :: ([queue_name()]) -> 'ok').
 -spec(init/2 :: (queue_name(), boolean()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 7a18eb4b..6e6b4c67 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -126,7 +126,7 @@
 start() ->
     DurableQueues = find_durable_queues(),
     {ok, BQ} = application:get_env(backing_queue_module),
-    ok = BQ:start(DurableQueues),
+    ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
                {rabbit_amqqueue_sup,
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index b00d351b..c484a7c2 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -35,11 +35,11 @@
 
 behaviour_info(callbacks) ->
     [
-     %% Called on startup with a list of durable queues. The queues
-     %% aren't being started at this point, but this call allows the
-     %% backing queue to perform any checking necessary for the
-     %% consistency of those queues, or initialise any other shared
-     %% resources.
+     %% Called on startup with a list of durable queue names. The
+     %% queues aren't being started at this point, but this call
+     %% allows the backing queue to perform any checking necessary for
+     %% the consistency of those queues, or initialise any other
+     %% shared resources.
      {start, 1},
 
      %% Called with queue name and a boolean to indicate whether or
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f7f265af..aaef03af 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -435,8 +435,8 @@ start_msg_stores(DurableQueues) ->
            [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
             fun (ok) -> finished end, ok]),
     DurableDict =
-        dict:from_list([ {queue_name_to_dir_name(Queue #amqqueue.name),
-                          Queue #amqqueue.name} || Queue <- DurableQueues ]),
+        dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
+                           Queue <- DurableQueues ]),
     QueuesDir = queues_dir(),
     Directories = case file:list_dir(QueuesDir) of
                       {ok, Entries} ->
-- 
cgit v1.2.1


From 9f12df546e587d545a4a5d4c1f5997f4ac3dd7a3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Apr 2010 16:59:49 +0100
Subject: Converted the maybe_run_queue_via_backing_queue to take a thunk, and
 also vq:needs_sync => vq:sync_callback

---
 include/rabbit_backing_queue_type_spec.hrl |  2 +-
 src/rabbit_amqqueue.erl                    |  9 +++++----
 src/rabbit_amqqueue_process.erl            | 31 +++++++++++++++---------------
 src/rabbit_backing_queue_type.erl          |  2 +-
 src/rabbit_queue_index.erl                 |  2 +-
 src/rabbit_tests.erl                       | 13 +++----------
 src/rabbit_variable_queue.erl              | 15 ++++++++-------
 7 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index 5db43bb6..f0a81aad 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -51,6 +51,6 @@
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(update_ram_duration/1 :: (state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> number()).
--spec(needs_sync/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
+-spec(sync_callback/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 6e6b4c67..7d456602 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -41,7 +41,7 @@
 -export([consumers/1, consumers_all/1]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, maybe_run_queue_via_backing_queue/3,
+-export([notify_sent/2, unblock/2, maybe_run_queue_via_backing_queue/2,
          flush_all/2]).
 -export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
@@ -109,7 +109,8 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(maybe_run_queue_via_backing_queue/3 :: (pid(), atom(), [any()]) -> 'ok').
+-spec(maybe_run_queue_via_backing_queue/2 ::
+        (pid(), (fun ((A) -> {boolean(), A}))) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -350,8 +351,8 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
-maybe_run_queue_via_backing_queue(QPid, Fun, Args) ->
-    gen_server2:pcast(QPid, 7, {maybe_run_queue_via_backing_queue, Fun, Args}).
+maybe_run_queue_via_backing_queue(QPid, Fun) ->
+    gen_server2:pcast(QPid, 7, {maybe_run_queue_via_backing_queue, Fun}).
 
 flush_all(QPids, ChPid) ->
     safe_pmap_ok(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a20cd6c3..a4d653e2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -190,11 +190,12 @@ noreply(NewState) ->
 
 next_state(State = #q{backing_queue_state = BQS,
                       backing_queue = BQ}) ->
-    next_state1(ensure_rate_timer(State), BQ:needs_sync(BQS)).
+    next_state1(ensure_rate_timer(State), BQ:sync_callback(BQS)).
 
-next_state1(State = #q{sync_timer_ref = undefined}, Callback = {_Fun, _Args}) ->
-    {start_sync_timer(State, Callback), 0};
-next_state1(State, {_Fun, _Args}) ->
+next_state1(State = #q{sync_timer_ref = undefined}, Fun)
+  when Fun =/= undefined ->
+    {start_sync_timer(State, Fun), 0};
+next_state1(State, Fun) when Fun =/= undefined ->
     {State, 0};
 next_state1(State = #q{sync_timer_ref = undefined}, undefined) ->
     {State, hibernate};
@@ -218,12 +219,12 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
     State#q{rate_timer_ref = undefined}.
 
-start_sync_timer(State = #q{sync_timer_ref = undefined},
-                 Callback = {Fun, Args}) ->
+start_sync_timer(State = #q{sync_timer_ref = undefined}, Fun)
+  when Fun =/= undefined ->
     {ok, TRef} = timer:apply_after(
                    ?SYNC_INTERVAL, rabbit_amqqueue,
-                   maybe_run_queue_via_backing_queue, [self(), Fun, Args]),
-    State#q{sync_timer_ref = TRef, backing_queue_timeout_fun = Callback}.
+                   maybe_run_queue_via_backing_queue, [self(), Fun]),
+    State#q{sync_timer_ref = TRef, backing_queue_timeout_fun = Fun}.
 
 stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
@@ -524,10 +525,8 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
-maybe_run_queue_via_backing_queue(Fun, Args,
-                                   State = #q{backing_queue_state = BQS,
-                                              backing_queue = BQ}) ->
-    {RunQueue, BQS1} = apply(BQ, Fun, Args ++ [BQS]),
+maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) ->
+    {RunQueue, BQS1} = Fun(BQS),
     State1 = State#q{backing_queue_state = BQS1},
     case RunQueue of
         true  -> run_message_queue(State1);
@@ -908,8 +907,8 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({maybe_run_queue_via_backing_queue, Fun, Args}, State) ->
-    noreply(maybe_run_queue_via_backing_queue(Fun, Args, State));
+handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) ->
+    noreply(maybe_run_queue_via_backing_queue(Fun, State));
 
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
@@ -973,9 +972,9 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
 handle_info(timeout, State = #q{backing_queue_timeout_fun = undefined}) ->
     noreply(State);
 
-handle_info(timeout, State = #q{backing_queue_timeout_fun = {Fun, Args}}) ->
+handle_info(timeout, State = #q{backing_queue_timeout_fun = Fun}) ->
     noreply(maybe_run_queue_via_backing_queue(
-              Fun, Args, State#q{backing_queue_timeout_fun = undefined}));
+              Fun, State#q{backing_queue_timeout_fun = undefined}));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index c484a7c2..3ccd71d0 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -116,7 +116,7 @@ behaviour_info(callbacks) ->
      %% arguments to be invoked in the internal queue module as soon
      %% as the queue process can manage (either on an empty mailbox,
      %% or when a timer fires).
-     {needs_sync, 1},
+     {sync_callback, 1},
 
      %% Called immediately before the queue hibernates
      {handle_pre_hibernate, 1},
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index aaef03af..a5583b87 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -211,7 +211,7 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(start_msg_stores/1 :: ([amqqueue()]) -> 'ok').
+-spec(start_msg_stores/1 :: ([queue_name()]) -> 'ok').
 
 -endif.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 29699829..d374561f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1196,13 +1196,6 @@ queue_name(Name) ->
 test_queue() ->
     queue_name(test).
 
-test_amqqueue(Durable) ->
-    #amqqueue{name = test_queue(),
-              durable = Durable,
-              auto_delete = true,
-              arguments = [],
-              pid = none}.
-
 empty_test_queue() ->
     ok = rabbit_queue_index:start_msg_stores([]),
     {0, _PRef, _TRef, _Terms, Qi1} = rabbit_queue_index:init(test_queue(), false),
@@ -1266,7 +1259,7 @@ test_queue_index() ->
     %% call terminate twice to prove it's idempotent
     _Qi5 = rabbit_queue_index:terminate([], rabbit_queue_index:terminate([], Qi4)),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([test_amqqueue(true)]),
+    ok = rabbit_queue_index:start_msg_stores([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
     {0, _PRef1, _TRef1, _Terms1, Qi6} = rabbit_queue_index:init(test_queue(), false),
     {0, 0, Qi7} =
@@ -1279,7 +1272,7 @@ test_queue_index() ->
                                     lists:reverse(SeqIdsMsgIdsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([test_amqqueue(true)]),
+    ok = rabbit_queue_index:start_msg_stores([test_queue()]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
     {LenB, _PRef2, _TRef2, _Terms2, Qi12} = rabbit_queue_index:init(test_queue(), false),
@@ -1296,7 +1289,7 @@ test_queue_index() ->
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate([], Qi18),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([test_amqqueue(true)]),
+    ok = rabbit_queue_index:start_msg_stores([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
     {0, _PRef3, _TRef3, _Terms3, Qi20} = rabbit_queue_index:init(test_queue(), false),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7ee88dea..0048925a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -35,7 +35,7 @@
          set_ram_duration_target/2, update_ram_duration/1,
          ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
-         tx_commit/4, needs_sync/1, handle_pre_hibernate/1, status/1]).
+         tx_commit/4, sync_callback/1, handle_pre_hibernate/1, status/1]).
 
 -export([start/1]).
 
@@ -605,8 +605,11 @@ tx_commit(Pubs, AckTags, From, State =
             ok = rabbit_msg_store:sync(
                    ?PERSISTENT_MSG_STORE, PersistentMsgIds,
                    fun () -> ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                                    Self, tx_commit_post_msg_store,
-                                    [IsTransientPubs, Pubs, AckTags, From])
+                                    Self,
+                                    fun (StateN) -> tx_commit_post_msg_store(
+                                                      IsTransientPubs, Pubs,
+                                                      AckTags, From, StateN)
+                                    end)
                    end),
             {false, State}
     end.
@@ -660,10 +663,8 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
     {Pubs /= [],
      State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }}.
 
-needs_sync(#vqstate { on_sync = {_, _, []} }) ->
-    undefined;
-needs_sync(_) ->
-    {tx_commit_index, []}.
+sync_callback(#vqstate { on_sync = {_, _, []} }) -> undefined;
+sync_callback(_)                                 -> fun tx_commit_index/1.
 
 handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
-- 
cgit v1.2.1


From 6bd393936397f5677cdcb7de178fb3881d26fff6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Apr 2010 17:09:48 +0100
Subject: Corrections to specs, docs and exports

---
 include/rabbit_backing_queue_type_spec.hrl |  3 ++-
 src/rabbit_backing_queue_type.erl          | 10 +++++-----
 src/rabbit_variable_queue.erl              |  4 +---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index f0a81aad..54118ba6 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -51,6 +51,7 @@
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(update_ram_duration/1 :: (state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> number()).
--spec(sync_callback/1 :: (state()) -> ('undefined' | {atom(), [any()]})).
+-spec(sync_callback/1 :: (state()) ->
+                              ('undefined' | (fun ((A) -> {boolean(), A})))).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index 3ccd71d0..526152f1 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -62,7 +62,7 @@ behaviour_info(callbacks) ->
 
      %% Called for messages which have already been passed straight
      %% out to a client. The queue will be empty for these calls
-     %% (i.e. saves the round trip through the internal queue).
+     %% (i.e. saves the round trip through the backing queue).
      {publish_delivered, 2},
 
      %% Produce the next message
@@ -112,10 +112,10 @@ behaviour_info(callbacks) ->
      %% the current rates of the queue.
      {ram_duration, 1},
 
-     %% Can return 'undefined' or a function atom name plus list of
-     %% arguments to be invoked in the internal queue module as soon
-     %% as the queue process can manage (either on an empty mailbox,
-     %% or when a timer fires).
+     %% Can return 'undefined' or a thunk which will receive the
+     %% state, and must return the state, as soon as the queue process
+     %% can manage (either on an empty mailbox, or when a timer
+     %% fires).
      {sync_callback, 1},
 
      %% Called immediately before the queue hibernates
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0048925a..1c29c193 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -39,8 +39,6 @@
 
 -export([start/1]).
 
--export([tx_commit_post_msg_store/5, tx_commit_index/1]). %% internal
-
 %%----------------------------------------------------------------------------
 %% Definitions:
 
@@ -227,7 +225,7 @@
                avg_ingress_rate      :: float(),
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
-               on_sync               :: {[ack()], [msg_id()], [{pid(), any()}]},
+               on_sync               :: {[[ack()]], [[msg_id()]], [{pid(), any()}]},
                msg_store_clients     :: {{any(), binary()}, {any(), binary()}},
                persistent_store      :: pid() | atom(),
                persistent_count      :: non_neg_integer(),
-- 
cgit v1.2.1


From 44133205acf4bf6bfd055d4e414b538dd93d3b6d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 9 Apr 2010 18:41:30 +0100
Subject: Cosmetics for unification with default

---
 src/rabbit_basic.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index 8c4ba897..4ab7a2a0 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -33,8 +33,8 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([publish/1, message/4, delivery/4]).
--export([properties/1, publish/4, publish/7]).
+-export([publish/1, message/4, properties/1, delivery/4]).
+-export([publish/4, publish/7]).
 -export([build_content/2, from_content/1]).
 -export([is_message_persistent/1]).
 
-- 
cgit v1.2.1


From aefd9ddc7da0ca57fdf17091f51872b586877bd3 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 10:17:49 +0100
Subject: refactor

---
 src/bpqueue.erl | 73 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 33 insertions(+), 40 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 3c3e7647..3ba04144 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -97,13 +97,12 @@ in_r(Prefix, Value, BPQ) ->
     in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ).
 
 in1({In, Out}, Prefix, Value, {N, Q}) ->
-    {N+1,
-     case Out(Q) of
-         {{value, {Prefix, InnerQ}}, Q1} ->
-             In({Prefix, In(Value, InnerQ)}, Q1);
-         {{value, {_Prefix, _InnerQ}}, _Q1} ->
-             In({Prefix, queue:in(Value, queue:new())}, Q)
-     end}.
+    {N+1, case Out(Q) of
+              {{value, {Prefix, InnerQ}}, Q1} ->
+                  In({Prefix, In(Value, InnerQ)}, Q1);
+              {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                  In({Prefix, queue:in(Value, queue:new())}, Q)
+          end}.
 
 in_q(Prefix, Queue, BPQ = {0, Q}) ->
     case queue:len(Queue) of
@@ -124,13 +123,12 @@ in_q_r(Prefix, Queue, BPQ) ->
 in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) ->
     case queue:len(Queue) of
         0 -> BPQ;
-        M -> {N + M,
-              case Out(Q) of
-                  {{value, {Prefix, InnerQ}}, Q1} ->
-                      In({Prefix, Join(InnerQ, Queue)}, Q1);
-                  {{value, {_Prefix, _InnerQ}}, _Q1} ->
-                      In({Prefix, Queue}, Q)
-              end}
+        M -> {N + M, case Out(Q) of
+                         {{value, {Prefix, InnerQ}}, Q1} ->
+                             In({Prefix, Join(InnerQ, Queue)}, Q1);
+                         {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                             In({Prefix, Queue}, Q)
+                     end}
     end.
 
 out({0, _Q} = BPQ) ->
@@ -233,20 +231,18 @@ to_list1({Prefix, InnerQ}) ->
 map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
 map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
-    map_fold_filter1(
-      {fun queue:out/1, fun queue:in/2, fun in_q/3, fun join/2},
-      N, PFilter, Fun, Init, Q, new()).
+    map_fold_filter1({fun queue:out/1, fun queue:in/2, fun in_q/3, fun join/2},
+                     N, PFilter, Fun, Init, Q, new()).
 
 map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
 map_fold_filter_r(PFilter, Fun, Init, {N, Q}) ->
-    map_fold_filter1(
-      {fun queue:out_r/1, fun queue:in_r/2, fun in_q_r/3,
-       fun (T, H) -> join(H, T) end},
-      N, PFilter, Fun, Init, Q, new()).
+    map_fold_filter1({fun queue:out_r/1, fun queue:in_r/2,
+                      fun in_q_r/3, fun (T, H) -> join(H, T) end},
+                     N, PFilter, Fun, Init, Q, new()).
 
-map_fold_filter1(
-  Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, Init, Q, QNew) ->
+map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, Init,
+                 Q, QNew) ->
     case Out(Q) of
         {empty, _Q} ->
             {QNew, Init};
@@ -254,18 +250,18 @@ map_fold_filter1(
             case PFilter(Prefix) of
                 true ->
                     {Init1, QNew1, Cont} =
-                        map_fold_filter2(
-                          Funs, Fun, Prefix, Prefix, Init, InnerQ, QNew, queue:new()),
+                        map_fold_filter2(Funs, Fun, Prefix, Prefix, Init,
+                                         InnerQ, QNew, queue:new()),
                     case Cont of
                         false ->
                             {Join(QNew1, {Len - len(QNew1), Q1}), Init1};
                         true ->
-                            map_fold_filter1(
-                              Funs, Len, PFilter, Fun, Init1, Q1, QNew1)
+                            map_fold_filter1(Funs, Len, PFilter, Fun, Init1,
+                                             Q1, QNew1)
                     end;
                 false ->
-                    map_fold_filter1(
-                      Funs, Len, PFilter, Fun, Init, Q1, InQ(Prefix, InnerQ, QNew))
+                    map_fold_filter1(Funs, Len, PFilter, Fun, Init,
+                                     Q1, InQ(Prefix, InnerQ, QNew))
             end
     end.
 
@@ -281,16 +277,13 @@ map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, Init,
                     {Init, InQ(OrigPrefix, InnerQ,
                                InQ(Prefix, InnerQNew, QNew)), false};
                 {Prefix1, Value1, Init1} ->
-                    case Prefix1 =:= Prefix of
-                        true ->
-                            map_fold_filter2(
-                              Funs, Fun, OrigPrefix, Prefix, Init1, InnerQ1, QNew,
-                              In(Value1, InnerQNew));
-                        false ->
-                            map_fold_filter2(
-                              Funs, Fun, OrigPrefix, Prefix1, Init1, InnerQ1,
-                              InQ(Prefix, InnerQNew, QNew),
-                              In(Value1, queue:new()))
-                    end
+                    {QNew1, InnerQNew1} =
+                        case Prefix1 =:= Prefix of
+                            true  -> {QNew, In(Value1, InnerQNew)};
+                            false -> {InQ(Prefix, InnerQNew, QNew),
+                                      In(Value1, queue:new())}
+                        end,
+                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix, Init1,
+                                     InnerQ1, QNew1, InnerQNew1)
             end
     end.
-- 
cgit v1.2.1


From 5da66d403d0519522379934c967100e2af1c89c1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 11:24:59 +0100
Subject: rename rabbit_misc:geometric/1 to random_geometric/1

---
 src/rabbit_misc.erl      | 6 +++---
 src/rabbit_msg_store.erl | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 4f0e1bb0..6be36a0d 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -60,7 +60,7 @@
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
 -export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
--export([geometric/1]).
+-export([random_geometric/1]).
 
 -import(mnesia).
 -import(lists).
@@ -141,7 +141,7 @@
 -spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
--spec(geometric/1 :: (float()) -> non_neg_integer()).
+-spec(random_geometric/1 :: (float()) -> non_neg_integer()).
 
 -endif.
 
@@ -642,6 +642,6 @@ unlink_and_capture_exit(Pid) ->
     after 0 -> ok
     end.
 
-geometric(P) when 0.0 < P andalso P < 1.0 ->
+random_geometric(P) when 0.0 < P andalso P < 1.0 ->
     U = 1.0 - random:uniform(),
     ceil(math:log(U) / math:log(1.0 - P)).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2af16bc1..322cad87 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1444,7 +1444,7 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  file_summary_ets = FileSummaryEts })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     First = ets:first(FileSummaryEts),
-    N = rabbit_misc:geometric(?GEOMETRIC_P),
+    N = rabbit_misc:random_geometric(?GEOMETRIC_P),
     case find_files_to_gc(FileSummaryEts, N, First) of
         undefined ->
             State;
-- 
cgit v1.2.1


From a123b6fe1bd1c243c7ea33527a34419e87cbfa43 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 11:28:16 +0100
Subject: Apparently, abuse of empty queues is unsightly. Also, minor
 improvement to documentation

---
 src/bpqueue.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 3ba04144..4e9cdb31 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -32,9 +32,10 @@
 -module(bpqueue).
 
 %% Block-prefixed queue. This implements a queue of queues, but
-%% supporting the normal queue interface. Each block has a prefix and
-%% it is guaranteed that no two consecutive blocks have the same
-%% prefix. len/1 returns the flattened length of the queue and is O(1)
+%% supporting the normal queue interface. Each inner queue has a
+%% prefix, which does not need to be unique, and it is guaranteed that
+%% no two consecutive blocks have the same prefix. len/1 returns the
+%% flattened length of the queue and is O(1).
 
 -export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
          foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4,
@@ -87,7 +88,7 @@ len({N, _Q}) ->
     N.
 
 in(Prefix, Value, {0, Q}) ->
-    {1, queue:in({Prefix, queue:in(Value, Q)}, Q)};
+    {1, queue:in({Prefix, queue:from_list([Value])}, Q)};
 in(Prefix, Value, BPQ) ->
     in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ).
 
-- 
cgit v1.2.1


From 5896446a3906b7a99eb27557bdfcfdff668fa116 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 11:55:36 +0100
Subject: msg_id => guid in type specs, field names and function names. Vars
 still todo

---
 include/rabbit.hrl                         |  2 +-
 include/rabbit_backing_queue_type_spec.hrl |  4 +--
 include/rabbit_msg_store.hrl               |  2 +-
 include/rabbit_msg_store_index.hrl         |  6 ++--
 src/rabbit_msg_file.erl                    | 32 ++++++++++-----------
 src/rabbit_msg_store.erl                   | 46 +++++++++++++++---------------
 src/rabbit_msg_store_ets_index.erl         |  2 +-
 src/rabbit_queue_index.erl                 | 34 +++++++++++-----------
 src/rabbit_tests.erl                       | 28 +++++++++---------
 src/rabbit_variable_queue.erl              | 32 ++++++++++-----------
 10 files changed, 94 insertions(+), 94 deletions(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 4b1be43c..982d90e9 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -89,7 +89,7 @@
 
 %% this is really an abstract type, but dialyzer does not support them
 -type(guid() :: binary()).
--type(msg_id() :: guid()).
+-type(msg_id() :: non_neg_integer()).
 -type(txn() :: guid()).
 -type(pkey() :: guid()).
 -type(r(Kind) ::
diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index 54118ba6..ac47ccba 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -41,8 +41,8 @@
               state()}).
 -spec(ack/2 :: ([ack()], state()) -> state()).
 -spec(tx_publish/2 :: (basic_message(), state()) -> state()).
--spec(tx_rollback/2 :: ([msg_id()], state()) -> state()).
--spec(tx_commit/4 :: ([msg_id()], [ack()], {pid(), any()}, state()) ->
+-spec(tx_rollback/2 :: ([guid()], state()) -> state()).
+-spec(tx_commit/4 :: ([guid()], [ack()], {pid(), any()}, state()) ->
                           {boolean(), state()}).
 -spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
 -spec(len/1 :: (state()) -> non_neg_integer()).
diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 696ccf3c..d96fa758 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -38,4 +38,4 @@
 -endif.
 
 -record(msg_location,
-        {msg_id, ref_count, file, offset, total_size}).
+        {guid, ref_count, file, offset, total_size}).
diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
index 9b3332ee..eb0ad5cb 100644
--- a/include/rabbit_msg_store_index.hrl
+++ b/include/rabbit_msg_store_index.hrl
@@ -43,13 +43,13 @@
 
 -spec(init/2 :: (('fresh'|'recover'), dir()) ->
                      {'fresh'|'recovered', index_state()}).
--spec(lookup/2 :: (msg_id(), index_state()) -> ('not_found' | keyvalue())).
+-spec(lookup/2 :: (guid(), index_state()) -> ('not_found' | keyvalue())).
 -spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
 -spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
--spec(update_fields/3 :: (msg_id(), ({fieldpos(), fieldvalue()} |
+-spec(update_fields/3 :: (guid(), ({fieldpos(), fieldvalue()} |
                                      [{fieldpos(), fieldvalue()}]),
                           index_state()) -> 'ok').
--spec(delete/2 :: (msg_id(), index_state()) -> 'ok').
+-spec(delete/2 :: (guid(), index_state()) -> 'ok').
 -spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok').
 -spec(terminate/1 :: (index_state()) -> any()).
 
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 2c7ea893..0edeb469 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -40,9 +40,9 @@
 -define(WRITE_OK_SIZE_BITS,      8).
 -define(WRITE_OK_MARKER,         255).
 -define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
--define(MSG_ID_SIZE_BYTES,       16).
--define(MSG_ID_SIZE_BITS,        (8 * ?MSG_ID_SIZE_BYTES)).
--define(SIZE_AND_MSG_ID_BYTES,   (?MSG_ID_SIZE_BYTES + ?INTEGER_SIZE_BYTES)).
+-define(GUID_SIZE_BYTES,         16).
+-define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
+-define(SIZE_AND_GUID_BYTES,     (?GUID_SIZE_BYTES + ?INTEGER_SIZE_BYTES)).
 
 %%----------------------------------------------------------------------------
 
@@ -53,25 +53,25 @@
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
 
--spec(append/3 :: (io_device(), msg_id(), msg()) ->
+-spec(append/3 :: (io_device(), guid(), msg()) ->
              ({'ok', msg_size()} | {'error', any()})).
 -spec(read/2 :: (io_device(), msg_size()) ->
-             ({'ok', {msg_id(), msg()}} | {'error', any()})).
+             ({'ok', {guid(), msg()}} | {'error', any()})).
 -spec(scan/1 :: (io_device()) ->
-             {'ok', [{msg_id(), msg_size(), position()}], position()}).
+             {'ok', [{guid(), msg_size(), position()}], position()}).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
 append(FileHdl, MsgId, MsgBody)
-  when is_binary(MsgId) andalso size(MsgId) =< ?MSG_ID_SIZE_BYTES ->
+  when is_binary(MsgId) andalso size(MsgId) =< ?GUID_SIZE_BYTES ->
     MsgBodyBin  = term_to_binary(MsgBody),
     MsgBodyBinSize = size(MsgBodyBin),
-    Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES,
+    Size = MsgBodyBinSize + ?GUID_SIZE_BYTES,
     case file_handle_cache:append(FileHdl,
                                   <<Size:?INTEGER_SIZE_BITS,
-                                   MsgId:?MSG_ID_SIZE_BYTES/binary,
+                                   MsgId:?GUID_SIZE_BYTES/binary,
                                    MsgBodyBin:MsgBodyBinSize/binary,
                                    ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
@@ -80,10 +80,10 @@ append(FileHdl, MsgId, MsgBody)
 
 read(FileHdl, TotalSize) ->
     Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
-    BodyBinSize = Size - ?MSG_ID_SIZE_BYTES,
+    BodyBinSize = Size - ?GUID_SIZE_BYTES,
     case file_handle_cache:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
-               MsgId:?MSG_ID_SIZE_BYTES/binary,
+               MsgId:?GUID_SIZE_BYTES/binary,
                MsgBodyBin:BodyBinSize/binary,
                ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
             {ok, {MsgId, binary_to_term(MsgBodyBin)}};
@@ -105,26 +105,26 @@ scan(FileHdl, Offset, Acc) ->
     end.
 
 read_next(FileHdl, Offset) ->
-    case file_handle_cache:read(FileHdl, ?SIZE_AND_MSG_ID_BYTES) of
+    case file_handle_cache:read(FileHdl, ?SIZE_AND_GUID_BYTES) of
         %% Here we take option 5 from
         %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in which
         %% we read the MsgId as a number, and then convert it back to
         %% a binary in order to work around bugs in Erlang's GC.
-        {ok, <<Size:?INTEGER_SIZE_BITS, MsgIdNum:?MSG_ID_SIZE_BITS>>} ->
+        {ok, <<Size:?INTEGER_SIZE_BITS, MsgIdNum:?GUID_SIZE_BITS>>} ->
             case Size of
                 0 -> eof; %% Nothing we can do other than stop
                 _ ->
                     TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
                     ExpectedAbsPos = Offset + TotalSize - 1,
                     case file_handle_cache:position(
-                           FileHdl, {cur, Size - ?MSG_ID_SIZE_BYTES}) of
+                           FileHdl, {cur, Size - ?GUID_SIZE_BYTES}) of
                         {ok, ExpectedAbsPos} ->
                             NextOffset = ExpectedAbsPos + 1,
                             case file_handle_cache:read(FileHdl, 1) of
                                 {ok,
                                  <<?WRITE_OK_MARKER: ?WRITE_OK_SIZE_BITS>>} ->
-                                    <<MsgId:?MSG_ID_SIZE_BYTES/binary>> =
-                                        <<MsgIdNum:?MSG_ID_SIZE_BITS>>,
+                                    <<MsgId:?GUID_SIZE_BYTES/binary>> =
+                                        <<MsgIdNum:?GUID_SIZE_BITS>>,
                                     {ok, {MsgId, TotalSize, NextOffset}};
                                 {ok, _SomeOtherData} ->
                                     {corrupted, NextOffset};
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 322cad87..1a7085a2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -121,16 +121,16 @@
 
 -spec(start_link/5 ::
       (atom(), file_path(), [binary()] | 'undefined',
-       (fun ((A) -> 'finished' | {msg_id(), non_neg_integer(), A})), A) ->
+       (fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A) ->
              {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(write/4 :: (server(), msg_id(), msg(), client_msstate()) ->
+-spec(write/4 :: (server(), guid(), msg(), client_msstate()) ->
                       {'ok', client_msstate()}).
--spec(read/3 :: (server(), msg_id(), client_msstate()) ->
+-spec(read/3 :: (server(), guid(), client_msstate()) ->
                      {{'ok', msg()} | 'not_found', client_msstate()}).
--spec(contains/2 :: (server(), msg_id()) -> boolean()).
--spec(remove/2 :: (server(), [msg_id()]) -> 'ok').
--spec(release/2 :: (server(), [msg_id()]) -> 'ok').
--spec(sync/3 :: (server(), [msg_id()], fun (() -> any())) -> 'ok').
+-spec(contains/2 :: (server(), guid()) -> boolean()).
+-spec(remove/2 :: (server(), [guid()]) -> 'ok').
+-spec(release/2 :: (server(), [guid()]) -> 'ok').
+-spec(sync/3 :: (server(), [guid()], fun (() -> any())) -> 'ok').
 -spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> 'ok').
 -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
 -spec(client_init/2 :: (server(), binary()) -> client_msstate()).
@@ -153,7 +153,7 @@
 %% The components:
 %%
 %% MsgLocation: this is a mapping from MsgId to #msg_location{}:
-%%              {MsgId, RefCount, File, Offset, TotalSize}
+%%              {Guid, RefCount, File, Offset, TotalSize}
 %%              By default, it's in ets, but it's also pluggable.
 %% FileSummary: this is an ets table which contains:
 %%              {File, ValidTotalSize, ContiguousTop, Left, Right}
@@ -393,7 +393,7 @@ add_to_cache(CurFileCacheEts, MsgId, Msg) ->
             end
     end.
 
-client_read1(Server, #msg_location { msg_id = MsgId, file = File } =
+client_read1(Server, #msg_location { guid = MsgId, file = File } =
                  MsgLocation, Defer, CState =
                  #client_msstate { file_summary_ets = FileSummaryEts }) ->
     case ets:lookup(FileSummaryEts, File) of
@@ -404,7 +404,7 @@ client_read1(Server, #msg_location { msg_id = MsgId, file = File } =
     end.
 
 client_read2(_Server, false, undefined,
-             #msg_location { msg_id = MsgId, ref_count = RefCount }, Defer,
+             #msg_location { guid = MsgId, ref_count = RefCount }, Defer,
              CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
                                         dedup_cache_ets = DedupCacheEts }) ->
     case ets:lookup(CurFileCacheEts, MsgId) of
@@ -420,7 +420,7 @@ client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
     %% the safest and simplest thing to do.
     Defer();
 client_read2(Server, false, _Right,
-             #msg_location { msg_id = MsgId, ref_count = RefCount, file = File },
+             #msg_location { guid = MsgId, ref_count = RefCount, file = File },
              Defer, CState =
                  #client_msstate { file_handles_ets = FileHandlesEts,
                                    file_summary_ets = FileSummaryEts,
@@ -631,7 +631,7 @@ handle_cast({write, MsgId, Msg},
             {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
             {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
             ok = index_insert(#msg_location {
-                                msg_id = MsgId, ref_count = 1, file = CurFile,
+                                guid = MsgId, ref_count = 1, file = CurFile,
                                 offset = CurOffset, total_size = TotalSize },
                               State),
             [#file_summary { valid_total_size = ValidTotalSize,
@@ -836,7 +836,7 @@ read_message(MsgId, From, State =
             end
     end.
 
-read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
+read_message1(From, #msg_location { guid = MsgId, ref_count = RefCount,
                                     file = File, offset = Offset } = MsgLoc,
               State = #msstate { current_file = CurFile,
                                  current_file_handle = CurHdl,
@@ -874,7 +874,7 @@ read_message1(From, #msg_location { msg_id = MsgId, ref_count = RefCount,
             end
     end.
 
-read_from_disk(#msg_location { msg_id = MsgId, ref_count = RefCount,
+read_from_disk(#msg_location { guid = MsgId, ref_count = RefCount,
                                file = File, offset = Offset,
                                total_size = TotalSize }, State,
                DedupCacheEts) ->
@@ -888,7 +888,7 @@ read_from_disk(#msg_location { msg_id = MsgId, ref_count = RefCount,
                 throw({error, {misread, [{old_state, State},
                                          {file_num,  File},
                                          {offset,    Offset},
-                                         {msg_id,    MsgId},
+                                         {guid,      MsgId},
                                          {read,      Rest},
                                          {proc_dict, get()}
                                         ]}})
@@ -1176,7 +1176,7 @@ count_msg_refs(Gen, Seed, State) ->
         {MsgId, Delta, Next} ->
             ok = case index_lookup(MsgId, State) of
                      not_found ->
-                         index_insert(#msg_location { msg_id = MsgId,
+                         index_insert(#msg_location { guid = MsgId,
                                                       ref_count = Delta },
                                       State);
                      StoreEntry = #msg_location { ref_count = RefCount } ->
@@ -1202,9 +1202,9 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
     NonTmpRelatedFileName = filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFileName, FileNames),
     {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
-        scan_file_for_valid_messages_msg_ids(Dir, TmpFileName),
+        scan_file_for_valid_messages_guids(Dir, TmpFileName),
     {ok, UncorruptedMessages, MsgIds} =
-        scan_file_for_valid_messages_msg_ids(Dir, NonTmpRelatedFileName),
+        scan_file_for_valid_messages_guids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -1282,7 +1282,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             ok = file_handle_cache:delete(TmpHdl),
 
             {ok, _MainMessages, MsgIdsMain} =
-                scan_file_for_valid_messages_msg_ids(
+                scan_file_for_valid_messages_guids(
                   Dir, NonTmpRelatedFileName),
             %% check that everything in MsgIds1 is in MsgIdsMain
             true = is_sublist(MsgIds1, MsgIdsMain),
@@ -1297,7 +1297,7 @@ is_sublist(SmallerL, BiggerL) ->
 is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
-scan_file_for_valid_messages_msg_ids(Dir, FileName) ->
+scan_file_for_valid_messages_guids(Dir, FileName) ->
     {ok, Messages, _FileSize} =
         scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [MsgId || {MsgId, _TotalSize, _FileOffset} <- Messages]}.
@@ -1367,7 +1367,7 @@ build_index(Gatherer, Left, [File|Files], State) ->
     build_index(Gatherer, File, Files, State).
 
 build_index_worker(
-  Gatherer, Guid, State = #msstate { dir = Dir }, Left, File, Files) ->
+  Gatherer, Ref, State = #msstate { dir = Dir }, Left, File, Files) ->
     {ok, Messages, FileSize} =
         scan_file_for_valid_messages(
           Dir, filenum_to_name(File)),
@@ -1405,7 +1405,7 @@ build_index_worker(
                             contiguous_top = ContiguousTop, locked = false,
                             left = Left, right = Right, file_size = FileSize1,
                             readers = 0 }),
-    ok = gatherer:finished(Gatherer, Guid).
+    ok = gatherer:finished(Gatherer, Ref).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation -- internal
@@ -1660,7 +1660,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
     {FinalOffset, BlockStart1, BlockEnd1} =
         lists:foldl(
-          fun (#msg_location { msg_id = MsgId, offset = Offset,
+          fun (#msg_location { guid = MsgId, offset = Offset,
                                total_size = TotalSize },
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index d46212ba..b4fb5ef1 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -45,7 +45,7 @@
 
 init(fresh, Dir) ->
     file:delete(filename:join(Dir, ?FILENAME)),
-    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]),
+    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]),
     {fresh, #state { table = Tid, dir = Dir }};
 init(recover, Dir) ->
     Path = filename:join(Dir, ?FILENAME),
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a5583b87..556c6968 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -143,10 +143,10 @@
 -define(PUBLISH_PREFIX, 1).
 -define(PUBLISH_PREFIX_BITS, 1).
 
--define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
--define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)).
+-define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(GUID_BITS, (?GUID_BYTES * 8)).
 %% 16 bytes for md5sum + 2 for seq, bits and prefix
--define(PUBLISH_RECORD_LENGTH_BYTES, ?MSG_ID_BYTES + 2).
+-define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + 2).
 
 %% 1 publish, 1 deliver, 1 ack per msg
 -define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
@@ -199,14 +199,14 @@
                      {'undefined' | non_neg_integer(), binary(), binary(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
--spec(write_published/4 :: (msg_id(), seq_id(), boolean(), qistate())
+-spec(write_published/4 :: (guid(), seq_id(), boolean(), qistate())
       -> qistate()).
 -spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(sync_seq_ids/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush_journal/1 :: (qistate()) -> qistate()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
-             {[{msg_id(), seq_id(), boolean(), boolean()}], qistate()}).
+             {[{guid(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
@@ -328,7 +328,7 @@ terminate_and_erase(State) ->
     State1.
 
 write_published(MsgId, SeqId, IsPersistent, State) when is_binary(MsgId) ->
-    ?MSG_ID_BYTES = size(MsgId),
+    ?GUID_BYTES = size(MsgId),
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
            JournalHdl, [<<(case IsPersistent of
@@ -501,20 +501,20 @@ queue_index_walker({[QueueName | QueueNames], Gatherer}) ->
                                    [QueueName, Gatherer, Child]}),
     queue_index_walker({QueueNames, Gatherer}).
 
-queue_index_walker_reader(QueueName, Gatherer, Guid) ->
+queue_index_walker_reader(QueueName, Gatherer, Ref) ->
     State = blank_state(QueueName),
     State1 = load_journal(State),
     SegNums = all_segment_nums(State1),
-    queue_index_walker_reader(Gatherer, Guid, State1, SegNums).
+    queue_index_walker_reader(Gatherer, Ref, State1, SegNums).
 
-queue_index_walker_reader(Gatherer, Guid, State, []) ->
+queue_index_walker_reader(Gatherer, Ref, State, []) ->
     _State = terminate(false, [], State),
-    ok = gatherer:finished(Gatherer, Guid);
-queue_index_walker_reader(Gatherer, Guid, State, [Seg | SegNums]) ->
+    ok = gatherer:finished(Gatherer, Ref);
+queue_index_walker_reader(Gatherer, Ref, State, [Seg | SegNums]) ->
     SeqId = reconstruct_seq_id(Seg, 0),
     {Messages, State1} = read_segment_entries(SeqId, State),
     State2 = queue_index_walker_reader1(Gatherer, State1, Messages),
-    queue_index_walker_reader(Gatherer, Guid, State2, SegNums).
+    queue_index_walker_reader(Gatherer, Ref, State2, SegNums).
 
 queue_index_walker_reader1(_Gatherer, State, []) ->
     State;
@@ -775,7 +775,7 @@ load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
                 IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
-            {ok, MsgId} = file_handle_cache:read(Hdl, ?MSG_ID_BYTES),
+            {ok, MsgId} = file_handle_cache:read(Hdl, ?GUID_BYTES),
             SegEntries1 =
                 array:set(RelSeq,
                           {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
@@ -836,13 +836,13 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
                 ?ACK_JPREFIX ->
                     load_journal_entries(add_to_journal(SeqId, ack, State));
                 _ ->
-                    case file_handle_cache:read(Hdl, ?MSG_ID_BYTES) of
-                        {ok, <<MsgIdNum:?MSG_ID_BITS>>} ->
+                    case file_handle_cache:read(Hdl, ?GUID_BYTES) of
+                        {ok, <<MsgIdNum:?GUID_BITS>>} ->
                             %% work around for binary data
                             %% fragmentation. See
                             %% rabbit_msg_file:read_next/2
-                            <<MsgId:?MSG_ID_BYTES/binary>> =
-                                <<MsgIdNum:?MSG_ID_BITS>>,
+                            <<MsgId:?GUID_BYTES/binary>> =
+                                <<MsgIdNum:?GUID_BITS>>,
                             Publish = {MsgId, case Prefix of
                                                   ?PUB_PERSIST_JPREFIX -> true;
                                                   ?PUB_TRANS_JPREFIX   -> false
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 97590e66..8eb12939 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1019,7 +1019,7 @@ stop_msg_store() ->
         E -> {persistent, E}
     end.
 
-msg_id_bin(X) ->
+guid_bin(X) ->
     erlang:md5(term_to_binary(X)).
 
 msg_store_contains(Atom, MsgIds) ->
@@ -1037,7 +1037,7 @@ msg_store_sync(MsgIds) ->
         {sync, Ref} -> ok
     after
         10000 ->
-            io:format("Sync from msg_store missing for msg_ids ~p~n", [MsgIds]),
+            io:format("Sync from msg_store missing for guids ~p~n", [MsgIds]),
             throw(timeout)
     end.
 
@@ -1060,7 +1060,7 @@ test_msg_store() ->
     stop_msg_store(),
     ok = start_msg_store_empty(),
     Self = self(),
-    MsgIds = [msg_id_bin(M) || M <- lists:seq(1,100)],
+    MsgIds = [guid_bin(M) || M <- lists:seq(1,100)],
     {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
     %% check we don't contain any of the msgs we're about to publish
     false = msg_store_contains(false, MsgIds),
@@ -1094,7 +1094,7 @@ test_msg_store() ->
                   {sync, MsgId} -> ok
               after
                   10000 ->
-                      io:format("Sync from msg_store missing (msg_id: ~p)~n",
+                      io:format("Sync from msg_store missing (guid: ~p)~n",
                                 [MsgId]),
                       throw(timeout)
               end
@@ -1147,10 +1147,10 @@ test_msg_store() ->
     ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIds1stHalf),
     %% restart empty
     ok = stop_msg_store(),
-    ok = start_msg_store_empty(), %% now safe to reuse msg_ids
+    ok = start_msg_store_empty(), %% now safe to reuse guids
     %% push a lot of msgs in...
     BigCount = 100000,
-    MsgIdsBig = [msg_id_bin(X) || X <- lists:seq(1, BigCount)],
+    MsgIdsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
     Payload = << 0:65536 >>,
     ok = rabbit_msg_store:client_terminate(
            lists:foldl(
@@ -1170,19 +1170,19 @@ test_msg_store() ->
     %% .., then 3s by 1...
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [msg_id_bin(MsgId)])
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(MsgId)])
            end, ok, lists:seq(BigCount, 1, -3)),
     %% .., then remove 3s by 2, from the young end first. This hits
     %% GC (under 50% good data left, but no empty files. Must GC).
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [msg_id_bin(MsgId)])
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(MsgId)])
            end, ok, lists:seq(BigCount-1, 1, -3)),
     %% .., then remove 3s by 3, from the young end first. This hits
     %% GC...
     ok = lists:foldl(
            fun (MsgId, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [msg_id_bin(MsgId)])
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(MsgId)])
            end, ok, lists:seq(BigCount-2, 1, -3)),
     %% ensure empty
     false = msg_store_contains(false, MsgIdsBig),
@@ -1212,12 +1212,12 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
     {A, B, MSCStateEnd} =
         lists:foldl(
           fun (SeqId, {QiN, SeqIdsMsgIdsAcc, MSCStateN}) ->
-                  MsgId = rabbit_guid:guid(),
-                  QiM = rabbit_queue_index:write_published(MsgId, SeqId, Persistent,
+                  Guid = rabbit_guid:guid(),
+                  QiM = rabbit_queue_index:write_published(Guid, SeqId, Persistent,
                                                            QiN),
-                  {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, MsgId,
-                                                           MsgId, MSCStateN),
-                  {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc], MSCStateM}
+                  {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid,
+                                                           Guid, MSCStateN),
+                  {QiM, [{SeqId, Guid} | SeqIdsMsgIdsAcc], MSCStateM}
           end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds),
     ok = rabbit_msg_store:delete_client(MsgStore, Ref),
     ok = rabbit_msg_store:client_terminate(MSCStateEnd),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 1c29c193..f2e9c19c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -165,7 +165,7 @@
 
 -record(msg_status,
         { msg,
-          msg_id,
+          guid,
           seq_id,
           is_persistent,
           is_delivered,
@@ -197,7 +197,7 @@
 
 -type(bpqueue() :: any()).
 -type(seq_id()  :: non_neg_integer()).
--type(ack()     :: {'ack_index_and_store', msg_id(), seq_id(), atom() | pid()}
+-type(ack()     :: {'ack_index_and_store', guid(), seq_id(), atom() | pid()}
                  | 'ack_not_on_disk').
 
 -type(delta() :: #delta { start_seq_id :: non_neg_integer(),
@@ -225,7 +225,7 @@
                avg_ingress_rate      :: float(),
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
-               on_sync               :: {[[ack()]], [[msg_id()]], [{pid(), any()}]},
+               on_sync               :: {[[ack()]], [[guid()]], [{pid(), any()}]},
                msg_store_clients     :: {{any(), binary()}, {any(), binary()}},
                persistent_store      :: pid() | atom(),
                persistent_count      :: non_neg_integer(),
@@ -233,7 +233,7 @@
               }).
 
 -spec(tx_commit_post_msg_store/5 ::
-        (boolean(), [msg_id()], [ack()], {pid(), any()}, state()) ->
+        (boolean(), [guid()], [ack()], {pid(), any()}, state()) ->
                                          {boolean(), state()}).
 -spec(tx_commit_index/1 :: (state()) -> {boolean(), state()}).
 
@@ -331,7 +331,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
     State1 = State #vqstate { out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
     MsgStatus = #msg_status {
-      msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
+      msg = Msg, guid = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
     {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
                                                       MsgStatus, MSCState),
@@ -411,7 +411,7 @@ fetch(State =
         {empty, _Q4} ->
             fetch_from_q3_or_delta(State);
         {{value, #msg_status {
-            msg = Msg, msg_id = MsgId, seq_id = SeqId,
+            msg = Msg, guid = MsgId, seq_id = SeqId,
             is_persistent = IsPersistent, is_delivered = IsDelivered,
             msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
@@ -572,7 +572,7 @@ tx_publish(Msg = #basic_message { is_persistent = true, guid = MsgId },
            State = #vqstate { msg_store_clients = MSCState,
                               persistent_store = PersistentStore }) ->
     MsgStatus = #msg_status {
-      msg = Msg, msg_id = MsgId, seq_id = undefined, is_persistent = true,
+      msg = Msg, guid = MsgId, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
     {#msg_status { msg_on_disk = true }, MSCState1} =
         maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
@@ -581,7 +581,7 @@ tx_publish(_Msg, State) ->
     State.
 
 tx_rollback(Pubs, State = #vqstate { persistent_store = PersistentStore }) ->
-    ok = case persistent_msg_ids(Pubs) of
+    ok = case persistent_guids(Pubs) of
              [] -> ok;
              PP -> rabbit_msg_store:remove(PersistentStore, PP)
          end,
@@ -591,7 +591,7 @@ tx_commit(Pubs, AckTags, From, State =
               #vqstate { persistent_store = PersistentStore }) ->
     %% If we are a non-durable queue, or we have no persistent pubs,
     %% we can skip the msg_store loop.
-    PersistentMsgIds = persistent_msg_ids(Pubs),
+    PersistentMsgIds = persistent_guids(Pubs),
     IsTransientPubs = [] == PersistentMsgIds,
     case IsTransientPubs orelse
         ?TRANSIENT_MSG_STORE == PersistentStore of
@@ -699,7 +699,7 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
     Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
     {Avg, {Then, Count}}.
 
-persistent_msg_ids(Pubs) ->
+persistent_guids(Pubs) ->
     [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
               Obj #basic_message.is_persistent].
 
@@ -722,7 +722,7 @@ betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
                           case SeqId < SeqIdLimit of
                               true ->
                                   {[#msg_status { msg           = undefined,
-                                                  msg_id        = MsgId,
+                                                  guid          = MsgId,
                                                   seq_id        = SeqId,
                                                   is_persistent = IsPersistent,
                                                   is_delivered  = IsDelivered,
@@ -852,7 +852,7 @@ remove_queue_entries(PersistentStore, Fold, Q, IndexState) ->
     {Count, IndexState2}.
 
 remove_queue_entries1(
-  #msg_status { msg_id = MsgId, seq_id = SeqId,
+  #msg_status { guid = MsgId, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
   {PersistentStore, CountN, MsgIdsByStore, SeqIdsAcc, IndexStateN}) ->
@@ -889,7 +889,7 @@ fetch_from_q3_or_delta(State = #vqstate {
             true = queue:is_empty(Q1), %% ASSERTION
             {empty, State};
         {{value, IndexOnDisk, MsgStatus = #msg_status {
-                                msg = undefined, msg_id = MsgId,
+                                msg = undefined, guid = MsgId,
                                 is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
                                          guid = MsgId }}, MSCState1} =
@@ -983,7 +983,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId },
         #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount,
                    persistent_count = PCount }) ->
     MsgStatus = #msg_status {
-      msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
+      msg = Msg, guid = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
       index_on_disk = false },
     PCount1 = PCount + case IsPersistent of
@@ -1096,7 +1096,7 @@ maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus =
     {MsgStatus, MSCState};
 maybe_write_msg_to_disk(PersistentStore, Force,
                         MsgStatus = #msg_status {
-                          msg = Msg, msg_id = MsgId,
+                          msg = Msg, guid = MsgId,
                           is_persistent = IsPersistent }, MSCState)
   when Force orelse IsPersistent ->
     {ok, MSCState1} =
@@ -1115,7 +1115,7 @@ maybe_write_index_to_disk(_Force, MsgStatus =
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     {MsgStatus, IndexState};
 maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
-                                   msg_id = MsgId, seq_id = SeqId,
+                                   guid = MsgId, seq_id = SeqId,
                                    is_persistent = IsPersistent,
                                    is_delivered = IsDelivered }, IndexState)
   when Force orelse IsPersistent ->
-- 
cgit v1.2.1


From 719cf66b0175d9d6b498790cf773899d2c4d18d8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 12:05:56 +0100
Subject: Fix the refactoring which broke bpqueue

---
 src/bpqueue.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 4e9cdb31..7acc9697 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -278,13 +278,13 @@ map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, Init,
                     {Init, InQ(OrigPrefix, InnerQ,
                                InQ(Prefix, InnerQNew, QNew)), false};
                 {Prefix1, Value1, Init1} ->
-                    {QNew1, InnerQNew1} =
+                    {Prefix2, QNew1, InnerQNew1} =
                         case Prefix1 =:= Prefix of
-                            true  -> {QNew, In(Value1, InnerQNew)};
-                            false -> {InQ(Prefix, InnerQNew, QNew),
+                            true  -> {Prefix, QNew, In(Value1, InnerQNew)};
+                            false -> {Prefix1, InQ(Prefix, InnerQNew, QNew),
                                       In(Value1, queue:new())}
                         end,
-                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix, Init1,
+                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2, Init1,
                                      InnerQ1, QNew1, InnerQNew1)
             end
     end.
-- 
cgit v1.2.1


From e9a141e32eb0c81e9d136c5d901ac0c4a1c12927 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 12:21:50 +0100
Subject: MsgId => Guid. All tests still pass. The distinction between msg_ids
 and guids is now complete

---
 src/rabbit_msg_file.erl       |  24 ++--
 src/rabbit_msg_store.erl      | 262 +++++++++++++++++++++---------------------
 src/rabbit_queue_index.erl    |  86 +++++++-------
 src/rabbit_tests.erl          | 166 +++++++++++++-------------
 src/rabbit_variable_queue.erl | 108 ++++++++---------
 5 files changed, 323 insertions(+), 323 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 0edeb469..792f0efa 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -64,14 +64,14 @@
 
 %%----------------------------------------------------------------------------
 
-append(FileHdl, MsgId, MsgBody)
-  when is_binary(MsgId) andalso size(MsgId) =< ?GUID_SIZE_BYTES ->
+append(FileHdl, Guid, MsgBody)
+  when is_binary(Guid) andalso size(Guid) =< ?GUID_SIZE_BYTES ->
     MsgBodyBin  = term_to_binary(MsgBody),
     MsgBodyBinSize = size(MsgBodyBin),
     Size = MsgBodyBinSize + ?GUID_SIZE_BYTES,
     case file_handle_cache:append(FileHdl,
                                   <<Size:?INTEGER_SIZE_BITS,
-                                   MsgId:?GUID_SIZE_BYTES/binary,
+                                   Guid:?GUID_SIZE_BYTES/binary,
                                    MsgBodyBin:MsgBodyBinSize/binary,
                                    ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
         ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
@@ -83,10 +83,10 @@ read(FileHdl, TotalSize) ->
     BodyBinSize = Size - ?GUID_SIZE_BYTES,
     case file_handle_cache:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
-               MsgId:?GUID_SIZE_BYTES/binary,
+               Guid:?GUID_SIZE_BYTES/binary,
                MsgBodyBin:BodyBinSize/binary,
                ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
-            {ok, {MsgId, binary_to_term(MsgBodyBin)}};
+            {ok, {Guid, binary_to_term(MsgBodyBin)}};
         KO -> KO
     end.
 
@@ -97,8 +97,8 @@ scan(FileHdl, Offset, Acc) ->
         eof -> {ok, Acc, Offset};
         {corrupted, NextOffset} ->
             scan(FileHdl, NextOffset, Acc);
-        {ok, {MsgId, TotalSize, NextOffset}} ->
-            scan(FileHdl, NextOffset, [{MsgId, TotalSize, Offset} | Acc]);
+        {ok, {Guid, TotalSize, NextOffset}} ->
+            scan(FileHdl, NextOffset, [{Guid, TotalSize, Offset} | Acc]);
         _KO ->
             %% bad message, but we may still have recovered some valid messages
             {ok, Acc, Offset}
@@ -108,9 +108,9 @@ read_next(FileHdl, Offset) ->
     case file_handle_cache:read(FileHdl, ?SIZE_AND_GUID_BYTES) of
         %% Here we take option 5 from
         %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in which
-        %% we read the MsgId as a number, and then convert it back to
+        %% we read the Guid as a number, and then convert it back to
         %% a binary in order to work around bugs in Erlang's GC.
-        {ok, <<Size:?INTEGER_SIZE_BITS, MsgIdNum:?GUID_SIZE_BITS>>} ->
+        {ok, <<Size:?INTEGER_SIZE_BITS, GuidNum:?GUID_SIZE_BITS>>} ->
             case Size of
                 0 -> eof; %% Nothing we can do other than stop
                 _ ->
@@ -123,9 +123,9 @@ read_next(FileHdl, Offset) ->
                             case file_handle_cache:read(FileHdl, 1) of
                                 {ok,
                                  <<?WRITE_OK_MARKER: ?WRITE_OK_SIZE_BITS>>} ->
-                                    <<MsgId:?GUID_SIZE_BYTES/binary>> =
-                                        <<MsgIdNum:?GUID_SIZE_BITS>>,
-                                    {ok, {MsgId, TotalSize, NextOffset}};
+                                    <<Guid:?GUID_SIZE_BYTES/binary>> =
+                                        <<GuidNum:?GUID_SIZE_BITS>>,
+                                    {ok, {Guid, TotalSize, NextOffset}};
                                 {ok, _SomeOtherData} ->
                                     {corrupted, NextOffset};
                                 KO -> KO
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 1a7085a2..b6d6c5da 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -152,7 +152,7 @@
 
 %% The components:
 %%
-%% MsgLocation: this is a mapping from MsgId to #msg_location{}:
+%% MsgLocation: this is a mapping from Guid to #msg_location{}:
 %%              {Guid, RefCount, File, Offset, TotalSize}
 %%              By default, it's in ets, but it's also pluggable.
 %% FileSummary: this is an ets table which contains:
@@ -307,29 +307,29 @@ start_link(Server, Dir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
                            [Server, Dir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit],
                            [{timeout, infinity}]).
 
-write(Server, MsgId, Msg, CState =
+write(Server, Guid, Msg, CState =
           #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
-    ok = add_to_cache(CurFileCacheEts, MsgId, Msg),
-    {gen_server2:cast(Server, {write, MsgId, Msg}), CState}.
+    ok = add_to_cache(CurFileCacheEts, Guid, Msg),
+    {gen_server2:cast(Server, {write, Guid, Msg}), CState}.
 
-read(Server, MsgId, CState =
+read(Server, Guid, CState =
          #client_msstate { dedup_cache_ets = DedupCacheEts,
                            cur_file_cache_ets = CurFileCacheEts }) ->
     %% 1. Check the dedup cache
-    case fetch_and_increment_cache(DedupCacheEts, MsgId) of
+    case fetch_and_increment_cache(DedupCacheEts, Guid) of
         not_found ->
             %% 2. Check the cur file cache
-            case ets:lookup(CurFileCacheEts, MsgId) of
+            case ets:lookup(CurFileCacheEts, Guid) of
                 [] ->
                     Defer = fun() -> {gen_server2:pcall(
-                                        Server, 2, {read, MsgId}, infinity),
+                                        Server, 2, {read, Guid}, infinity),
                                       CState} end,
-                    case index_lookup(MsgId, CState) of
+                    case index_lookup(Guid, CState) of
                         not_found   -> Defer();
                         MsgLocation -> client_read1(Server, MsgLocation, Defer,
                                                     CState)
                     end;
-                [{MsgId, Msg, _CacheRefCount}] ->
+                [{Guid, Msg, _CacheRefCount}] ->
                     %% Although we've found it, we don't know the
                     %% refcount, so can't insert into dedup cache
                     {{ok, Msg}, CState}
@@ -338,10 +338,10 @@ read(Server, MsgId, CState =
             {{ok, Msg}, CState}
     end.
 
-contains(Server, MsgId) -> gen_server2:call(Server, {contains, MsgId}, infinity).
-remove(Server, MsgIds)  -> gen_server2:cast(Server, {remove, MsgIds}).
-release(Server, MsgIds) -> gen_server2:cast(Server, {release, MsgIds}).
-sync(Server, MsgIds, K) -> gen_server2:cast(Server, {sync, MsgIds, K}).
+contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity).
+remove(Server, Guids)  -> gen_server2:cast(Server, {remove, Guids}).
+release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}).
+sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}).
 sync(Server)            -> gen_server2:pcast(Server, 8, sync). %% internal
 
 gc_done(Server, Reclaimed, Source, Destination) ->
@@ -381,37 +381,37 @@ clean(Server, BaseDir) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
-add_to_cache(CurFileCacheEts, MsgId, Msg) ->
-    case ets:insert_new(CurFileCacheEts, {MsgId, Msg, 1}) of
+add_to_cache(CurFileCacheEts, Guid, Msg) ->
+    case ets:insert_new(CurFileCacheEts, {Guid, Msg, 1}) of
         true ->
             ok;
         false ->
             try
-                ets:update_counter(CurFileCacheEts, MsgId, {3, +1}),
+                ets:update_counter(CurFileCacheEts, Guid, {3, +1}),
                 ok
-            catch error:badarg -> add_to_cache(CurFileCacheEts, MsgId, Msg)
+            catch error:badarg -> add_to_cache(CurFileCacheEts, Guid, Msg)
             end
     end.
 
-client_read1(Server, #msg_location { guid = MsgId, file = File } =
+client_read1(Server, #msg_location { guid = Guid, file = File } =
                  MsgLocation, Defer, CState =
                  #client_msstate { file_summary_ets = FileSummaryEts }) ->
     case ets:lookup(FileSummaryEts, File) of
         [] -> %% File has been GC'd and no longer exists. Go around again.
-            read(Server, MsgId, CState);
+            read(Server, Guid, CState);
         [#file_summary { locked = Locked, right = Right }] ->
             client_read2(Server, Locked, Right, MsgLocation, Defer, CState)
     end.
 
 client_read2(_Server, false, undefined,
-             #msg_location { guid = MsgId, ref_count = RefCount }, Defer,
+             #msg_location { guid = Guid, ref_count = RefCount }, Defer,
              CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
                                         dedup_cache_ets = DedupCacheEts }) ->
-    case ets:lookup(CurFileCacheEts, MsgId) of
+    case ets:lookup(CurFileCacheEts, Guid) of
         [] ->
             Defer(); %% may have rolled over
-        [{MsgId, Msg, _CacheRefCount}] ->
-            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg),
+        [{Guid, Msg, _CacheRefCount}] ->
+            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
             {{ok, Msg}, CState}
     end;
 client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
@@ -420,7 +420,7 @@ client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
     %% the safest and simplest thing to do.
     Defer();
 client_read2(Server, false, _Right,
-             #msg_location { guid = MsgId, ref_count = RefCount, file = File },
+             #msg_location { guid = Guid, ref_count = RefCount, file = File },
              Defer, CState =
                  #client_msstate { file_handles_ets = FileHandlesEts,
                                    file_summary_ets = FileSummaryEts,
@@ -430,7 +430,7 @@ client_read2(Server, false, _Right,
     %% finished.
     try ets:update_counter(FileSummaryEts, File, {#file_summary.readers, +1})
     catch error:badarg -> %% the File has been GC'd and deleted. Go around.
-            read(Server, MsgId, CState)
+            read(Server, Guid, CState)
     end,
     Release = fun() -> ets:update_counter(FileSummaryEts, File,
                                           {#file_summary.readers, -1})
@@ -452,7 +452,7 @@ client_read2(Server, false, _Right,
             %% readers, msg_store ets:deletes (and unlocks the dest)
             try Release(),
                 Defer()
-            catch error:badarg -> read(Server, MsgId, CState)
+            catch error:badarg -> read(Server, Guid, CState)
             end;
         false ->
             %% Ok, we're definitely safe to continue - a GC can't
@@ -468,7 +468,7 @@ client_read2(Server, false, _Right,
             %% badarg scenario above, but we don't have a missing file
             %% - we just have the /wrong/ file).
 
-            case index_lookup(MsgId, CState) of
+            case index_lookup(Guid, CState) of
                 MsgLocation = #msg_location { file = File } ->
                     %% Still the same file.
                     %% This is fine to fail (already exists)
@@ -476,7 +476,7 @@ client_read2(Server, false, _Right,
                     CState1 = close_all_indicated(CState),
                     {Msg, CState2} =
                         read_from_disk(MsgLocation, CState1, DedupCacheEts),
-                    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId,
+                    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid,
                                                  Msg),
                     Release(), %% this MUST NOT fail with badarg
                     {{ok, Msg}, CState2};
@@ -589,12 +589,12 @@ init([Server, BaseDir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
                            gc_pid = GCPid }, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call({read, MsgId}, From, State) ->
-    State1 = read_message(MsgId, From, State),
+handle_call({read, Guid}, From, State) ->
+    State1 = read_message(Guid, From, State),
     noreply(State1);
 
-handle_call({contains, MsgId}, From, State) ->
-    State1 = contains_message(MsgId, From, State),
+handle_call({contains, Guid}, From, State) ->
+    State1 = contains_message(Guid, From, State),
     noreply(State1);
 
 handle_call({new_client_state, CRef}, _From,
@@ -617,21 +617,21 @@ handle_call({delete_client, CRef}, _From,
     reply(ok,
           State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }).
 
-handle_cast({write, MsgId, Msg},
+handle_cast({write, Guid, Msg},
             State = #msstate { current_file_handle = CurHdl,
                                current_file        = CurFile,
                                sum_valid_data      = SumValid,
                                sum_file_size       = SumFileSize,
                                file_summary_ets    = FileSummaryEts,
                                cur_file_cache_ets  = CurFileCacheEts }) ->
-    true = 0 =< ets:update_counter(CurFileCacheEts, MsgId, {3, -1}),
-    case index_lookup(MsgId, State) of
+    true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}),
+    case index_lookup(Guid, State) of
         not_found ->
             %% New message, lots to do
             {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
-            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg),
+            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg),
             ok = index_insert(#msg_location {
-                                guid = MsgId, ref_count = 1, file = CurFile,
+                                guid = Guid, ref_count = 1, file = CurFile,
                                 offset = CurOffset, total_size = TotalSize },
                               State),
             [#file_summary { valid_total_size = ValidTotalSize,
@@ -661,34 +661,34 @@ handle_cast({write, MsgId, Msg},
         #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter. Only
             %% update field otherwise bad interaction with concurrent GC
-            ok = index_update_fields(MsgId,
+            ok = index_update_fields(Guid,
                                      {#msg_location.ref_count, RefCount + 1},
                                      State),
             noreply(State)
     end;
 
-handle_cast({remove, MsgIds}, State) ->
+handle_cast({remove, Guids}, State) ->
     State1 = lists:foldl(
-               fun (MsgId, State2) -> remove_message(MsgId, State2) end,
-               State, MsgIds),
+               fun (Guid, State2) -> remove_message(Guid, State2) end,
+               State, Guids),
     noreply(maybe_compact(State1));
 
-handle_cast({release, MsgIds}, State =
+handle_cast({release, Guids}, State =
                 #msstate { dedup_cache_ets = DedupCacheEts }) ->
     lists:foreach(
-      fun (MsgId) -> decrement_cache(DedupCacheEts, MsgId) end, MsgIds),
+      fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids),
     noreply(State);
 
-handle_cast({sync, MsgIds, K},
+handle_cast({sync, Guids, K},
             State = #msstate { current_file        = CurFile,
                                current_file_handle = CurHdl,
                                on_sync             = Syncs }) ->
     {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
-    case lists:any(fun (MsgId) ->
+    case lists:any(fun (Guid) ->
                            #msg_location { file = File, offset = Offset } =
-                               index_lookup(MsgId, State),
+                               index_lookup(Guid, State),
                            File =:= CurFile andalso Offset >= SyncOffset
-                   end, MsgIds) of
+                   end, Guids) of
         false -> K(),
                  noreply(State);
         true  -> noreply(State #msstate { on_sync = [K | Syncs] })
@@ -821,13 +821,13 @@ internal_sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
-read_message(MsgId, From, State =
+read_message(Guid, From, State =
                  #msstate { dedup_cache_ets = DedupCacheEts }) ->
-    case index_lookup(MsgId, State) of
+    case index_lookup(Guid, State) of
         not_found -> gen_server2:reply(From, not_found),
                      State;
         MsgLocation ->
-            case fetch_and_increment_cache(DedupCacheEts, MsgId) of
+            case fetch_and_increment_cache(DedupCacheEts, Guid) of
                 not_found ->
                     read_message1(From, MsgLocation, State);
                 Msg ->
@@ -836,7 +836,7 @@ read_message(MsgId, From, State =
             end
     end.
 
-read_message1(From, #msg_location { guid = MsgId, ref_count = RefCount,
+read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                                     file = File, offset = Offset } = MsgLoc,
               State = #msstate { current_file = CurFile,
                                  current_file_handle = CurHdl,
@@ -847,7 +847,7 @@ read_message1(From, #msg_location { guid = MsgId, ref_count = RefCount,
         true ->
             {Msg, State1} =
                 %% can return [] if msg in file existed on startup
-                case ets:lookup(CurFileCacheEts, MsgId) of
+                case ets:lookup(CurFileCacheEts, Guid) of
                     [] ->
                         ok = case {ok, Offset} >=
                                  file_handle_cache:current_raw_offset(CurHdl) of
@@ -855,10 +855,10 @@ read_message1(From, #msg_location { guid = MsgId, ref_count = RefCount,
                                  false -> ok
                              end,
                         read_from_disk(MsgLoc, State, DedupCacheEts);
-                    [{MsgId, Msg1, _CacheRefCount}] ->
+                    [{Guid, Msg1, _CacheRefCount}] ->
                         {Msg1, State}
                 end,
-            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg),
+            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
             gen_server2:reply(From, {ok, Msg}),
             State1;
         false ->
@@ -866,7 +866,7 @@ read_message1(From, #msg_location { guid = MsgId, ref_count = RefCount,
                 ets:lookup(FileSummaryEts, File),
             case Locked of
                 true ->
-                    add_to_pending_gc_completion({read, MsgId, From}, State);
+                    add_to_pending_gc_completion({read, Guid, From}, State);
                 false ->
                     {Msg, State1} = read_from_disk(MsgLoc, State, DedupCacheEts),
                     gen_server2:reply(From, {ok, Msg}),
@@ -874,36 +874,36 @@ read_message1(From, #msg_location { guid = MsgId, ref_count = RefCount,
             end
     end.
 
-read_from_disk(#msg_location { guid = MsgId, ref_count = RefCount,
+read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
                                file = File, offset = Offset,
                                total_size = TotalSize }, State,
                DedupCacheEts) ->
     {Hdl, State1} = get_read_handle(File, State),
     {ok, Offset} = file_handle_cache:position(Hdl, Offset),
-    {ok, {MsgId, Msg}} =
+    {ok, {Guid, Msg}} =
         case rabbit_msg_file:read(Hdl, TotalSize) of
-            {ok, {MsgId, _}} = Obj ->
+            {ok, {Guid, _}} = Obj ->
                 Obj;
             Rest ->
                 throw({error, {misread, [{old_state, State},
                                          {file_num,  File},
                                          {offset,    Offset},
-                                         {guid,      MsgId},
+                                         {guid,      Guid},
                                          {read,      Rest},
                                          {proc_dict, get()}
                                         ]}})
         end,
-    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg),
+    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
     {Msg, State1}.
 
-maybe_insert_into_cache(DedupCacheEts, RefCount, MsgId, Msg)
+maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
   when RefCount > 1 ->
-    insert_into_cache(DedupCacheEts, MsgId, Msg);
-maybe_insert_into_cache(_DedupCacheEts, _RefCount, _MsgId, _Msg) ->
+    insert_into_cache(DedupCacheEts, Guid, Msg);
+maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
     ok.
 
-contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
-    case index_lookup(MsgId, State) of
+contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
+    case index_lookup(Guid, State) of
         not_found ->
             gen_server2:reply(From, false),
             State;
@@ -911,34 +911,34 @@ contains_message(MsgId, From, State = #msstate { gc_active = GCActive }) ->
             case GCActive of
                 {A, B} when File == A orelse File == B ->
                     add_to_pending_gc_completion(
-                      {contains, MsgId, From}, State);
+                      {contains, Guid, From}, State);
                 _ ->
                     gen_server2:reply(From, true),
                     State
             end
     end.
 
-remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
+remove_message(Guid, State = #msstate { sum_valid_data = SumValid,
                                          file_summary_ets = FileSummaryEts,
                                          dedup_cache_ets = DedupCacheEts }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
-        index_lookup(MsgId, State),
+        index_lookup(Guid, State),
     case RefCount of
         1 ->
             %% don't remove from CUR_FILE_CACHE_ETS_NAME here because
             %% there may be further writes in the mailbox for the same
             %% msg.
-            ok = remove_cache_entry(DedupCacheEts, MsgId),
+            ok = remove_cache_entry(DedupCacheEts, Guid),
             [#file_summary { valid_total_size = ValidTotalSize,
                              contiguous_top = ContiguousTop,
                              locked = Locked }] =
                 ets:lookup(FileSummaryEts, File),
             case Locked of
                 true ->
-                    add_to_pending_gc_completion({remove, MsgId}, State);
+                    add_to_pending_gc_completion({remove, Guid}, State);
                 false ->
-                    ok = index_delete(MsgId, State),
+                    ok = index_delete(Guid, State),
                     ContiguousTop1 = lists:min([ContiguousTop, Offset]),
                     ValidTotalSize1 = ValidTotalSize - TotalSize,
                     true = ets:update_element(
@@ -949,9 +949,9 @@ remove_message(MsgId, State = #msstate { sum_valid_data = SumValid,
                     State1 #msstate { sum_valid_data = SumValid - TotalSize }
             end;
         _ when 1 < RefCount ->
-            ok = decrement_cache(DedupCacheEts, MsgId),
+            ok = decrement_cache(DedupCacheEts, Guid),
             %% only update field, otherwise bad interaction with concurrent GC
-            ok = index_update_fields(MsgId,
+            ok = index_update_fields(Guid,
                                      {#msg_location.ref_count, RefCount - 1},
                                      State),
             State
@@ -967,12 +967,12 @@ run_pending(State = #msstate { pending_gc_completion = Pending }) ->
     State1 = State #msstate { pending_gc_completion = [] },
     lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)).
 
-run_pending({read, MsgId, From}, State) ->
-    read_message(MsgId, From, State);
-run_pending({contains, MsgId, From}, State) ->
-    contains_message(MsgId, From, State);
-run_pending({remove, MsgId}, State) ->
-    remove_message(MsgId, State).
+run_pending({read, Guid, From}, State) ->
+    read_message(Guid, From, State);
+run_pending({contains, Guid, From}, State) ->
+    contains_message(Guid, From, State);
+run_pending({remove, Guid}, State) ->
+    remove_message(Guid, State).
 
 open_file(Dir, FileName, Mode) ->
     file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
@@ -1091,45 +1091,45 @@ scan_file_for_valid_messages(Dir, FileName) ->
 %% message cache helper functions
 %%----------------------------------------------------------------------------
 
-remove_cache_entry(DedupCacheEts, MsgId) ->
-    true = ets:delete(DedupCacheEts, MsgId),
+remove_cache_entry(DedupCacheEts, Guid) ->
+    true = ets:delete(DedupCacheEts, Guid),
     ok.
 
-fetch_and_increment_cache(DedupCacheEts, MsgId) ->
-    case ets:lookup(DedupCacheEts, MsgId) of
+fetch_and_increment_cache(DedupCacheEts, Guid) ->
+    case ets:lookup(DedupCacheEts, Guid) of
         [] ->
             not_found;
-        [{_MsgId, Msg, _RefCount}] ->
+        [{_Guid, Msg, _RefCount}] ->
             try
-                ets:update_counter(DedupCacheEts, MsgId, {3, 1})
+                ets:update_counter(DedupCacheEts, Guid, {3, 1})
             catch error:badarg ->
                     %% someone has deleted us in the meantime, insert us
-                    ok = insert_into_cache(DedupCacheEts, MsgId, Msg)
+                    ok = insert_into_cache(DedupCacheEts, Guid, Msg)
             end,
             Msg
     end.
 
-decrement_cache(DedupCacheEts, MsgId) ->
-    true = try case ets:update_counter(DedupCacheEts, MsgId, {3, -1}) of
-                   N when N =< 0 -> true = ets:delete(DedupCacheEts, MsgId);
+decrement_cache(DedupCacheEts, Guid) ->
+    true = try case ets:update_counter(DedupCacheEts, Guid, {3, -1}) of
+                   N when N =< 0 -> true = ets:delete(DedupCacheEts, Guid);
                    _N            -> true
                end
            catch error:badarg ->
-                   %% MsgId is not in there because although it's been
+                   %% Guid is not in there because although it's been
                    %% delivered, it's never actually been read (think:
                    %% persistent message held in RAM)
                    true
            end,
     ok.
 
-insert_into_cache(DedupCacheEts, MsgId, Msg) ->
-    case ets:insert_new(DedupCacheEts, {MsgId, Msg, 1}) of
+insert_into_cache(DedupCacheEts, Guid, Msg) ->
+    case ets:insert_new(DedupCacheEts, {Guid, Msg, 1}) of
         true  -> ok;
         false -> try
-                     ets:update_counter(DedupCacheEts, MsgId, {3, 1}),
+                     ets:update_counter(DedupCacheEts, Guid, {3, 1}),
                      ok
                  catch error:badarg ->
-                         insert_into_cache(DedupCacheEts, MsgId, Msg)
+                         insert_into_cache(DedupCacheEts, Guid, Msg)
                  end
     end.
 
@@ -1172,17 +1172,17 @@ count_msg_refs(true, _Gen, _Seed, _State) ->
 count_msg_refs(Gen, Seed, State) ->
     case Gen(Seed) of
         finished -> ok;
-        {_MsgId, 0, Next} -> count_msg_refs(Gen, Next, State);
-        {MsgId, Delta, Next} ->
-            ok = case index_lookup(MsgId, State) of
+        {_Guid, 0, Next} -> count_msg_refs(Gen, Next, State);
+        {Guid, Delta, Next} ->
+            ok = case index_lookup(Guid, State) of
                      not_found ->
-                         index_insert(#msg_location { guid = MsgId,
+                         index_insert(#msg_location { guid = Guid,
                                                       ref_count = Delta },
                                       State);
                      StoreEntry = #msg_location { ref_count = RefCount } ->
                          NewRefCount = RefCount + Delta,
                          case NewRefCount of
-                             0 -> index_delete(MsgId, State);
+                             0 -> index_delete(Guid, State);
                              _ -> index_update(StoreEntry #msg_location {
                                                  ref_count = NewRefCount },
                                                State)
@@ -1201,9 +1201,9 @@ recover_crashed_compactions(Dir, FileNames, TmpFileNames) ->
 recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
     NonTmpRelatedFileName = filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
     true = lists:member(NonTmpRelatedFileName, FileNames),
-    {ok, UncorruptedMessagesTmp, MsgIdsTmp} =
+    {ok, UncorruptedMessagesTmp, GuidsTmp} =
         scan_file_for_valid_messages_guids(Dir, TmpFileName),
-    {ok, UncorruptedMessages, MsgIds} =
+    {ok, UncorruptedMessages, Guids} =
         scan_file_for_valid_messages_guids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
@@ -1232,7 +1232,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
     %%    back to before any of the files in the tmp file and copy
     %%    them over again
     TmpPath = form_filename(Dir, TmpFileName),
-    case is_sublist(MsgIdsTmp, MsgIds) of
+    case is_sublist(GuidsTmp, Guids) of
         true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
                 %% note this also catches the case when the tmp file
                 %% is empty
@@ -1243,13 +1243,13 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% there are no msgs in the tmp file then we would be in
             %% the 'true' branch of this case, so we know the
             %% lists:last call is safe.
-            EldestTmpMsgId = lists:last(MsgIdsTmp),
-            {MsgIds1, UncorruptedMessages1}
+            EldestTmpGuid = lists:last(GuidsTmp),
+            {Guids1, UncorruptedMessages1}
                 = case lists:splitwith(
-                         fun (MsgId) -> MsgId /= EldestTmpMsgId end, MsgIds) of
-                      {_MsgIds, []} -> %% no msgs from tmp in main
-                          {MsgIds, UncorruptedMessages};
-                      {Dropped, [EldestTmpMsgId | Rest]} ->
+                         fun (Guid) -> Guid /= EldestTmpGuid end, Guids) of
+                      {_Guids, []} -> %% no msgs from tmp in main
+                          {Guids, UncorruptedMessages};
+                      {Dropped, [EldestTmpGuid | Rest]} ->
                           %% Msgs in Dropped are in tmp, so forget them.
                           %% *cry*. Lists indexed from 1.
                           {Rest, lists:sublist(UncorruptedMessages,
@@ -1257,11 +1257,11 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
                                                length(Rest))}
                   end,
             %% The main file prefix should be contiguous
-            {Top, MsgIds1} = find_contiguous_block_prefix(
+            {Top, Guids1} = find_contiguous_block_prefix(
                                lists:reverse(UncorruptedMessages1)),
             %% we should have that none of the messages in the prefix
             %% are in the tmp file
-            true = is_disjoint(MsgIds1, MsgIdsTmp),
+            true = is_disjoint(Guids1, GuidsTmp),
             %% must open with read flag, otherwise will stomp over contents
             {ok, MainHdl} = open_file(
                               Dir, NonTmpRelatedFileName, [read | ?WRITE_MODE]),
@@ -1281,13 +1281,13 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             ok = file_handle_cache:close(MainHdl),
             ok = file_handle_cache:delete(TmpHdl),
 
-            {ok, _MainMessages, MsgIdsMain} =
+            {ok, _MainMessages, GuidsMain} =
                 scan_file_for_valid_messages_guids(
                   Dir, NonTmpRelatedFileName),
-            %% check that everything in MsgIds1 is in MsgIdsMain
-            true = is_sublist(MsgIds1, MsgIdsMain),
-            %% check that everything in MsgIdsTmp is in MsgIdsMain
-            true = is_sublist(MsgIdsTmp, MsgIdsMain)
+            %% check that everything in Guids1 is in GuidsMain
+            true = is_sublist(Guids1, GuidsMain),
+            %% check that everything in GuidsTmp is in GuidsMain
+            true = is_sublist(GuidsTmp, GuidsMain)
     end,
     ok.
 
@@ -1300,7 +1300,7 @@ is_disjoint(SmallerL, BiggerL) ->
 scan_file_for_valid_messages_guids(Dir, FileName) ->
     {ok, Messages, _FileSize} =
         scan_file_for_valid_messages(Dir, FileName),
-    {ok, Messages, [MsgId || {MsgId, _TotalSize, _FileOffset} <- Messages]}.
+    {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}.
 
 %% Takes the list in *ascending* order (i.e. eldest message
 %% first). This is the opposite of what scan_file_for_valid_messages
@@ -1309,14 +1309,14 @@ find_contiguous_block_prefix([]) -> {0, []};
 find_contiguous_block_prefix(List) ->
     find_contiguous_block_prefix(List, 0, []).
 
-find_contiguous_block_prefix([], ExpectedOffset, MsgIds) ->
-    {ExpectedOffset, MsgIds};
-find_contiguous_block_prefix([{MsgId, TotalSize, ExpectedOffset} | Tail],
-                             ExpectedOffset, MsgIds) ->
+find_contiguous_block_prefix([], ExpectedOffset, Guids) ->
+    {ExpectedOffset, Guids};
+find_contiguous_block_prefix([{Guid, TotalSize, ExpectedOffset} | Tail],
+                             ExpectedOffset, Guids) ->
     ExpectedOffset1 = ExpectedOffset + TotalSize,
-    find_contiguous_block_prefix(Tail, ExpectedOffset1, [MsgId | MsgIds]);
-find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, MsgIds) ->
-    {ExpectedOffset, MsgIds}.
+    find_contiguous_block_prefix(Tail, ExpectedOffset1, [Guid | Guids]);
+find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, Guids) ->
+    {ExpectedOffset, Guids}.
 
 build_index(true, _Files, State =
                 #msstate { file_summary_ets = FileSummaryEts }) ->
@@ -1373,8 +1373,8 @@ build_index_worker(
           Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
-          fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-                  case index_lookup(MsgId, State) of
+          fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+                  case index_lookup(Guid, State) of
                       not_found -> {VMAcc, VTSAcc};
                       StoreEntry ->
                           ok = index_update(StoreEntry #msg_location {
@@ -1394,7 +1394,7 @@ build_index_worker(
             %% file size.
             []    -> {undefined, case ValidMessages of
                                      [] -> 0;
-                                     _  -> {_MsgId, TotalSize, Offset} =
+                                     _  -> {_Guid, TotalSize, Offset} =
                                                lists:last(ValidMessages),
                                            Offset + TotalSize
                                  end};
@@ -1649,8 +1649,8 @@ find_unremoved_messages_in_file(File,
           Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(
-      fun ({MsgId, _TotalSize, _Offset}, Acc) ->
-              case Index:lookup(MsgId, IndexState) of
+      fun ({Guid, _TotalSize, _Offset}, Acc) ->
+              case Index:lookup(Guid, IndexState) of
                   Entry = #msg_location { file = File } -> [ Entry | Acc ];
                   _                                     -> Acc
               end
@@ -1660,13 +1660,13 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
     {FinalOffset, BlockStart1, BlockEnd1} =
         lists:foldl(
-          fun (#msg_location { guid = MsgId, offset = Offset,
+          fun (#msg_location { guid = Guid, offset = Offset,
                                total_size = TotalSize },
                {CurOffset, BlockStart, BlockEnd}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   %% update MsgLocation to reflect change of file and offset
-                  ok = Index:update_fields(MsgId,
+                  ok = Index:update_fields(Guid,
                                            [{#msg_location.file, Destination},
                                             {#msg_location.offset, CurOffset}],
                                            IndexState),
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 556c6968..2d9b6673 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -103,7 +103,7 @@
 %% and seeding the message store on start up.
 
 %% Note that in general, the representation of a message's state as
-%% the tuple: {('no_pub'|{MsgId, IsPersistent}), ('del'|'no_del'),
+%% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'),
 %% ('ack'|'no_ack')} is richer than strictly necessary for most
 %% operations. However, for startup, and to ensure the safe and
 %% correct combination of journal entries with entries read from the
@@ -265,12 +265,12 @@ init(Name, MsgStoreRecovered) ->
                           Segment2 =
                                #segment { pubs = PubCount1, acks = AckCount1 } =
                               array:sparse_foldl(
-                                fun (RelSeq, {{MsgId, _IsPersistent}, Del, no_ack},
+                                fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack},
                                      Segment3) ->
                                         Segment4 =
                                             maybe_add_to_journal(
                                               rabbit_msg_store:contains(
-                                                ?PERSISTENT_MSG_STORE, MsgId),
+                                                ?PERSISTENT_MSG_STORE, Guid),
                                               CleanShutdown, Del, RelSeq, Segment3),
                                         Segment4
                                 end, Segment1 #segment { pubs = PubCount,
@@ -327,15 +327,15 @@ terminate_and_erase(State) ->
     ok = delete_queue_directory(State1 #qistate.dir),
     State1.
 
-write_published(MsgId, SeqId, IsPersistent, State) when is_binary(MsgId) ->
-    ?GUID_BYTES = size(MsgId),
+write_published(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
+    ?GUID_BYTES = size(Guid),
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
            JournalHdl, [<<(case IsPersistent of
                                true  -> ?PUB_PERSIST_JPREFIX;
                                false -> ?PUB_TRANS_JPREFIX
-                           end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, MsgId]),
-    maybe_flush_journal(add_to_journal(SeqId, {MsgId, IsPersistent}, State1)).
+                           end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]),
+    maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)).
 
 write_delivered(SeqId, State) ->
     {JournalHdl, State1} = get_journal_handle(State),
@@ -396,8 +396,8 @@ read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
     {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
     {array:sparse_foldr(
-       fun (RelSeq, {{MsgId, IsPersistent}, IsDelivered, no_ack}, Acc) ->
-               [ {MsgId, reconstruct_seq_id(Seg, RelSeq),
+       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc) ->
+               [ {Guid, reconstruct_seq_id(Seg, RelSeq),
                   IsPersistent, IsDelivered == del} | Acc ]
        end, [], journal_plus_segment(JEntries, SegEntries)),
      State #qistate { segments = segment_store(Segment1, Segments) }}.
@@ -492,7 +492,7 @@ queue_index_walker({[], Gatherer}) ->
     case gatherer:fetch(Gatherer) of
         finished                -> rabbit_misc:unlink_and_capture_exit(Gatherer),
                                    finished;
-        {value, {MsgId, Count}} -> {MsgId, Count, {[], Gatherer}}
+        {value, {Guid, Count}} -> {Guid, Count, {[], Gatherer}}
     end;
 queue_index_walker({[QueueName | QueueNames], Gatherer}) ->
     Child = make_ref(),
@@ -519,9 +519,9 @@ queue_index_walker_reader(Gatherer, Ref, State, [Seg | SegNums]) ->
 queue_index_walker_reader1(_Gatherer, State, []) ->
     State;
 queue_index_walker_reader1(
-  Gatherer, State, [{MsgId, _SeqId, IsPersistent, _IsDelivered} | Msgs]) ->
+  Gatherer, State, [{Guid, _SeqId, IsPersistent, _IsDelivered} | Msgs]) ->
     case IsPersistent of
-        true  -> gatherer:produce(Gatherer, {MsgId, 1});
+        true  -> gatherer:produce(Gatherer, {Guid, 1});
         false -> ok
     end,
     queue_index_walker_reader1(Gatherer, State, Msgs).
@@ -684,17 +684,17 @@ get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
 bool_to_int(true ) -> 1;
 bool_to_int(false) -> 0.
 
-write_entry_to_segment(_RelSeq, {{_MsgId, _IsPersistent}, del, ack}, Hdl) ->
+write_entry_to_segment(_RelSeq, {{_Guid, _IsPersistent}, del, ack}, Hdl) ->
     Hdl;
 write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
     ok = case Pub of
              no_pub ->
                  ok;
-             {MsgId, IsPersistent} ->
+             {Guid, IsPersistent} ->
                  file_handle_cache:append(
                    Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
                            (bool_to_int(IsPersistent)):1,
-                           RelSeq:?REL_SEQ_BITS>>, MsgId])
+                           RelSeq:?REL_SEQ_BITS>>, Guid])
          end,
     ok = case {Del, Ack} of
              {no_del, no_ack} ->
@@ -775,10 +775,10 @@ load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
                 IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
-            {ok, MsgId} = file_handle_cache:read(Hdl, ?GUID_BYTES),
+            {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
             SegEntries1 =
                 array:set(RelSeq,
-                          {{MsgId, 1 == IsPersistentNum}, no_del, no_ack},
+                          {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
                           SegEntries),
             load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount + 1,
                                  AckCount);
@@ -837,13 +837,13 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
                     load_journal_entries(add_to_journal(SeqId, ack, State));
                 _ ->
                     case file_handle_cache:read(Hdl, ?GUID_BYTES) of
-                        {ok, <<MsgIdNum:?GUID_BITS>>} ->
+                        {ok, <<GuidNum:?GUID_BITS>>} ->
                             %% work around for binary data
                             %% fragmentation. See
                             %% rabbit_msg_file:read_next/2
-                            <<MsgId:?GUID_BYTES/binary>> =
-                                <<MsgIdNum:?GUID_BITS>>,
-                            Publish = {MsgId, case Prefix of
+                            <<Guid:?GUID_BYTES/binary>> =
+                                <<GuidNum:?GUID_BITS>>,
+                            Publish = {Guid, case Prefix of
                                                   ?PUB_PERSIST_JPREFIX -> true;
                                                   ?PUB_TRANS_JPREFIX   -> false
                                               end},
@@ -873,7 +873,7 @@ add_to_journal(RelSeq, Action,
     case Action of
         del                     -> Segment1;
         ack                     -> Segment1 #segment { acks = AckCount + 1 };
-        {_MsgId, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
+        {_Guid, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
     end;
 
 %% This is a more relaxed version of deliver_or_ack_msg because we can
@@ -912,30 +912,30 @@ journal_plus_segment(JEntries, SegEntries) ->
 %% Here, the Out is the Seg Array which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
 %% from (ack in journal, not segment).
-journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
+journal_plus_segment(Obj = {{_Guid, _IsPersistent}, no_del, no_ack},
                      not_found,
                      RelSeq, Out) ->
     array:set(RelSeq, Obj, Out);
-journal_plus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
+journal_plus_segment(Obj = {{_Guid, _IsPersistent}, del, no_ack},
                      not_found,
                      RelSeq, Out) ->
     array:set(RelSeq, Obj, Out);
-journal_plus_segment({{_MsgId, _IsPersistent}, del, ack},
+journal_plus_segment({{_Guid, _IsPersistent}, del, ack},
                      not_found,
                      RelSeq, Out) ->
     array:reset(RelSeq, Out);
 
 journal_plus_segment({no_pub, del, no_ack},
-                     {Pub = {_MsgId, _IsPersistent}, no_del, no_ack},
+                     {Pub = {_Guid, _IsPersistent}, no_del, no_ack},
                      RelSeq, Out) ->
     array:set(RelSeq, {Pub, del, no_ack}, Out);
 
 journal_plus_segment({no_pub, del, ack},
-                     {{_MsgId, _IsPersistent}, no_del, no_ack},
+                     {{_Guid, _IsPersistent}, no_del, no_ack},
                      RelSeq, Out) ->
     array:reset(RelSeq, Out);
 journal_plus_segment({no_pub, no_del, ack},
-                     {{_MsgId, _IsPersistent}, del, no_ack},
+                     {{_Guid, _IsPersistent}, del, no_ack},
                      RelSeq, Out) ->
     array:reset(RelSeq, Out).
 
@@ -958,77 +958,77 @@ journal_minus_segment(JEntries, SegEntries) ->
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, no_ack},
+journal_minus_segment(Obj, Obj = {{_Guid, _IsPersistent}, _Del, no_ack},
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved + 1, AcksRemoved};
-journal_minus_segment(Obj, Obj = {{_MsgId, _IsPersistent}, _Del, ack},
+journal_minus_segment(Obj, Obj = {{_Guid, _IsPersistent}, _Del, ack},
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved + 1, AcksRemoved + 1};
 
 %% Just publish in journal
-journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, no_del, no_ack},
+journal_minus_segment(Obj = {{_Guid, _IsPersistent}, no_del, no_ack},
                       not_found,
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 
 %% Just deliver in journal
 journal_minus_segment(Obj = {no_pub, del, no_ack},
-                      {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      {{_Guid, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, no_ack},
-                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      {{_Guid, _IsPersistent}, del, no_ack},
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved, AcksRemoved};
 
 %% Just ack in journal
 journal_minus_segment(Obj = {no_pub, no_del, ack},
-                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      {{_Guid, _IsPersistent}, del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, no_del, ack},
-                      {{_MsgId, _IsPersistent}, del, ack},
+                      {{_Guid, _IsPersistent}, del, ack},
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved, AcksRemoved};
 
 %% Publish and deliver in journal
-journal_minus_segment(Obj = {{_MsgId, _IsPersistent}, del, no_ack},
+journal_minus_segment(Obj = {{_Guid, _IsPersistent}, del, no_ack},
                       not_found,
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({Pub, del, no_ack},
-                      {Pub = {_MsgId, _IsPersistent}, no_del, no_ack},
+                      {Pub = {_Guid, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, del, no_ack}, Out),
      PubsRemoved + 1, AcksRemoved};
 
 %% Deliver and ack in journal
 journal_minus_segment(Obj = {no_pub, del, ack},
-                      {{_MsgId, _IsPersistent}, no_del, no_ack},
+                      {{_Guid, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
-                      {{_MsgId, _IsPersistent}, del, no_ack},
+                      {{_Guid, _IsPersistent}, del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, no_del, ack}, Out),
      PubsRemoved, AcksRemoved};
 journal_minus_segment({no_pub, del, ack},
-                      {{_MsgId, _IsPersistent}, del, ack},
+                      {{_Guid, _IsPersistent}, del, ack},
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved, AcksRemoved + 1};
 
 %% Publish, deliver and ack in journal
-journal_minus_segment({{_MsgId, _IsPersistent}, del, ack},
+journal_minus_segment({{_Guid, _IsPersistent}, del, ack},
                       not_found,
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved, AcksRemoved};
 journal_minus_segment({Pub, del, ack},
-                      {Pub = {_MsgId, _IsPersistent}, no_del, no_ack},
+                      {Pub = {_Guid, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, del, ack}, Out),
      PubsRemoved + 1, AcksRemoved};
 journal_minus_segment({Pub, del, ack},
-                      {Pub = {_MsgId, _IsPersistent}, del, no_ack},
+                      {Pub = {_Guid, _IsPersistent}, del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, no_del, ack}, Out),
      PubsRemoved + 1, AcksRemoved}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 8eb12939..66f2d3cc 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1022,170 +1022,170 @@ stop_msg_store() ->
 guid_bin(X) ->
     erlang:md5(term_to_binary(X)).
 
-msg_store_contains(Atom, MsgIds) ->
+msg_store_contains(Atom, Guids) ->
     Atom = lists:foldl(
-             fun (MsgId, Atom1) when Atom1 =:= Atom ->
-                     rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, MsgId) end,
-             Atom, MsgIds).
+             fun (Guid, Atom1) when Atom1 =:= Atom ->
+                     rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) end,
+             Atom, Guids).
 
-msg_store_sync(MsgIds) ->
+msg_store_sync(Guids) ->
     Ref = make_ref(),
     Self = self(),
-    ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, MsgIds,
+    ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, Guids,
                                fun () -> Self ! {sync, Ref} end),
     receive
         {sync, Ref} -> ok
     after
         10000 ->
-            io:format("Sync from msg_store missing for guids ~p~n", [MsgIds]),
+            io:format("Sync from msg_store missing for guids ~p~n", [Guids]),
             throw(timeout)
     end.
 
-msg_store_read(MsgIds, MSCState) ->
+msg_store_read(Guids, MSCState) ->
     lists:foldl(
-      fun (MsgId, MSCStateM) ->
-              {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read(
-                                           ?PERSISTENT_MSG_STORE, MsgId, MSCStateM),
+      fun (Guid, MSCStateM) ->
+              {{ok, Guid}, MSCStateN} = rabbit_msg_store:read(
+                                           ?PERSISTENT_MSG_STORE, Guid, MSCStateM),
               MSCStateN
       end,
-      MSCState, MsgIds).
+      MSCState, Guids).
 
-msg_store_write(MsgIds, MSCState) ->
+msg_store_write(Guids, MSCState) ->
     lists:foldl(
-      fun (MsgId, {ok, MSCStateN}) ->
-              rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId, MsgId, MSCStateN) end,
-      {ok, MSCState}, MsgIds).
+      fun (Guid, {ok, MSCStateN}) ->
+              rabbit_msg_store:write(?PERSISTENT_MSG_STORE, Guid, Guid, MSCStateN) end,
+      {ok, MSCState}, Guids).
 
 test_msg_store() ->
     stop_msg_store(),
     ok = start_msg_store_empty(),
     Self = self(),
-    MsgIds = [guid_bin(M) || M <- lists:seq(1,100)],
-    {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds),
+    Guids = [guid_bin(M) || M <- lists:seq(1,100)],
+    {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids),
     %% check we don't contain any of the msgs we're about to publish
-    false = msg_store_contains(false, MsgIds),
+    false = msg_store_contains(false, Guids),
     Ref = rabbit_guid:guid(),
     MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
     %% publish the first half
-    {ok, MSCState1} = msg_store_write(MsgIds1stHalf, MSCState),
+    {ok, MSCState1} = msg_store_write(Guids1stHalf, MSCState),
     %% sync on the first half
-    ok = msg_store_sync(MsgIds1stHalf),
+    ok = msg_store_sync(Guids1stHalf),
     %% publish the second half
-    {ok, MSCState2} = msg_store_write(MsgIds2ndHalf, MSCState1),
+    {ok, MSCState2} = msg_store_write(Guids2ndHalf, MSCState1),
     %% sync on the first half again - the msg_store will be dirty, but
     %% we won't need the fsync
-    ok = msg_store_sync(MsgIds1stHalf),
+    ok = msg_store_sync(Guids1stHalf),
     %% check they're all in there
-    true = msg_store_contains(true, MsgIds),
+    true = msg_store_contains(true, Guids),
     %% publish the latter half twice so we hit the caching and ref count code
-    {ok, MSCState3} = msg_store_write(MsgIds2ndHalf, MSCState2),
+    {ok, MSCState3} = msg_store_write(Guids2ndHalf, MSCState2),
     %% check they're still all in there
-    true = msg_store_contains(true, MsgIds),
+    true = msg_store_contains(true, Guids),
     %% sync on the 2nd half, but do lots of individual syncs to try
     %% and cause coalescing to happen
     ok = lists:foldl(
-           fun (MsgId, ok) -> rabbit_msg_store:sync(
+           fun (Guid, ok) -> rabbit_msg_store:sync(
                                 ?PERSISTENT_MSG_STORE,
-                                [MsgId], fun () -> Self ! {sync, MsgId} end)
-           end, ok, MsgIds2ndHalf),
+                                [Guid], fun () -> Self ! {sync, Guid} end)
+           end, ok, Guids2ndHalf),
     lists:foldl(
-      fun(MsgId, ok) ->
+      fun(Guid, ok) ->
               receive
-                  {sync, MsgId} -> ok
+                  {sync, Guid} -> ok
               after
                   10000 ->
                       io:format("Sync from msg_store missing (guid: ~p)~n",
-                                [MsgId]),
+                                [Guid]),
                       throw(timeout)
               end
-      end, ok, MsgIds2ndHalf),
+      end, ok, Guids2ndHalf),
     %% it's very likely we're not dirty here, so the 1st half sync
     %% should hit a different code path
-    ok = msg_store_sync(MsgIds1stHalf),
+    ok = msg_store_sync(Guids1stHalf),
     %% read them all
-    MSCState4 = msg_store_read(MsgIds, MSCState3),
+    MSCState4 = msg_store_read(Guids, MSCState3),
     %% read them all again - this will hit the cache, not disk
-    MSCState5 = msg_store_read(MsgIds, MSCState4),
+    MSCState5 = msg_store_read(Guids, MSCState4),
     %% remove them all
-    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIds),
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids),
     %% check first half doesn't exist
-    false = msg_store_contains(false, MsgIds1stHalf),
+    false = msg_store_contains(false, Guids1stHalf),
     %% check second half does exist
-    true = msg_store_contains(true, MsgIds2ndHalf),
+    true = msg_store_contains(true, Guids2ndHalf),
     %% read the second half again
-    MSCState6 = msg_store_read(MsgIds2ndHalf, MSCState5),
+    MSCState6 = msg_store_read(Guids2ndHalf, MSCState5),
     %% release the second half, just for fun (aka code coverage)
-    ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, MsgIds2ndHalf),
+    ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, Guids2ndHalf),
     %% read the second half again, just for fun (aka code coverage)
-    MSCState7 = msg_store_read(MsgIds2ndHalf, MSCState6),
+    MSCState7 = msg_store_read(Guids2ndHalf, MSCState6),
     ok = rabbit_msg_store:client_terminate(MSCState7),
     %% stop and restart, preserving every other msg in 2nd half
     ok = stop_msg_store(),
     ok = start_msg_store(fun ([]) -> finished;
-                             ([MsgId|MsgIdsTail])
-                             when length(MsgIdsTail) rem 2 == 0 ->
-                                 {MsgId, 1, MsgIdsTail};
-                             ([MsgId|MsgIdsTail]) ->
-                                 {MsgId, 0, MsgIdsTail}
-                         end, MsgIds2ndHalf),
+                             ([Guid|GuidsTail])
+                             when length(GuidsTail) rem 2 == 0 ->
+                                 {Guid, 1, GuidsTail};
+                             ([Guid|GuidsTail]) ->
+                                 {Guid, 0, GuidsTail}
+                         end, Guids2ndHalf),
     %% check we have the right msgs left
     lists:foldl(
-      fun (MsgId, Bool) ->
-              not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, MsgId))
-      end, false, MsgIds2ndHalf),
+      fun (Guid, Bool) ->
+              not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid))
+      end, false, Guids2ndHalf),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(),
     %% check we don't contain any of the msgs
-    false = msg_store_contains(false, MsgIds),
+    false = msg_store_contains(false, Guids),
     %% publish the first half again
     MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
-    {ok, MSCState9} = msg_store_write(MsgIds1stHalf, MSCState8),
+    {ok, MSCState9} = msg_store_write(Guids1stHalf, MSCState8),
     %% this should force some sort of sync internally otherwise misread
     ok = rabbit_msg_store:client_terminate(
-           msg_store_read(MsgIds1stHalf, MSCState9)),
-    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, MsgIds1stHalf),
+           msg_store_read(Guids1stHalf, MSCState9)),
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(), %% now safe to reuse guids
     %% push a lot of msgs in...
     BigCount = 100000,
-    MsgIdsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
+    GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
     Payload = << 0:65536 >>,
     ok = rabbit_msg_store:client_terminate(
            lists:foldl(
-             fun (MsgId, MSCStateN) ->
+             fun (Guid, MSCStateN) ->
                      {ok, MSCStateM} =
-                         rabbit_msg_store:write(?PERSISTENT_MSG_STORE, MsgId, Payload, MSCStateN),
+                         rabbit_msg_store:write(?PERSISTENT_MSG_STORE, Guid, Payload, MSCStateN),
                      MSCStateM
-             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), MsgIdsBig)),
+             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), GuidsBig)),
     %% now read them to ensure we hit the fast client-side reading
     ok = rabbit_msg_store:client_terminate(
            lists:foldl(
-             fun (MsgId, MSCStateM) ->
+             fun (Guid, MSCStateM) ->
                      {{ok, Payload}, MSCStateN} =
-                         rabbit_msg_store:read(?PERSISTENT_MSG_STORE, MsgId, MSCStateM),
+                         rabbit_msg_store:read(?PERSISTENT_MSG_STORE, Guid, MSCStateM),
                      MSCStateN
-             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), MsgIdsBig)),
+             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), GuidsBig)),
     %% .., then 3s by 1...
     ok = lists:foldl(
-           fun (MsgId, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(MsgId)])
+           fun (Guid, ok) ->
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(Guid)])
            end, ok, lists:seq(BigCount, 1, -3)),
     %% .., then remove 3s by 2, from the young end first. This hits
     %% GC (under 50% good data left, but no empty files. Must GC).
     ok = lists:foldl(
-           fun (MsgId, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(MsgId)])
+           fun (Guid, ok) ->
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(Guid)])
            end, ok, lists:seq(BigCount-1, 1, -3)),
     %% .., then remove 3s by 3, from the young end first. This hits
     %% GC...
     ok = lists:foldl(
-           fun (MsgId, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(MsgId)])
+           fun (Guid, ok) ->
+                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(Guid)])
            end, ok, lists:seq(BigCount-2, 1, -3)),
     %% ensure empty
-    false = msg_store_contains(false, MsgIdsBig),
+    false = msg_store_contains(false, GuidsBig),
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(),
@@ -1211,13 +1211,13 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
                end,
     {A, B, MSCStateEnd} =
         lists:foldl(
-          fun (SeqId, {QiN, SeqIdsMsgIdsAcc, MSCStateN}) ->
+          fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) ->
                   Guid = rabbit_guid:guid(),
                   QiM = rabbit_queue_index:write_published(Guid, SeqId, Persistent,
                                                            QiN),
                   {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid,
                                                            Guid, MSCStateN),
-                  {QiM, [{SeqId, Guid} | SeqIdsMsgIdsAcc], MSCStateM}
+                  {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM}
           end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds),
     ok = rabbit_msg_store:delete_client(MsgStore, Ref),
     ok = rabbit_msg_store:client_terminate(MSCStateEnd),
@@ -1235,8 +1235,8 @@ queue_index_flush_journal(Qi) ->
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
 verify_read_with_published(Delivered, Persistent,
-                           [{MsgId, SeqId, Persistent, Delivered}|Read],
-                           [{SeqId, MsgId}|Published]) ->
+                           [{Guid, SeqId, Persistent, Delivered}|Read],
+                           [{SeqId, Guid}|Published]) ->
     verify_read_with_published(Delivered, Persistent, Read, Published);
 verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
@@ -1251,12 +1251,12 @@ test_queue_index() ->
     {0, _PRef, _TRef, _Terms, Qi0} = rabbit_queue_index:init(test_queue(), false),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
-    {Qi2, SeqIdsMsgIdsA} = queue_index_publish(SeqIdsA, false, Qi1),
+    {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, SegmentSize, Qi3} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
     {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
-                                    lists:reverse(SeqIdsMsgIdsA)),
+                                    lists:reverse(SeqIdsGuidsA)),
     %% call terminate twice to prove it's idempotent
     _Qi5 = rabbit_queue_index:terminate([], rabbit_queue_index:terminate([], Qi4)),
     ok = stop_msg_store(),
@@ -1265,12 +1265,12 @@ test_queue_index() ->
     {0, _PRef1, _TRef1, _Terms1, Qi6} = rabbit_queue_index:init(test_queue(), false),
     {0, 0, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
-    {Qi8, SeqIdsMsgIdsB} = queue_index_publish(SeqIdsB, true, Qi7),
+    {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
     {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
-                                    lists:reverse(SeqIdsMsgIdsB)),
+                                    lists:reverse(SeqIdsGuidsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
     ok = stop_msg_store(),
     ok = rabbit_queue_index:start_msg_stores([test_queue()]),
@@ -1282,7 +1282,7 @@ test_queue_index() ->
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
     {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
-                                    lists:reverse(SeqIdsMsgIdsB)),
+                                    lists:reverse(SeqIdsGuidsB)),
     Qi16 = rabbit_queue_index:write_acks(SeqIdsB, Qi15),
     Qi17 = queue_index_flush_journal(Qi16),
     %% Everything will have gone now because #pubs == #acks
@@ -1302,20 +1302,20 @@ test_queue_index() ->
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
     {0, _PRef4, _TRef4, _Terms4, Qi22} = rabbit_queue_index:init(test_queue(), false),
-    {Qi23, _SeqIdsMsgIdsC} = queue_index_publish(SeqIdsC, false, Qi22),
+    {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = queue_index_deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
     Qi26 = queue_index_flush_journal(Qi25),
-    {Qi27, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize], false, Qi26),
+    {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
     _Qi28 = rabbit_queue_index:terminate_and_erase(Qi27),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
     {0, _PRef5, _TRef5, _Terms5, Qi29} = rabbit_queue_index:init(test_queue(), false),
-    {Qi30, _SeqIdsMsgIdsC2} = queue_index_publish(SeqIdsC, false, Qi29),
+    {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = queue_index_deliver(SeqIdsC, Qi30),
-    {Qi32, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi31),
+    {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
     Qi33 = rabbit_queue_index:write_acks(SeqIdsC, Qi32),
     Qi34 = queue_index_flush_journal(Qi33),
     _Qi35 = rabbit_queue_index:terminate_and_erase(Qi34),
@@ -1325,7 +1325,7 @@ test_queue_index() ->
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
     {0, _PRef6, _TRef6, _Terms6, Qi36} = rabbit_queue_index:init(test_queue(), false),
-    {Qi37, _SeqIdsMsgIdsD} = queue_index_publish(SeqIdsD, false, Qi36),
+    {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
     Qi40 = queue_index_flush_journal(Qi39),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f2e9c19c..164533b7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -319,7 +319,7 @@ publish(Msg, State) ->
     {_SeqId, State2} = publish(Msg, false, false, State1),
     State2.
 
-publish_delivered(Msg = #basic_message { guid = MsgId,
+publish_delivered(Msg = #basic_message { guid = Guid,
                                          is_persistent = IsPersistent },
                   State = #vqstate { len = 0, index_state = IndexState,
                                      next_seq_id = SeqId,
@@ -331,7 +331,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
     State1 = State #vqstate { out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
     MsgStatus = #msg_status {
-      msg = Msg, guid = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
+      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
     {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
                                                       MsgStatus, MSCState),
@@ -344,7 +344,7 @@ publish_delivered(Msg = #basic_message { guid = MsgId,
         true ->
             {#msg_status { index_on_disk = true }, IndexState1} =
                 maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-            {{ack_index_and_store, MsgId, SeqId,
+            {{ack_index_and_store, Guid, SeqId,
               find_msg_store(IsPersistent, PersistentStore)},
              State2 #vqstate { index_state = IndexState1,
                                next_seq_id = SeqId + 1 }};
@@ -411,7 +411,7 @@ fetch(State =
         {empty, _Q4} ->
             fetch_from_q3_or_delta(State);
         {{value, #msg_status {
-            msg = Msg, guid = MsgId, seq_id = SeqId,
+            msg = Msg, guid = Guid, seq_id = SeqId,
             is_persistent = IsPersistent, is_delivered = IsDelivered,
             msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
@@ -436,11 +436,11 @@ fetch(State =
             AckTag =
                 case IsPersistent of
                     true  -> true = MsgOnDisk, %% ASSERTION
-                             {ack_index_and_store, MsgId, SeqId, MsgStore};
+                             {ack_index_and_store, Guid, SeqId, MsgStore};
                     false -> ok = case MsgOnDisk of
                                       true ->
                                           rabbit_msg_store:remove(
-                                            MsgStore, [MsgId]);
+                                            MsgStore, [Guid]);
                                       false -> ok
                                   end,
                              ack_not_on_disk
@@ -457,22 +457,22 @@ ack([], State) ->
 ack(AckTags, State = #vqstate { index_state = IndexState,
                                 persistent_count = PCount,
                                 persistent_store = PersistentStore }) ->
-    {MsgIdsByStore, SeqIds} =
+    {GuidsByStore, SeqIds} =
         lists:foldl(
           fun (ack_not_on_disk, Acc) -> Acc;
-              ({ack_index_and_store, MsgId, SeqId, MsgStore},  {Dict, SeqIds}) ->
-                  {rabbit_misc:dict_cons(MsgStore, MsgId, Dict), [SeqId | SeqIds]}
+              ({ack_index_and_store, Guid, SeqId, MsgStore},  {Dict, SeqIds}) ->
+                  {rabbit_misc:dict_cons(MsgStore, Guid, Dict), [SeqId | SeqIds]}
           end, {dict:new(), []}, AckTags),
     IndexState1 = case SeqIds of
                       [] -> IndexState;
                       _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
                   end,
-    ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
-                           rabbit_msg_store:remove(MsgStore, MsgIds)
-                   end, ok, MsgIdsByStore),
-    PCount1 = PCount - case dict:find(PersistentStore, MsgIdsByStore) of
+    ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                           rabbit_msg_store:remove(MsgStore, Guids)
+                   end, ok, GuidsByStore),
+    PCount1 = PCount - case dict:find(PersistentStore, GuidsByStore) of
                            error        -> 0;
-                           {ok, MsgIds} -> length(MsgIds)
+                           {ok, Guids} -> length(Guids)
                        end,
     State #vqstate { index_state = IndexState1, persistent_count = PCount1 }.
 
@@ -533,20 +533,20 @@ delete_and_terminate(State) ->
 %% msg_store:release so that the cache isn't held full of msgs which
 %% are now at the tail of the queue.
 requeue(MsgsWithAckTags, State) ->
-    {SeqIds, MsgIdsByStore,
+    {SeqIds, GuidsByStore,
      State1 = #vqstate { index_state = IndexState,
                          persistent_count = PCount,
                          persistent_store = PersistentStore }} =
         lists:foldl(
-          fun ({Msg = #basic_message { guid = MsgId }, AckTag},
+          fun ({Msg = #basic_message { guid = Guid }, AckTag},
                {SeqIdsAcc, Dict, StateN}) ->
                   {SeqIdsAcc1, Dict1, MsgOnDisk} =
                       case AckTag of
                           ack_not_on_disk ->
                               {SeqIdsAcc, Dict, false};
-                          {ack_index_and_store, MsgId, SeqId, MsgStore} ->
+                          {ack_index_and_store, Guid, SeqId, MsgStore} ->
                               {[SeqId | SeqIdsAcc],
-                               rabbit_misc:dict_cons(MsgStore, MsgId, Dict),
+                               rabbit_misc:dict_cons(MsgStore, Guid, Dict),
                                true}
                       end,
                   {_SeqId, StateN1} =
@@ -558,21 +558,21 @@ requeue(MsgsWithAckTags, State) ->
             [] -> IndexState;
             _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
         end,
-    ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
-                           rabbit_msg_store:release(MsgStore, MsgIds)
-                   end, ok, MsgIdsByStore),
-    PCount1 = PCount - case dict:find(PersistentStore, MsgIdsByStore) of
+    ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                           rabbit_msg_store:release(MsgStore, Guids)
+                   end, ok, GuidsByStore),
+    PCount1 = PCount - case dict:find(PersistentStore, GuidsByStore) of
                            error        -> 0;
-                           {ok, MsgIds} -> length(MsgIds)
+                           {ok, Guids} -> length(Guids)
                        end,
     State1 #vqstate { index_state = IndexState1,
                       persistent_count = PCount1 }.
 
-tx_publish(Msg = #basic_message { is_persistent = true, guid = MsgId },
+tx_publish(Msg = #basic_message { is_persistent = true, guid = Guid },
            State = #vqstate { msg_store_clients = MSCState,
                               persistent_store = PersistentStore }) ->
     MsgStatus = #msg_status {
-      msg = Msg, guid = MsgId, seq_id = undefined, is_persistent = true,
+      msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
     {#msg_status { msg_on_disk = true }, MSCState1} =
         maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
@@ -591,8 +591,8 @@ tx_commit(Pubs, AckTags, From, State =
               #vqstate { persistent_store = PersistentStore }) ->
     %% If we are a non-durable queue, or we have no persistent pubs,
     %% we can skip the msg_store loop.
-    PersistentMsgIds = persistent_guids(Pubs),
-    IsTransientPubs = [] == PersistentMsgIds,
+    PersistentGuids = persistent_guids(Pubs),
+    IsTransientPubs = [] == PersistentGuids,
     case IsTransientPubs orelse
         ?TRANSIENT_MSG_STORE == PersistentStore of
         true ->
@@ -601,7 +601,7 @@ tx_commit(Pubs, AckTags, From, State =
         false ->
             Self = self(),
             ok = rabbit_msg_store:sync(
-                   ?PERSISTENT_MSG_STORE, PersistentMsgIds,
+                   ?PERSISTENT_MSG_STORE, PersistentGuids,
                    fun () -> ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue(
                                     Self,
                                     fun (StateN) -> tx_commit_post_msg_store(
@@ -636,7 +636,7 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                                      persistent_store = PersistentStore }) ->
     Acks = lists:flatten(SAcks),
     State1 = ack(Acks, State),
-    AckSeqIds = lists:foldl(fun ({ack_index_and_store, _MsgId,
+    AckSeqIds = lists:foldl(fun ({ack_index_and_store, _Guid,
                                   SeqId, ?PERSISTENT_MSG_STORE}, SeqIdsAcc) ->
                                     [SeqId | SeqIdsAcc];
                                 (_, SeqIdsAcc) ->
@@ -700,13 +700,13 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
     {Avg, {Then, Count}}.
 
 persistent_guids(Pubs) ->
-    [MsgId || Obj = #basic_message { guid = MsgId } <- Pubs,
+    [Guid || Obj = #basic_message { guid = Guid } <- Pubs,
               Obj #basic_message.is_persistent].
 
 betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
     {Filtered, IndexState1} =
         lists:foldr(
-          fun ({MsgId, SeqId, IsPersistent, IsDelivered},
+          fun ({Guid, SeqId, IsPersistent, IsDelivered},
                {FilteredAcc, IndexStateAcc}) ->
                   case SeqId < TransientThreshold andalso not IsPersistent of
                       true ->
@@ -722,7 +722,7 @@ betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
                           case SeqId < SeqIdLimit of
                               true ->
                                   {[#msg_status { msg           = undefined,
-                                                  guid          = MsgId,
+                                                  guid          = Guid,
                                                   seq_id        = SeqId,
                                                   is_persistent = IsPersistent,
                                                   is_delivered  = IsDelivered,
@@ -838,12 +838,12 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
     end.
 
 remove_queue_entries(PersistentStore, Fold, Q, IndexState) ->
-    {_PersistentStore, Count, MsgIdsByStore, SeqIds, IndexState1} =
+    {_PersistentStore, Count, GuidsByStore, SeqIds, IndexState1} =
         Fold(fun remove_queue_entries1/2,
              {PersistentStore, 0, dict:new(), [], IndexState}, Q),
-    ok = dict:fold(fun (MsgStore, MsgIds, ok) ->
-                           rabbit_msg_store:remove(MsgStore, MsgIds)
-                   end, ok, MsgIdsByStore),
+    ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                           rabbit_msg_store:remove(MsgStore, Guids)
+                   end, ok, GuidsByStore),
     IndexState2 =
         case SeqIds of
             [] -> IndexState1;
@@ -852,18 +852,18 @@ remove_queue_entries(PersistentStore, Fold, Q, IndexState) ->
     {Count, IndexState2}.
 
 remove_queue_entries1(
-  #msg_status { guid = MsgId, seq_id = SeqId,
+  #msg_status { guid = Guid, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {PersistentStore, CountN, MsgIdsByStore, SeqIdsAcc, IndexStateN}) ->
-    MsgIdsByStore1 =
+  {PersistentStore, CountN, GuidsByStore, SeqIdsAcc, IndexStateN}) ->
+    GuidsByStore1 =
         case {MsgOnDisk, IsPersistent} of
             {true,  true}  ->
-                rabbit_misc:dict_cons(PersistentStore, MsgId, MsgIdsByStore);
+                rabbit_misc:dict_cons(PersistentStore, Guid, GuidsByStore);
             {true,  false} ->
-                rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, MsgId, MsgIdsByStore);
+                rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, Guid, GuidsByStore);
             {false, _}     ->
-                MsgIdsByStore
+                GuidsByStore
         end,
     SeqIdsAcc1 = case IndexOnDisk of
                      true  -> [SeqId | SeqIdsAcc];
@@ -874,7 +874,7 @@ remove_queue_entries1(
                                  SeqId, IndexStateN);
                        false -> IndexStateN
                    end,
-    {PersistentStore, CountN + 1, MsgIdsByStore1, SeqIdsAcc1, IndexStateN1}.
+    {PersistentStore, CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
                          q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
@@ -889,12 +889,12 @@ fetch_from_q3_or_delta(State = #vqstate {
             true = queue:is_empty(Q1), %% ASSERTION
             {empty, State};
         {{value, IndexOnDisk, MsgStatus = #msg_status {
-                                msg = undefined, guid = MsgId,
+                                msg = undefined, guid = Guid,
                                 is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
-                                         guid = MsgId }}, MSCState1} =
+                                         guid = Guid }}, MSCState1} =
                 read_from_msg_store(
-                  PersistentStore, MSCState, IsPersistent, MsgId),
+                  PersistentStore, MSCState, IsPersistent, Guid),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
             RamIndexCount1 = case IndexOnDisk of
                                  true  -> RamIndexCount;
@@ -978,12 +978,12 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
             end
     end.
 
-publish(Msg = #basic_message { is_persistent = IsPersistent, guid = MsgId },
+publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
         IsDelivered, MsgOnDisk, State =
         #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount,
                    persistent_count = PCount }) ->
     MsgStatus = #msg_status {
-      msg = Msg, guid = MsgId, seq_id = SeqId, is_persistent = IsPersistent,
+      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
       index_on_disk = false },
     PCount1 = PCount + case IsPersistent of
@@ -1084,11 +1084,11 @@ with_msg_store_state(_PersistentStore, {MSCStateP, {MSCStateT, TRef}}, false,
     {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
     {Result, {MSCStateP, {MSCStateT1, TRef}}}.
 
-read_from_msg_store(PersistentStore, MSCState, IsPersistent, MsgId) ->
+read_from_msg_store(PersistentStore, MSCState, IsPersistent, Guid) ->
     with_msg_store_state(
       PersistentStore, MSCState, IsPersistent,
       fun (MsgStore, MSCState1) ->
-              rabbit_msg_store:read(MsgStore, MsgId, MSCState1)
+              rabbit_msg_store:read(MsgStore, Guid, MSCState1)
       end).
 
 maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus =
@@ -1096,7 +1096,7 @@ maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus =
     {MsgStatus, MSCState};
 maybe_write_msg_to_disk(PersistentStore, Force,
                         MsgStatus = #msg_status {
-                          msg = Msg, guid = MsgId,
+                          msg = Msg, guid = Guid,
                           is_persistent = IsPersistent }, MSCState)
   when Force orelse IsPersistent ->
     {ok, MSCState1} =
@@ -1104,7 +1104,7 @@ maybe_write_msg_to_disk(PersistentStore, Force,
           PersistentStore, MSCState, IsPersistent,
           fun (MsgStore, MSCState2) ->
                   rabbit_msg_store:write(
-                    MsgStore, MsgId, ensure_binary_properties(Msg), MSCState2)
+                    MsgStore, Guid, ensure_binary_properties(Msg), MSCState2)
           end),
     {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
 maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus, MSCState) ->
@@ -1115,13 +1115,13 @@ maybe_write_index_to_disk(_Force, MsgStatus =
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     {MsgStatus, IndexState};
 maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
-                                   guid = MsgId, seq_id = SeqId,
+                                   guid = Guid, seq_id = SeqId,
                                    is_persistent = IsPersistent,
                                    is_delivered = IsDelivered }, IndexState)
   when Force orelse IsPersistent ->
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     IndexState1 = rabbit_queue_index:write_published(
-                    MsgId, SeqId, IsPersistent, IndexState),
+                    Guid, SeqId, IsPersistent, IndexState),
     {MsgStatus #msg_status { index_on_disk = true },
      case IsDelivered of
          true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
-- 
cgit v1.2.1


From 785035da0b346c16ceb1f9586c6a6fcee9bca0b6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 12:30:37 +0100
Subject: Combine ram_duration and update_ram_duration

---
 include/rabbit_backing_queue_type_spec.hrl |  3 +--
 src/rabbit_amqqueue_process.erl            |  3 +--
 src/rabbit_backing_queue_type.erl          |  7 ++----
 src/rabbit_tests.erl                       |  6 ++---
 src/rabbit_variable_queue.erl              | 40 ++++++++++++++----------------
 5 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index ac47ccba..ec6db218 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -49,8 +49,7 @@
 -spec(is_empty/1 :: (state()) -> boolean()).
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
--spec(update_ram_duration/1 :: (state()) -> state()).
--spec(ram_duration/1 :: (state()) -> number()).
+-spec(update_ram_duration/1 :: (state()) -> {number(), state()}).
 -spec(sync_callback/1 :: (state()) ->
                               ('undefined' | (fun ((A) -> {boolean(), A})))).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a4d653e2..81bd37ae 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -932,8 +932,7 @@ handle_cast({flush, ChPid}, State) ->
 
 handle_cast(update_ram_duration, State = #q{backing_queue_state = BQS,
                                             backing_queue = BQ}) ->
-    BQS1 = BQ:update_ram_duration(BQS),
-    RamDuration = BQ:ram_duration(BQS1),
+    {BQS1, RamDuration} = BQ:update_ram_duration(BQS),
     DesiredDuration =
         rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
     BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index 526152f1..8f77d5b1 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -105,13 +105,10 @@ behaviour_info(callbacks) ->
      {set_ram_duration_target, 2},
 
      %% Recalculate the duration internally (likely to be just update
-     %% your internal rates).
+     %% your internal rates), and report how many seconds the messages
+     %% in RAM represent given the current rates of the queue.
      {update_ram_duration, 1},
 
-     %% Report how many seconds the messages in RAM represent given
-     %% the current rates of the queue.
-     {ram_duration, 1},
-
      %% Can return 'undefined' or a thunk which will receive the
      %% state, and must return the state, as soon as the queue process
      %% can manage (either on an empty mailbox, or when a timer
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 66f2d3cc..2903a69b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1384,7 +1384,7 @@ test_variable_queue_dynamic_duration_change() ->
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
     VQ1 = variable_queue_publish(false, Len1, VQ0),
-    VQ2 = rabbit_variable_queue:update_ram_duration(VQ1),
+    {_Duration, VQ2} = rabbit_variable_queue:update_ram_duration(VQ1),
     {ok, _TRef} = timer:send_after(1000, {duration, 60,
                                           fun (V) -> (V*0.75)-1 end}),
     VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
@@ -1420,7 +1420,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
                        _               -> Fun
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
-            VQ4 = rabbit_variable_queue:update_ram_duration(VQ3),
+            {_Duration, VQ4} = rabbit_variable_queue:update_ram_duration(VQ3),
             VQ5 = %% /37 otherwise the duration is just to high to stress things
                 rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
@@ -1434,7 +1434,7 @@ test_variable_queue_partial_segments_delta_thing() ->
     HalfSegment = SegmentSize div 2,
     VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
-    VQ2 = rabbit_variable_queue:update_ram_duration(VQ1),
+    {_Duration, VQ2} = rabbit_variable_queue:update_ram_duration(VQ1),
     VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 164533b7..e929694f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -33,7 +33,7 @@
 
 -export([init/2, terminate/1, publish/2, publish_delivered/2,
          set_ram_duration_target/2, update_ram_duration/1,
-         ram_duration/1, fetch/1, ack/2, len/1, is_empty/1, purge/1,
+         fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
          tx_commit/4, sync_callback/1, handle_pre_hibernate/1, status/1]).
 
@@ -378,30 +378,28 @@ update_ram_duration(State = #vqstate { egress_rate = Egress,
                                        in_counter = InCount,
                                        out_counter = OutCount,
                                        ram_msg_count = RamMsgCount,
-                                       duration_target = DurationTarget }) ->
+                                       duration_target = DurationTarget,
+                                       ram_msg_count_prev = RamMsgCountPrev }) ->
     Now = now(),
     {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
     {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
 
-    set_ram_duration_target(
-      DurationTarget,
-      State #vqstate { egress_rate = Egress1,
-                       avg_egress_rate = AvgEgressRate,
-                       ingress_rate = Ingress1,
-                       avg_ingress_rate = AvgIngressRate,
-                       rate_timestamp = Now,
-                       ram_msg_count_prev = RamMsgCount,
-                       out_counter = 0, in_counter = 0 }).
-
-ram_duration(#vqstate { avg_egress_rate = AvgEgressRate,
-                        avg_ingress_rate = AvgIngressRate,
-                        ram_msg_count = RamMsgCount,
-                        ram_msg_count_prev = RamMsgCountPrev }) ->
-    %% msgs / (msgs/sec) == sec
-    case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
-        true  -> infinity;
-        false -> (RamMsgCountPrev + RamMsgCount) / (2 * (AvgEgressRate + AvgIngressRate))
-    end.
+    Duration = %% msgs / (msgs/sec) == sec
+        case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
+            true  -> infinity;
+            false -> (RamMsgCountPrev + RamMsgCount) /
+                         (2 * (AvgEgressRate + AvgIngressRate))
+        end,
+
+    {Duration, set_ram_duration_target(
+                 DurationTarget,
+                 State #vqstate { egress_rate = Egress1,
+                                  avg_egress_rate = AvgEgressRate,
+                                  ingress_rate = Ingress1,
+                                  avg_ingress_rate = AvgIngressRate,
+                                  rate_timestamp = Now,
+                                  ram_msg_count_prev = RamMsgCount,
+                                  out_counter = 0, in_counter = 0 })}.
 
 fetch(State =
       #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
-- 
cgit v1.2.1


From 5f383986b4860997a26f1b3b6921e414cdeb53a5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 12:40:20 +0100
Subject: s/(rabbit_variable_queue:)update_/\1/g. Also, fix in amqqueue_process
 where the result tuple was the wrong way around

---
 include/rabbit_backing_queue_type_spec.hrl |  2 +-
 src/rabbit_amqqueue_process.erl            |  2 +-
 src/rabbit_backing_queue_type.erl          |  9 +++++----
 src/rabbit_tests.erl                       |  6 +++---
 src/rabbit_variable_queue.erl              | 18 +++++++++---------
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
index ec6db218..afb54918 100644
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ b/include/rabbit_backing_queue_type_spec.hrl
@@ -49,7 +49,7 @@
 -spec(is_empty/1 :: (state()) -> boolean()).
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
--spec(update_ram_duration/1 :: (state()) -> {number(), state()}).
+-spec(ram_duration/1 :: (state()) -> {number(), state()}).
 -spec(sync_callback/1 :: (state()) ->
                               ('undefined' | (fun ((A) -> {boolean(), A})))).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 81bd37ae..9697cc13 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -932,7 +932,7 @@ handle_cast({flush, ChPid}, State) ->
 
 handle_cast(update_ram_duration, State = #q{backing_queue_state = BQS,
                                             backing_queue = BQ}) ->
-    {BQS1, RamDuration} = BQ:update_ram_duration(BQS),
+    {RamDuration, BQS1} = BQ:ram_duration(BQS),
     DesiredDuration =
         rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
     BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index 8f77d5b1..8e3cce14 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -104,10 +104,11 @@ behaviour_info(callbacks) ->
      %% by the duration and the current queue rates.
      {set_ram_duration_target, 2},
 
-     %% Recalculate the duration internally (likely to be just update
-     %% your internal rates), and report how many seconds the messages
-     %% in RAM represent given the current rates of the queue.
-     {update_ram_duration, 1},
+     %% Optionally recalculate the duration internally (likely to be
+     %% just update your internal rates), and report how many seconds
+     %% the messages in RAM represent given the current rates of the
+     %% queue.
+     {ram_duration, 1},
 
      %% Can return 'undefined' or a thunk which will receive the
      %% state, and must return the state, as soon as the queue process
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 2903a69b..b186538b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1384,7 +1384,7 @@ test_variable_queue_dynamic_duration_change() ->
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
     VQ1 = variable_queue_publish(false, Len1, VQ0),
-    {_Duration, VQ2} = rabbit_variable_queue:update_ram_duration(VQ1),
+    {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
     {ok, _TRef} = timer:send_after(1000, {duration, 60,
                                           fun (V) -> (V*0.75)-1 end}),
     VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
@@ -1420,7 +1420,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
                        _               -> Fun
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
-            {_Duration, VQ4} = rabbit_variable_queue:update_ram_duration(VQ3),
+            {_Duration, VQ4} = rabbit_variable_queue:ram_duration(VQ3),
             VQ5 = %% /37 otherwise the duration is just to high to stress things
                 rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
@@ -1434,7 +1434,7 @@ test_variable_queue_partial_segments_delta_thing() ->
     HalfSegment = SegmentSize div 2,
     VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
-    {_Duration, VQ2} = rabbit_variable_queue:update_ram_duration(VQ1),
+    {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
     VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index e929694f..bbf78bb7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -32,7 +32,7 @@
 -module(rabbit_variable_queue).
 
 -export([init/2, terminate/1, publish/2, publish_delivered/2,
-         set_ram_duration_target/2, update_ram_duration/1,
+         set_ram_duration_target/2, ram_duration/1,
          fetch/1, ack/2, len/1, is_empty/1, purge/1,
          delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
          tx_commit/4, sync_callback/1, handle_pre_hibernate/1, status/1]).
@@ -372,14 +372,14 @@ set_ram_duration_target(
         false -> reduce_memory_use(State1)
     end.
 
-update_ram_duration(State = #vqstate { egress_rate = Egress,
-                                       ingress_rate = Ingress,
-                                       rate_timestamp = Timestamp,
-                                       in_counter = InCount,
-                                       out_counter = OutCount,
-                                       ram_msg_count = RamMsgCount,
-                                       duration_target = DurationTarget,
-                                       ram_msg_count_prev = RamMsgCountPrev }) ->
+ram_duration(State = #vqstate { egress_rate = Egress,
+                                ingress_rate = Ingress,
+                                rate_timestamp = Timestamp,
+                                in_counter = InCount,
+                                out_counter = OutCount,
+                                ram_msg_count = RamMsgCount,
+                                duration_target = DurationTarget,
+                                ram_msg_count_prev = RamMsgCountPrev }) ->
     Now = now(),
     {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
     {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
-- 
cgit v1.2.1


From 846195a8a44a8fed5685ae9bf7f297a2090deb1a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 12:43:37 +0100
Subject: cosmetic - reduce diff w default

---
 include/rabbit.hrl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 982d90e9..51c8b35e 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -89,7 +89,6 @@
 
 %% this is really an abstract type, but dialyzer does not support them
 -type(guid() :: binary()).
--type(msg_id() :: non_neg_integer()).
 -type(txn() :: guid()).
 -type(pkey() :: guid()).
 -type(r(Kind) ::
@@ -158,6 +157,7 @@
                 sender    :: pid(),
                 message   :: message()}).
 %% this really should be an abstract type
+-type(msg_id() :: non_neg_integer()).
 -type(qmsg() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
 -type(listener() ::
       #listener{node     :: erlang_node(),
-- 
cgit v1.2.1


From cdb56ae79191a34fb01341a8edbdabd0051b9071 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 13:12:52 +0100
Subject: refactor: inline rabbit_misc:random_geometric it's a tiny fun and is
 only used in one place

---
 src/rabbit_misc.erl      | 6 ------
 src/rabbit_msg_store.erl | 3 ++-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 6be36a0d..136ca6fd 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -60,7 +60,6 @@
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
 -export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
--export([random_geometric/1]).
 
 -import(mnesia).
 -import(lists).
@@ -141,7 +140,6 @@
 -spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
--spec(random_geometric/1 :: (float()) -> non_neg_integer()).
 
 -endif.
 
@@ -641,7 +639,3 @@ unlink_and_capture_exit(Pid) ->
     receive {'EXIT', Pid, _} -> ok
     after 0 -> ok
     end.
-
-random_geometric(P) when 0.0 < P andalso P < 1.0 ->
-    U = 1.0 - random:uniform(),
-    ceil(math:log(U) / math:log(1.0 - P)).
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b6d6c5da..086f26b8 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1444,7 +1444,8 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  file_summary_ets = FileSummaryEts })
   when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     First = ets:first(FileSummaryEts),
-    N = rabbit_misc:random_geometric(?GEOMETRIC_P),
+    N = rabbit_misc:ceil(math:log(1.0 - random:uniform()) /
+                         math:log(1.0 - ?GEOMETRIC_P)),
     case find_files_to_gc(FileSummaryEts, N, First) of
         undefined ->
             State;
-- 
cgit v1.2.1


From 20fdbfe70237382018970e434ef744b759875506 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 13:22:21 +0100
Subject: cosmetic

---
 src/rabbit_amqqueue.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 6fe60bba..29b253d1 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -162,9 +162,9 @@ recover_durable_queues(DurableQueues) ->
                                       []  -> false
                                   end
                           end) of
-                  true  -> [Q | Acc];
-                  false -> exit(Q#amqqueue.pid, shutdown),
-                           Acc
+                       true  -> [Q | Acc];
+                       false -> exit(Q#amqqueue.pid, shutdown),
+                                Acc
                    end
            end, [], DurableQueues),
     %% Issue inits to *all* the queues so that they all init at the same time
-- 
cgit v1.2.1


From c03a73313acc605fe3009f9d90cb6c1d6ac26065 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 13:23:16 +0100
Subject: Fixed recursive delete - it now takes a list of paths, and it deletes
 directories it comes across

---
 src/rabbit_misc.erl      | 31 +++++++++++++++++++++----------
 src/rabbit_mnesia.erl    |  3 +--
 src/rabbit_msg_store.erl |  2 +-
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 136ca6fd..c75c48d9 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -137,7 +137,7 @@
 -spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt').
 -spec(version_compare/3 :: (string(), string(),
                             ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()).
--spec(recursive_delete/1 :: (string()) -> 'ok' | {'error', any()}).
+-spec(recursive_delete/1 :: ([string()]) -> 'ok' | {'error', any()}).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
 
@@ -609,23 +609,34 @@ version_compare(A,  B) ->
        ANum > BNum   -> gt
     end.
 
-recursive_delete(Path) ->
+recursive_delete(Files) ->
+    lists:foldl(
+      fun (Path,  ok                   ) -> recursive_delete1(Path);
+          (_Path, {error, _Err} = Error) -> Error
+      end, ok, Files).
+
+recursive_delete1(Path) ->
     case filelib:is_dir(Path) of
         false ->
             case file:delete(Path) of
-                ok              -> ok;
+                ok                    -> ok;
                 %% Path doesn't exist anyway
-                {error, enoent} -> ok
+                {error, enoent}       -> ok;
+                {error, _Err} = Error -> Error
             end;
         true ->
             case file:list_dir(Path) of
                 {ok, FileNames} ->
-                    lists:foldl(
-                      fun (FileName, ok) ->
-                              recursive_delete(filename:join(Path, FileName));
-                          (_FileName, Error) ->
-                              Error
-                      end, ok, FileNames);
+                    case lists:foldl(
+                           fun (FileName, ok) ->
+                                   recursive_delete1(
+                                     filename:join(Path, FileName));
+                               (_FileName, Error) ->
+                                   Error
+                           end, ok, FileNames) of
+                        ok                    -> file:del_dir(Path);
+                        {error, _Err} = Error -> Error
+                    end;
                 {error, Error} ->
                     {error, {Path, Error}}
             end
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 6ec3cf74..e11f27af 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -425,8 +425,7 @@ reset(Force) ->
     end,
     ok = delete_cluster_nodes_config(),
     %% remove persistet messages and any other garbage we find
-    lists:foreach(fun file:delete/1,
-                  filelib:wildcard(dir() ++ "/*")),
+    ok = rabbit_misc:recursive_delete(filelib:wildcard(dir() ++ "/*")),
     ok.
 
 leave_cluster([], _) -> ok;
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 086f26b8..800b2061 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -375,7 +375,7 @@ successfully_recovered_state(Server) ->
 
 clean(Server, BaseDir) ->
     Dir = filename:join(BaseDir, atom_to_list(Server)),
-    ok = rabbit_misc:recursive_delete(Dir).
+    ok = rabbit_misc:recursive_delete([Dir]).
 
 %%----------------------------------------------------------------------------
 %% Client-side-only helpers
-- 
cgit v1.2.1


From f04992235396908da859bbd7a00caf0dd1b0f8e6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 13:41:31 +0100
Subject: Be more consistent about errors

---
 src/rabbit_misc.erl | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index c75c48d9..65e1d56d 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -619,10 +619,10 @@ recursive_delete1(Path) ->
     case filelib:is_dir(Path) of
         false ->
             case file:delete(Path) of
-                ok                    -> ok;
+                ok              -> ok;
                 %% Path doesn't exist anyway
-                {error, enoent}       -> ok;
-                {error, _Err} = Error -> Error
+                {error, enoent} -> ok;
+                {error, Err}    -> {error, {Path, Err}}
             end;
         true ->
             case file:list_dir(Path) of
@@ -634,11 +634,16 @@ recursive_delete1(Path) ->
                                (_FileName, Error) ->
                                    Error
                            end, ok, FileNames) of
-                        ok                    -> file:del_dir(Path);
-                        {error, _Err} = Error -> Error
+                        ok ->
+                            case file:del_dir(Path) of
+                                ok           -> ok;
+                                {error, Err} -> {error, {Path, Err}}
+                            end;
+                        {error, _Err} = Error ->
+                            Error
                     end;
-                {error, Error} ->
-                    {error, {Path, Error}}
+                {error, Err} ->
+                    {error, {Path, Err}}
             end
     end.
 
-- 
cgit v1.2.1


From 684182ff7795989271fe8effbf1f131b137bebd1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 13:45:11 +0100
Subject: Refine the type of recursive delete

---
 src/rabbit_misc.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 65e1d56d..d31ca653 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -137,7 +137,7 @@
 -spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt').
 -spec(version_compare/3 :: (string(), string(),
                             ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()).
--spec(recursive_delete/1 :: ([string()]) -> 'ok' | {'error', any()}).
+-spec(recursive_delete/1 :: ([string()]) -> 'ok' | {'error', {string(), any()}}).
 -spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
 
-- 
cgit v1.2.1


From b4385c036f4701005b2ebc97e2ba6958e7e93f44 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 15:27:36 +0100
Subject: Reworked msg_store startup sequencing so that qi need not know about
 the msg_store

---
 include/rabbit_msg_store.hrl  |  2 ++
 src/rabbit_msg_store.erl      | 15 +++++++--------
 src/rabbit_queue_index.erl    | 33 +++++++++++++--------------------
 src/rabbit_tests.erl          | 37 ++++++++++++++++++++++---------------
 src/rabbit_variable_queue.erl | 16 ++++++++++++++--
 5 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index d96fa758..a392a6f4 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -34,6 +34,8 @@
 -ifdef(use_specs).
 
 -type(msg() :: any()).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
 -endif.
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 800b2061..4ac4a16e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/5, write/4, read/3, contains/2, remove/2, release/2,
+-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
          sync/3, client_init/2, client_terminate/1, delete_client/2, clean/2,
          successfully_recovered_state/1]).
 
@@ -119,10 +119,9 @@
                                             dedup_cache_ets    :: tid(),
                                             cur_file_cache_ets :: tid() }).
 
--spec(start_link/5 ::
-      (atom(), file_path(), [binary()] | 'undefined',
-       (fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A) ->
-             {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/4 ::
+      (atom(), file_path(), [binary()] | 'undefined', startup_fun_state()) ->
+                           {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/4 :: (server(), guid(), msg(), client_msstate()) ->
                       {'ok', client_msstate()}).
 -spec(read/3 :: (server(), guid(), client_msstate()) ->
@@ -302,9 +301,9 @@
 %% public API
 %%----------------------------------------------------------------------------
 
-start_link(Server, Dir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+start_link(Server, Dir, ClientRefs, StartupFunState) ->
     gen_server2:start_link({local, Server}, ?MODULE,
-                           [Server, Dir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit],
+                           [Server, Dir, ClientRefs, StartupFunState],
                            [{timeout, infinity}]).
 
 write(Server, Guid, Msg, CState =
@@ -498,7 +497,7 @@ close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
 
-init([Server, BaseDir, ClientRefs, MsgRefDeltaGen, MsgRefDeltaGenInit]) ->
+init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     process_flag(trap_exit, true),
 
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 2d9b6673..4a54fda8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,11 +31,11 @@
 
 -module(rabbit_queue_index).
 
--export([init/2, terminate/2, terminate_and_erase/1, write_published/4,
+-export([init/3, terminate/2, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1,
-         start_msg_stores/1]).
+         prepare_msg_store_seed_funs/1]).
 
 -export([queue_index_walker_reader/3]). %% for internal use only
 
@@ -171,8 +171,7 @@
           num
         }).
 
--include("rabbit.hrl").
--include("rabbit_variable_queue.hrl").
+-include("rabbit_msg_store.hrl").
 
 %%----------------------------------------------------------------------------
 
@@ -195,7 +194,7 @@
                               dirty_count     :: integer()
                             }).
 
--spec(init/2 :: (queue_name(), boolean()) ->
+-spec(init/3 :: (queue_name(), boolean(), fun ((guid()) -> boolean())) ->
                      {'undefined' | non_neg_integer(), binary(), binary(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
@@ -211,7 +210,10 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(start_msg_stores/1 :: ([queue_name()]) -> 'ok').
+-spec(prepare_msg_store_seed_funs/1 ::
+        ([queue_name()]) ->
+             {{[binary()] | 'undefined', startup_fun_state()},
+              {[binary()] | 'undefined', startup_fun_state()}}).
 
 -endif.
 
@@ -220,7 +222,7 @@
 %% Public API
 %%----------------------------------------------------------------------------
 
-init(Name, MsgStoreRecovered) ->
+init(Name, MsgStoreRecovered, ContainsCheckFun) ->
     State = blank_state(Name),
     {PRef, TRef, Terms} =
         case read_shutdown_terms(State #qistate.dir) of
@@ -269,8 +271,7 @@ init(Name, MsgStoreRecovered) ->
                                      Segment3) ->
                                         Segment4 =
                                             maybe_add_to_journal(
-                                              rabbit_msg_store:contains(
-                                                ?PERSISTENT_MSG_STORE, Guid),
+                                              ContainsCheckFun(Guid),
                                               CleanShutdown, Del, RelSeq, Segment3),
                                         Segment4
                                 end, Segment1 #segment { pubs = PubCount,
@@ -428,12 +429,7 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
         end,
     {LowSeqIdSeg, NextSeqId, State}.
 
-start_msg_stores(DurableQueues) ->
-    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
-    ok = rabbit_sup:start_child(
-           ?TRANSIENT_MSG_STORE, rabbit_msg_store,
-           [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
-            fun (ok) -> finished end, ok]),
+prepare_msg_store_seed_funs(DurableQueues) ->
     DurableDict =
         dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
                            Queue <- DurableQueues ]),
@@ -470,15 +466,12 @@ start_msg_stores(DurableQueues) ->
                           {DurableAcc, [QueueDir | TransientAcc], RefsAcc}
                   end
           end, {[], [], []}, Directories),
-    ok = rabbit_sup:start_child(
-           ?PERSISTENT_MSG_STORE, rabbit_msg_store,
-           [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), DurableRefs,
-            fun queue_index_walker/1, DurableQueueNames]),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = delete_queue_directory(Dir)
                   end, TransientDirs),
-    ok.
+    {{undefined, {fun (ok) -> finished end, ok}},
+     {DurableRefs, {fun queue_index_walker/1, DurableQueueNames}}}.
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b186538b..a826cc62 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -999,7 +999,7 @@ start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
     ok = rabbit_sup:start_child(
            ?PERSISTENT_MSG_STORE, rabbit_msg_store,
            [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
-            MsgRefDeltaGen, MsgRefDeltaGenInit]),
+            {MsgRefDeltaGen, MsgRefDeltaGenInit}]),
     start_transient_msg_store().
 
 start_transient_msg_store() ->
@@ -1007,7 +1007,7 @@ start_transient_msg_store() ->
     ok = rabbit_sup:start_child(
            ?TRANSIENT_MSG_STORE, rabbit_msg_store,
            [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
-            fun (ok) -> finished end, ok]).
+            {fun (ok) -> finished end, ok}]).
 
 stop_msg_store() ->
     case supervisor:terminate_child(rabbit_sup, ?PERSISTENT_MSG_STORE) of
@@ -1198,8 +1198,8 @@ test_queue() ->
     queue_name(test).
 
 empty_test_queue() ->
-    ok = rabbit_queue_index:start_msg_stores([]),
-    {0, _PRef, _TRef, _Terms, Qi1} = rabbit_queue_index:init(test_queue(), false),
+    ok = rabbit_variable_queue:start([]),
+    {0, _PRef, _TRef, _Terms, Qi1} = test_queue_init(),
     _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
     ok.
 
@@ -1241,6 +1241,13 @@ verify_read_with_published(Delivered, Persistent,
 verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
 
+test_queue_init() ->
+    rabbit_queue_index:init(
+      test_queue(), false,
+      fun (Guid) ->
+              rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+      end).
+
 test_queue_index() ->
     SegmentSize = rabbit_queue_index:segment_size(),
     TwoSegs = SegmentSize + SegmentSize,
@@ -1248,7 +1255,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(0,9999),
     SeqIdsB = lists:seq(10000,19999),
-    {0, _PRef, _TRef, _Terms, Qi0} = rabbit_queue_index:init(test_queue(), false),
+    {0, _PRef, _TRef, _Terms, Qi0} = test_queue_init(),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
@@ -1260,9 +1267,9 @@ test_queue_index() ->
     %% call terminate twice to prove it's idempotent
     _Qi5 = rabbit_queue_index:terminate([], rabbit_queue_index:terminate([], Qi4)),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([test_queue()]),
+    ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
-    {0, _PRef1, _TRef1, _Terms1, Qi6} = rabbit_queue_index:init(test_queue(), false),
+    {0, _PRef1, _TRef1, _Terms1, Qi6} = test_queue_init(),
     {0, 0, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
@@ -1273,10 +1280,10 @@ test_queue_index() ->
                                     lists:reverse(SeqIdsGuidsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([test_queue()]),
+    ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
-    {LenB, _PRef2, _TRef2, _Terms2, Qi12} = rabbit_queue_index:init(test_queue(), false),
+    {LenB, _PRef2, _TRef2, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
@@ -1290,9 +1297,9 @@ test_queue_index() ->
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
     _Qi19 = rabbit_queue_index:terminate([], Qi18),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([test_queue()]),
+    ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, _PRef3, _TRef3, _Terms3, Qi20} = rabbit_queue_index:init(test_queue(), false),
+    {0, _PRef3, _TRef3, _Terms3, Qi20} = test_queue_init(),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1301,7 +1308,7 @@ test_queue_index() ->
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    {0, _PRef4, _TRef4, _Terms4, Qi22} = rabbit_queue_index:init(test_queue(), false),
+    {0, _PRef4, _TRef4, _Terms4, Qi22} = test_queue_init(),
     {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = queue_index_deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
@@ -1312,7 +1319,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    {0, _PRef5, _TRef5, _Terms5, Qi29} = rabbit_queue_index:init(test_queue(), false),
+    {0, _PRef5, _TRef5, _Terms5, Qi29} = test_queue_init(),
     {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = queue_index_deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
@@ -1324,14 +1331,14 @@ test_queue_index() ->
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
-    {0, _PRef6, _TRef6, _Terms6, Qi36} = rabbit_queue_index:init(test_queue(), false),
+    {0, _PRef6, _TRef6, _Terms6, Qi36} = test_queue_init(),
     {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
     Qi40 = queue_index_flush_journal(Qi39),
     _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
     ok = stop_msg_store(),
-    ok = rabbit_queue_index:start_msg_stores([]),
+    ok = rabbit_variable_queue:start([]),
     ok = stop_msg_store(),
     passed.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index bbf78bb7..72fba950 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -253,7 +253,15 @@
 %%----------------------------------------------------------------------------
 
 start(DurableQueues) ->
-    rabbit_queue_index:start_msg_stores(DurableQueues).
+    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
+    {{TransRefs, TransStartFunState}, {PersistRefs, PersistStartFunState}}
+        = rabbit_queue_index:prepare_msg_store_seed_funs(DurableQueues),
+    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
+                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 TransRefs, TransStartFunState]),
+    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
+                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 PersistRefs, PersistStartFunState]).
 
 init(QueueName, IsDurable) ->
     PersistentStore = case IsDurable of
@@ -262,8 +270,12 @@ init(QueueName, IsDurable) ->
                       end,
     MsgStoreRecovered =
         rabbit_msg_store:successfully_recovered_state(PersistentStore),
+    ContainsCheckFun =
+        fun (Guid) ->
+                rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+        end,
     {DeltaCount, PRef, TRef, Terms, IndexState} =
-        rabbit_queue_index:init(QueueName, MsgStoreRecovered),
+        rabbit_queue_index:init(QueueName, MsgStoreRecovered, ContainsCheckFun),
     {DeltaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
 
-- 
cgit v1.2.1


From e66ae561addd8573140edcfaf57f73d22b4c3235 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 15:33:14 +0100
Subject: Removed rabbit_variable_queue.hrl

---
 include/rabbit_variable_queue.hrl | 33 ---------------------------------
 src/rabbit_tests.erl              |  4 +++-
 2 files changed, 3 insertions(+), 34 deletions(-)
 delete mode 100644 include/rabbit_variable_queue.hrl

diff --git a/include/rabbit_variable_queue.hrl b/include/rabbit_variable_queue.hrl
deleted file mode 100644
index 2e43a8fa..00000000
--- a/include/rabbit_variable_queue.hrl
+++ /dev/null
@@ -1,33 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2010 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--define(PERSISTENT_MSG_STORE,     msg_store_persistent).
--define(TRANSIENT_MSG_STORE,      msg_store_transient).
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index a826cc62..c772271f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -42,9 +42,11 @@
 
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
--include("rabbit_variable_queue.hrl").
 -include_lib("kernel/include/file.hrl").
 
+-define(PERSISTENT_MSG_STORE,     msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,      msg_store_transient).
+
 test_content_prop_roundtrip(Datum, Binary) ->
     Types =  [element(1, E) || E <- Datum],
     Values = [element(2, E) || E <- Datum],
-- 
cgit v1.2.1


From f56ab0c669bd3c033e69de5a56ee9b79d87a8bd2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 12 Apr 2010 15:41:31 +0100
Subject: Whoops

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 72fba950..ad3cb5c0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -187,9 +187,10 @@
 %% write more - we can always come back on the next publish to do
 %% more.
 -define(RAM_INDEX_BATCH_SIZE, 64).
+-define(PERSISTENT_MSG_STORE,     msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,      msg_store_transient).
 
 -include("rabbit.hrl").
--include("rabbit_variable_queue.hrl").
 
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From 579d13f724e2ecf6d48e6c519809ef3dd018f281 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 19:59:14 +0100
Subject: fix typo in docs

---
 src/rabbit_backing_queue_type.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
index 8e3cce14..5f8319dc 100644
--- a/src/rabbit_backing_queue_type.erl
+++ b/src/rabbit_backing_queue_type.erl
@@ -84,7 +84,7 @@ behaviour_info(callbacks) ->
      {tx_commit, 4},
 
      %% Reinsert messages into the queue which have already been
-     %% delivered and were (likely) pending acks.q
+     %% delivered and were pending acknowledgement.
      {requeue, 2},
 
      %% How long is my queue?
-- 
cgit v1.2.1


From 55f5fc73d21e0289b3d63430ef4b7d8fabb1dfc4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 12 Apr 2010 20:24:14 +0100
Subject: rename backing_queue_type to backing_queue

---
 include/rabbit_backing_queue_spec.hrl      |  56 +++++++++++++
 include/rabbit_backing_queue_type_spec.hrl |  56 -------------
 src/rabbit_backing_queue.erl               | 127 +++++++++++++++++++++++++++++
 src/rabbit_backing_queue_type.erl          | 127 -----------------------------
 src/rabbit_variable_queue.erl              |   4 +-
 5 files changed, 185 insertions(+), 185 deletions(-)
 create mode 100644 include/rabbit_backing_queue_spec.hrl
 delete mode 100644 include/rabbit_backing_queue_type_spec.hrl
 create mode 100644 src/rabbit_backing_queue.erl
 delete mode 100644 src/rabbit_backing_queue_type.erl

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
new file mode 100644
index 00000000..afb54918
--- /dev/null
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -0,0 +1,56 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-spec(start/1 :: ([queue_name()]) -> 'ok').
+-spec(init/2 :: (queue_name(), boolean()) -> state()).
+-spec(terminate/1 :: (state()) -> state()).
+-spec(delete_and_terminate/1 :: (state()) -> state()).
+-spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
+-spec(publish/2 :: (basic_message(), state()) -> state()).
+-spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
+-spec(fetch/1 :: (state()) ->
+             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
+              state()}).
+-spec(ack/2 :: ([ack()], state()) -> state()).
+-spec(tx_publish/2 :: (basic_message(), state()) -> state()).
+-spec(tx_rollback/2 :: ([guid()], state()) -> state()).
+-spec(tx_commit/4 :: ([guid()], [ack()], {pid(), any()}, state()) ->
+                          {boolean(), state()}).
+-spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
+-spec(len/1 :: (state()) -> non_neg_integer()).
+-spec(is_empty/1 :: (state()) -> boolean()).
+-spec(set_ram_duration_target/2 ::
+      (('undefined' | 'infinity' | number()), state()) -> state()).
+-spec(ram_duration/1 :: (state()) -> {number(), state()}).
+-spec(sync_callback/1 :: (state()) ->
+                              ('undefined' | (fun ((A) -> {boolean(), A})))).
+-spec(handle_pre_hibernate/1 :: (state()) -> state()).
+-spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/include/rabbit_backing_queue_type_spec.hrl b/include/rabbit_backing_queue_type_spec.hrl
deleted file mode 100644
index afb54918..00000000
--- a/include/rabbit_backing_queue_type_spec.hrl
+++ /dev/null
@@ -1,56 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--spec(start/1 :: ([queue_name()]) -> 'ok').
--spec(init/2 :: (queue_name(), boolean()) -> state()).
--spec(terminate/1 :: (state()) -> state()).
--spec(delete_and_terminate/1 :: (state()) -> state()).
--spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
--spec(publish/2 :: (basic_message(), state()) -> state()).
--spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
--spec(fetch/1 :: (state()) ->
-             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
-              state()}).
--spec(ack/2 :: ([ack()], state()) -> state()).
--spec(tx_publish/2 :: (basic_message(), state()) -> state()).
--spec(tx_rollback/2 :: ([guid()], state()) -> state()).
--spec(tx_commit/4 :: ([guid()], [ack()], {pid(), any()}, state()) ->
-                          {boolean(), state()}).
--spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
--spec(len/1 :: (state()) -> non_neg_integer()).
--spec(is_empty/1 :: (state()) -> boolean()).
--spec(set_ram_duration_target/2 ::
-      (('undefined' | 'infinity' | number()), state()) -> state()).
--spec(ram_duration/1 :: (state()) -> {number(), state()}).
--spec(sync_callback/1 :: (state()) ->
-                              ('undefined' | (fun ((A) -> {boolean(), A})))).
--spec(handle_pre_hibernate/1 :: (state()) -> state()).
--spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
new file mode 100644
index 00000000..cc6fda55
--- /dev/null
+++ b/src/rabbit_backing_queue.erl
@@ -0,0 +1,127 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_backing_queue).
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [
+     %% Called on startup with a list of durable queue names. The
+     %% queues aren't being started at this point, but this call
+     %% allows the backing queue to perform any checking necessary for
+     %% the consistency of those queues, or initialise any other
+     %% shared resources.
+     {start, 1},
+
+     %% Called with queue name and a boolean to indicate whether or
+     %% not the queue is durable.
+     {init, 2},
+
+     %% Called on queue shutdown when queue isn't being deleted
+     {terminate, 1},
+
+     %% Called when the queue is terminating and needs to delete all
+     %% its content.
+     {delete_and_terminate, 1},
+
+     %% Remove all messages in the queue, but not messages which have
+     %% been fetched and are pending acks.
+     {purge, 1},
+
+     %% Publish a message
+     {publish, 2},
+
+     %% Called for messages which have already been passed straight
+     %% out to a client. The queue will be empty for these calls
+     %% (i.e. saves the round trip through the backing queue).
+     {publish_delivered, 2},
+
+     %% Produce the next message
+     {fetch, 1},
+
+     %% Acktags supplied are for messages which can now be forgotten
+     %% about
+     {ack, 2},
+
+     %% A publish, but in the context of a transaction.
+     {tx_publish, 2},
+
+     %% Undo anything which has been done by the tx_publish of the
+     %% indicated messages.
+     {tx_rollback, 2},
+
+     %% Commit these publishes and acktags. The publishes you will
+     %% have previously seen in calls to tx_publish.
+     {tx_commit, 4},
+
+     %% Reinsert messages into the queue which have already been
+     %% delivered and were pending acknowledgement.
+     {requeue, 2},
+
+     %% How long is my queue?
+     {len, 1},
+
+     %% Is my queue empty?
+     {is_empty, 1},
+
+     %% For the next three functions, the assumption is that you're
+     %% monitoring something like the ingress and egress rates of the
+     %% queue. The RAM duration is thus the length of time represented
+     %% by the messages held in RAM given the current rates. If you
+     %% want to ignore all of this stuff, then do so, and return 0 in
+     %% ram_duration/1.
+
+     %% The target is to have no more messages in RAM than indicated
+     %% by the duration and the current queue rates.
+     {set_ram_duration_target, 2},
+
+     %% Optionally recalculate the duration internally (likely to be
+     %% just update your internal rates), and report how many seconds
+     %% the messages in RAM represent given the current rates of the
+     %% queue.
+     {ram_duration, 1},
+
+     %% Can return 'undefined' or a thunk which will receive the
+     %% state, and must return the state, as soon as the queue process
+     %% can manage (either on an empty mailbox, or when a timer
+     %% fires).
+     {sync_callback, 1},
+
+     %% Called immediately before the queue hibernates
+     {handle_pre_hibernate, 1},
+
+     %% Exists for debugging purposes, to be able to expose state via
+     %% rabbitmqctl list_queues backing_queue_status
+     {status, 1}
+    ];
+behaviour_info(_Other) ->
+    undefined.
diff --git a/src/rabbit_backing_queue_type.erl b/src/rabbit_backing_queue_type.erl
deleted file mode 100644
index 5f8319dc..00000000
--- a/src/rabbit_backing_queue_type.erl
+++ /dev/null
@@ -1,127 +0,0 @@
-%%   The contents of this file are subject to the Mozilla Public License
-%%   Version 1.1 (the "License"); you may not use this file except in
-%%   compliance with the License. You may obtain a copy of the License at
-%%   http://www.mozilla.org/MPL/
-%%
-%%   Software distributed under the License is distributed on an "AS IS"
-%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-%%   License for the specific language governing rights and limitations
-%%   under the License.
-%%
-%%   The Original Code is RabbitMQ.
-%%
-%%   The Initial Developers of the Original Code are LShift Ltd,
-%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
-%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
-%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
-%%   Technologies LLC, and Rabbit Technologies Ltd.
-%%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
-%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
-%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
-%%
-%%   All Rights Reserved.
-%%
-%%   Contributor(s): ______________________________________.
-%%
-
--module(rabbit_backing_queue_type).
-
--export([behaviour_info/1]).
-
-behaviour_info(callbacks) ->
-    [
-     %% Called on startup with a list of durable queue names. The
-     %% queues aren't being started at this point, but this call
-     %% allows the backing queue to perform any checking necessary for
-     %% the consistency of those queues, or initialise any other
-     %% shared resources.
-     {start, 1},
-
-     %% Called with queue name and a boolean to indicate whether or
-     %% not the queue is durable.
-     {init, 2},
-
-     %% Called on queue shutdown when queue isn't being deleted
-     {terminate, 1},
-
-     %% Called when the queue is terminating and needs to delete all
-     %% its content.
-     {delete_and_terminate, 1},
-
-     %% Remove all messages in the queue, but not messages which have
-     %% been fetched and are pending acks.
-     {purge, 1},
-
-     %% Publish a message
-     {publish, 2},
-
-     %% Called for messages which have already been passed straight
-     %% out to a client. The queue will be empty for these calls
-     %% (i.e. saves the round trip through the backing queue).
-     {publish_delivered, 2},
-
-     %% Produce the next message
-     {fetch, 1},
-
-     %% Acktags supplied are for messages which can now be forgotten
-     %% about
-     {ack, 2},
-
-     %% A publish, but in the context of a transaction.
-     {tx_publish, 2},
-
-     %% Undo anything which has been done by the tx_publish of the
-     %% indicated messages.
-     {tx_rollback, 2},
-
-     %% Commit these publishes and acktags. The publishes you will
-     %% have previously seen in calls to tx_publish.
-     {tx_commit, 4},
-
-     %% Reinsert messages into the queue which have already been
-     %% delivered and were pending acknowledgement.
-     {requeue, 2},
-
-     %% How long is my queue?
-     {len, 1},
-
-     %% Is my queue empty?
-     {is_empty, 1},
-
-     %% For the next three functions, the assumption is that you're
-     %% monitoring something like the ingress and egress rates of the
-     %% queue. The RAM duration is thus the length of time represented
-     %% by the messages held in RAM given the current rates. If you
-     %% want to ignore all of this stuff, then do so, and return 0 in
-     %% ram_duration/1.
-
-     %% The target is to have no more messages in RAM than indicated
-     %% by the duration and the current queue rates.
-     {set_ram_duration_target, 2},
-
-     %% Optionally recalculate the duration internally (likely to be
-     %% just update your internal rates), and report how many seconds
-     %% the messages in RAM represent given the current rates of the
-     %% queue.
-     {ram_duration, 1},
-
-     %% Can return 'undefined' or a thunk which will receive the
-     %% state, and must return the state, as soon as the queue process
-     %% can manage (either on an empty mailbox, or when a timer
-     %% fires).
-     {sync_callback, 1},
-
-     %% Called immediately before the queue hibernates
-     {handle_pre_hibernate, 1},
-
-     %% Exists for debugging purposes, to be able to expose state via
-     %% rabbitmqctl list_queues backing_queue_status
-     {status, 1}
-    ];
-behaviour_info(_Other) ->
-    undefined.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ad3cb5c0..60e50800 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -133,7 +133,7 @@
 
 %%----------------------------------------------------------------------------
 
--behaviour(rabbit_backing_queue_type).
+-behaviour(rabbit_backing_queue).
 
 -record(vqstate,
         { q1,
@@ -238,7 +238,7 @@
                                          {boolean(), state()}).
 -spec(tx_commit_index/1 :: (state()) -> {boolean(), state()}).
 
--include("rabbit_backing_queue_type_spec.hrl").
+-include("rabbit_backing_queue_spec.hrl").
 
 -endif.
 
-- 
cgit v1.2.1


From a57093eb531a2ad987a6bc68264dfdb9da1457a5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Apr 2010 11:55:56 +0100
Subject: queue_index doesn't need to reimplement recursive delete

---
 src/rabbit_queue_index.erl | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 4a54fda8..f6784339 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -325,7 +325,7 @@ terminate(Terms, State) ->
 
 terminate_and_erase(State) ->
     State1 = terminate(false, [], State),
-    ok = delete_queue_directory(State1 #qistate.dir),
+    ok = rabbit_misc:recursive_delete([State1 #qistate.dir]),
     State1.
 
 write_published(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
@@ -468,7 +468,7 @@ prepare_msg_store_seed_funs(DurableQueues) ->
           end, {[], [], []}, Directories),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
-                          ok = delete_queue_directory(Dir)
+                          ok = rabbit_misc:recursive_delete([Dir])
                   end, TransientDirs),
     {{undefined, {fun (ok) -> finished end, ok}},
      {DurableRefs, {fun queue_index_walker/1, DurableQueueNames}}}.
@@ -590,13 +590,6 @@ queue_name_to_dir_name(Name = #resource { kind = queue }) ->
 queues_dir() ->
     filename:join(rabbit_mnesia:dir(), "queues").
 
-delete_queue_directory(Dir) ->
-    {ok, Entries} = file:list_dir(Dir),
-    ok = lists:foldl(fun (Entry, ok) ->
-                             file:delete(filename:join(Dir, Entry))
-                     end, ok, Entries),
-    ok = file:del_dir(Dir).
-
 get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
     {ok, Hdl} = file_handle_cache:open(Path,
                                        [binary, raw, read, write,
-- 
cgit v1.2.1


From 7baf450d8a45c00593482af90e98e8ebdb0e78df Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Apr 2010 12:32:32 +0100
Subject: Whoops - redeliver was only used by the old persister and was removed
 from _process months ago

---
 src/rabbit_amqqueue.erl | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 29b253d1..6125fddc 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -36,7 +36,7 @@
          set_ram_duration_target/2, set_maximum_since_use/2]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
-         stat/1, stat_all/0, deliver/2, redeliver/2, requeue/3, ack/4]).
+         stat/1, stat_all/0, deliver/2, requeue/3, ack/4]).
 -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]).
 -export([consumers/1, consumers_all/1]).
 -export([claim_queue/2]).
@@ -90,7 +90,6 @@
                                             {'error', 'not_empty'}).
 -spec(purge/1 :: (amqqueue()) -> qlen()).
 -spec(deliver/2 :: (pid(), delivery()) -> boolean()).
--spec(redeliver/2 :: (pid(), [{message(), boolean()}]) -> 'ok').
 -spec(requeue/3 :: (pid(), [msg_id()],  pid()) -> 'ok').
 -spec(ack/4 :: (pid(), maybe(txn()), [msg_id()], pid()) -> 'ok').
 -spec(commit_all/2 :: ([pid()], txn()) -> ok_or_errors()).
@@ -293,9 +292,6 @@ deliver(QPid, #delivery{txn = Txn, sender = ChPid, message = Message}) ->
     gen_server2:cast(QPid, {deliver, Txn, Message, ChPid}),
     true.
 
-redeliver(QPid, Messages) ->
-    gen_server2:cast(QPid, {redeliver, Messages}).
-
 requeue(QPid, MsgIds, ChPid) ->
     gen_server2:cast(QPid, {requeue, MsgIds, ChPid}).
 
-- 
cgit v1.2.1


From 15164b641ea8fa5900a8d9eae81c74d250f81f67 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 13 Apr 2010 15:47:20 +0100
Subject: tx_commit takes a list of publishes, not a list of guids

---
 include/rabbit_backing_queue_spec.hrl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index afb54918..3a0f701b 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -42,7 +42,7 @@
 -spec(ack/2 :: ([ack()], state()) -> state()).
 -spec(tx_publish/2 :: (basic_message(), state()) -> state()).
 -spec(tx_rollback/2 :: ([guid()], state()) -> state()).
--spec(tx_commit/4 :: ([guid()], [ack()], {pid(), any()}, state()) ->
+-spec(tx_commit/4 :: ([basic_message()], [ack()], {pid(), any()}, state()) ->
                           {boolean(), state()}).
 -spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
 -spec(len/1 :: (state()) -> non_neg_integer()).
-- 
cgit v1.2.1


From 2b643b0b50c621fa5349573c11708edc5fc2b005 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 15 Apr 2010 14:03:05 +0100
Subject: cosmetic and minor refactoring

---
 src/file_handle_cache.erl | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 59bb01bf..929671cd 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -332,19 +332,18 @@ truncate(Ref) ->
     with_flushed_handles(
       [Ref],
       fun ([Handle1 = #handle { hdl = Hdl, offset = Offset,
-                                trusted_offset = TrustedOffset }]) ->
+                                trusted_offset = TOffset }]) ->
               case file:truncate(Hdl) of
-                  ok    -> TrustedOffset1 = lists:min([Offset, TrustedOffset]),
-                           {ok, [Handle1 #handle {
-                                   at_eof = true,
-                                   trusted_offset = TrustedOffset1 }]};
+                  ok    -> TOffset1 = lists:min([Offset, TOffset]),
+                           {ok, [Handle1 #handle {at_eof = true,
+                                                  trusted_offset = TOffset1 }]};
                   Error -> {Error, [Handle1]}
               end
       end).
 
 last_sync_offset(Ref) ->
-    with_handles([Ref], fun ([#handle { trusted_offset = TrustedOffset }]) ->
-                                {ok, TrustedOffset}
+    with_handles([Ref], fun ([#handle { trusted_offset = TOffset }]) ->
+                                {ok, TOffset}
                         end).
 
 current_virtual_offset(Ref) ->
@@ -476,7 +475,7 @@ with_handles(Refs, Fun) ->
                    end, {ok, []}, Refs),
     case ResHandles of
         {ok, Handles} ->
-            case erlang:apply(Fun, [lists:reverse(Handles)]) of
+            case Fun(lists:reverse(Handles)) of
                 {Result, Handles1} when is_list(Handles1) ->
                     lists:zipwith(fun put_handle/2, Refs, Handles1),
                     Result;
@@ -499,7 +498,7 @@ with_flushed_handles(Refs, Fun) ->
                              {Error, [Handle | HandlesAcc]}
                      end, {ok, []}, Handles) of
                   {ok, Handles1} ->
-                      erlang:apply(Fun, [lists:reverse(Handles1)]);
+                      Fun(lists:reverse(Handles1));
                   {Error, Handles1} ->
                       {Error, lists:reverse(Handles1)}
               end
@@ -545,14 +544,14 @@ open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
                     N when is_integer(N) -> N
                 end,
             Now = now(),
-            Handle =
-                #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
-                          write_buffer_size = 0, options = Options,
-                          write_buffer_size_limit = WriteBufferSize,
-                          write_buffer = [], at_eof = false, mode = Mode,
-                          is_write = is_writer(Mode), is_read = is_reader(Mode),
-                          path = Path, last_used_at = Now,
-                          is_dirty = false },
+            Handle = #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
+                               write_buffer_size = 0, options = Options,
+                               write_buffer_size_limit = WriteBufferSize,
+                               write_buffer = [], at_eof = false, mode = Mode,
+                               is_write = is_writer(Mode),
+                               is_read = is_reader(Mode),
+                               path = Path, last_used_at = Now,
+                               is_dirty = false },
             {{ok, Offset1}, Handle1} = maybe_seek(Offset, Handle),
             Handle2 = Handle1 #handle { trusted_offset = Offset1 },
             put({Ref, fhc_handle}, Handle2),
@@ -844,5 +843,6 @@ ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) ->
 ensure_mref(Pid, ClientMRefs) ->
     case dict:find(Pid, ClientMRefs) of
         {ok, _MRef} -> ClientMRefs;
-        error       -> dict:store(Pid, erlang:monitor(process, Pid), ClientMRefs)
+        error       -> dict:store(Pid, erlang:monitor(process, Pid),
+                                  ClientMRefs)
     end.
-- 
cgit v1.2.1


From 999c216b9978d74bf64aff7546776929ab7ffc75 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 16 Apr 2010 09:58:24 +0100
Subject: cosmetic

---
 src/supervisor2.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 978d30f9..55753512 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -5,8 +5,8 @@
 %%
 %% 2) there is a new strategy called
 %% simple_one_for_one_terminate. This is exactly the same as for
-%% simple_one_for_one, except that children *are* explicitly killed as
-%% per the shutdown component of the child_spec.
+%% simple_one_for_one, except that children *are* explicitly
+%% terminated as per the shutdown component of the child_spec.
 %%
 %% All modifications are (C) 2010 LShift Ltd.
 %%
-- 
cgit v1.2.1


From db0f89ea5aa1167479c6b0d89a7536aaf9efca86 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 11:14:08 +0100
Subject: Thorough reworking of API - the BQ is now responsible for hanging
 onto unacked msgs and all details of transactions

---
 include/rabbit_backing_queue_spec.hrl |  21 +-
 src/rabbit_amqqueue.erl               |  17 +-
 src/rabbit_amqqueue_process.erl       | 356 +++++++-----------
 src/rabbit_backing_queue.erl          |  11 +-
 src/rabbit_channel.erl                |   4 +-
 src/rabbit_msg_store.erl              |   4 +-
 src/rabbit_queue_index.erl            |   2 +
 src/rabbit_tests.erl                  |  10 +-
 src/rabbit_variable_queue.erl         | 660 +++++++++++++++++++---------------
 9 files changed, 526 insertions(+), 559 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 3a0f701b..d86a5382 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -29,22 +29,25 @@
 %%   Contributor(s): ______________________________________.
 %%
 
+-type(fetch_result() ::
+                 %% Message,  IsDelivered,  AckTag,  Remaining_Len
+        ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})).
+
 -spec(start/1 :: ([queue_name()]) -> 'ok').
 -spec(init/2 :: (queue_name(), boolean()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
 -spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
 -spec(publish/2 :: (basic_message(), state()) -> state()).
--spec(publish_delivered/2 :: (basic_message(), state()) -> {ack(), state()}).
--spec(fetch/1 :: (state()) ->
-             {('empty'|{basic_message(), boolean(), ack(), non_neg_integer()}),
-              state()}).
+-spec(publish_delivered/3 ::
+        (boolean(), basic_message(), state()) -> {ack(), state()}).
+-spec(fetch/2 :: (boolean(), state()) -> {fetch_result(), state()}).
 -spec(ack/2 :: ([ack()], state()) -> state()).
--spec(tx_publish/2 :: (basic_message(), state()) -> state()).
--spec(tx_rollback/2 :: ([guid()], state()) -> state()).
--spec(tx_commit/4 :: ([basic_message()], [ack()], {pid(), any()}, state()) ->
-                          {boolean(), state()}).
--spec(requeue/2 :: ([{basic_message(), ack()}], state()) -> state()).
+-spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()).
+-spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()).
+-spec(tx_rollback/2 :: (txn(), state()) -> {[ack()], state()}).
+-spec(tx_commit/3 :: (txn(), {pid(), any()}, state()) -> {[ack()], state()}).
+-spec(requeue/2 :: ([ack()], state()) -> state()).
 -spec(len/1 :: (state()) -> non_neg_integer()).
 -spec(is_empty/1 :: (state()) -> boolean()).
 -spec(set_ram_duration_target/2 ::
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 6125fddc..cc6f08b7 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -43,7 +43,7 @@
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
 -export([notify_sent/2, unblock/2, maybe_run_queue_via_backing_queue/2,
          flush_all/2]).
--export([commit_all/2, rollback_all/2, notify_down_all/2, limit_all/3]).
+-export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
 -import(mnesia).
@@ -92,8 +92,8 @@
 -spec(deliver/2 :: (pid(), delivery()) -> boolean()).
 -spec(requeue/3 :: (pid(), [msg_id()],  pid()) -> 'ok').
 -spec(ack/4 :: (pid(), maybe(txn()), [msg_id()], pid()) -> 'ok').
--spec(commit_all/2 :: ([pid()], txn()) -> ok_or_errors()).
--spec(rollback_all/2 :: ([pid()], txn()) -> ok_or_errors()).
+-spec(commit_all/3 :: ([pid()], txn(), pid()) -> ok_or_errors()).
+-spec(rollback_all/3 :: ([pid()], txn(), pid()) -> ok_or_errors()).
 -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()).
 -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()).
 -spec(claim_queue/2 :: (amqqueue(), pid()) -> 'ok' | 'locked').
@@ -107,8 +107,7 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(maybe_run_queue_via_backing_queue/2 ::
-        (pid(), (fun ((A) -> {boolean(), A}))) -> 'ok').
+-spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
@@ -298,16 +297,16 @@ requeue(QPid, MsgIds, ChPid) ->
 ack(QPid, Txn, MsgIds, ChPid) ->
     gen_server2:pcast(QPid, 7, {ack, Txn, MsgIds, ChPid}).
 
-commit_all(QPids, Txn) ->
+commit_all(QPids, Txn, ChPid) ->
     safe_pmap_ok(
       fun (QPid) -> exit({queue_disappeared, QPid}) end,
-      fun (QPid) -> gen_server2:call(QPid, {commit, Txn}, infinity) end,
+      fun (QPid) -> gen_server2:call(QPid, {commit, Txn, ChPid}, infinity) end,
       QPids).
 
-rollback_all(QPids, Txn) ->
+rollback_all(QPids, Txn, ChPid) ->
     safe_pmap_ok(
       fun (QPid) -> exit({queue_disappeared, QPid}) end,
-      fun (QPid) -> gen_server2:cast(QPid, {rollback, Txn}) end,
+      fun (QPid) -> gen_server2:cast(QPid, {rollback, Txn, ChPid}) end,
       QPids).
 
 notify_down_all(QPids, ChPid) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 9697cc13..efbc2766 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -56,7 +56,6 @@
             backing_queue,
             backing_queue_state,
             backing_queue_timeout_fun,
-            next_msg_id,
             active_consumers,
             blocked_consumers,
             sync_timer_ref,
@@ -65,8 +64,6 @@
 
 -record(consumer, {tag, ack_required}).
 
--record(tx, {ch_pid, pending_messages, pending_acks}).
-
 %% These are held in our process dictionary
 -record(cr, {consumer_count,
              ch_pid,
@@ -88,9 +85,7 @@
          exclusive_consumer_tag,
          messages_ready,
          messages_unacknowledged,
-         messages_uncommitted,
          messages,
-         acks_uncommitted,
          consumers,
          transactions,
          memory,
@@ -122,7 +117,6 @@ init([Q, InitBQ]) ->
             backing_queue = BQ,
             backing_queue_state = maybe_init_backing_queue(InitBQ, BQ, Q),
             backing_queue_timeout_fun = undefined,
-            next_msg_id = 1,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
             sync_timer_ref = undefined,
@@ -135,49 +129,39 @@ maybe_init_backing_queue(
 maybe_init_backing_queue(false, _BQ, _Q) ->
     undefined.
 
-terminate(shutdown, #q{backing_queue_state = BQS,
-                       backing_queue = BQ}) ->
-    ok = rabbit_memory_monitor:deregister(self()),
-    case BQS of
-        undefined -> ok;
-        _         -> BQ:terminate(BQS)
-    end;
-terminate({shutdown, _}, #q{backing_queue_state = BQS,
-                            backing_queue = BQ}) ->
-    ok = rabbit_memory_monitor:deregister(self()),
-    case BQS of
-        undefined -> ok;
-        _         -> BQ:terminate(BQS)
-    end;
-terminate(_Reason, State = #q{backing_queue_state = BQS,
-                              backing_queue = BQ}) ->
+terminate(shutdown,      State) ->
+    terminate_shutdown(terminate, State);
+terminate({shutdown, _}, State) ->
+    terminate_shutdown(terminate, State);
+terminate(_Reason,       State) ->
     ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
     %% Ensure that any persisted tx messages are removed.
     %% TODO: wait for all in flight tx_commits to complete
-    case BQS of
-        undefined ->
-            ok;
-        _ ->
-            BQS1 = BQ:tx_rollback(
-                     lists:concat([PM || #tx { pending_messages = PM } <-
-                                             all_tx_record()]), BQS),
-            %% Delete from disk first. If we crash at this point, when
-            %% a durable queue, we will be recreated at startup,
-            %% possibly with partial content. The alternative is much
-            %% worse however - if we called internal_delete first, we
-            %% would then have a race between the disk delete and a
-            %% new queue with the same name being created and
-            %% published to.
-            BQ:delete_and_terminate(BQS1)
-    end,
-    ok = rabbit_amqqueue:internal_delete(qname(State)).
+    State1 = terminate_shutdown(delete_and_terminate, State),
+    ok = rabbit_amqqueue:internal_delete(qname(State1)).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 %%----------------------------------------------------------------------------
 
+terminate_shutdown(Fun, State =
+                       #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+    ok = rabbit_memory_monitor:deregister(self()),
+    case BQS of
+        undefined -> State;
+        _         -> BQS1 = lists:foldl(
+                              fun (#cr{txn = none}, BQSN) ->
+                                      BQSN;
+                                  (#cr{txn = Txn}, BQSN) ->
+                                      {_AckTags, BQSN1} =
+                                          BQ:tx_rollback(Txn, BQSN),
+                                      BQSN1
+                              end, BQS, all_ch_record()),
+                     State#q{backing_queue_state = BQ:Fun(BQS1)}
+    end.
+
 reply(Reply, NewState) ->
     assert_invariant(NewState),
     {NewState1, Timeout} = next_state(NewState),
@@ -248,7 +232,7 @@ ch_record(ChPid) ->
             C = #cr{consumer_count = 0,
                     ch_pid = ChPid,
                     monitor_ref = MonitorRef,
-                    unacked_messages = dict:new(),
+                    unacked_messages = [],
                     is_limit_active = false,
                     txn = none,
                     unsent_message_count = 0},
@@ -282,8 +266,7 @@ record_current_channel_tx(ChPid, Txn) ->
 deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                           State = #q{q = #amqqueue{name = QName},
                                      active_consumers = ActiveConsumers,
-                                     blocked_consumers = BlockedConsumers,
-                                     next_msg_id = NextId}) ->
+                                     blocked_consumers = BlockedConsumers}) ->
     case queue:out(ActiveConsumers) of
         {{value, QEntry = {ChPid, #consumer{tag = ConsumerTag,
                                             ack_required = AckRequired}}},
@@ -299,12 +282,11 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                         DeliverFun(AckRequired, FunAcc, State),
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
-                      {QName, self(), NextId, IsDelivered, Message}),
-                    NewUAM =
-                        case AckRequired of
-                            true  -> dict:store(NextId, {Message, AckTag}, UAM);
-                            false -> UAM
-                        end,
+                      {QName, self(), AckTag, IsDelivered, Message}),
+                    NewUAM = case AckRequired of
+                                 true  -> [AckTag|UAM];
+                                 false -> UAM
+                             end,
                     NewC = C#cr{unsent_message_count = Count + 1,
                                 unacked_messages = NewUAM},
                     store_ch_record(NewC),
@@ -322,8 +304,7 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                         end,
                     State2 = State1#q{
                                active_consumers = NewActiveConsumers,
-                               blocked_consumers = NewBlockedConsumers,
-                               next_msg_id = NextId + 1},
+                               blocked_consumers = NewBlockedConsumers},
                     deliver_msgs_to_consumers(Funs, FunAcc1, State2);
                 %% if IsMsgReady then we've hit the limiter
                 false when IsMsgReady ->
@@ -344,50 +325,39 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
             {FunAcc, State}
     end.
 
-deliver_from_queue_pred({IsEmpty, _AutoAcks}, _State) ->
+deliver_from_queue_pred(IsEmpty, _State) ->
     not IsEmpty.
-deliver_from_queue_deliver(AckRequired, {false, AutoAcks},
+
+deliver_from_queue_deliver(AckRequired, false,
                            State = #q{backing_queue_state = BQS,
                                       backing_queue = BQ}) ->
-    {{Message, IsDelivered, AckTag, Remaining}, BQS1} = BQ:fetch(BQS),
-    AutoAcks1 = case AckRequired of
-                    true -> AutoAcks;
-                    false -> [AckTag | AutoAcks]
-                end,
-    {{Message, IsDelivered, AckTag}, {0 == Remaining, AutoAcks1},
+    {{Message, IsDelivered, AckTag, Remaining}, BQS1} =
+        BQ:fetch(AckRequired, BQS),
+    {{Message, IsDelivered, AckTag}, 0 == Remaining,
      State #q { backing_queue_state = BQS1 }}.
 
 run_message_queue(State = #q{backing_queue_state = BQS,
                              backing_queue = BQ}) ->
-    Funs = { fun deliver_from_queue_pred/2,
-             fun deliver_from_queue_deliver/3 },
+    Funs = {fun deliver_from_queue_pred/2,
+            fun deliver_from_queue_deliver/3},
     IsEmpty = BQ:is_empty(BQS),
-    {{_IsEmpty1, AutoAcks}, State1} =
-        deliver_msgs_to_consumers(Funs, {IsEmpty, []}, State),
-    BQS1 = BQ:ack(AutoAcks, State1 #q.backing_queue_state),
-    State1 #q { backing_queue_state = BQS1 }.
+    {_IsEmpty1, State1} = deliver_msgs_to_consumers(Funs, IsEmpty, State),
+    State1.
 
 attempt_delivery(none, _ChPid, Message, State = #q{backing_queue = BQ}) ->
     PredFun = fun (IsEmpty, _State) -> not IsEmpty end,
     DeliverFun =
-        fun (AckRequired, false, State1) ->
-                {AckTag, State2} =
-                    case AckRequired of
-                        true ->
-                            {AckTag1, BQS} =
-                                BQ:publish_delivered(
-                                  Message, State1 #q.backing_queue_state),
-                            {AckTag1, State1 #q { backing_queue_state = BQS }};
-                        false ->
-                            {noack, State1}
-                    end,
-                {{Message, false, AckTag}, true, State2}
+        fun (AckRequired, false, State1 = #q{backing_queue_state = BQS}) ->
+                {AckTag, BQS1} =
+                    BQ:publish_delivered(AckRequired, Message, BQS),
+                {{Message, false, AckTag}, true,
+                 State1#q{backing_queue_state = BQS1}}
         end,
     deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State);
-attempt_delivery(Txn, ChPid, Message, State = #q{backing_queue = BQ}) ->
-    BQS = BQ:tx_publish(Message, State #q.backing_queue_state),
-    record_pending_message(Txn, ChPid, Message),
-    {true, State #q { backing_queue_state = BQS }}.
+attempt_delivery(Txn, ChPid, Message, State = #q{backing_queue = BQ,
+                                                 backing_queue_state = BQS}) ->
+    record_current_channel_tx(ChPid, Txn),
+    {true, State#q{backing_queue_state = BQ:tx_publish(Txn, Message, BQS)}}.
 
 deliver_or_enqueue(Txn, ChPid, Message, State = #q{backing_queue = BQ}) ->
     case attempt_delivery(Txn, ChPid, Message, State) of
@@ -396,49 +366,22 @@ deliver_or_enqueue(Txn, ChPid, Message, State = #q{backing_queue = BQ}) ->
         {false, NewState} ->
             %% Txn is none and no unblocked channels with consumers
             BQS = BQ:publish(Message, State #q.backing_queue_state),
-            {false, NewState #q { backing_queue_state = BQS }}
+            {false, NewState#q{backing_queue_state = BQS}}
     end.
 
-%% all these messages have already been delivered at least once and
-%% not ack'd, but need to be either redelivered or requeued
-deliver_or_requeue_n([], State) ->
-    State;
-deliver_or_requeue_n(MsgsWithAcks, State = #q{backing_queue = BQ}) ->
-    Funs = { fun deliver_or_requeue_msgs_pred/2,
-             fun deliver_or_requeue_msgs_deliver/3 },
-    {{_RemainingLengthMinusOne, AutoAcks, OutstandingMsgs}, NewState} =
-        deliver_msgs_to_consumers(
-          Funs, {length(MsgsWithAcks), [], MsgsWithAcks}, State),
-    BQS = BQ:ack(AutoAcks, NewState #q.backing_queue_state),
-    case OutstandingMsgs of
-        [] -> NewState #q { backing_queue_state = BQS };
-        _ -> BQS1 = BQ:requeue(OutstandingMsgs, BQS),
-             NewState #q { backing_queue_state = BQS1 }
-    end.
-
-deliver_or_requeue_msgs_pred({Len, _AcksAcc, _MsgsWithAcks}, _State) ->
-    0 < Len.
-deliver_or_requeue_msgs_deliver(
-  false, {Len, AcksAcc, [{Message, AckTag} | MsgsWithAcks]}, State) ->
-    {{Message, true, noack}, {Len - 1, [AckTag | AcksAcc], MsgsWithAcks},
-     State};
-deliver_or_requeue_msgs_deliver(
-  true, {Len, AcksAcc, [{Message, AckTag} | MsgsWithAcks]}, State) ->
-    {{Message, true, AckTag}, {Len - 1, AcksAcc, MsgsWithAcks}, State}.
+requeue_and_run(AckTags, State = #q{backing_queue = BQ}) ->
+    maybe_run_queue_via_backing_queue(
+      fun (BQS) -> BQ:requeue(AckTags, BQS) end, State).
 
 add_consumer(ChPid, Consumer, Queue) -> queue:in({ChPid, Consumer}, Queue).
 
 remove_consumer(ChPid, ConsumerTag, Queue) ->
-    %% TODO: replace this with queue:filter/2 once we move to R12
-    queue:from_list(lists:filter(
-                      fun ({CP, #consumer{tag = CT}}) ->
-                              (CP /= ChPid) or (CT /= ConsumerTag)
-                      end, queue:to_list(Queue))).
+    queue:filter(fun ({CP, #consumer{tag = CT}}) ->
+                         (CP /= ChPid) or (CT /= ConsumerTag)
+                 end, Queue).
 
 remove_consumers(ChPid, Queue) ->
-    %% TODO: replace this with queue:filter/2 once we move to R12
-    queue:from_list(lists:filter(fun ({CP, _}) -> CP /= ChPid end,
-                                 queue:to_list(Queue))).
+    queue:filter(fun ({CP, _}) -> CP /= ChPid end, Queue).
 
 move_consumers(ChPid, From, To) ->
     {Kept, Removed} = lists:partition(fun ({CP, _}) -> CP /= ChPid end,
@@ -489,12 +432,10 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                 true  -> {stop, State1};
                 false -> State2 = case Txn of
                                       none -> State1;
-                                      _    -> rollback_transaction(Txn, State1)
+                                      _    -> rollback_transaction(Txn, ChPid,
+                                                                   State1)
                                   end,
-                         {ok, deliver_or_requeue_n(
-                                [MsgWithAck ||
-                                    {_MsgId, MsgWithAck} <- dict:to_list(UAM)],
-                                State2)}
+                         {ok, requeue_and_run(UAM, State2)}
             end
     end.
 
@@ -526,72 +467,34 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg).
 qname(#q{q = #amqqueue{name = QName}}) -> QName.
 
 maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) ->
-    {RunQueue, BQS1} = Fun(BQS),
-    State1 = State#q{backing_queue_state = BQS1},
-    case RunQueue of
-        true  -> run_message_queue(State1);
-        false -> State1
-    end.
+    run_message_queue(State#q{backing_queue_state = Fun(BQS)}).
 
-lookup_tx(Txn) ->
-    case get({txn, Txn}) of
-        undefined -> #tx{ch_pid = none,
-                         pending_messages = [],
-                         pending_acks = []};
-        V -> V
-    end.
-
-store_tx(Txn, Tx) ->
-    put({txn, Txn}, Tx).
-
-erase_tx(Txn) ->
-    erase({txn, Txn}).
-
-all_tx_record() ->
-    [T || {{txn, _}, T} <- get()].
+commit_transaction(Txn, From, ChPid, State = #q{backing_queue = BQ,
+                                                backing_queue_state = BQS}) ->
+    {AckTags, BQS1} = BQ:tx_commit(Txn, From, BQS),
+    case lookup_ch(ChPid) of
+        not_found ->
+            [];
+        C = #cr{unacked_messages = UAM} ->
+            Remaining = ordsets:to_list(ordsets:subtract(
+                                          ordsets:from_list(UAM),
+                                          ordsets:from_list(AckTags))),
+            store_ch_record(C#cr{unacked_messages = Remaining, txn = none})
+    end,
+    State#q{backing_queue_state = BQS1}.
 
-record_pending_message(Txn, ChPid, Message) ->
-    Tx = #tx{pending_messages = Pending} = lookup_tx(Txn),
-    record_current_channel_tx(ChPid, Txn),
-    store_tx(Txn, Tx#tx{pending_messages = [Message | Pending],
-                        ch_pid = ChPid}).
+rollback_transaction(Txn, _ChPid, State = #q{backing_queue = BQ,
+                                             backing_queue_state = BQS}) ->
+    {_AckTags, BQS1} = BQ:tx_rollback(Txn, BQS),
+    %% Iff we removed acktags from the channel record on ack+txn then
+    %% we would add them back in here (would also require ChPid)
+    State#q{backing_queue_state = BQS1}.
 
-record_pending_acks(Txn, ChPid, MsgIds) ->
-    Tx = #tx{pending_acks = Pending} = lookup_tx(Txn),
-    record_current_channel_tx(ChPid, Txn),
-    store_tx(Txn, Tx#tx{pending_acks = [MsgIds | Pending],
-                        ch_pid = ChPid}).
-
-commit_transaction(Txn, From, State = #q{backing_queue = BQ}) ->
-    #tx{ch_pid = ChPid, pending_messages = PendingMessages,
-        pending_acks = PendingAcks} = lookup_tx(Txn),
-    PendingMessagesOrdered = lists:reverse(PendingMessages),
-    PendingAcksOrdered = lists:append(PendingAcks),
-    Acks =
-        case lookup_ch(ChPid) of
-            not_found ->
-                [];
-            C = #cr{unacked_messages = UAM} ->
-                {MsgsWithAcks, Remaining} =
-                    collect_messages(PendingAcksOrdered, UAM),
-                store_ch_record(C#cr{unacked_messages = Remaining}),
-                [AckTag || {_Message, AckTag} <- MsgsWithAcks]
-        end,
-    {RunQueue, BQS} = BQ:tx_commit(PendingMessagesOrdered, Acks, From,
-                                   State#q.backing_queue_state),
-    erase_tx(Txn),
-    {RunQueue, State#q{backing_queue_state = BQS}}.
-
-rollback_transaction(Txn, State = #q{backing_queue = BQ}) ->
-    #tx{pending_messages = PendingMessages} = lookup_tx(Txn),
-    BQS = BQ:tx_rollback(PendingMessages, State #q.backing_queue_state),
-    erase_tx(Txn),
-    State#q{backing_queue_state = BQS}.
-
-collect_messages(MsgIds, UAM) ->
-    lists:mapfoldl(
-      fun (MsgId, D) -> {dict:fetch(MsgId, D), dict:erase(MsgId, D)} end,
-      UAM, MsgIds).
+collect_messages(AckTags, UAM) ->
+    AckTagsSet = ordsets:from_list(AckTags),
+    UAMSet = ordsets:from_list(UAM),
+    {ordsets:to_list(ordsets:intersection(AckTagsSet, UAMSet)),
+     ordsets:to_list(ordsets:subtract(UAMSet, AckTagsSet))}.
 
 infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
@@ -616,22 +519,15 @@ i(exclusive_consumer_tag, #q{exclusive_consumer = {_ChPid, ConsumerTag}}) ->
 i(messages_ready, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
     BQ:len(BQS);
 i(messages_unacknowledged, _) ->
-    lists:sum([dict:size(UAM) ||
+    lists:sum([ordsets:size(UAM) ||
                   #cr{unacked_messages = UAM} <- all_ch_record()]);
-i(messages_uncommitted, _) ->
-    lists:sum([length(Pending) ||
-                  #tx{pending_messages = Pending} <- all_tx_record()]);
 i(messages, State) ->
     lists:sum([i(Item, State) || Item <- [messages_ready,
-                                          messages_unacknowledged,
-                                          messages_uncommitted]]);
-i(acks_uncommitted, _) ->
-    lists:sum([length(Pending) ||
-                  #tx{pending_acks = Pending} <- all_tx_record()]);
+                                          messages_unacknowledged]]);
 i(consumers, State) ->
     queue:len(State#q.active_consumers) + queue:len(State#q.blocked_consumers);
 i(transactions, _) ->
-    length(all_tx_record());
+    length([ok || #cr{txn = Txn} <- all_ch_record(), Txn =/= none]);
 i(memory, _) ->
     {memory, M} = process_info(self(), memory),
     M;
@@ -685,12 +581,9 @@ handle_call({deliver, Txn, Message, ChPid}, _From, State) ->
     {Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
     reply(Delivered, NewState);
 
-handle_call({commit, Txn}, From, State) ->
-    {RunQueue, NewState} = commit_transaction(Txn, From, State),
-    noreply(case RunQueue of
-                true -> run_message_queue(NewState);
-                false -> NewState
-            end);
+handle_call({commit, Txn, ChPid}, From, State) ->
+    NewState = commit_transaction(Txn, From, ChPid, State),
+    noreply(run_message_queue(NewState));
 
 handle_call({notify_down, ChPid}, _From, State) ->
     %% we want to do this synchronously, so that auto_deleted queues
@@ -704,25 +597,19 @@ handle_call({notify_down, ChPid}, _From, State) ->
     end;
 
 handle_call({basic_get, ChPid, NoAck}, _From,
-            State = #q{q = #amqqueue{name = QName}, next_msg_id = NextId,
+            State = #q{q = #amqqueue{name = QName},
                        backing_queue_state = BQS, backing_queue = BQ}) ->
-    case BQ:fetch(BQS) of
-        {empty, BQS1} -> reply(empty, State #q { backing_queue_state = BQS1 });
+    AckRequired = not NoAck,
+    case BQ:fetch(AckRequired, BQS) of
+        {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1});
         {{Message, IsDelivered, AckTag, Remaining}, BQS1} ->
-            AckRequired = not(NoAck),
-            BQS2 =
-                case AckRequired of
-                    true ->
-                        C = #cr{unacked_messages = UAM} = ch_record(ChPid),
-                        NewUAM = dict:store(NextId, {Message, AckTag}, UAM),
-                        store_ch_record(C#cr{unacked_messages = NewUAM}),
-                        BQS1;
-                    false ->
-                        BQ:ack([AckTag], BQS1)
-                end,
-            Msg = {QName, self(), NextId, IsDelivered, Message},
-            reply({ok, Remaining, Msg},
-                  State #q { next_msg_id = NextId + 1, backing_queue_state = BQS2 })
+            case AckRequired of
+                true ->  C = #cr{unacked_messages = UAM} = ch_record(ChPid),
+                         store_ch_record(C#cr{unacked_messages = [AckTag|UAM]});
+                false -> ok
+            end,
+            reply({ok, Remaining, {QName, self(), AckTag, IsDelivered, Message}},
+                  State#q{backing_queue_state = BQS1})
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -740,7 +627,7 @@ handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
                 ok ->
                     C = #cr{consumer_count = ConsumerCount} = ch_record(ChPid),
                     Consumer = #consumer{tag = ConsumerTag,
-                                         ack_required = not(NoAck)},
+                                         ack_required = not NoAck},
                     store_ch_record(C#cr{consumer_count = ConsumerCount +1,
                                          limiter_pid = LimiterPid}),
                     case ConsumerCount of
@@ -862,37 +749,36 @@ handle_cast({deliver, Txn, Message, ChPid}, State) ->
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
     noreply(NewState);
 
-handle_cast({ack, Txn, MsgIds, ChPid}, State = #q{backing_queue = BQ}) ->
+handle_cast({ack, Txn, AckTags, ChPid}, State = #q{backing_queue_state = BQS,
+                                                   backing_queue = BQ}) ->
     case lookup_ch(ChPid) of
         not_found ->
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
-            case Txn of
-                none ->
-                    {MsgWithAcks, Remaining} = collect_messages(MsgIds, UAM),
-                    BQS = BQ:ack([AckTag || {_Message, AckTag} <- MsgWithAcks],
-                                 State #q.backing_queue_state),
-                    store_ch_record(C#cr{unacked_messages = Remaining}),
-                    noreply(State #q { backing_queue_state = BQS });
-                _  ->
-                    record_pending_acks(Txn, ChPid, MsgIds),
-                    noreply(State)
-            end
+            {AckTags1, Remaining} = collect_messages(AckTags, UAM),
+            {C1, BQS1} =
+                case Txn of
+                    none -> {C#cr{unacked_messages = Remaining},
+                             BQ:ack(AckTags1, BQS)};
+                    _    -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags1, BQS)}
+                end,
+            store_ch_record(C1),
+            noreply(State #q { backing_queue_state = BQS1 })
     end;
 
-handle_cast({rollback, Txn}, State) ->
-    noreply(rollback_transaction(Txn, State));
+handle_cast({rollback, Txn, ChPid}, State) ->
+    noreply(rollback_transaction(Txn, ChPid, State));
 
-handle_cast({requeue, MsgIds, ChPid}, State) ->
+handle_cast({requeue, AckTags, ChPid}, State) ->
     case lookup_ch(ChPid) of
         not_found ->
             rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n",
                                [ChPid]),
             noreply(State);
         C = #cr{unacked_messages = UAM} ->
-            {MsgWithAcks, NewUAM} = collect_messages(MsgIds, UAM),
-            store_ch_record(C#cr{unacked_messages = NewUAM}),
-            noreply(deliver_or_requeue_n(MsgWithAcks, State))
+            {AckTags1, Remaining} = collect_messages(AckTags, UAM),
+            store_ch_record(C#cr{unacked_messages = Remaining}),
+            noreply(requeue_and_run(AckTags1, State))
     end;
 
 handle_cast({unblock, ChPid}, State) ->
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index cc6fda55..8e7de95e 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -63,17 +63,20 @@ behaviour_info(callbacks) ->
      %% Called for messages which have already been passed straight
      %% out to a client. The queue will be empty for these calls
      %% (i.e. saves the round trip through the backing queue).
-     {publish_delivered, 2},
+     {publish_delivered, 3},
 
      %% Produce the next message
-     {fetch, 1},
+     {fetch, 2},
 
      %% Acktags supplied are for messages which can now be forgotten
      %% about
      {ack, 2},
 
      %% A publish, but in the context of a transaction.
-     {tx_publish, 2},
+     {tx_publish, 3},
+
+     %% Acks, but in the context of a transaction.
+     {tx_ack, 3},
 
      %% Undo anything which has been done by the tx_publish of the
      %% indicated messages.
@@ -81,7 +84,7 @@ behaviour_info(callbacks) ->
 
      %% Commit these publishes and acktags. The publishes you will
      %% have previously seen in calls to tx_publish.
-     {tx_commit, 4},
+     {tx_commit, 3},
 
      %% Reinsert messages into the queue which have already been
      %% delivered and were pending acknowledgement.
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 9aeb4623..7d3cd722 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -928,7 +928,7 @@ new_tx(State) ->
 internal_commit(State = #ch{transaction_id = TxnKey,
                             tx_participants = Participants}) ->
     case rabbit_amqqueue:commit_all(sets:to_list(Participants),
-                                    TxnKey) of
+                                    TxnKey, self()) of
         ok              -> ok = notify_limiter(State#ch.limiter_pid,
                                                State#ch.uncommitted_ack_q),
                            new_tx(State);
@@ -945,7 +945,7 @@ internal_rollback(State = #ch{transaction_id = TxnKey,
                queue:len(UAQ),
                queue:len(UAMQ)]),
     case rabbit_amqqueue:rollback_all(sets:to_list(Participants),
-                                      TxnKey) of
+                                      TxnKey, self()) of
         ok              -> NewUAMQ = queue:join(UAQ, UAMQ),
                            new_tx(State#ch{unacked_message_q = NewUAMQ});
         {error, Errors} -> rabbit_misc:protocol_error(
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 4ac4a16e..74fa0980 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -338,10 +338,12 @@ read(Server, Guid, CState =
     end.
 
 contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity).
+remove(_Server, [])    -> ok;
 remove(Server, Guids)  -> gen_server2:cast(Server, {remove, Guids}).
+release(_Server, [])   -> ok;
 release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}).
 sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}).
-sync(Server)            -> gen_server2:pcast(Server, 8, sync). %% internal
+sync(Server)           -> gen_server2:pcast(Server, 8, sync). %% internal
 
 gc_done(Server, Reclaimed, Source, Destination) ->
     gen_server2:pcast(Server, 8, {gc_done, Reclaimed, Source, Destination}).
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f6784339..d6ef0cb8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -344,6 +344,8 @@ write_delivered(SeqId, State) ->
            JournalHdl, <<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>),
     maybe_flush_journal(add_to_journal(SeqId, del, State1)).
 
+write_acks([], State) ->
+    State;
 write_acks(SeqIds, State) ->
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c772271f..4bef8435 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1362,7 +1362,7 @@ variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
                         Rem = Len - N,
                         {{#basic_message { is_persistent = IsPersistent },
                           IsDelivered, AckTagN, Rem}, VQM} =
-                            rabbit_variable_queue:fetch(VQN),
+                            rabbit_variable_queue:fetch(true, VQN),
                         {VQM, [AckTagN | AckTagsAcc]}
                 end, {VQ, []}, lists:seq(1, Count)).
 
@@ -1399,7 +1399,7 @@ test_variable_queue_dynamic_duration_change() ->
     VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
     {VQ4, AckTags} = variable_queue_fetch(Len1, false, false, Len1, VQ3),
     VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
-    {empty, VQ6} = rabbit_variable_queue:fetch(VQ5),
+    {empty, VQ6} = rabbit_variable_queue:fetch(true, VQ5),
 
     %% just publish and fetch some persistent msgs, this hits the the
     %% partial segment path in queue_index due to the period when
@@ -1408,7 +1408,7 @@ test_variable_queue_dynamic_duration_change() ->
     {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
     VQ10 = rabbit_variable_queue:handle_pre_hibernate(VQ9),
-    {empty, VQ11} = rabbit_variable_queue:fetch(VQ10),
+    {empty, VQ11} = rabbit_variable_queue:fetch(true, VQ10),
 
     rabbit_variable_queue:terminate(VQ11),
 
@@ -1416,7 +1416,7 @@ test_variable_queue_dynamic_duration_change() ->
 
 test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
     VQ1 = variable_queue_publish(false, 1, VQ0),
-    {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(VQ1),
+    {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1),
     VQ3 = rabbit_variable_queue:ack([AckTag], VQ2),
     receive
         {duration, _, stop} ->
@@ -1475,7 +1475,7 @@ test_variable_queue_partial_segments_delta_thing() ->
                                            HalfSegment + 1, VQ6),
     VQ8 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ7),
     %% should be empty now
-    {empty, VQ9} = rabbit_variable_queue:fetch(VQ8),
+    {empty, VQ9} = rabbit_variable_queue:fetch(true, VQ8),
     rabbit_variable_queue:terminate(VQ9),
 
     passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 60e50800..90e1eb6c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,11 +31,11 @@
 
 -module(rabbit_variable_queue).
 
--export([init/2, terminate/1, publish/2, publish_delivered/2,
-         set_ram_duration_target/2, ram_duration/1,
-         fetch/1, ack/2, len/1, is_empty/1, purge/1,
-         delete_and_terminate/1, requeue/2, tx_publish/2, tx_rollback/2,
-         tx_commit/4, sync_callback/1, handle_pre_hibernate/1, status/1]).
+-export([init/2, terminate/1, publish/2, publish_delivered/3,
+         set_ram_duration_target/2, ram_duration/1, fetch/2, ack/2, len/1,
+         is_empty/1, purge/1, delete_and_terminate/1, requeue/2, tx_publish/3,
+         tx_ack/3, tx_rollback/2, tx_commit/3, sync_callback/1,
+         handle_pre_hibernate/1, status/1]).
 
 -export([start/1]).
 
@@ -160,13 +160,14 @@
           msg_store_clients,
           persistent_store,
           persistent_count,
-          transient_threshold
+          transient_threshold,
+          pending_ack
         }).
 
 -record(msg_status,
-        { msg,
+        { seq_id,
           guid,
-          seq_id,
+          msg,
           is_persistent,
           is_delivered,
           msg_on_disk,
@@ -179,6 +180,8 @@
           end_seq_id %% note the end_seq_id is always >, not >=
         }).
 
+-record(tx, {pending_messages, pending_acks}).
+
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
 %% betas that we must be due to write indices for before we do any
@@ -198,8 +201,7 @@
 
 -type(bpqueue() :: any()).
 -type(seq_id()  :: non_neg_integer()).
--type(ack()     :: {'ack_index_and_store', guid(), seq_id(), atom() | pid()}
-                 | 'ack_not_on_disk').
+-type(ack()     :: {'ack', seq_id(), guid(), boolean()} | 'blank_ack').
 
 -type(delta() :: #delta { start_seq_id :: non_neg_integer(),
                           count :: non_neg_integer (),
@@ -234,9 +236,8 @@
               }).
 
 -spec(tx_commit_post_msg_store/5 ::
-        (boolean(), [guid()], [ack()], {pid(), any()}, state()) ->
-                                         {boolean(), state()}).
--spec(tx_commit_index/1 :: (state()) -> {boolean(), state()}).
+        (boolean(), [guid()], [ack()], {pid(), any()}, state()) -> state()).
+-spec(tx_commit_index/1 :: (state()) -> state()).
 
 -include("rabbit_backing_queue_spec.hrl").
 
@@ -313,7 +314,8 @@ init(QueueName, IsDurable) ->
                      {rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), TRef}},
                    persistent_store = PersistentStore,
                    persistent_count = DeltaCount1,
-                   transient_threshold = NextSeqId
+                   transient_threshold = NextSeqId,
+                   pending_ack = dict:new()
                  },
     maybe_deltas_to_betas(State).
 
@@ -327,157 +329,185 @@ terminate(State = #vqstate {
              {persistent_count, PCount}],
     State #vqstate { index_state = rabbit_queue_index:terminate(Terms, IndexState) }.
 
+%% the only difference between purge and delete is that delete also
+%% needs to delete everything that's been delivered and not ack'd.
+delete_and_terminate(State) ->
+    {_PurgeCount, State1 = #vqstate {
+                    index_state = IndexState,
+                    msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
+                    persistent_store = PersistentStore,
+                    transient_threshold = TransientThreshold }} =
+        purge(State),
+    %% flushing here is good because it deletes all full segments,
+    %% leaving only partial segments around.
+    IndexState1 = rabbit_queue_index:flush_journal(IndexState),
+    IndexState2 =
+        case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
+               IndexState1) of
+            {N, N, IndexState3} ->
+                IndexState3;
+            {DeltaSeqId, NextSeqId, IndexState3} ->
+                {_DeleteCount, IndexState4} =
+                    delete1(PersistentStore, TransientThreshold, NextSeqId, 0,
+                            DeltaSeqId, IndexState3),
+                IndexState4
+    end,
+    IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
+    rabbit_msg_store:delete_client(PersistentStore, PRef),
+    rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
+    rabbit_msg_store:client_terminate(MSCStateP),
+    rabbit_msg_store:client_terminate(MSCStateT),
+    State1 #vqstate { index_state = IndexState5 }.
+
+purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
+                         persistent_store = PersistentStore }) ->
+    {Q4Count, IndexState1} =
+        remove_queue_entries(PersistentStore, fun rabbit_misc:queue_fold/3,
+                             Q4, IndexState),
+    {Len, State1} =
+        purge1(Q4Count, State #vqstate { index_state = IndexState1,
+                                         q4 = queue:new() }),
+    {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0,
+                            persistent_count = 0 }}.
+
 publish(Msg, State) ->
     State1 = limit_ram_index(State),
     {_SeqId, State2} = publish(Msg, false, false, State1),
     State2.
 
-publish_delivered(Msg = #basic_message { guid = Guid,
-                                         is_persistent = IsPersistent },
+publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
+    {blank_ack, State};
+publish_delivered(true, Msg = #basic_message { guid = Guid,
+                                               is_persistent = IsPersistent },
                   State = #vqstate { len = 0, index_state = IndexState,
                                      next_seq_id = SeqId,
                                      out_counter = OutCount,
                                      in_counter = InCount,
                                      msg_store_clients = MSCState,
                                      persistent_store = PersistentStore,
-                                     persistent_count = PCount }) ->
-    State1 = State #vqstate { out_counter = OutCount + 1,
-                              in_counter = InCount + 1 },
+                                     persistent_count = PCount,
+                                     pending_ack = PA }) ->
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
     {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
                                                       MsgStatus, MSCState),
-    State2 = State1 #vqstate { msg_store_clients = MSCState1,
-                               persistent_count = PCount + case IsPersistent of
-                                                               true  -> 1;
-                                                               false -> 0
-                                                           end },
-    case MsgStatus1 #msg_status.msg_on_disk of
-        true ->
-            {#msg_status { index_on_disk = true }, IndexState1} =
-                maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-            {{ack_index_and_store, Guid, SeqId,
-              find_msg_store(IsPersistent, PersistentStore)},
-             State2 #vqstate { index_state = IndexState1,
-                               next_seq_id = SeqId + 1 }};
-        false ->
-            {ack_not_on_disk, State2}
-    end.
-
-set_ram_duration_target(
-  DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
-                                     avg_ingress_rate = AvgIngressRate,
-                                     target_ram_msg_count = TargetRamMsgCount
-                                   }) ->
-    Rate = AvgEgressRate + AvgIngressRate,
-    TargetRamMsgCount1 =
-        case DurationTarget of
-            infinity -> undefined;
-            undefined -> undefined;
-            _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
-        end,
-    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
-                              duration_target = DurationTarget },
-    case TargetRamMsgCount1 == undefined orelse
-        TargetRamMsgCount1 >= TargetRamMsgCount of
-        true  -> State1;
-        false -> reduce_memory_use(State1)
-    end.
-
-ram_duration(State = #vqstate { egress_rate = Egress,
-                                ingress_rate = Ingress,
-                                rate_timestamp = Timestamp,
-                                in_counter = InCount,
-                                out_counter = OutCount,
-                                ram_msg_count = RamMsgCount,
-                                duration_target = DurationTarget,
-                                ram_msg_count_prev = RamMsgCountPrev }) ->
-    Now = now(),
-    {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
-    {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
-
-    Duration = %% msgs / (msgs/sec) == sec
-        case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
-            true  -> infinity;
-            false -> (RamMsgCountPrev + RamMsgCount) /
-                         (2 * (AvgEgressRate + AvgIngressRate))
-        end,
-
-    {Duration, set_ram_duration_target(
-                 DurationTarget,
-                 State #vqstate { egress_rate = Egress1,
-                                  avg_egress_rate = AvgEgressRate,
-                                  ingress_rate = Ingress1,
-                                  avg_ingress_rate = AvgIngressRate,
-                                  rate_timestamp = Now,
-                                  ram_msg_count_prev = RamMsgCount,
-                                  out_counter = 0, in_counter = 0 })}.
-
-fetch(State =
+    State1 = State #vqstate { msg_store_clients = MSCState1,
+                              persistent_count = PCount + case IsPersistent of
+                                                              true  -> 1;
+                                                              false -> 0
+                                                          end,
+                              next_seq_id = SeqId + 1,
+                              out_counter = OutCount + 1,
+                              in_counter = InCount + 1 },
+    AckTag = {ack, SeqId, Guid, IsPersistent},
+    {AckTag,
+     case MsgStatus1 #msg_status.msg_on_disk of
+         true ->
+             {#msg_status { index_on_disk = true }, IndexState1} =
+                 maybe_write_index_to_disk(false, MsgStatus1, IndexState),
+             State1 #vqstate { index_state = IndexState1 };
+         false ->
+             State1 #vqstate { pending_ack =
+                                   dict:store(AckTag, MsgStatus1, PA) }
+     end}.
+
+fetch(AckRequired, State =
       #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
                  index_state = IndexState, len = Len,
-                 persistent_store = PersistentStore }) ->
+                 persistent_store = PersistentStore, pending_ack = PA }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
-            fetch_from_q3_or_delta(State);
-        {{value, #msg_status {
+            fetch_from_q3_or_delta(AckRequired, State);
+        {{value, MsgStatus = #msg_status {
             msg = Msg, guid = Guid, seq_id = SeqId,
             is_persistent = IsPersistent, is_delivered = IsDelivered,
             msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
-            {IndexState1, IsPersistent} =
-                case IndexOnDisk of
-                    true ->
-                        IndexState2 =
-                            case IsDelivered of
-                                false -> rabbit_queue_index:write_delivered(
-                                           SeqId, IndexState);
-                                true -> IndexState
-                            end,
-                        {case IsPersistent of
-                             true -> IndexState2;
-                             false -> rabbit_queue_index:write_acks(
-                                        [SeqId], IndexState2)
-                         end, IsPersistent};
-                    false -> %% If index isn't on disk, we can't be persistent
-                        {IndexState, false}
-                end,
+
+            AckTag = case AckRequired of
+                         true  -> {ack, SeqId, Guid, IsPersistent};
+                         false -> blank_ack
+                     end,
+
+            %% 1. Mark it delivered if necessary
+            IndexState1 = case IndexOnDisk andalso not IsDelivered of
+                              true  -> rabbit_queue_index:write_delivered(
+                                         SeqId, IndexState);
+                              false -> IndexState
+                          end,
+
+            %% 2. If it's on disk and there's no Ack required, remove it
             MsgStore = find_msg_store(IsPersistent, PersistentStore),
-            AckTag =
-                case IsPersistent of
-                    true  -> true = MsgOnDisk, %% ASSERTION
-                             {ack_index_and_store, Guid, SeqId, MsgStore};
-                    false -> ok = case MsgOnDisk of
-                                      true ->
-                                          rabbit_msg_store:remove(
-                                            MsgStore, [Guid]);
-                                      false -> ok
-                                  end,
-                             ack_not_on_disk
+            IndexState2 =
+                case MsgOnDisk andalso not AckRequired of
+                    true -> %% Remove from disk now
+                        case IndexOnDisk of
+                            true ->
+                                ok = rabbit_msg_store:remove(MsgStore, [Guid]),
+                                rabbit_queue_index:write_acks([SeqId],
+                                                              IndexState1);
+                            false ->
+                                ok = case MsgOnDisk of
+                                         true  -> rabbit_msg_store:remove(
+                                                    MsgStore, [Guid]);
+                                         false -> ok
+                                     end,
+                                IndexState1
+                        end;
+                    false ->
+                        IndexState1
+                end,
+
+            %% 3. If it's on disk, not persistent and an ack's
+            %% required then remove it from the queue index only.
+            IndexState3 =
+                case IndexOnDisk andalso AckRequired andalso not IsPersistent of
+                    true -> rabbit_queue_index:write_acks([SeqId], IndexState2);
+                    false -> IndexState2
                 end,
+
+            %% 4. If it's not on disk and we need an Ack, add it to PA
+            PA1 = case AckRequired andalso not MsgOnDisk of
+                      true  -> dict:store(AckTag, MsgStatus #msg_status {
+                                                    is_delivered = true }, PA);
+                      false -> PA
+                  end,
+
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
                               ram_msg_count = RamMsgCount - 1,
-                              index_state = IndexState1, len = Len1 }}
+                              index_state = IndexState3, len = Len1,
+                              pending_ack = PA1 }}
     end.
 
 ack([], State) ->
     State;
 ack(AckTags, State = #vqstate { index_state = IndexState,
                                 persistent_count = PCount,
-                                persistent_store = PersistentStore }) ->
-    {GuidsByStore, SeqIds} =
+                                persistent_store = PersistentStore,
+                                pending_ack = PA }) ->
+    {GuidsByStore, SeqIds, PA1} =
         lists:foldl(
-          fun (ack_not_on_disk, Acc) -> Acc;
-              ({ack_index_and_store, Guid, SeqId, MsgStore},  {Dict, SeqIds}) ->
-                  {rabbit_misc:dict_cons(MsgStore, Guid, Dict), [SeqId | SeqIds]}
-          end, {dict:new(), []}, AckTags),
-    IndexState1 = case SeqIds of
-                      [] -> IndexState;
-                      _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
-                  end,
+          fun (blank_ack, Acc) -> Acc;
+              ({ack, SeqId, Guid, true}, {Dict, SeqIds, PAN}) ->
+                  {rabbit_misc:dict_cons(PersistentStore, Guid, Dict),
+                   [SeqId | SeqIds], PAN};
+              ({ack, _SeqId, Guid, false} = AckTag, {Dict, SeqIds, PAN}) ->
+                  case dict:find(AckTag, PAN) of
+                       error ->
+                          %% must be in the transient store and won't
+                          %% be in the queue index.
+                          {rabbit_misc:dict_cons(
+                             ?TRANSIENT_MSG_STORE, Guid, Dict), SeqIds, PAN};
+                      {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
+                                         msg_on_disk = false,
+                                         is_persistent = false }} ->
+                          {Dict, SeqIds, dict:erase(AckTag, PAN)}
+                  end
+          end, {dict:new(), [], PA}, AckTags),
+    IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
@@ -485,90 +515,104 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
                            error        -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
-    State #vqstate { index_state = IndexState1, persistent_count = PCount1 }.
+    State #vqstate { index_state = IndexState1, persistent_count = PCount1,
+                     pending_ack = PA1 }.
 
-len(#vqstate { len = Len }) ->
-    Len.
+tx_publish(Txn,
+           Msg = #basic_message { is_persistent = true, guid = Guid },
+           State = #vqstate { msg_store_clients = MSCState,
+                              persistent_store = PersistentStore }) ->
+    MsgStatus = #msg_status {
+      msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
+      is_delivered = false, msg_on_disk = false, index_on_disk = false },
+    {#msg_status { msg_on_disk = true }, MSCState1} =
+        maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
+    publish_in_tx(Txn, Msg),
+    State #vqstate { msg_store_clients = MSCState1 };
+tx_publish(Txn, Msg, State) ->
+    publish_in_tx(Txn, Msg),
+    State.
 
-is_empty(State) ->
-    0 == len(State).
+tx_ack(Txn, AckTags, State) ->
+    ack_in_tx(Txn, AckTags),
+    State.
 
-purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
-                         persistent_store = PersistentStore }) ->
-    {Q4Count, IndexState1} =
-        remove_queue_entries(PersistentStore, fun rabbit_misc:queue_fold/3,
-                             Q4, IndexState),
-    {Len, State1} =
-        purge1(Q4Count, State #vqstate { index_state = IndexState1,
-                                         q4 = queue:new() }),
-    {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0,
-                            persistent_count = 0 }}.
+tx_rollback(Txn, State = #vqstate { persistent_store = PersistentStore }) ->
+    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
+    erase_tx(Txn),
+    ok = rabbit_msg_store:remove(PersistentStore, persistent_guids(Pubs)),
+    {lists:flatten(AckTags), State}.
 
-%% the only difference between purge and delete is that delete also
-%% needs to delete everything that's been delivered and not ack'd.
-delete_and_terminate(State) ->
-    {_PurgeCount, State1 = #vqstate {
-                    index_state = IndexState,
-                    msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
-                    persistent_store = PersistentStore,
-                    transient_threshold = TransientThreshold }} =
-        purge(State),
-    %% flushing here is good because it deletes all full segments,
-    %% leaving only partial segments around.
-    IndexState1 = rabbit_queue_index:flush_journal(IndexState),
-    IndexState2 =
-        case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
-               IndexState1) of
-            {N, N, IndexState3} ->
-                IndexState3;
-            {DeltaSeqId, NextSeqId, IndexState3} ->
-                {_DeleteCount, IndexState4} =
-                    delete1(PersistentStore, TransientThreshold, NextSeqId, 0,
-                            DeltaSeqId, IndexState3),
-                IndexState4
-    end,
-    IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
-    rabbit_msg_store:delete_client(PersistentStore, PRef),
-    rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
-    rabbit_msg_store:client_terminate(MSCStateP),
-    rabbit_msg_store:client_terminate(MSCStateT),
-    State1 #vqstate { index_state = IndexState5 }.
+tx_commit(Txn, From, State = #vqstate { persistent_store = PersistentStore }) ->
+    %% If we are a non-durable queue, or we have no persistent pubs,
+    %% we can skip the msg_store loop.
+    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
+    erase_tx(Txn),
+    PubsOrdered = lists:reverse(Pubs),
+    AckTags1 = lists:flatten(AckTags),
+    PersistentGuids = persistent_guids(PubsOrdered),
+    IsTransientPubs = [] == PersistentGuids,
+    {AckTags1,
+     case IsTransientPubs orelse
+         ?TRANSIENT_MSG_STORE == PersistentStore of
+         true ->
+             tx_commit_post_msg_store(
+               IsTransientPubs, PubsOrdered, AckTags1, From, State);
+         false ->
+             Self = self(),
+             ok =
+                 rabbit_msg_store:sync(
+                   ?PERSISTENT_MSG_STORE, PersistentGuids,
+                   fun () ->
+                           ok =
+                               rabbit_amqqueue:maybe_run_queue_via_backing_queue(
+                                 Self,
+                                 fun (StateN) -> tx_commit_post_msg_store(
+                                                   IsTransientPubs, PubsOrdered,
+                                                   AckTags1, From, StateN)
+                                 end)
+                   end),
+             State
+     end}.
 
-%% [{Msg, AckTag}]
-%% We guarantee that after fetch, only persistent msgs are left on
-%% disk. This means that in a requeue, we set MsgOnDisk to true, thus
-%% avoiding calls to msg_store:write for persistent msgs. It also
-%% means that we don't need to worry about calling msg_store:remove
-%% (as ack would do) because transient msgs won't be on disk anyway,
-%% thus they won't need to be removed. However, we do call
-%% msg_store:release so that the cache isn't held full of msgs which
-%% are now at the tail of the queue.
-requeue(MsgsWithAckTags, State) ->
+requeue(AckTags, State = #vqstate { persistent_store = PersistentStore,
+                                    pending_ack = PA }) ->
     {SeqIds, GuidsByStore,
      State1 = #vqstate { index_state = IndexState,
-                         persistent_count = PCount,
-                         persistent_store = PersistentStore }} =
+                         persistent_count = PCount }} =
         lists:foldl(
-          fun ({Msg = #basic_message { guid = Guid }, AckTag},
-               {SeqIdsAcc, Dict, StateN}) ->
-                  {SeqIdsAcc1, Dict1, MsgOnDisk} =
-                      case AckTag of
-                          ack_not_on_disk ->
-                              {SeqIdsAcc, Dict, false};
-                          {ack_index_and_store, Guid, SeqId, MsgStore} ->
-                              {[SeqId | SeqIdsAcc],
-                               rabbit_misc:dict_cons(MsgStore, Guid, Dict),
-                               true}
-                      end,
-                  {_SeqId, StateN1} =
-                      publish(Msg, true, MsgOnDisk, StateN),
-                  {SeqIdsAcc1, Dict1, StateN1}
-          end, {[], dict:new(), State}, MsgsWithAckTags),
-    IndexState1 =
-        case SeqIds of
-            [] -> IndexState;
-            _  -> rabbit_queue_index:write_acks(SeqIds, IndexState)
-        end,
+          fun ({ack, SeqId, Guid, IsPersistent} = AckTag,
+               {SeqIdsAcc, Dict, StateN = #vqstate {
+                                   msg_store_clients = MSCStateN }}) ->
+                  case dict:find(AckTag, PA) of
+                      error ->
+                          {{ok, Msg = #basic_message{}}, MSCStateN1} =
+                              read_from_msg_store(PersistentStore, MSCStateN,
+                                                  IsPersistent, Guid),
+                          StateN1 = StateN #vqstate {
+                                      msg_store_clients = MSCStateN1 },
+                          {_SeqId, StateN2} = publish(Msg, true, true, StateN1),
+                          {SeqIdsAcc1, MsgStore} =
+                              case IsPersistent of
+                                  true ->
+                                      {[SeqId | SeqIdsAcc], PersistentStore};
+                                  false ->
+                                      {SeqIdsAcc, ?TRANSIENT_MSG_STORE}
+                              end,
+                           {SeqIdsAcc1,
+                            rabbit_misc:dict_cons(MsgStore, Guid, Dict),
+                            StateN2};
+                      {ok, #msg_status { index_on_disk = false,
+                                         msg_on_disk = false,
+                                         is_persistent = false,
+                                         msg = Msg }} ->
+                          {_SeqId, StateN1} = publish(Msg, true, false, StateN),
+                          {SeqIdsAcc, Dict,
+                           StateN1 #vqstate {
+                             pending_ack = dict:erase(AckTag, PA) }}
+                  end
+          end, {[], dict:new(), State}, AckTags),
+    IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:release(MsgStore, Guids)
                    end, ok, GuidsByStore),
@@ -579,98 +623,60 @@ requeue(MsgsWithAckTags, State) ->
     State1 #vqstate { index_state = IndexState1,
                       persistent_count = PCount1 }.
 
-tx_publish(Msg = #basic_message { is_persistent = true, guid = Guid },
-           State = #vqstate { msg_store_clients = MSCState,
-                              persistent_store = PersistentStore }) ->
-    MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
-      is_delivered = false, msg_on_disk = false, index_on_disk = false },
-    {#msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
-    State #vqstate { msg_store_clients = MSCState1 };
-tx_publish(_Msg, State) ->
-    State.
+len(#vqstate { len = Len }) ->
+    Len.
 
-tx_rollback(Pubs, State = #vqstate { persistent_store = PersistentStore }) ->
-    ok = case persistent_guids(Pubs) of
-             [] -> ok;
-             PP -> rabbit_msg_store:remove(PersistentStore, PP)
-         end,
-    State.
+is_empty(State) ->
+    0 == len(State).
 
-tx_commit(Pubs, AckTags, From, State =
-              #vqstate { persistent_store = PersistentStore }) ->
-    %% If we are a non-durable queue, or we have no persistent pubs,
-    %% we can skip the msg_store loop.
-    PersistentGuids = persistent_guids(Pubs),
-    IsTransientPubs = [] == PersistentGuids,
-    case IsTransientPubs orelse
-        ?TRANSIENT_MSG_STORE == PersistentStore of
-        true ->
-            tx_commit_post_msg_store(
-              IsTransientPubs, Pubs, AckTags, From, State);
-        false ->
-            Self = self(),
-            ok = rabbit_msg_store:sync(
-                   ?PERSISTENT_MSG_STORE, PersistentGuids,
-                   fun () -> ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                                    Self,
-                                    fun (StateN) -> tx_commit_post_msg_store(
-                                                      IsTransientPubs, Pubs,
-                                                      AckTags, From, StateN)
-                                    end)
-                   end),
-            {false, State}
+set_ram_duration_target(
+  DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
+                                     avg_ingress_rate = AvgIngressRate,
+                                     target_ram_msg_count = TargetRamMsgCount
+                                   }) ->
+    Rate = AvgEgressRate + AvgIngressRate,
+    TargetRamMsgCount1 =
+        case DurationTarget of
+            infinity -> undefined;
+            undefined -> undefined;
+            _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
+        end,
+    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
+                              duration_target = DurationTarget },
+    case TargetRamMsgCount1 == undefined orelse
+        TargetRamMsgCount1 >= TargetRamMsgCount of
+        true  -> State1;
+        false -> reduce_memory_use(State1)
     end.
 
-tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
-                             #vqstate { on_sync = OnSync = {SAcks, SPubs, SFroms},
-                                        persistent_store = PersistentStore }) ->
-    %% If we are a non-durable queue, or (no persisent pubs, and no
-    %% persistent acks) then we can skip the queue_index loop.
-    DiskAcks =
-        lists:filter(fun (AckTag) -> AckTag /= ack_not_on_disk end, AckTags),
-    case PersistentStore == ?TRANSIENT_MSG_STORE orelse
-        (IsTransientPubs andalso [] == DiskAcks) of
-        true  -> {Res, State1} =
-                     tx_commit_index(State #vqstate {
-                                       on_sync = {[], [Pubs], [From]} }),
-                 {Res, State1 #vqstate { on_sync = OnSync }};
-        false -> {false, State #vqstate { on_sync = { [DiskAcks | SAcks],
-                                                      [Pubs | SPubs],
-                                                      [From | SFroms] }}}
-    end.
+ram_duration(State = #vqstate { egress_rate = Egress,
+                                ingress_rate = Ingress,
+                                rate_timestamp = Timestamp,
+                                in_counter = InCount,
+                                out_counter = OutCount,
+                                ram_msg_count = RamMsgCount,
+                                duration_target = DurationTarget,
+                                ram_msg_count_prev = RamMsgCountPrev }) ->
+    Now = now(),
+    {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
+    {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
 
-tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
-    {false, State};
-tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
-                                     persistent_store = PersistentStore }) ->
-    Acks = lists:flatten(SAcks),
-    State1 = ack(Acks, State),
-    AckSeqIds = lists:foldl(fun ({ack_index_and_store, _Guid,
-                                  SeqId, ?PERSISTENT_MSG_STORE}, SeqIdsAcc) ->
-                                    [SeqId | SeqIdsAcc];
-                                (_, SeqIdsAcc) ->
-                                    SeqIdsAcc
-                            end, [], Acks),
-    IsPersistentStore = ?PERSISTENT_MSG_STORE == PersistentStore,
-    Pubs = lists:flatten(lists:reverse(SPubs)),
-    {SeqIds, State2 = #vqstate { index_state = IndexState }} =
-        lists:foldl(
-          fun (Msg = #basic_message { is_persistent = IsPersistent },
-               {SeqIdsAcc, StateN}) ->
-                  {SeqId, StateN1} =
-                      publish(Msg, false, IsPersistent, StateN),
-                  {case IsPersistentStore andalso IsPersistent of
-                       true  -> [SeqId | SeqIdsAcc];
-                       false -> SeqIdsAcc
-                   end, StateN1}
-          end, {AckSeqIds, State1}, Pubs),
-    IndexState1 =
-        rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
-    [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
-    {Pubs /= [],
-     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }}.
+    Duration = %% msgs / (msgs/sec) == sec
+        case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
+            true  -> infinity;
+            false -> (RamMsgCountPrev + RamMsgCount) /
+                         (2 * (AvgEgressRate + AvgIngressRate))
+        end,
+
+    {Duration, set_ram_duration_target(
+                 DurationTarget,
+                 State #vqstate { egress_rate = Egress1,
+                                  avg_egress_rate = AvgEgressRate,
+                                  ingress_rate = Ingress1,
+                                  avg_ingress_rate = AvgIngressRate,
+                                  rate_timestamp = Now,
+                                  ram_msg_count_prev = RamMsgCount,
+                                  out_counter = 0, in_counter = 0 })}.
 
 sync_callback(#vqstate { on_sync = {_, _, []} }) -> undefined;
 sync_callback(_)                                 -> fun tx_commit_index/1.
@@ -705,6 +711,27 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
+lookup_tx(Txn) ->
+    case get({txn, Txn}) of
+        undefined -> #tx { pending_messages = [],
+                           pending_acks     = [] };
+        V         -> V
+    end.
+
+store_tx(Txn, Tx) ->
+    put({txn, Txn}, Tx).
+
+erase_tx(Txn) ->
+    erase({txn, Txn}).
+
+publish_in_tx(Txn, Msg) ->
+    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }).
+
+ack_in_tx(Txn, AckTags) ->
+    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }).
+
 update_rate(Now, Then, Count, {OThen, OCount}) ->
     %% form the avg over the current period and the previous
     Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
@@ -712,7 +739,7 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
 
 persistent_guids(Pubs) ->
     [Guid || Obj = #basic_message { guid = Guid } <- Pubs,
-              Obj #basic_message.is_persistent].
+             Obj #basic_message.is_persistent].
 
 betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
     {Filtered, IndexState1} =
@@ -805,6 +832,50 @@ should_force_index_to_disk(State =
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
+tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
+                             #vqstate { on_sync = OnSync = {SAcks, SPubs, SFroms},
+                                        persistent_store = PersistentStore }) ->
+    %% If we are a non-durable queue, or (no persisent pubs, and no
+    %% persistent acks) then we can skip the queue_index loop.
+    case PersistentStore == ?TRANSIENT_MSG_STORE orelse
+        (IsTransientPubs andalso [] == AckTags) of  %%% AGH FIX ME
+        true  -> State1 = tx_commit_index(State #vqstate {
+                                            on_sync = {[], [Pubs], [From]} }),
+                 State1 #vqstate { on_sync = OnSync };
+        false -> State #vqstate { on_sync = { [AckTags | SAcks],
+                                              [Pubs | SPubs],
+                                              [From | SFroms] }}
+    end.
+
+tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
+    State;
+tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
+                                   persistent_store = PersistentStore }) ->
+    Acks = lists:flatten(SAcks),
+    State1 = ack(Acks, State),
+    AckSeqIds = lists:foldl(fun ({ack, SeqId, _Guid, true}, SeqIdsAcc) ->
+                                    [SeqId | SeqIdsAcc];
+                                (_, SeqIdsAcc) ->
+                                    SeqIdsAcc
+                            end, [], Acks),
+    IsPersistentStore = ?PERSISTENT_MSG_STORE == PersistentStore,
+    Pubs = lists:flatten(lists:reverse(SPubs)),
+    {SeqIds, State2 = #vqstate { index_state = IndexState }} =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent },
+               {SeqIdsAcc, StateN}) ->
+                  {SeqId, StateN1} =
+                      publish(Msg, false, IsPersistent, StateN),
+                  {case IsPersistentStore andalso IsPersistent of
+                       true  -> [SeqId | SeqIdsAcc];
+                       false -> SeqIdsAcc
+                   end, StateN1}
+          end, {AckSeqIds, State1}, Pubs),
+    IndexState1 =
+        rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
+    [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
+    State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
+
 delete1(_PersistentStore, _TransientThreshold, NextSeqId, Count, DeltaSeqId,
         IndexState) when DeltaSeqId >= NextSeqId ->
     {Count, IndexState};
@@ -887,7 +958,8 @@ remove_queue_entries1(
                    end,
     {PersistentStore, CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
 
-fetch_from_q3_or_delta(State = #vqstate {
+fetch_from_q3_or_delta(AckRequired,
+                       State = #vqstate {
                          q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
@@ -933,7 +1005,7 @@ fetch_from_q3_or_delta(State = #vqstate {
                         %% delta and q3 are maintained
                         State1
                 end,
-            fetch(State2)
+            fetch(AckRequired, State2)
     end.
 
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
-- 
cgit v1.2.1


From 6f95ee1292bc734334579f9bcf3cc2ea1bbf4df7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 11:21:25 +0100
Subject: Updated documentation

---
 docs/rabbitmqctl.1.xml | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml
index 7634b2d2..3d503593 100644
--- a/docs/rabbitmqctl.1.xml
+++ b/docs/rabbitmqctl.1.xml
@@ -714,20 +714,11 @@
                 <term>messages_unacknowledged</term>
                 <listitem><para>Number of messages delivered to clients but not yet acknowledged.</para></listitem>
               </varlistentry>
-              <varlistentry>
-                <term>messages_uncommitted</term>
-                <listitem><para>Number of messages published in as yet uncommitted transactions</para></listitem>
-              </varlistentry>
               <varlistentry>
                 <term>messages</term>
-                <listitem><para>Sum of ready, unacknowledged and uncommitted messages
+                <listitem><para>Sum of ready and unacknowledged messages
                   (queue depth).</para></listitem>
               </varlistentry>
-              <varlistentry>
-                <term>acks_uncommitted</term>
-                <listitem><para>Number of acknowledgements received in as yet uncommitted
-                  transactions.</para></listitem>
-              </varlistentry>
               <varlistentry>
                 <term>consumers</term>
                 <listitem><para>Number of consumers.</para></listitem>
@@ -945,7 +936,7 @@
               The <command>channelinfoitem</command> parameter is used to
               indicate which channel information items to include in the
               results. The column order in the results will match the
-              order of the parameters. 
+              order of the parameters.
               <command>channelinfoitem</command> can take any value from the list
               that follows:
             </para>
-- 
cgit v1.2.1


From ab914a4d3328c5f27086279656608ec72ee95742 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 11:55:02 +0100
Subject: Cope with queue death even though there are in-flight commits going
 on

---
 src/rabbit_amqqueue.erl         |  3 ++-
 src/rabbit_amqqueue_process.erl | 10 +++-----
 src/rabbit_variable_queue.erl   | 57 ++++++++++++++++++++++++++---------------
 3 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index cc6f08b7..3ca39e10 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -346,7 +346,8 @@ unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
 maybe_run_queue_via_backing_queue(QPid, Fun) ->
-    gen_server2:pcast(QPid, 7, {maybe_run_queue_via_backing_queue, Fun}).
+    gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun},
+                      infinity).
 
 flush_all(QPids, ChPid) ->
     safe_pmap_ok(
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index efbc2766..5fda6935 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -136,8 +136,6 @@ terminate({shutdown, _}, State) ->
 terminate(_Reason,       State) ->
     ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
-    %% Ensure that any persisted tx messages are removed.
-    %% TODO: wait for all in flight tx_commits to complete
     State1 = terminate_shutdown(delete_and_terminate, State),
     ok = rabbit_amqqueue:internal_delete(qname(State1)).
 
@@ -733,7 +731,10 @@ handle_call({claim_queue, ReaderPid}, _From,
             reply(ok, State);
         _ ->
             reply(locked, State)
-    end.
+    end;
+
+handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
+    reply(ok, maybe_run_queue_via_backing_queue(Fun, State)).
 
 
 handle_cast(init_backing_queue, State = #q{backing_queue_state = undefined,
@@ -793,9 +794,6 @@ handle_cast({notify_sent, ChPid}, State) ->
                                C#cr{unsent_message_count = Count - 1}
                        end));
 
-handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) ->
-    noreply(maybe_run_queue_via_backing_queue(Fun, State));
-
 handle_cast({limit, ChPid, LimiterPid}, State) ->
     noreply(
       possibly_unblock(
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 90e1eb6c..aa1589a6 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -319,15 +319,17 @@ init(QueueName, IsDurable) ->
                  },
     maybe_deltas_to_betas(State).
 
-terminate(State = #vqstate {
-            persistent_count = PCount,
-            index_state = IndexState,
-            msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} }) ->
+terminate(State) ->
+    State1 = #vqstate {
+      persistent_count = PCount, index_state = IndexState,
+      msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} } =
+        tx_commit_index(State),
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
     Terms = [{persistent_ref, PRef}, {transient_ref, TRef},
              {persistent_count, PCount}],
-    State #vqstate { index_state = rabbit_queue_index:terminate(Terms, IndexState) }.
+    State1 #vqstate { index_state =
+                          rabbit_queue_index:terminate(Terms, IndexState) }.
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
@@ -559,19 +561,10 @@ tx_commit(Txn, From, State = #vqstate { persistent_store = PersistentStore }) ->
              tx_commit_post_msg_store(
                IsTransientPubs, PubsOrdered, AckTags1, From, State);
          false ->
-             Self = self(),
-             ok =
-                 rabbit_msg_store:sync(
-                   ?PERSISTENT_MSG_STORE, PersistentGuids,
-                   fun () ->
-                           ok =
-                               rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                                 Self,
-                                 fun (StateN) -> tx_commit_post_msg_store(
-                                                   IsTransientPubs, PubsOrdered,
-                                                   AckTags1, From, StateN)
-                                 end)
-                   end),
+             ok = rabbit_msg_store:sync(
+                    ?PERSISTENT_MSG_STORE, PersistentGuids,
+                    msg_store_callback(PersistentGuids, IsTransientPubs,
+                                       PubsOrdered, AckTags1, From)),
              State
      end}.
 
@@ -832,13 +825,37 @@ should_force_index_to_disk(State =
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
+msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, From) ->
+    Self = self(),
+    fun() ->
+            spawn(
+              fun() ->
+                      ok = rabbit_misc:with_exit_handler(
+                             fun() -> rabbit_msg_store:remove(
+                                        ?PERSISTENT_MSG_STORE,
+                                        PersistentGuids)
+                             end,
+                             fun() -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
+                                        Self, fun (StateN) ->
+                                                      tx_commit_post_msg_store(
+                                                        IsTransientPubs, Pubs,
+                                                        AckTags, From, StateN)
+                                              end)
+                             end)
+              end)
+    end.
+
 tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
                              #vqstate { on_sync = OnSync = {SAcks, SPubs, SFroms},
-                                        persistent_store = PersistentStore }) ->
+                                        persistent_store = PersistentStore,
+                                        pending_ack = PA }) ->
     %% If we are a non-durable queue, or (no persisent pubs, and no
     %% persistent acks) then we can skip the queue_index loop.
     case PersistentStore == ?TRANSIENT_MSG_STORE orelse
-        (IsTransientPubs andalso [] == AckTags) of  %%% AGH FIX ME
+        (IsTransientPubs andalso
+         lists:foldl(fun (AckTag,  true ) -> dict:is_key(AckTag, PA);
+                         (_AckTag, false) -> false
+                     end, true, AckTags)) of
         true  -> State1 = tx_commit_index(State #vqstate {
                                             on_sync = {[], [Pubs], [From]} }),
                  State1 #vqstate { on_sync = OnSync };
-- 
cgit v1.2.1


From 4ed329aa3974bcc64ffab758e2b806d67d5f0616 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 14:02:22 +0100
Subject: Changes to the way we track messages that are pending acks to ensure
 that on queue deletion we remove everything from the message stores that
 shouldn't be there

---
 src/rabbit_variable_queue.erl | 166 +++++++++++++++++++++++++++---------------
 1 file changed, 107 insertions(+), 59 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index aa1589a6..7cebf2b1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -201,7 +201,7 @@
 
 -type(bpqueue() :: any()).
 -type(seq_id()  :: non_neg_integer()).
--type(ack()     :: {'ack', seq_id(), guid(), boolean()} | 'blank_ack').
+-type(ack()     :: {'ack', seq_id()} | 'blank_ack').
 
 -type(delta() :: #delta { start_seq_id :: non_neg_integer(),
                           count :: non_neg_integer (),
@@ -229,7 +229,7 @@
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
                on_sync               :: {[[ack()]], [[guid()]], [{pid(), any()}]},
-               msg_store_clients     :: {{any(), binary()}, {any(), binary()}},
+               msg_store_clients     :: 'undefined' | {{any(), binary()}, {any(), binary()}},
                persistent_store      :: pid() | atom(),
                persistent_count      :: non_neg_integer(),
                transient_threshold   :: non_neg_integer()
@@ -323,23 +323,25 @@ terminate(State) ->
     State1 = #vqstate {
       persistent_count = PCount, index_state = IndexState,
       msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} } =
-        tx_commit_index(State),
+        remove_pending_ack(true, tx_commit_index(State)),
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
     Terms = [{persistent_ref, PRef}, {transient_ref, TRef},
              {persistent_count, PCount}],
     State1 #vqstate { index_state =
-                          rabbit_queue_index:terminate(Terms, IndexState) }.
+                          rabbit_queue_index:terminate(Terms, IndexState),
+                      msg_store_clients = undefined }.
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
-    {_PurgeCount, State1 = #vqstate {
-                    index_state = IndexState,
-                    msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
-                    persistent_store = PersistentStore,
-                    transient_threshold = TransientThreshold }} =
-        purge(State),
+    {_PurgeCount, State1} = purge(State),
+    State2 = #vqstate {
+      index_state = IndexState,
+      msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
+      persistent_store = PersistentStore,
+      transient_threshold = TransientThreshold } =
+        remove_pending_ack(false, State1),
     %% flushing here is good because it deletes all full segments,
     %% leaving only partial segments around.
     IndexState1 = rabbit_queue_index:flush_journal(IndexState),
@@ -359,7 +361,8 @@ delete_and_terminate(State) ->
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
-    State1 #vqstate { index_state = IndexState5 }.
+    State2 #vqstate { index_state = IndexState5,
+                      msg_store_clients = undefined }.
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
                          persistent_store = PersistentStore }) ->
@@ -402,16 +405,16 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                               next_seq_id = SeqId + 1,
                               out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
-    AckTag = {ack, SeqId, Guid, IsPersistent},
-    {AckTag,
+    {{ack, SeqId},
      case MsgStatus1 #msg_status.msg_on_disk of
          true ->
              {#msg_status { index_on_disk = true }, IndexState1} =
                  maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-             State1 #vqstate { index_state = IndexState1 };
+             State1 #vqstate { index_state = IndexState1,
+                               pending_ack = dict:store(SeqId, {true, Guid},
+                                                        PA) };
          false ->
-             State1 #vqstate { pending_ack =
-                                   dict:store(AckTag, MsgStatus1, PA) }
+             State1 #vqstate { pending_ack = dict:store(SeqId, MsgStatus1, PA) }
      end}.
 
 fetch(AckRequired, State =
@@ -428,7 +431,7 @@ fetch(AckRequired, State =
          Q4a} ->
 
             AckTag = case AckRequired of
-                         true  -> {ack, SeqId, Guid, IsPersistent};
+                         true  -> {ack, SeqId};
                          false -> blank_ack
                      end,
 
@@ -444,17 +447,17 @@ fetch(AckRequired, State =
             IndexState2 =
                 case MsgOnDisk andalso not AckRequired of
                     true -> %% Remove from disk now
+                        ok = case MsgOnDisk of
+                                 true ->
+                                     rabbit_msg_store:remove(MsgStore, [Guid]);
+                                 false ->
+                                     ok
+                             end,
                         case IndexOnDisk of
                             true ->
-                                ok = rabbit_msg_store:remove(MsgStore, [Guid]),
                                 rabbit_queue_index:write_acks([SeqId],
                                                               IndexState1);
                             false ->
-                                ok = case MsgOnDisk of
-                                         true  -> rabbit_msg_store:remove(
-                                                    MsgStore, [Guid]);
-                                         false -> ok
-                                     end,
                                 IndexState1
                         end;
                     false ->
@@ -469,10 +472,16 @@ fetch(AckRequired, State =
                     false -> IndexState2
                 end,
 
-            %% 4. If it's not on disk and we need an Ack, add it to PA
-            PA1 = case AckRequired andalso not MsgOnDisk of
-                      true  -> dict:store(AckTag, MsgStatus #msg_status {
-                                                    is_delivered = true }, PA);
+            %% 4. If an ack is required, add something sensible to PA
+            PA1 = case AckRequired of
+                      true  ->
+                          Entry =
+                              case MsgOnDisk of
+                                  true  -> {IsPersistent, Guid};
+                                  false -> MsgStatus #msg_status {
+                                             is_delivered = true }
+                              end,
+                          dict:store(SeqId, Entry, PA);
                       false -> PA
                   end,
 
@@ -492,21 +501,19 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
                                 pending_ack = PA }) ->
     {GuidsByStore, SeqIds, PA1} =
         lists:foldl(
-          fun (blank_ack, Acc) -> Acc;
-              ({ack, SeqId, Guid, true}, {Dict, SeqIds, PAN}) ->
-                  {rabbit_misc:dict_cons(PersistentStore, Guid, Dict),
-                   [SeqId | SeqIds], PAN};
-              ({ack, _SeqId, Guid, false} = AckTag, {Dict, SeqIds, PAN}) ->
-                  case dict:find(AckTag, PAN) of
-                       error ->
-                          %% must be in the transient store and won't
-                          %% be in the queue index.
-                          {rabbit_misc:dict_cons(
-                             ?TRANSIENT_MSG_STORE, Guid, Dict), SeqIds, PAN};
+          fun ({ack, SeqId}, {Dict, SeqIds, PAN}) ->
+                  PAN1 = dict:erase(SeqId, PAN),
+                  case dict:find(SeqId, PAN) of
                       {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
                                          msg_on_disk = false,
                                          is_persistent = false }} ->
-                          {Dict, SeqIds, dict:erase(AckTag, PAN)}
+                          {Dict, SeqIds, PAN1};
+                      {ok, {false, Guid}} ->
+                          {rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, Guid,
+                                                 Dict), SeqIds, PAN1};
+                      {ok, {true, Guid}} ->
+                          {rabbit_misc:dict_cons(PersistentStore, Guid, Dict),
+                           [SeqId | SeqIds], PAN1}
                   end
           end, {dict:new(), [], PA}, AckTags),
     IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
@@ -568,23 +575,31 @@ tx_commit(Txn, From, State = #vqstate { persistent_store = PersistentStore }) ->
              State
      end}.
 
-requeue(AckTags, State = #vqstate { persistent_store = PersistentStore,
-                                    pending_ack = PA }) ->
+requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
     {SeqIds, GuidsByStore,
      State1 = #vqstate { index_state = IndexState,
                          persistent_count = PCount }} =
         lists:foldl(
-          fun ({ack, SeqId, Guid, IsPersistent} = AckTag,
-               {SeqIdsAcc, Dict, StateN = #vqstate {
-                                   msg_store_clients = MSCStateN }}) ->
-                  case dict:find(AckTag, PA) of
-                      error ->
+          fun ({ack, SeqId},
+               {SeqIdsAcc, Dict, StateN =
+                    #vqstate { msg_store_clients = MSCStateN,
+                               pending_ack = PAN}}) ->
+                  PAN1 = dict:erase(SeqId, PAN),
+                  StateN1 = StateN #vqstate { pending_ack = PAN1 },
+                  case dict:find(SeqId, PAN) of
+                      {ok, #msg_status { index_on_disk = false,
+                                         msg_on_disk = false,
+                                         is_persistent = false,
+                                         msg = Msg }} ->
+                          {_SeqId, StateN2} = publish(Msg, true, false, StateN1),
+                          {SeqIdsAcc, Dict, StateN2};
+                      {ok, {IsPersistent, Guid}} ->
                           {{ok, Msg = #basic_message{}}, MSCStateN1} =
-                              read_from_msg_store(PersistentStore, MSCStateN,
-                                                  IsPersistent, Guid),
-                          StateN1 = StateN #vqstate {
+                              read_from_msg_store(
+                                PersistentStore, MSCStateN, IsPersistent, Guid),
+                          StateN2 = StateN1 #vqstate {
                                       msg_store_clients = MSCStateN1 },
-                          {_SeqId, StateN2} = publish(Msg, true, true, StateN1),
+                          {_SeqId, StateN3} = publish(Msg, true, true, StateN2),
                           {SeqIdsAcc1, MsgStore} =
                               case IsPersistent of
                                   true ->
@@ -594,15 +609,7 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore,
                               end,
                            {SeqIdsAcc1,
                             rabbit_misc:dict_cons(MsgStore, Guid, Dict),
-                            StateN2};
-                      {ok, #msg_status { index_on_disk = false,
-                                         msg_on_disk = false,
-                                         is_persistent = false,
-                                         msg = Msg }} ->
-                          {_SeqId, StateN1} = publish(Msg, true, false, StateN),
-                          {SeqIdsAcc, Dict,
-                           StateN1 #vqstate {
-                             pending_ack = dict:erase(AckTag, PA) }}
+                            StateN3}
                   end
           end, {[], dict:new(), State}, AckTags),
     IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
@@ -704,6 +711,47 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
+remove_pending_ack(KeepPersistent,
+                   State = #vqstate { pending_ack = PA,
+                                      persistent_store = PersistentStore,
+                                      index_state = IndexState }) ->
+    {SeqIds, GuidsByStore} =
+        dict:fold(fun (SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
+                          case IsPersistent of
+                              true  -> {[SeqId | SeqIdsAcc],
+                                        rabbit_misc:dict_cons(
+                                          PersistentStore, Guid, Dict)};
+                              false -> {SeqIdsAcc,
+                                        rabbit_misc:dict_cons(
+                                          ?TRANSIENT_MSG_STORE, Guid, Dict)}
+                          end;
+                      (_SeqId, #basic_message {}, Acc) ->
+                          Acc
+                  end, {[], dict:new()}, PA),
+    case KeepPersistent of
+        true ->
+            State1 =
+                State #vqstate {
+                  pending_ack =
+                      dict:filter(
+                        fun (_SeqId, {IsPersistent, _Guid}) -> IsPersistent;
+                            (_SeqId, #basic_message {})     -> false
+                        end, PA) },
+            case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+                error       -> State1;
+                {ok, Guids} -> ok = rabbit_msg_store:remove(
+                                      ?TRANSIENT_MSG_STORE, Guids),
+                               State1
+            end;
+        false ->
+            IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
+            ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                                   rabbit_msg_store:remove(MsgStore, Guids)
+                           end, ok, GuidsByStore),
+            State #vqstate { pending_ack = dict:new(),
+                             index_state = IndexState1 }
+    end.
+
 lookup_tx(Txn) ->
     case get({txn, Txn}) of
         undefined -> #tx { pending_messages = [],
-- 
cgit v1.2.1


From acd3fa74c63030f2756b60400dd9bf14c9c1bb1a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 14:14:35 +0100
Subject: Whoops, wrong record

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7cebf2b1..9a8cb86e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -725,7 +725,7 @@ remove_pending_ack(KeepPersistent,
                                         rabbit_misc:dict_cons(
                                           ?TRANSIENT_MSG_STORE, Guid, Dict)}
                           end;
-                      (_SeqId, #basic_message {}, Acc) ->
+                      (_SeqId, #msg_status {}, Acc) ->
                           Acc
                   end, {[], dict:new()}, PA),
     case KeepPersistent of
@@ -735,7 +735,7 @@ remove_pending_ack(KeepPersistent,
                   pending_ack =
                       dict:filter(
                         fun (_SeqId, {IsPersistent, _Guid}) -> IsPersistent;
-                            (_SeqId, #basic_message {})     -> false
+                            (_SeqId, #msg_status {})     -> false
                         end, PA) },
             case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
                 error       -> State1;
-- 
cgit v1.2.1


From 72931e099acfd4b3b147dc8ebdd3bcf3e71ad90c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 15:53:46 +0100
Subject: Capture more information for the next time the freaky GC error pops
 up

---
 src/rabbit_msg_store.erl | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 74fa0980..0d7254e5 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -440,8 +440,7 @@ client_read2(Server, false, _Right,
     %% check again to see if we've been locked in the meantime,
     %% between lookup and update_counter (thus GC started before our
     %% +1).
-    [#file_summary { locked = Locked }] =
-        ets:lookup(FileSummaryEts, File),
+    [#file_summary { locked = Locked }] = ets:lookup(FileSummaryEts, File),
     case Locked of
         true ->
             %% If we get a badarg here, then the GC has finished and
@@ -1443,10 +1442,11 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  gc_active        = false,
                                  gc_pid           = GCPid,
                                  file_summary_ets = FileSummaryEts })
-  when (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
+  when SumValid > ?FILE_SIZE_LIMIT andalso
+       (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     First = ets:first(FileSummaryEts),
     N = rabbit_misc:ceil(math:log(1.0 - random:uniform()) /
-                         math:log(1.0 - ?GEOMETRIC_P)),
+                             math:log(1.0 - ?GEOMETRIC_P)),
     case find_files_to_gc(FileSummaryEts, N, First) of
         undefined ->
             State;
@@ -1660,7 +1660,7 @@ find_unremoved_messages_in_file(File,
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
-    {FinalOffset, BlockStart1, BlockEnd1} =
+    case
         lists:foldl(
           fun (#msg_location { guid = Guid, offset = Offset,
                                total_size = TotalSize },
@@ -1692,17 +1692,23 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                               {Offset, Offset + TotalSize}
                       end,
                   {CurOffset + TotalSize, BlockStart2, BlockEnd2}
-          end, {InitOffset, undefined, undefined}, WorkList),
-    case WorkList of
-        [] ->
-            ok;
-        _ ->
-            %% do the last remaining block
-            BSize1 = BlockEnd1 - BlockStart1,
-            {ok, BlockStart1} =
-                file_handle_cache:position(SourceHdl, BlockStart1),
-            {ok, BSize1} =
-                file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
-            ok = file_handle_cache:sync(DestinationHdl)
+          end, {InitOffset, undefined, undefined}, WorkList) of
+        {FinalOffset, BlockStart1, BlockEnd1} ->
+            case WorkList of
+                [] ->
+                    ok;
+                _ ->
+                    %% do the last remaining block
+                    BSize1 = BlockEnd1 - BlockStart1,
+                    {ok, BlockStart1} =
+                        file_handle_cache:position(SourceHdl, BlockStart1),
+                    {ok, BSize1} =
+                        file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
+                    ok = file_handle_cache:sync(DestinationHdl)
+            end;
+        {FinalOffsetZ, _BlockStart1, _BlockEnd1} ->
+            throw({gc_error, [{expected, FinalOffset},
+                              {got, FinalOffsetZ},
+                              {destination, Destination}]})
     end,
     ok.
-- 
cgit v1.2.1


From f868068bd52447a5d9a24cfcaeea111b47506d0f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 16:23:42 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 0d7254e5..e875ac5f 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1585,8 +1585,7 @@ combine_files(#file_summary { file = Source,
     {ok, SourceHdl} =
         open_file(Dir, SourceName, ?READ_AHEAD_MODE),
     {ok, DestinationHdl} =
-        open_file(Dir, DestinationName,
-                                        ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+        open_file(Dir, DestinationName, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't
     %% need a tmp file
-- 
cgit v1.2.1


From b5526a5e7a20f326c1fcb4900284adfc76194e36 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 17:11:33 +0100
Subject: cosmetics

---
 src/rabbit_msg_store.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e875ac5f..90486192 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -919,8 +919,8 @@ contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
     end.
 
 remove_message(Guid, State = #msstate { sum_valid_data = SumValid,
-                                         file_summary_ets = FileSummaryEts,
-                                         dedup_cache_ets = DedupCacheEts }) ->
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets = DedupCacheEts }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(Guid, State),
@@ -1646,8 +1646,7 @@ find_unremoved_messages_in_file(File,
                                 {_FileSummaryEts, Dir, Index, IndexState}) ->
     %% Msgs here will be end-of-file at start-of-list
     {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(
-          Dir, filenum_to_name(File)),
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(
       fun ({Guid, _TotalSize, _Offset}, Acc) ->
-- 
cgit v1.2.1


From d59179758e57b0e628774b53e679f918bc2ca35e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 17:11:48 +0100
Subject: set channel txn to none on rollback

---
 src/rabbit_amqqueue_process.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5fda6935..f6d3d1a5 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -481,11 +481,12 @@ commit_transaction(Txn, From, ChPid, State = #q{backing_queue = BQ,
     end,
     State#q{backing_queue_state = BQS1}.
 
-rollback_transaction(Txn, _ChPid, State = #q{backing_queue = BQ,
+rollback_transaction(Txn, ChPid, State = #q{backing_queue = BQ,
                                              backing_queue_state = BQS}) ->
     {_AckTags, BQS1} = BQ:tx_rollback(Txn, BQS),
     %% Iff we removed acktags from the channel record on ack+txn then
     %% we would add them back in here (would also require ChPid)
+    record_current_channel_tx(ChPid, none),
     State#q{backing_queue_state = BQS1}.
 
 collect_messages(AckTags, UAM) ->
-- 
cgit v1.2.1


From 8df34200752e170a45232c1fcd9972fc1e7ecfa2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 16 Apr 2010 17:38:03 +0100
Subject: On commit we must know about the channel

---
 src/rabbit_amqqueue_process.erl | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index f6d3d1a5..720d390a 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -470,15 +470,12 @@ maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) ->
 commit_transaction(Txn, From, ChPid, State = #q{backing_queue = BQ,
                                                 backing_queue_state = BQS}) ->
     {AckTags, BQS1} = BQ:tx_commit(Txn, From, BQS),
-    case lookup_ch(ChPid) of
-        not_found ->
-            [];
-        C = #cr{unacked_messages = UAM} ->
-            Remaining = ordsets:to_list(ordsets:subtract(
-                                          ordsets:from_list(UAM),
-                                          ordsets:from_list(AckTags))),
-            store_ch_record(C#cr{unacked_messages = Remaining, txn = none})
-    end,
+    %% ChPid must be known here because of the participant management
+    %% by the channel.
+    C = #cr{unacked_messages = UAM} = lookup_ch(ChPid),
+    Remaining = ordsets:to_list(ordsets:subtract(ordsets:from_list(UAM),
+                                                 ordsets:from_list(AckTags))),
+    store_ch_record(C#cr{unacked_messages = Remaining, txn = none}),
     State#q{backing_queue_state = BQS1}.
 
 rollback_transaction(Txn, ChPid, State = #q{backing_queue = BQ,
-- 
cgit v1.2.1


From 0e628eb768adc21f292891526422c7405b06ebc4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 18 Apr 2010 11:58:35 +0100
Subject: Rewrote scanning of message files, to read in blocks of up to 4MB
 (hence bounded) but can still cope with variable sized files. The advantage
 here is vastly reduced number of OS calls to position and read. The results
 is that in tests, GC time is reduced from around 35 seconds to about 2. Code
 is also 7 lines shorter - and arguably simpler - the only tricky bit is
 reusing left over data from one block with the next read block, hence
 different read and scan offsets

---
 src/rabbit_msg_file.erl  | 89 ++++++++++++++++++++++--------------------------
 src/rabbit_msg_store.erl | 23 +++++++------
 2 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 792f0efa..03910902 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_file).
 
--export([append/3, read/2, scan/1]).
+-export([append/3, read/2, scan/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -43,6 +43,7 @@
 -define(GUID_SIZE_BYTES,         16).
 -define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
 -define(SIZE_AND_GUID_BYTES,     (?GUID_SIZE_BYTES + ?INTEGER_SIZE_BYTES)).
+-define(FOUR_MEGA_BYTES,         4194304).
 
 %%----------------------------------------------------------------------------
 
@@ -52,12 +53,13 @@
 
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
+-type(file_size() :: non_neg_integer()).
 
 -spec(append/3 :: (io_device(), guid(), msg()) ->
              ({'ok', msg_size()} | {'error', any()})).
 -spec(read/2 :: (io_device(), msg_size()) ->
              ({'ok', {guid(), msg()}} | {'error', any()})).
--spec(scan/1 :: (io_device()) ->
+-spec(scan/2 :: (io_device(), file_size()) ->
              {'ok', [{guid(), msg_size(), position()}], position()}).
 
 -endif.
@@ -90,51 +92,42 @@ read(FileHdl, TotalSize) ->
         KO -> KO
     end.
 
-scan(FileHdl) -> scan(FileHdl, 0, []).
-
-scan(FileHdl, Offset, Acc) ->
-    case read_next(FileHdl, Offset) of
-        eof -> {ok, Acc, Offset};
-        {corrupted, NextOffset} ->
-            scan(FileHdl, NextOffset, Acc);
-        {ok, {Guid, TotalSize, NextOffset}} ->
-            scan(FileHdl, NextOffset, [{Guid, TotalSize, Offset} | Acc]);
-        _KO ->
-            %% bad message, but we may still have recovered some valid messages
-            {ok, Acc, Offset}
+scan(FileHdl, FileSize) when FileSize >= 0 ->
+    scan(FileHdl, FileSize, <<>>, 0, [], 0).
+
+scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
+    {ok, Acc, ScanOffset};
+scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
+    Read = lists:min([?FOUR_MEGA_BYTES, (FileSize - ReadOffset)]),
+    case file_handle_cache:read(FileHdl, Read) of
+        {ok, Data1} ->
+            {Acc1, ScanOffset1, Data2} =
+                scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
+            scan(FileHdl, FileSize, Data2, ReadOffset + iolist_size(Data1),
+                 Acc1, ScanOffset1);
+        _KO        -> {ok, Acc, ScanOffset}
     end.
 
-read_next(FileHdl, Offset) ->
-    case file_handle_cache:read(FileHdl, ?SIZE_AND_GUID_BYTES) of
-        %% Here we take option 5 from
-        %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in which
-        %% we read the Guid as a number, and then convert it back to
-        %% a binary in order to work around bugs in Erlang's GC.
-        {ok, <<Size:?INTEGER_SIZE_BITS, GuidNum:?GUID_SIZE_BITS>>} ->
-            case Size of
-                0 -> eof; %% Nothing we can do other than stop
-                _ ->
-                    TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
-                    ExpectedAbsPos = Offset + TotalSize - 1,
-                    case file_handle_cache:position(
-                           FileHdl, {cur, Size - ?GUID_SIZE_BYTES}) of
-                        {ok, ExpectedAbsPos} ->
-                            NextOffset = ExpectedAbsPos + 1,
-                            case file_handle_cache:read(FileHdl, 1) of
-                                {ok,
-                                 <<?WRITE_OK_MARKER: ?WRITE_OK_SIZE_BITS>>} ->
-                                    <<Guid:?GUID_SIZE_BYTES/binary>> =
-                                        <<GuidNum:?GUID_SIZE_BITS>>,
-                                    {ok, {Guid, TotalSize, NextOffset}};
-                                {ok, _SomeOtherData} ->
-                                    {corrupted, NextOffset};
-                                KO -> KO
-                            end;
-                        {ok, _SomeOtherPos} ->
-                            %% seek failed, so give up
-                            eof;
-                        KO -> KO
-                    end
-            end;
-        Other -> Other
-    end.
+scan(<<>>, Acc, Offset) ->
+    {Acc, Offset, <<>>};
+scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
+    {Acc, Offset, <<>>}; %% Nothing to do other than stop.
+scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
+       WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
+    TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+    case WriteMarker of
+        ?WRITE_OK_MARKER ->
+            %% Here we take option 5 from
+            %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in
+            %% which we read the Guid as a number, and then convert it
+            %% back to a binary in order to work around bugs in
+            %% Erlang's GC.
+            <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> =
+                <<GuidAndMsg:Size/binary>>,
+            <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>,
+            scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
+        _ ->
+            scan(Rest, Acc, Offset + TotalSize)
+    end;
+scan(Data, Acc, Offset) ->
+    {Acc, Offset, Data}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 90486192..eb3a5db0 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1077,7 +1077,8 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 scan_file_for_valid_messages(Dir, FileName) ->
     case open_file(Dir, FileName, ?READ_MODE) of
         {ok, Hdl} ->
-            Valid = rabbit_msg_file:scan(Hdl),
+            Size = filelib:file_size(form_filename(Dir, FileName)),
+            Valid = rabbit_msg_file:scan(Hdl, Size),
             %% if something really bad's happened, the close could fail,
             %% but ignore
             file_handle_cache:close(Hdl),
@@ -1442,7 +1443,7 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  gc_active        = false,
                                  gc_pid           = GCPid,
                                  file_summary_ets = FileSummaryEts })
-  when SumValid > ?FILE_SIZE_LIMIT andalso
+  when SumFileSize > 3 * ?FILE_SIZE_LIMIT andalso
        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     First = ets:first(FileSummaryEts),
     N = rabbit_misc:ceil(math:log(1.0 - random:uniform()) /
@@ -1543,7 +1544,6 @@ delete_file_if_empty(File, State =
 %%----------------------------------------------------------------------------
 
 gc(SourceFile, DestFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
-
     [SourceObj = #file_summary {
        readers = SourceReaders,
        valid_total_size = SourceValidData, left = DestFile,
@@ -1597,6 +1597,8 @@ combine_files(#file_summary { file = Source,
             ok = truncate_and_extend_file(
                    DestinationHdl, DestinationValid, ExpectedSize);
        true ->
+            {DestinationWorkList, DestinationValid} =
+                find_unremoved_messages_in_file(Destination, State),
             Worklist =
                 lists:dropwhile(
                   fun (#msg_location { offset = Offset })
@@ -1610,8 +1612,7 @@ combine_files(#file_summary { file = Source,
                           %% that the list should be naturally sorted
                           %% as we require, however, we need to
                           %% enforce it anyway
-                  end,
-                  find_unremoved_messages_in_file(Destination, State)),
+                  end, DestinationWorkList),
             Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} = open_file(
                              Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
@@ -1633,7 +1634,7 @@ combine_files(#file_summary { file = Source,
             ok = file_handle_cache:close(TmpHdl),
             ok = file:delete(form_filename(Dir, Tmp))
     end,
-    SourceWorkList = find_unremoved_messages_in_file(Source, State),
+    {SourceWorkList, SourceValid} = find_unremoved_messages_in_file(Source, State),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State),
     %% tidy up
@@ -1649,12 +1650,14 @@ find_unremoved_messages_in_file(File,
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(
-      fun ({Guid, _TotalSize, _Offset}, Acc) ->
+      fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
               case Index:lookup(Guid, IndexState) of
-                  Entry = #msg_location { file = File } -> [ Entry | Acc ];
-                  _                                     -> Acc
+                  Entry = #msg_location { file = File } ->
+                      {[ Entry | List ], TotalSize + Size};
+                  _ ->
+                      Acc
               end
-      end, [], Messages).
+      end, {[], 0}, Messages).
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
-- 
cgit v1.2.1


From 8526a1a4fd15494161bec29a034184056a05cc28 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 18 Apr 2010 12:18:49 +0100
Subject: A cosmetic and a missing infinity timeout on a call

---
 src/rabbit_amqqueue.erl  | 2 +-
 src/rabbit_msg_store.erl | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 9406008e..d705909a 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -167,7 +167,7 @@ recover_durable_queues(DurableQueues) ->
            end, [], DurableQueues),
     %% Issue inits to *all* the queues so that they all init at the same time
     [ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue) || Q <- Qs],
-    [ok = gen_server2:call(Q#amqqueue.pid, sync) || Q <- Qs],
+    [ok = gen_server2:call(Q#amqqueue.pid, sync, infinity) || Q <- Qs],
     Qs.
 
 declare(QueueName, Durable, AutoDelete, Args) ->
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index eb3a5db0..74b81f61 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1370,8 +1370,7 @@ build_index(Gatherer, Left, [File|Files], State) ->
 build_index_worker(
   Gatherer, Ref, State = #msstate { dir = Dir }, Left, File, Files) ->
     {ok, Messages, FileSize} =
-        scan_file_for_valid_messages(
-          Dir, filenum_to_name(File)),
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
           fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
-- 
cgit v1.2.1


From 39201702560e5e4ebd77d1350ebd406971a9a794 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 18 Apr 2010 12:19:54 +0100
Subject: Another missing infinity on a call

---
 src/rabbit_msg_store_gc.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 5c8e88d6..32ea0014 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -59,7 +59,7 @@ gc(Server, Source, Destination) ->
     gen_server2:cast(Server, {gc, Source, Destination}).
 
 stop(Server) ->
-    gen_server2:call(Server, stop).
+    gen_server2:call(Server, stop, infinity).
 
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From 1a47cfb25dc7f154bb39d7ea7c19015a3eb935d4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 18 Apr 2010 15:04:56 +0100
Subject: Make use of fhc:delete now that we have it

---
 src/rabbit_msg_store.erl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 74b81f61..e46d2664 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1630,16 +1630,14 @@ combine_files(#file_summary { file = Source,
                 file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
             %% position in DestinationHdl should now be DestinationValid
             ok = file_handle_cache:sync(DestinationHdl),
-            ok = file_handle_cache:close(TmpHdl),
-            ok = file:delete(form_filename(Dir, Tmp))
+            ok = file_handle_cache:delete(TmpHdl)
     end,
     {SourceWorkList, SourceValid} = find_unremoved_messages_in_file(Source, State),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State),
     %% tidy up
-    ok = file_handle_cache:close(SourceHdl),
     ok = file_handle_cache:close(DestinationHdl),
-    ok = file:delete(form_filename(Dir, SourceName)),
+    ok = file_handle_cache:delete(SourceHdl),
     ok.
 
 find_unremoved_messages_in_file(File,
-- 
cgit v1.2.1


From b7238a77512bab079f7de74f4e40fca0600db116 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 18 Apr 2010 16:47:25 +0100
Subject: Correct mistake in indexing into pairs of GC candidates

---
 src/rabbit_msg_store.erl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index e46d2664..6f1a5e6b 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1478,8 +1478,9 @@ find_files_to_gc(FileSummaryEts, N, First) ->
     case Pairs of
         []     -> undefined;
         [Pair] -> Pair;
-        _      -> M = 1 + (N rem length(Pairs)),
-                  lists:nth(M, Pairs)
+        _      -> Len = length(Pairs),   %% The list is the wrong way
+                  M = Len - (N rem Len), %% around, so subtract our N
+                  lists:nth(M, Pairs)    %% from its length
     end.
 
 find_files_to_gc(_FileSummaryEts, _N, #file_summary {}, [], Pairs) ->
-- 
cgit v1.2.1


From 6094d346509c25778070c820a4e6752ef183f9e2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 18 Apr 2010 17:21:09 +0100
Subject: Switch four megs to a fraction of the file size limit

---
 include/rabbit_msg_store.hrl | 2 ++
 src/rabbit_msg_file.erl      | 8 ++++----
 src/rabbit_msg_store.erl     | 2 --
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index a392a6f4..73eb4ae0 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -31,6 +31,8 @@
 
 -include("rabbit.hrl").
 
+-define(FILE_SIZE_LIMIT,       (16*1024*1024)).
+
 -ifdef(use_specs).
 
 -type(msg() :: any()).
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 03910902..f758c184 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -35,6 +35,8 @@
 
 %%----------------------------------------------------------------------------
 
+-include("rabbit_msg_store.hrl").
+
 -define(INTEGER_SIZE_BYTES,      8).
 -define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
 -define(WRITE_OK_SIZE_BITS,      8).
@@ -43,12 +45,10 @@
 -define(GUID_SIZE_BYTES,         16).
 -define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
 -define(SIZE_AND_GUID_BYTES,     (?GUID_SIZE_BYTES + ?INTEGER_SIZE_BYTES)).
--define(FOUR_MEGA_BYTES,         4194304).
+-define(SCAN_BLOCK_SIZE,         ?FILE_SIZE_LIMIT div 4).
 
 %%----------------------------------------------------------------------------
 
--include("rabbit_msg_store.hrl").
-
 -ifdef(use_specs).
 
 -type(position() :: non_neg_integer()).
@@ -98,7 +98,7 @@ scan(FileHdl, FileSize) when FileSize >= 0 ->
 scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
     {ok, Acc, ScanOffset};
 scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
-    Read = lists:min([?FOUR_MEGA_BYTES, (FileSize - ReadOffset)]),
+    Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
     case file_handle_cache:read(FileHdl, Read) of
         {ok, Data1} ->
             {Acc1, ScanOffset1, Data2} =
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6f1a5e6b..7e09f7fa 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -60,8 +60,6 @@
 -define(FILE_EXTENSION,        ".rdq").
 -define(FILE_EXTENSION_TMP,    ".rdt").
 
--define(FILE_SIZE_LIMIT,       (16*1024*1024)).
-
 -define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
 
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 9b823814e0baa79850b9705ccb04cf4279772f71 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Apr 2010 09:10:46 +0100
Subject: Beef up assertions on combine deltas, but it doesn't appear there was
 a bug here anyway

---
 src/rabbit_variable_queue.erl | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9a8cb86e..1a609dcc 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -831,15 +831,25 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
 %% the first arg is the older delta
 combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) ->
     ?BLANK_DELTA;
-combine_deltas(?BLANK_DELTA_PATTERN(X), #delta {       } = B) -> B;
-combine_deltas(#delta {       } = A, ?BLANK_DELTA_PATTERN(Y)) -> A;
-combine_deltas(#delta { start_seq_id = SeqIdLow,  count = CountLow},
-               #delta { start_seq_id = SeqIdHigh, count = CountHigh,
-                        end_seq_id = SeqIdEnd }) ->
-    true = SeqIdLow =< SeqIdHigh, %% ASSERTION
+combine_deltas(?BLANK_DELTA_PATTERN(X),
+               #delta { start_seq_id = Start, count = Count,
+                        end_seq_id = End } = B) ->
+    true = Start + Count =< End, %% ASSERTION
+    B;
+combine_deltas(#delta { start_seq_id = Start, count = Count,
+                        end_seq_id = End } = A, ?BLANK_DELTA_PATTERN(Y)) ->
+    true = Start + Count =< End, %% ASSERTION
+    A;
+combine_deltas(#delta { start_seq_id = StartLow,  count = CountLow,
+                        end_seq_id = EndLow },
+               #delta { start_seq_id = StartHigh, count = CountHigh,
+                        end_seq_id = EndHigh }) ->
     Count = CountLow + CountHigh,
-    true = Count =< SeqIdEnd - SeqIdLow, %% ASSERTION
-    #delta { start_seq_id = SeqIdLow, count = Count, end_seq_id = SeqIdEnd }.
+    true = (StartLow =< StartHigh) %% ASSERTIONS
+        andalso ((StartLow + CountLow) =< EndLow)
+        andalso ((StartHigh + CountHigh) =< EndHigh)
+        andalso ((StartLow + Count) =< EndHigh),
+    #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }.
 
 beta_fold_no_index_on_disk(Fun, Init, Q) ->
     bpqueue:foldr(fun (_Prefix, Value, Acc) ->
@@ -1520,7 +1530,7 @@ push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
         {empty, Qa} ->
             {Count, Qa, RamIndexCount, IndexState};
         {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa}
-        when Limit /= undefined andalso SeqId < Limit ->
+        when Limit =/= undefined andalso SeqId < Limit ->
             {Count, Q, RamIndexCount, IndexState};
         {{value, IndexOnDisk, MsgStatus}, Qa} ->
             {RamIndexCount1, IndexState1} =
-- 
cgit v1.2.1


From 28792661e389a75cc3b1aef78e96e4e108f7b89e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Apr 2010 14:31:40 +0100
Subject: Sort out handling of acktags in the queue_process. Use a set
 throughout - this avoids the unpleasant lists:usort on every ack (which is
 what ordsets:from_list does). Also, don't bother with the intersection - just
 assume that we're only given acks we really know about, which the channel
 should be able to guarantee. This results in a performance improvement from
 MulticastMain -s 0 -r 12750 to MulticastMain -s 0 -r 13900 : i.e. 9%

---
 src/rabbit_amqqueue_process.erl | 61 +++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 720d390a..a0bb3b0b 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -69,7 +69,7 @@
              ch_pid,
              limiter_pid,
              monitor_ref,
-             unacked_messages,
+             acktags,
              is_limit_active,
              txn,
              unsent_message_count}).
@@ -230,7 +230,7 @@ ch_record(ChPid) ->
             C = #cr{consumer_count = 0,
                     ch_pid = ChPid,
                     monitor_ref = MonitorRef,
-                    unacked_messages = [],
+                    acktags = sets:new(),
                     is_limit_active = false,
                     txn = none,
                     unsent_message_count = 0},
@@ -271,7 +271,7 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
          ActiveConsumersTail} ->
             C = #cr{limiter_pid = LimiterPid,
                     unsent_message_count = Count,
-                    unacked_messages = UAM} = ch_record(ChPid),
+                    acktags = ChAckTags} = ch_record(ChPid),
             IsMsgReady = PredFun(FunAcc, State),
             case (IsMsgReady andalso
                   rabbit_limiter:can_send( LimiterPid, self(), AckRequired )) of
@@ -281,12 +281,12 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                     rabbit_channel:deliver(
                       ChPid, ConsumerTag, AckRequired,
                       {QName, self(), AckTag, IsDelivered, Message}),
-                    NewUAM = case AckRequired of
-                                 true  -> [AckTag|UAM];
-                                 false -> UAM
-                             end,
+                    ChAckTags1 = case AckRequired of
+                                     true  -> sets:add_element(AckTag, ChAckTags);
+                                     false -> ChAckTags
+                                 end,
                     NewC = C#cr{unsent_message_count = Count + 1,
-                                unacked_messages = NewUAM},
+                                acktags = ChAckTags1},
                     store_ch_record(NewC),
                     {NewActiveConsumers, NewBlockedConsumers} =
                         case ch_record_state_transition(C, NewC) of
@@ -414,7 +414,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
         not_found ->
             {ok, State};
         #cr{monitor_ref = MonitorRef, ch_pid = ChPid, txn = Txn,
-            unacked_messages = UAM} ->
+            acktags = ChAckTags} ->
             erlang:demonitor(MonitorRef),
             erase({ch, ChPid}),
             State1 = State#q{
@@ -433,7 +433,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                                       _    -> rollback_transaction(Txn, ChPid,
                                                                    State1)
                                   end,
-                         {ok, requeue_and_run(UAM, State2)}
+                         {ok, requeue_and_run(ChAckTags, State2)}
             end
     end.
 
@@ -472,25 +472,21 @@ commit_transaction(Txn, From, ChPid, State = #q{backing_queue = BQ,
     {AckTags, BQS1} = BQ:tx_commit(Txn, From, BQS),
     %% ChPid must be known here because of the participant management
     %% by the channel.
-    C = #cr{unacked_messages = UAM} = lookup_ch(ChPid),
-    Remaining = ordsets:to_list(ordsets:subtract(ordsets:from_list(UAM),
-                                                 ordsets:from_list(AckTags))),
-    store_ch_record(C#cr{unacked_messages = Remaining, txn = none}),
+    C = #cr{acktags = ChAckTags} = lookup_ch(ChPid),
+    ChAckTags1 = subtract_acks(ChAckTags, AckTags),
+    store_ch_record(C#cr{acktags = ChAckTags1, txn = none}),
     State#q{backing_queue_state = BQS1}.
 
 rollback_transaction(Txn, ChPid, State = #q{backing_queue = BQ,
-                                             backing_queue_state = BQS}) ->
+                                            backing_queue_state = BQS}) ->
     {_AckTags, BQS1} = BQ:tx_rollback(Txn, BQS),
     %% Iff we removed acktags from the channel record on ack+txn then
     %% we would add them back in here (would also require ChPid)
     record_current_channel_tx(ChPid, none),
     State#q{backing_queue_state = BQS1}.
 
-collect_messages(AckTags, UAM) ->
-    AckTagsSet = ordsets:from_list(AckTags),
-    UAMSet = ordsets:from_list(UAM),
-    {ordsets:to_list(ordsets:intersection(AckTagsSet, UAMSet)),
-     ordsets:to_list(ordsets:subtract(UAMSet, AckTagsSet))}.
+subtract_acks(A, B) when is_list(B) ->
+    lists:foldl(fun sets:del_element/2, A, B).
 
 infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items].
 
@@ -515,8 +511,7 @@ i(exclusive_consumer_tag, #q{exclusive_consumer = {_ChPid, ConsumerTag}}) ->
 i(messages_ready, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
     BQ:len(BQS);
 i(messages_unacknowledged, _) ->
-    lists:sum([ordsets:size(UAM) ||
-                  #cr{unacked_messages = UAM} <- all_ch_record()]);
+    lists:sum([sets:size(C#cr.acktags) || C <- all_ch_record()]);
 i(messages, State) ->
     lists:sum([i(Item, State) || Item <- [messages_ready,
                                           messages_unacknowledged]]);
@@ -600,8 +595,9 @@ handle_call({basic_get, ChPid, NoAck}, _From,
         {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1});
         {{Message, IsDelivered, AckTag, Remaining}, BQS1} ->
             case AckRequired of
-                true ->  C = #cr{unacked_messages = UAM} = ch_record(ChPid),
-                         store_ch_record(C#cr{unacked_messages = [AckTag|UAM]});
+                true ->  C = #cr{acktags = ChAckTags} = ch_record(ChPid),
+                         store_ch_record(
+                           C#cr{acktags = sets:add_element(AckTag, ChAckTags)});
                 false -> ok
             end,
             reply({ok, Remaining, {QName, self(), AckTag, IsDelivered, Message}},
@@ -753,13 +749,12 @@ handle_cast({ack, Txn, AckTags, ChPid}, State = #q{backing_queue_state = BQS,
     case lookup_ch(ChPid) of
         not_found ->
             noreply(State);
-        C = #cr{unacked_messages = UAM} ->
-            {AckTags1, Remaining} = collect_messages(AckTags, UAM),
+        C = #cr{acktags = ChAckTags} ->
             {C1, BQS1} =
                 case Txn of
-                    none -> {C#cr{unacked_messages = Remaining},
-                             BQ:ack(AckTags1, BQS)};
-                    _    -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags1, BQS)}
+                    none -> ChAckTags1 = subtract_acks(ChAckTags, AckTags),
+                            {C#cr{acktags = ChAckTags1}, BQ:ack(AckTags, BQS)};
+                    _    -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags, BQS)}
                 end,
             store_ch_record(C1),
             noreply(State #q { backing_queue_state = BQS1 })
@@ -774,10 +769,10 @@ handle_cast({requeue, AckTags, ChPid}, State) ->
             rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n",
                                [ChPid]),
             noreply(State);
-        C = #cr{unacked_messages = UAM} ->
-            {AckTags1, Remaining} = collect_messages(AckTags, UAM),
-            store_ch_record(C#cr{unacked_messages = Remaining}),
-            noreply(requeue_and_run(AckTags1, State))
+        C = #cr{acktags = ChAckTags} ->
+            ChAckTags1 = subtract_acks(ChAckTags, AckTags),
+            store_ch_record(C#cr{acktags = ChAckTags1}),
+            noreply(requeue_and_run(AckTags, State))
     end;
 
 handle_cast({unblock, ChPid}, State) ->
-- 
cgit v1.2.1


From a054968e96cebf33087e2f33635ba53ed0156b9d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Apr 2010 15:49:36 +0100
Subject: Whoops

---
 src/rabbit_amqqueue_process.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a0bb3b0b..697282fe 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -433,7 +433,7 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                                       _    -> rollback_transaction(Txn, ChPid,
                                                                    State1)
                                   end,
-                         {ok, requeue_and_run(ChAckTags, State2)}
+                         {ok, requeue_and_run(sets:to_list(ChAckTags), State2)}
             end
     end.
 
-- 
cgit v1.2.1


From 607b1ce5bbfe0a4fbfd50acb0c5663a038dd4336 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 19 Apr 2010 16:19:56 +0100
Subject: Lots of fixes for handling of acktags, which was badly broken in some
 places

---
 src/rabbit_variable_queue.erl | 72 +++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 1a609dcc..9328164b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -201,7 +201,7 @@
 
 -type(bpqueue() :: any()).
 -type(seq_id()  :: non_neg_integer()).
--type(ack()     :: {'ack', seq_id()} | 'blank_ack').
+-type(ack()     :: seq_id() | 'blank_ack').
 
 -type(delta() :: #delta { start_seq_id :: non_neg_integer(),
                           count :: non_neg_integer (),
@@ -405,7 +405,7 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                               next_seq_id = SeqId + 1,
                               out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
-    {{ack, SeqId},
+    {SeqId,
      case MsgStatus1 #msg_status.msg_on_disk of
          true ->
              {#msg_status { index_on_disk = true }, IndexState1} =
@@ -431,7 +431,7 @@ fetch(AckRequired, State =
          Q4a} ->
 
             AckTag = case AckRequired of
-                         true  -> {ack, SeqId};
+                         true  -> SeqId;
                          false -> blank_ack
                      end,
 
@@ -501,7 +501,7 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
                                 pending_ack = PA }) ->
     {GuidsByStore, SeqIds, PA1} =
         lists:foldl(
-          fun ({ack, SeqId}, {Dict, SeqIds, PAN}) ->
+          fun (SeqId, {Dict, SeqIds, PAN}) ->
                   PAN1 = dict:erase(SeqId, PAN),
                   case dict:find(SeqId, PAN) of
                       {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
@@ -580,10 +580,9 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
      State1 = #vqstate { index_state = IndexState,
                          persistent_count = PCount }} =
         lists:foldl(
-          fun ({ack, SeqId},
-               {SeqIdsAcc, Dict, StateN =
-                    #vqstate { msg_store_clients = MSCStateN,
-                               pending_ack = PAN}}) ->
+          fun (SeqId, {SeqIdsAcc, Dict, StateN =
+                           #vqstate { msg_store_clients = MSCStateN,
+                                      pending_ack = PAN}}) ->
                   PAN1 = dict:erase(SeqId, PAN),
                   StateN1 = StateN #vqstate { pending_ack = PAN1 },
                   case dict:find(SeqId, PAN) of
@@ -715,28 +714,27 @@ remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       persistent_store = PersistentStore,
                                       index_state = IndexState }) ->
-    {SeqIds, GuidsByStore} =
-        dict:fold(fun (SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
-                          case IsPersistent of
-                              true  -> {[SeqId | SeqIdsAcc],
-                                        rabbit_misc:dict_cons(
-                                          PersistentStore, Guid, Dict)};
-                              false -> {SeqIdsAcc,
-                                        rabbit_misc:dict_cons(
-                                          ?TRANSIENT_MSG_STORE, Guid, Dict)}
-                          end;
-                      (_SeqId, #msg_status {}, Acc) ->
-                          Acc
-                  end, {[], dict:new()}, PA),
+    {SeqIds, GuidsByStore, PA1} =
+        dict:fold(
+          fun (SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict, PAN}) ->
+                  PAN1 = case KeepPersistent andalso IsPersistent of
+                             true  -> PAN;
+                             false -> dict:erase(SeqId, PAN)
+                         end,
+                  case IsPersistent of
+                      true  -> {[SeqId | SeqIdsAcc],
+                                rabbit_misc:dict_cons(
+                                  PersistentStore, Guid, Dict), PAN1};
+                      false -> {SeqIdsAcc,
+                                rabbit_misc:dict_cons(
+                                  ?TRANSIENT_MSG_STORE, Guid, Dict), PAN1}
+                  end;
+              (SeqId, #msg_status {}, {SeqIdsAcc, Dict, PAN}) ->
+                  {SeqIdsAcc, Dict, dict:erase(SeqId, PAN)}
+          end, {[], dict:new(), PA}, PA),
     case KeepPersistent of
         true ->
-            State1 =
-                State #vqstate {
-                  pending_ack =
-                      dict:filter(
-                        fun (_SeqId, {IsPersistent, _Guid}) -> IsPersistent;
-                            (_SeqId, #msg_status {})     -> false
-                        end, PA) },
+            State1 = State #vqstate { pending_ack = PA1 },
             case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
                 error       -> State1;
                 {ok, Guids} -> ok = rabbit_msg_store:remove(
@@ -911,9 +909,14 @@ tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
     %% persistent acks) then we can skip the queue_index loop.
     case PersistentStore == ?TRANSIENT_MSG_STORE orelse
         (IsTransientPubs andalso
-         lists:foldl(fun (AckTag,  true ) -> dict:is_key(AckTag, PA);
-                         (_AckTag, false) -> false
-                     end, true, AckTags)) of
+         lists:foldl(
+           fun (AckTag,  true ) ->
+                   case dict:find(AckTag, PA) of
+                       {ok, #msg_status{}}         -> true;
+                       {ok, {IsPersistent, _Guid}} -> not IsPersistent
+                   end;
+               (_AckTag, false) -> false
+           end, true, AckTags)) of
         true  -> State1 = tx_commit_index(State #vqstate {
                                             on_sync = {[], [Pubs], [From]} }),
                  State1 #vqstate { on_sync = OnSync };
@@ -928,11 +931,6 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                                    persistent_store = PersistentStore }) ->
     Acks = lists:flatten(SAcks),
     State1 = ack(Acks, State),
-    AckSeqIds = lists:foldl(fun ({ack, SeqId, _Guid, true}, SeqIdsAcc) ->
-                                    [SeqId | SeqIdsAcc];
-                                (_, SeqIdsAcc) ->
-                                    SeqIdsAcc
-                            end, [], Acks),
     IsPersistentStore = ?PERSISTENT_MSG_STORE == PersistentStore,
     Pubs = lists:flatten(lists:reverse(SPubs)),
     {SeqIds, State2 = #vqstate { index_state = IndexState }} =
@@ -945,7 +943,7 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
                        true  -> [SeqId | SeqIdsAcc];
                        false -> SeqIdsAcc
                    end, StateN1}
-          end, {AckSeqIds, State1}, Pubs),
+          end, {Acks, State1}, Pubs),
     IndexState1 =
         rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
     [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
-- 
cgit v1.2.1


From ef48739ff34a763c6a38fb14bdc5faf7ba167d65 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 20 Apr 2010 14:15:51 +0100
Subject: Switched the From to a Fun and hence CPS

---
 include/rabbit_backing_queue_spec.hrl |  2 +-
 src/rabbit_amqqueue_process.erl       |  3 ++-
 src/rabbit_backing_queue.erl          |  5 ++++-
 src/rabbit_variable_queue.erl         | 26 +++++++++++++-------------
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index d86a5382..63f4493b 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -46,7 +46,7 @@
 -spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()).
 -spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()).
 -spec(tx_rollback/2 :: (txn(), state()) -> {[ack()], state()}).
--spec(tx_commit/3 :: (txn(), {pid(), any()}, state()) -> {[ack()], state()}).
+-spec(tx_commit/3 :: (txn(), fun (() -> any()), state()) -> {[ack()], state()}).
 -spec(requeue/2 :: ([ack()], state()) -> state()).
 -spec(len/1 :: (state()) -> non_neg_integer()).
 -spec(is_empty/1 :: (state()) -> boolean()).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 697282fe..b10baacb 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -469,7 +469,8 @@ maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) ->
 
 commit_transaction(Txn, From, ChPid, State = #q{backing_queue = BQ,
                                                 backing_queue_state = BQS}) ->
-    {AckTags, BQS1} = BQ:tx_commit(Txn, From, BQS),
+    {AckTags, BQS1} =
+        BQ:tx_commit(Txn, fun () -> gen_server2:reply(From, ok) end, BQS),
     %% ChPid must be known here because of the participant management
     %% by the channel.
     C = #cr{acktags = ChAckTags} = lookup_ch(ChPid),
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 8e7de95e..7090d9cc 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -83,7 +83,10 @@ behaviour_info(callbacks) ->
      {tx_rollback, 2},
 
      %% Commit these publishes and acktags. The publishes you will
-     %% have previously seen in calls to tx_publish.
+     %% have previously seen in calls to tx_publish, and the acks in
+     %% calls to tx_ack. The Fun passed in must be called once the
+     %% messages have really been commited. This CPS permits the
+     %% possibility of commit coalescing.
      {tx_commit, 3},
 
      %% Reinsert messages into the queue which have already been
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9328164b..d53e6e3f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -180,7 +180,7 @@
           end_seq_id %% note the end_seq_id is always >, not >=
         }).
 
--record(tx, {pending_messages, pending_acks}).
+-record(tx, { pending_messages, pending_acks }).
 
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
@@ -228,7 +228,7 @@
                avg_ingress_rate      :: float(),
                rate_timestamp        :: {integer(), integer(), integer()},
                len                   :: non_neg_integer(),
-               on_sync               :: {[[ack()]], [[guid()]], [{pid(), any()}]},
+               on_sync               :: {[[ack()]], [[guid()]], [fun (() -> any())]},
                msg_store_clients     :: 'undefined' | {{any(), binary()}, {any(), binary()}},
                persistent_store      :: pid() | atom(),
                persistent_count      :: non_neg_integer(),
@@ -552,7 +552,7 @@ tx_rollback(Txn, State = #vqstate { persistent_store = PersistentStore }) ->
     ok = rabbit_msg_store:remove(PersistentStore, persistent_guids(Pubs)),
     {lists:flatten(AckTags), State}.
 
-tx_commit(Txn, From, State = #vqstate { persistent_store = PersistentStore }) ->
+tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore }) ->
     %% If we are a non-durable queue, or we have no persistent pubs,
     %% we can skip the msg_store loop.
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
@@ -566,12 +566,12 @@ tx_commit(Txn, From, State = #vqstate { persistent_store = PersistentStore }) ->
          ?TRANSIENT_MSG_STORE == PersistentStore of
          true ->
              tx_commit_post_msg_store(
-               IsTransientPubs, PubsOrdered, AckTags1, From, State);
+               IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
          false ->
              ok = rabbit_msg_store:sync(
                     ?PERSISTENT_MSG_STORE, PersistentGuids,
                     msg_store_callback(PersistentGuids, IsTransientPubs,
-                                       PubsOrdered, AckTags1, From)),
+                                       PubsOrdered, AckTags1, Fun)),
              State
      end}.
 
@@ -881,7 +881,7 @@ should_force_index_to_disk(State =
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, From) ->
+msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
     Self = self(),
     fun() ->
             spawn(
@@ -895,14 +895,14 @@ msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, From) ->
                                         Self, fun (StateN) ->
                                                       tx_commit_post_msg_store(
                                                         IsTransientPubs, Pubs,
-                                                        AckTags, From, StateN)
+                                                        AckTags, Fun, StateN)
                                               end)
                              end)
               end)
     end.
 
-tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
-                             #vqstate { on_sync = OnSync = {SAcks, SPubs, SFroms},
+tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun, State =
+                             #vqstate { on_sync = OnSync = {SAcks, SPubs, SFuns},
                                         persistent_store = PersistentStore,
                                         pending_ack = PA }) ->
     %% If we are a non-durable queue, or (no persisent pubs, and no
@@ -918,16 +918,16 @@ tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, From, State =
                (_AckTag, false) -> false
            end, true, AckTags)) of
         true  -> State1 = tx_commit_index(State #vqstate {
-                                            on_sync = {[], [Pubs], [From]} }),
+                                            on_sync = {[], [Pubs], [Fun]} }),
                  State1 #vqstate { on_sync = OnSync };
         false -> State #vqstate { on_sync = { [AckTags | SAcks],
                                               [Pubs | SPubs],
-                                              [From | SFroms] }}
+                                              [Fun | SFuns] }}
     end.
 
 tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
     State;
-tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
+tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
                                    persistent_store = PersistentStore }) ->
     Acks = lists:flatten(SAcks),
     State1 = ack(Acks, State),
@@ -946,7 +946,7 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFroms},
           end, {Acks, State1}, Pubs),
     IndexState1 =
         rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
-    [ gen_server2:reply(From, ok) || From <- lists:reverse(SFroms) ],
+    [ Fun() || Fun <- lists:reverse(SFuns) ],
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
 delete1(_PersistentStore, _TransientThreshold, NextSeqId, Count, DeltaSeqId,
-- 
cgit v1.2.1


From 2f6b017082328631d7b9d5c16e970a8f90ae4116 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 21 Apr 2010 12:32:43 +0100
Subject: Adding a forgotten type spec

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d53e6e3f..95c35759 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -232,7 +232,8 @@
                msg_store_clients     :: 'undefined' | {{any(), binary()}, {any(), binary()}},
                persistent_store      :: pid() | atom(),
                persistent_count      :: non_neg_integer(),
-               transient_threshold   :: non_neg_integer()
+               transient_threshold   :: non_neg_integer(),
+               pending_ack           :: dict()
               }).
 
 -spec(tx_commit_post_msg_store/5 ::
-- 
cgit v1.2.1


From 546a48e0752db3a0d938443b63b450400de5a1be Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 21 Apr 2010 15:53:33 +0100
Subject: Added the old persister (slightly mutilated) and an invariable queue
 as a backing queue which uses it. Passes all the tests.

---
 src/rabbit_invariable_queue.erl | 284 +++++++++++++++++++++
 src/rabbit_persister.erl        | 542 ++++++++++++++++++++++++++++++++++++++++
 src/rabbit_tests.erl            |  14 +-
 3 files changed, 837 insertions(+), 3 deletions(-)
 create mode 100644 src/rabbit_invariable_queue.erl
 create mode 100644 src/rabbit_persister.erl

diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
new file mode 100644
index 00000000..5620fab3
--- /dev/null
+++ b/src/rabbit_invariable_queue.erl
@@ -0,0 +1,284 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_invariable_queue).
+
+-export([init/2, terminate/1, delete_and_terminate/1, purge/1, publish/2,
+         publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3,
+         tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1,
+         set_ram_duration_target/2, ram_duration/1, sync_callback/1,
+         handle_pre_hibernate/1, status/1]).
+
+-export([start/1]).
+
+-behaviour(rabbit_backing_queue).
+
+-include("rabbit.hrl").
+
+-record(iv_state, { queue, qname, len, pending_ack }).
+-record(tx, { pending_messages, pending_acks, is_persistent }).
+
+-ifdef(use_specs).
+
+-type(ack() :: guid() | 'blank_ack').
+-type(state() :: #iv_state { queue       :: queue(),
+                             qname       :: queue_name(),
+                             len         :: non_neg_integer(),
+                             pending_ack :: dict()
+                           }).
+-include("rabbit_backing_queue_spec.hrl").
+
+-endif.
+
+start(DurableQueues) ->
+    ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]).
+
+init(QName, IsDurable) ->
+    List = case IsDurable of
+               true  -> rabbit_persister:fetch_content(QName);
+               false -> []
+           end,
+    Q = queue:from_list(List),
+    #iv_state { queue = Q, qname = QName, len = queue:len(Q),
+                pending_ack = dict:new() }.
+
+terminate(State) ->
+    State #iv_state { queue = queue:new(), len = 0, pending_ack = dict:new() }.
+
+delete_and_terminate(State = #iv_state { qname = QName, pending_ack = PA }) ->
+    ok = persist_acks(none, QName, dict:fetch_keys(PA), PA),
+    {_PLen, State1} = purge(State),
+    terminate(State1).
+
+purge(State = #iv_state { len = Len, queue = Q, qname = QName }) ->
+    %% We do not purge messages pending acks.
+    {AckTags, PA} =
+        rabbit_misc:queue_fold(
+          fun ({#basic_message { is_persistent = false }, _IsDelivered}, Acc) ->
+                  Acc;
+              ({Msg = #basic_message { guid = Guid }, IsDelivered},
+               {AckTagsN, PAN}) ->
+                  ok = persist_delivery(QName, Msg, IsDelivered),
+                  {[Guid | AckTagsN], dict:store(Guid, Msg, PAN)}
+          end, {[], dict:new()}, Q),
+    ok = persist_acks(none, QName, AckTags, PA),
+    {Len, State #iv_state { len = 0, queue = queue:new() }}.
+
+publish(Msg, State = #iv_state { queue = Q, qname = QName, len = Len }) ->
+    ok = persist_message(none, QName, Msg),
+    State #iv_state { queue = queue:in({Msg, false}, Q), len = Len + 1 }.
+
+publish_delivered(false, _Msg, State) ->
+    {blank_ack, State};
+publish_delivered(true, Msg = #basic_message { guid = Guid },
+                  State = #iv_state { qname = QName, len = 0,
+                                      pending_ack = PA }) ->
+    ok = persist_message(none, QName, Msg),
+    ok = persist_delivery(QName, Msg, false),
+    {Guid, State #iv_state { pending_ack = dict:store(Guid, Msg, PA) }}.
+
+fetch(_AckRequired, State = #iv_state { len = 0 }) ->
+    {empty, State};
+fetch(AckRequired, State = #iv_state { queue = Q, qname = QName, len = Len,
+                                       pending_ack = PA }) ->
+    {{value, {Msg = #basic_message { guid = Guid }, IsDelivered}}, Q1} =
+        queue:out(Q),
+    Len1 = Len - 1,
+    ok = persist_delivery(QName, Msg, IsDelivered),
+    PA1 = dict:store(Guid, Msg, PA),
+    {AckTag, PA2} = case AckRequired of
+                        true  -> {Guid, PA1};
+                        false -> ok = persist_acks(none, QName, [Guid], PA1),
+                                 {blank_ack, PA}
+                    end,
+    {{Msg, IsDelivered, AckTag, Len1},
+     State #iv_state { queue = Q1, len = Len1, pending_ack = PA2 }}.
+
+ack(AckTags, State = #iv_state { qname = QName, pending_ack = PA }) ->
+    ok = persist_acks(none, QName, AckTags, PA),
+    PA1 = remove_acks(AckTags, PA),
+    State #iv_state { pending_ack = PA1 }.
+
+tx_publish(Txn, Msg, State = #iv_state { qname = QName }) ->
+    publish_in_tx(Txn, Msg),
+    ok = persist_message(Txn, QName, Msg),
+    State.
+
+tx_ack(Txn, AckTags, State = #iv_state { qname = QName, pending_ack = PA }) ->
+    ack_in_tx(Txn, AckTags),
+    ok = persist_acks(Txn, QName, AckTags, PA),
+    State.
+
+tx_rollback(Txn, State = #iv_state { qname = QName }) ->
+    #tx { pending_acks = AckTags } = lookup_tx(Txn),
+    ok = rollback_work(Txn, QName),
+    erase_tx(Txn),
+    {lists:flatten(AckTags), State}.
+
+tx_commit(Txn, Fun, State = #iv_state { qname = QName, pending_ack = PA,
+                                        queue = Q, len = Len }) ->
+    #tx { pending_acks = AckTags, pending_messages = PubsRev } = lookup_tx(Txn),
+    ok = commit_work(Txn, QName),
+    erase_tx(Txn),
+    Fun(),
+    AckTags1 = lists:flatten(AckTags),
+    PA1 = remove_acks(AckTags1, PA),
+    {Q1, Len1} = lists:foldr(fun (Msg, {QN, LenN}) ->
+                                     {queue:in({Msg, false}, QN), LenN + 1}
+                             end, {Q, Len}, PubsRev),
+    {AckTags1, State #iv_state { pending_ack = PA1, queue = Q1, len = Len1 }}.
+
+requeue(AckTags, State = #iv_state { pending_ack = PA, queue = Q,
+                                     len = Len }) ->
+    %% We don't need to touch the persister here - the persister will
+    %% already have these messages published and delivered as
+    %% necessary. The complication is that the persister's seq_id will
+    %% now be wrong, given the position of these messages in our queue
+    %% here. However, the persister's seq_id is only used for sorting
+    %% on startup, and requeue is silent as to where the requeued
+    %% messages should appear, thus the persister is permitted to sort
+    %% based on seq_id, even though it'll likely give a different
+    %% order to the last known state of our queue, prior to shutdown.
+    {Q1, PA1, Len1} =
+        lists:foldl(
+          fun (Guid, {QN, PAN, LenN}) ->
+                  {ok, Msg = #basic_message {}} = dict:find(Guid, PAN),
+                  {queue:in({Msg, true}, QN), dict:erase(Guid, PAN), LenN + 1}
+          end, {Q, PA, Len}, AckTags),
+    State #iv_state { pending_ack = PA1, queue = Q1, len = Len1 }.
+
+len(#iv_state { len = Len }) ->
+    Len.
+
+is_empty(State) ->
+    0 == len(State).
+
+set_ram_duration_target(_DurationTarget, State) ->
+    %% HA!
+    State.
+
+ram_duration(State) ->
+    {0, State}.
+
+sync_callback(_State) ->
+    undefined.
+
+handle_pre_hibernate(State) ->
+    State.
+
+status(_State) ->
+    [].
+
+%%----------------------------------------------------------------------------
+
+remove_acks(AckTags, PA) -> lists:foldl(fun dict:erase/2, PA, AckTags).
+
+%%----------------------------------------------------------------------------
+
+lookup_tx(Txn) ->
+    case get({txn, Txn}) of
+        undefined -> #tx { pending_messages = [],
+                           pending_acks     = [],
+                           is_persistent    = false };
+        V         -> V
+    end.
+
+store_tx(Txn, Tx) ->
+    put({txn, Txn}, Tx).
+
+erase_tx(Txn) ->
+    erase({txn, Txn}).
+
+mark_tx_persistent(Txn) ->
+    store_tx(Txn, (lookup_tx(Txn)) #tx { is_persistent = true }).
+
+is_tx_persistent(Txn) ->
+    (lookup_tx(Txn)) #tx.is_persistent.
+
+do_if_persistent(F, Txn, QName) ->
+    ok = case is_tx_persistent(Txn) of
+             false -> ok;
+             true  -> F({Txn, QName})
+         end.
+
+publish_in_tx(Txn, Msg) ->
+    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }).
+
+ack_in_tx(Txn, AckTags) ->
+    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }).
+
+%%----------------------------------------------------------------------------
+
+persist_message(_Txn, _QName, #basic_message { is_persistent = false }) ->
+    ok;
+persist_message(Txn, QName, Msg) ->
+    Msg1 = Msg #basic_message {
+             %% don't persist any recoverable decoded properties,
+             %% rebuild from properties_bin on restore
+             content = rabbit_binary_parser:clear_decoded_content(
+                         Msg #basic_message.content)},
+    persist_work(Txn, QName,
+                 [{publish, Msg1, {QName, Msg1 #basic_message.guid}}]).
+
+persist_delivery(_QName, #basic_message { is_persistent = false },
+                 _IsDelivered) ->
+    ok;
+persist_delivery(_QName, _Message, true) ->
+    ok;
+persist_delivery(QName, #basic_message { guid = Guid }, _IsDelivered) ->
+    persist_work(none, QName, [{deliver, {QName, Guid}}]).
+
+persist_acks(Txn, QName, AckTags, PA) ->
+    persist_work(Txn, QName,
+                 [{ack, {QName, Guid}} ||
+                     Guid <- AckTags,
+                     case dict:find(Guid, PA) of
+                         {ok, #basic_message { is_persistent = true }} -> true;
+                         _ -> false
+                     end]).
+
+persist_work(_Txn,_QName, []) ->
+    ok;
+persist_work(none, _QName, WorkList) ->
+    rabbit_persister:dirty_work(WorkList);
+persist_work(Txn, QName, WorkList) ->
+    mark_tx_persistent(Txn),
+    rabbit_persister:extend_transaction({Txn, QName}, WorkList).
+
+commit_work(Txn, QName) ->
+    do_if_persistent(fun rabbit_persister:commit_transaction/1,
+                     Txn, QName).
+
+rollback_work(Txn, QName) ->
+    do_if_persistent(fun rabbit_persister:rollback_transaction/1,
+                     Txn, QName).
diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl
new file mode 100644
index 00000000..3235a837
--- /dev/null
+++ b/src/rabbit_persister.erl
@@ -0,0 +1,542 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_persister).
+
+-behaviour(gen_server).
+
+-export([start_link/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-export([transaction/1, extend_transaction/2, dirty_work/1,
+         commit_transaction/1, rollback_transaction/1,
+         force_snapshot/0, serial/0, fetch_content/1]).
+
+-include("rabbit.hrl").
+
+-define(SERVER, ?MODULE).
+
+-define(LOG_BUNDLE_DELAY, 5).
+-define(COMPLETE_BUNDLE_DELAY, 2).
+
+-define(HIBERNATE_AFTER, 10000).
+
+-define(MAX_WRAP_ENTRIES, 500).
+
+-define(PERSISTER_LOG_FORMAT_VERSION, {2, 5}).
+
+-record(pstate, {log_handle, entry_count, deadline,
+                 pending_logs, pending_replies,
+                 snapshot, recovered_content}).
+
+%% two tables for efficient persistency
+%% one maps a key to a message
+%% the other maps a key to one or more queues.
+%% The aim is to reduce the overload of storing a message multiple times
+%% when it appears in several queues.
+-record(psnapshot, {serial, transactions, messages, queues, next_seq_id}).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(pmsg() :: {queue_name(), pkey()}).
+-type(work_item() ::
+      {publish, message(), pmsg()} |
+      {deliver, pmsg()} |
+      {ack, pmsg()}).
+
+-spec(start_link/1 :: ([queue_name()]) ->
+                           {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(transaction/1 :: ([work_item()]) -> 'ok').
+-spec(extend_transaction/2 :: ({txn(), queue_name()}, [work_item()]) -> 'ok').
+-spec(dirty_work/1 :: ([work_item()]) -> 'ok').
+-spec(commit_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
+-spec(rollback_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
+-spec(force_snapshot/0 :: () -> 'ok').
+-spec(serial/0 :: () -> non_neg_integer()).
+-spec(fetch_content/1 :: (queue_name()) -> [{message(), boolean()}]).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link(DurableQueues) ->
+    gen_server:start_link({local, ?SERVER}, ?MODULE, [DurableQueues], []).
+
+transaction(MessageList) ->
+    ?LOGDEBUG("transaction ~p~n", [MessageList]),
+    TxnKey = rabbit_guid:guid(),
+    gen_server:call(?SERVER, {transaction, TxnKey, MessageList}, infinity).
+
+extend_transaction(TxnKey, MessageList) ->
+    ?LOGDEBUG("extend_transaction ~p ~p~n", [TxnKey, MessageList]),
+    gen_server:cast(?SERVER, {extend_transaction, TxnKey, MessageList}).
+
+dirty_work(MessageList) ->
+    ?LOGDEBUG("dirty_work ~p~n", [MessageList]),
+    gen_server:cast(?SERVER, {dirty_work, MessageList}).
+
+commit_transaction(TxnKey) ->
+    ?LOGDEBUG("commit_transaction ~p~n", [TxnKey]),
+    gen_server:call(?SERVER, {commit_transaction, TxnKey}, infinity).
+
+rollback_transaction(TxnKey) ->
+    ?LOGDEBUG("rollback_transaction ~p~n", [TxnKey]),
+    gen_server:cast(?SERVER, {rollback_transaction, TxnKey}).
+
+force_snapshot() ->
+    gen_server:call(?SERVER, force_snapshot, infinity).
+
+serial() ->
+    gen_server:call(?SERVER, serial, infinity).
+
+fetch_content(QName) ->
+    gen_server:call(?SERVER, {fetch_content, QName}, infinity).
+
+%%--------------------------------------------------------------------
+
+init([DurableQueues]) ->
+    process_flag(trap_exit, true),
+    FileName = base_filename(),
+    ok = filelib:ensure_dir(FileName),
+    Snapshot = #psnapshot{serial       = 0,
+                          transactions = dict:new(),
+                          messages     = ets:new(messages, []),
+                          queues       = ets:new(queues, []),
+                          next_seq_id  = 0},
+    LogHandle =
+        case disk_log:open([{name, rabbit_persister},
+                            {head, current_snapshot(Snapshot)},
+                            {file, FileName}]) of
+            {ok, LH} -> LH;
+            {repaired, LH, {recovered, Recovered}, {badbytes, Bad}} ->
+                WarningFun = if
+                                 Bad > 0 -> fun rabbit_log:warning/2;
+                                 true    -> fun rabbit_log:info/2
+                             end,
+                WarningFun("Repaired persister log - ~p recovered, ~p bad~n",
+                           [Recovered, Bad]),
+                LH
+        end,
+    {Res, RecoveredContent, LoadedSnapshot} =
+        internal_load_snapshot(LogHandle, DurableQueues, Snapshot),
+    NewSnapshot = LoadedSnapshot#psnapshot{
+                    serial = LoadedSnapshot#psnapshot.serial + 1},
+    case Res of
+        ok ->
+            ok = take_snapshot(LogHandle, NewSnapshot);
+        {error, Reason} ->
+            rabbit_log:error("Failed to load persister log: ~p~n", [Reason]),
+            ok = take_snapshot_and_save_old(LogHandle, NewSnapshot)
+    end,
+    State = #pstate{log_handle        = LogHandle,
+                    entry_count       = 0,
+                    deadline          = infinity,
+                    pending_logs      = [],
+                    pending_replies   = [],
+                    snapshot          = NewSnapshot,
+                    recovered_content = RecoveredContent},
+    {ok, State}.
+
+handle_call({transaction, Key, MessageList}, From, State) ->
+    NewState = internal_extend(Key, MessageList, State),
+    do_noreply(internal_commit(From, Key, NewState));
+handle_call({commit_transaction, TxnKey}, From, State) ->
+    do_noreply(internal_commit(From, TxnKey, State));
+handle_call(force_snapshot, _From, State) ->
+    do_reply(ok, flush(true, State));
+handle_call(serial, _From,
+            State = #pstate{snapshot = #psnapshot{serial = Serial}}) ->
+    do_reply(Serial, State);
+handle_call({fetch_content, QName}, _From, State =
+                #pstate{recovered_content = RC}) ->
+    List = case dict:find(QName, RC) of
+               {ok, Content} -> Content;
+               error         -> []
+           end,
+    do_reply(List, State#pstate{recovered_content = dict:erase(QName, RC)});
+handle_call(_Request, _From, State) ->
+    {noreply, State}.
+
+handle_cast({rollback_transaction, TxnKey}, State) ->
+    do_noreply(internal_rollback(TxnKey, State));
+handle_cast({dirty_work, MessageList}, State) ->
+    do_noreply(internal_dirty_work(MessageList, State));
+handle_cast({extend_transaction, TxnKey, MessageList}, State) ->
+    do_noreply(internal_extend(TxnKey, MessageList, State));
+handle_cast(_Msg, State) ->
+    {noreply, State}.
+
+handle_info(timeout, State = #pstate{deadline = infinity}) ->
+    State1 = flush(true, State),
+    %% TODO: Once we drop support for R11B-5, we can change this to
+    %% {noreply, State1, hibernate};
+    proc_lib:hibernate(gen_server2, enter_loop, [?MODULE, [], State1]);
+handle_info(timeout, State) ->
+    do_noreply(flush(State));
+handle_info(_Info, State) ->
+    {noreply, State}.
+
+terminate(_Reason, State = #pstate{log_handle = LogHandle}) ->
+    flush(State),
+    disk_log:close(LogHandle),
+    ok.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, flush(State)}.
+
+%%--------------------------------------------------------------------
+
+internal_extend(Key, MessageList, State) ->
+    log_work(fun (ML) -> {extend_transaction, Key, ML} end,
+             MessageList, State).
+
+internal_dirty_work(MessageList, State) ->
+    log_work(fun (ML) -> {dirty_work, ML} end,
+             MessageList, State).
+
+internal_commit(From, Key, State = #pstate{snapshot = Snapshot}) ->
+    Unit = {commit_transaction, Key},
+    NewSnapshot = internal_integrate1(Unit, Snapshot),
+    complete(From, Unit, State#pstate{snapshot = NewSnapshot}).
+
+internal_rollback(Key, State = #pstate{snapshot = Snapshot}) ->
+    Unit = {rollback_transaction, Key},
+    NewSnapshot = internal_integrate1(Unit, Snapshot),
+    log(State#pstate{snapshot = NewSnapshot}, Unit).
+
+complete(From, Item, State = #pstate{deadline = ExistingDeadline,
+                                     pending_logs = Logs,
+                                     pending_replies = Waiting}) ->
+    State#pstate{deadline = compute_deadline(
+                              ?COMPLETE_BUNDLE_DELAY, ExistingDeadline),
+                 pending_logs = [Item | Logs],
+                 pending_replies = [From | Waiting]}.
+
+%% This is made to limit disk usage by writing messages only once onto
+%% disk.  We keep a table associating pkeys to messages, and provided
+%% the list of messages to output is left to right, we can guarantee
+%% that pkeys will be a backreference to a message in memory when a
+%% "tied" is met.
+log_work(CreateWorkUnit, MessageList,
+         State = #pstate{
+           snapshot = Snapshot = #psnapshot{messages = Messages}}) ->
+    Unit = CreateWorkUnit(
+             rabbit_misc:map_in_order(
+               fun(M = {publish, Message, QK = {_QName, PKey}}) ->
+                       case ets:lookup(Messages, PKey) of
+                           [_] -> {tied, QK};
+                           []  -> ets:insert(Messages, {PKey, Message}),
+                                  M
+                       end;
+                  (M) -> M
+               end,
+               MessageList)),
+    NewSnapshot = internal_integrate1(Unit, Snapshot),
+    log(State#pstate{snapshot = NewSnapshot}, Unit).
+
+log(State = #pstate{deadline = ExistingDeadline, pending_logs = Logs},
+    Message) ->
+    State#pstate{deadline = compute_deadline(?LOG_BUNDLE_DELAY,
+                                             ExistingDeadline),
+                 pending_logs = [Message | Logs]}.
+
+base_filename() ->
+    rabbit_mnesia:dir() ++ "/rabbit_persister.LOG".
+
+take_snapshot(LogHandle, OldFileName, Snapshot) ->
+    ok = disk_log:sync(LogHandle),
+    %% current_snapshot is the Head (ie. first thing logged)
+    ok = disk_log:reopen(LogHandle, OldFileName, current_snapshot(Snapshot)).
+
+take_snapshot(LogHandle, Snapshot) ->
+    OldFileName = lists:flatten(base_filename() ++ ".previous"),
+    file:delete(OldFileName),
+    rabbit_log:info("Rolling persister log to ~p~n", [OldFileName]),
+    ok = take_snapshot(LogHandle, OldFileName, Snapshot).
+
+take_snapshot_and_save_old(LogHandle, Snapshot) ->
+    {MegaSecs, Secs, MicroSecs} = erlang:now(),
+    Timestamp = MegaSecs * 1000000 + Secs * 1000 + MicroSecs,
+    OldFileName = lists:flatten(io_lib:format("~s.saved.~p",
+                                              [base_filename(), Timestamp])),
+    rabbit_log:info("Saving persister log in ~p~n", [OldFileName]),
+    ok = take_snapshot(LogHandle, OldFileName, Snapshot).
+
+maybe_take_snapshot(Force, State = #pstate{entry_count = EntryCount,
+                                           log_handle = LH,
+                                           snapshot = Snapshot})
+  when Force orelse EntryCount >= ?MAX_WRAP_ENTRIES ->
+    ok = take_snapshot(LH, Snapshot),
+    State#pstate{entry_count = 0};
+maybe_take_snapshot(_Force, State) ->
+    State.
+
+later_ms(DeltaMilliSec) ->
+    {MegaSec, Sec, MicroSec} = now(),
+    %% Note: not normalised. Unimportant for this application.
+    {MegaSec, Sec, MicroSec + (DeltaMilliSec * 1000)}.
+
+%% Result = B - A, more or less
+time_diff({B1, B2, B3}, {A1, A2, A3}) ->
+    (B1 - A1) * 1000000 + (B2 - A2) + (B3 - A3) / 1000000.0 .
+
+compute_deadline(TimerDelay, infinity) ->
+    later_ms(TimerDelay);
+compute_deadline(_TimerDelay, ExistingDeadline) ->
+    ExistingDeadline.
+
+compute_timeout(infinity) ->
+    ?HIBERNATE_AFTER;
+compute_timeout(Deadline) ->
+    DeltaMilliSec = time_diff(Deadline, now()) * 1000.0,
+    if
+        DeltaMilliSec =< 1 ->
+            0;
+        true ->
+            round(DeltaMilliSec)
+    end.
+
+do_noreply(State = #pstate{deadline = Deadline}) ->
+    {noreply, State, compute_timeout(Deadline)}.
+
+do_reply(Reply, State = #pstate{deadline = Deadline}) ->
+    {reply, Reply, State, compute_timeout(Deadline)}.
+
+flush(State) -> flush(false, State).
+
+flush(ForceSnapshot, State = #pstate{pending_logs = PendingLogs,
+                                     pending_replies = Waiting,
+                                     log_handle = LogHandle}) ->
+    State1 = if PendingLogs /= [] ->
+                     disk_log:alog(LogHandle, lists:reverse(PendingLogs)),
+                     State#pstate{entry_count = State#pstate.entry_count + 1};
+                true ->
+                     State
+             end,
+    State2 = maybe_take_snapshot(ForceSnapshot, State1),
+    if Waiting /= [] ->
+            ok = disk_log:sync(LogHandle),
+            lists:foreach(fun (From) -> gen_server:reply(From, ok) end,
+                          Waiting);
+       true ->
+            ok
+    end,
+    State2#pstate{deadline = infinity,
+                  pending_logs = [],
+                  pending_replies = []}.
+
+current_snapshot(_Snapshot = #psnapshot{serial       = Serial,
+                                        transactions = Ts,
+                                        messages     = Messages,
+                                        queues       = Queues,
+                                        next_seq_id  = NextSeqId}) ->
+    %% Avoid infinite growth of the table by removing messages not
+    %% bound to a queue anymore
+    prune_table(Messages, ets:foldl(
+                            fun ({{_QName, PKey}, _Delivered, _SeqId}, S) ->
+                                    sets:add_element(PKey, S)
+                            end, sets:new(), Queues)),
+    InnerSnapshot = {{serial, Serial},
+                     {txns, Ts},
+                     {messages, ets:tab2list(Messages)},
+                     {queues, ets:tab2list(Queues)},
+                     {next_seq_id, NextSeqId}},
+    ?LOGDEBUG("Inner snapshot: ~p~n", [InnerSnapshot]),
+    {persist_snapshot, {vsn, ?PERSISTER_LOG_FORMAT_VERSION},
+     term_to_binary(InnerSnapshot)}.
+
+prune_table(Tab, Keys) ->
+    true = ets:safe_fixtable(Tab, true),
+    ok = prune_table(Tab, Keys, ets:first(Tab)),
+    true = ets:safe_fixtable(Tab, false).
+
+prune_table(_Tab, _Keys, '$end_of_table') -> ok;
+prune_table(Tab, Keys, Key) ->
+    case sets:is_element(Key, Keys) of
+        true  -> ok;
+        false -> ets:delete(Tab, Key)
+    end,
+    prune_table(Tab, Keys, ets:next(Tab, Key)).
+
+internal_load_snapshot(LogHandle,
+                       DurableQueues,
+                       Snapshot = #psnapshot{messages = Messages,
+                                             queues = Queues}) ->
+    {K, [Loaded_Snapshot | Items]} = disk_log:chunk(LogHandle, start),
+    case check_version(Loaded_Snapshot) of
+        {ok, StateBin} ->
+            {{serial, Serial}, {txns, Ts}, {messages, Ms}, {queues, Qs},
+             {next_seq_id, NextSeqId}} = binary_to_term(StateBin),
+            true = ets:insert(Messages, Ms),
+            true = ets:insert(Queues, Qs),
+            Snapshot1 = replay(Items, LogHandle, K,
+                               Snapshot#psnapshot{
+                                 serial = Serial,
+                                 transactions = Ts,
+                                 next_seq_id = NextSeqId}),
+            {RecoveredContent, Snapshot2} =
+                recover_messages(DurableQueues, Snapshot1),
+            %% uncompleted transactions are discarded - this is TRTTD
+            %% since we only get into this code on node restart, so
+            %% any uncompleted transactions will have been aborted.
+            {ok, RecoveredContent,
+             Snapshot2#psnapshot{transactions = dict:new()}};
+        {error, Reason} -> {{error, Reason}, dict:new(), Snapshot}
+    end.
+
+check_version({persist_snapshot, {vsn, ?PERSISTER_LOG_FORMAT_VERSION},
+               StateBin}) ->
+    {ok, StateBin};
+check_version({persist_snapshot, {vsn, Vsn}, _StateBin}) ->
+    {error, {unsupported_persister_log_format, Vsn}};
+check_version(_Other) ->
+    {error, unrecognised_persister_log_format}.
+
+recover_messages(DurableQueues, Snapshot = #psnapshot{messages = Messages,
+                                                      queues = Queues}) ->
+    DurableQueuesSet = sets:from_list(DurableQueues),
+    Work = ets:foldl(
+             fun ({{QName, PKey}, Delivered, SeqId}, Acc) ->
+                     case sets:is_element(QName, DurableQueuesSet) of
+                         true ->
+                             rabbit_misc:dict_cons(
+                               QName, {SeqId, PKey, Delivered}, Acc);
+                         false ->
+                             Acc
+                     end
+             end, dict:new(), Queues),
+    {L, RecoveredContent} =
+        lists:foldl(
+          fun ({Recovered, {QName, Msgs}}, {L, Dict}) ->
+                  {Recovered ++ L, dict:store(QName, Msgs, Dict)}
+          end, {[], dict:new()},
+          %% unstable parallel map, because order doesn't matter
+          rabbit_misc:upmap(
+            %% we do as much work as possible in spawned worker
+            %% processes, but we need to make sure the ets:inserts are
+            %% performed in self()
+            fun ({QName, Requeues}) ->
+                    recover(QName, Requeues, Messages)
+            end, dict:to_list(Work))),
+    NewMessages = [{K, M} || {_S, _Q, K, M, _D} <- L],
+    NewQueues  = [{{Q, K}, D, S} || {S, Q, K, _M, D} <- L],
+    ets:delete_all_objects(Messages),
+    ets:delete_all_objects(Queues),
+    true = ets:insert(Messages, NewMessages),
+    true = ets:insert(Queues, NewQueues),
+    %% contains the mutated messages and queues tables
+    {RecoveredContent, Snapshot}.
+
+recover(QName, Requeues, Messages) ->
+    RecoveredMessages =
+        lists:sort([{SeqId, QName, PKey, Message, Delivered} ||
+                       {SeqId, PKey, Delivered} <- Requeues,
+                       {_, Message} <- ets:lookup(Messages, PKey)]),
+    {RecoveredMessages, {QName, [{Message, Delivered} ||
+                                    {_, _, _, Message, Delivered}
+                                        <- RecoveredMessages]}}.
+
+replay([], LogHandle, K, Snapshot) ->
+    case disk_log:chunk(LogHandle, K) of
+        {K1, Items} ->
+            replay(Items, LogHandle, K1, Snapshot);
+        {K1, Items, Badbytes} ->
+            rabbit_log:warning("~p bad bytes recovering persister log~n",
+                               [Badbytes]),
+            replay(Items, LogHandle, K1, Snapshot);
+        eof -> Snapshot
+    end;
+replay([Item | Items], LogHandle, K, Snapshot) ->
+    NewSnapshot = internal_integrate_messages(Item, Snapshot),
+    replay(Items, LogHandle, K, NewSnapshot).
+
+internal_integrate_messages(Items, Snapshot) ->
+    lists:foldl(fun (Item, Snap) -> internal_integrate1(Item, Snap) end,
+                Snapshot, Items).
+
+internal_integrate1({extend_transaction, Key, MessageList},
+                    Snapshot = #psnapshot {transactions = Transactions}) ->
+    Snapshot#psnapshot{transactions = rabbit_misc:dict_cons(Key, MessageList,
+                                                            Transactions)};
+internal_integrate1({rollback_transaction, Key},
+                    Snapshot = #psnapshot{transactions = Transactions}) ->
+    Snapshot#psnapshot{transactions = dict:erase(Key, Transactions)};
+internal_integrate1({commit_transaction, Key},
+                    Snapshot = #psnapshot{transactions = Transactions,
+                                          messages     = Messages,
+                                          queues       = Queues,
+                                          next_seq_id  = SeqId}) ->
+    case dict:find(Key, Transactions) of
+        {ok, MessageLists} ->
+            ?LOGDEBUG("persist committing txn ~p~n", [Key]),
+            NextSeqId =
+                lists:foldr(
+                  fun (ML, SeqIdN) ->
+                          perform_work(ML, Messages, Queues, SeqIdN) end,
+                  SeqId, MessageLists),
+            Snapshot#psnapshot{transactions = dict:erase(Key, Transactions),
+                               next_seq_id = NextSeqId};
+        error ->
+            Snapshot
+    end;
+internal_integrate1({dirty_work, MessageList},
+                    Snapshot = #psnapshot{messages    = Messages,
+                                          queues      = Queues,
+                                          next_seq_id = SeqId}) ->
+    Snapshot#psnapshot{next_seq_id = perform_work(MessageList, Messages,
+                                                  Queues, SeqId)}.
+
+perform_work(MessageList, Messages, Queues, SeqId) ->
+    lists:foldl(fun (Item, NextSeqId) ->
+                        perform_work_item(Item, Messages, Queues, NextSeqId)
+                end, SeqId, MessageList).
+
+perform_work_item({publish, Message, QK = {_QName, PKey}},
+                  Messages, Queues, NextSeqId) ->
+    true = ets:insert(Messages, {PKey, Message}),
+    true = ets:insert(Queues, {QK, false, NextSeqId}),
+    NextSeqId + 1;
+
+perform_work_item({tied, QK}, _Messages, Queues, NextSeqId) ->
+    true = ets:insert(Queues, {QK, false, NextSeqId}),
+    NextSeqId + 1;
+
+perform_work_item({deliver, QK}, _Messages, Queues, NextSeqId) ->
+    true = ets:update_element(Queues, QK, {2, true}),
+    NextSeqId;
+
+perform_work_item({ack, QK}, _Messages, Queues, NextSeqId) ->
+    true = ets:delete(Queues, QK),
+    NextSeqId.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4bef8435..6b5c73bb 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -54,9 +54,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
-    passed = test_msg_store(),
-    passed = test_queue_index(),
-    passed = test_variable_queue(),
+    passed = test_backing_queue(),
     passed = test_priority_queue(),
     passed = test_bpqueue(),
     passed = test_pg_local(),
@@ -994,6 +992,16 @@ bad_handle_hook(_, _, _) ->
 extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
     handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
 
+test_backing_queue() ->
+    case application:get_env(backing_queue_module) of
+        {ok, rabbit_variable_queue} ->
+            passed = test_msg_store(),
+            passed = test_queue_index(),
+            passed = test_variable_queue();
+        _ ->
+            passed
+    end.
+
 start_msg_store_empty() ->
     start_msg_store(fun (ok) -> finished end, ok).
 
-- 
cgit v1.2.1


From de93480a2572e181363f4aca8fdfa24357ba99ef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 25 Apr 2010 11:59:18 +0100
Subject: A cosmetic in vq, and a fix in tests for a bug which was causing the
 persister tests not to run

---
 src/rabbit_tests.erl          | 4 ++--
 src/rabbit_variable_queue.erl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4e893824..81b633fe 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -997,12 +997,12 @@ extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
     handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
 
 test_backing_queue() ->
-    case application:get_env(backing_queue_module) of
+    case application:get_env(rabbit, backing_queue_module) of
         {ok, rabbit_variable_queue} ->
             passed = test_msg_store(),
             passed = test_queue_index(),
             passed = test_variable_queue();
-        _ ->
+        Other ->
             passed
     end.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 95c35759..6ac92dc1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1377,9 +1377,9 @@ maybe_deltas_to_betas(
                     maybe_deltas_to_betas(
                       State #vqstate {
                         delta = Delta #delta { start_seq_id = Delta1SeqId }});
-                _ ->
+                Q3aLen ->
                     Q3b = bpqueue:join(Q3, Q3a),
-                    case DeltaCount - bpqueue:len(Q3a) of
+                    case DeltaCount - Q3aLen of
                         0 ->
                             %% delta is now empty, but it wasn't
                             %% before, so can now join q2 onto q3
-- 
cgit v1.2.1


From f4c6fefb81c9034fa468668e93081849f23c3035 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sun, 25 Apr 2010 11:59:57 +0100
Subject: unused var

---
 src/rabbit_tests.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 81b633fe..1ab4f224 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1002,7 +1002,7 @@ test_backing_queue() ->
             passed = test_msg_store(),
             passed = test_queue_index(),
             passed = test_variable_queue();
-        Other ->
+        _ ->
             passed
     end.
 
-- 
cgit v1.2.1


From 7e3b72a6a3aae3c71dc1afd0f5149ccf82462182 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 26 Apr 2010 15:02:31 +0100
Subject: Because of the exposure of AckRequired into the interface, the
 counting of persistent messages has potentially been wrong for a while
 (previously only decr on ack, but actually need to maybe decr on fetch too).
 This can have the effect on startup of telling the queue it has more messages
 in it than it really has, which can cause an infinite loop

---
 src/rabbit_variable_queue.erl | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6ac92dc1..b33df24f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -420,11 +420,14 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
 
 fetch(AckRequired, State =
       #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
-                 index_state = IndexState, len = Len,
+                 index_state = IndexState, len = Len, persistent_count = PCount,
                  persistent_store = PersistentStore, pending_ack = PA }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
-            fetch_from_q3_or_delta(AckRequired, State);
+            case fetch_from_q3_or_delta(State) of
+                {empty, _State1} = Result -> Result;
+                {loaded, State1}          -> fetch(AckRequired, State1)
+            end;
         {{value, MsgStatus = #msg_status {
             msg = Msg, guid = Guid, seq_id = SeqId,
             is_persistent = IsPersistent, is_delivered = IsDelivered,
@@ -486,12 +489,16 @@ fetch(AckRequired, State =
                       false -> PA
                   end,
 
+            PCount1 = case IsPersistent andalso not AckRequired of
+                          true  -> PCount - 1;
+                          false -> PCount
+                      end,
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
                               ram_msg_count = RamMsgCount - 1,
                               index_state = IndexState3, len = Len1,
-                              pending_ack = PA1 }}
+                              pending_ack = PA1, persistent_count = PCount1 }}
     end.
 
 ack([], State) ->
@@ -1032,8 +1039,7 @@ remove_queue_entries1(
                    end,
     {PersistentStore, CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
 
-fetch_from_q3_or_delta(AckRequired,
-                       State = #vqstate {
+fetch_from_q3_or_delta(State = #vqstate {
                          q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
@@ -1079,7 +1085,7 @@ fetch_from_q3_or_delta(AckRequired,
                         %% delta and q3 are maintained
                         State1
                 end,
-            fetch(AckRequired, State2)
+            {loaded, State2}
     end.
 
 reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
-- 
cgit v1.2.1


From 454b44105a9db48703ef40e92267004231913b5b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 15:55:55 +0100
Subject: simplify queue recovery in old persister instead of constructing a
 temporary data structure for all recovered messages we simply extract the
 messages from the snapshot on demand.

---
 src/rabbit_invariable_queue.erl |   9 ++--
 src/rabbit_persister.erl        | 111 +++++++++++++---------------------------
 2 files changed, 39 insertions(+), 81 deletions(-)

diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index 5620fab3..7765069f 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -62,11 +62,10 @@ start(DurableQueues) ->
     ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]).
 
 init(QName, IsDurable) ->
-    List = case IsDurable of
-               true  -> rabbit_persister:fetch_content(QName);
-               false -> []
-           end,
-    Q = queue:from_list(List),
+    Q = queue:from_list(case IsDurable of
+                            true  -> rabbit_persister:queue_content(QName);
+                            false -> []
+                        end),
     #iv_state { queue = Q, qname = QName, len = queue:len(Q),
                 pending_ack = dict:new() }.
 
diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl
index f16b7a33..ba9430fe 100644
--- a/src/rabbit_persister.erl
+++ b/src/rabbit_persister.erl
@@ -40,7 +40,7 @@
 
 -export([transaction/1, extend_transaction/2, dirty_work/1,
          commit_transaction/1, rollback_transaction/1,
-         force_snapshot/0, fetch_content/1]).
+         force_snapshot/0, queue_content/1]).
 
 -include("rabbit.hrl").
 
@@ -56,8 +56,7 @@
 -define(PERSISTER_LOG_FORMAT_VERSION, {2, 6}).
 
 -record(pstate, {log_handle, entry_count, deadline,
-                 pending_logs, pending_replies,
-                 snapshot, recovered_content}).
+                 pending_logs, pending_replies, snapshot}).
 
 %% two tables for efficient persistency
 %% one maps a key to a message
@@ -84,7 +83,7 @@
 -spec(commit_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
 -spec(rollback_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
 -spec(force_snapshot/0 :: () -> 'ok').
--spec(fetch_content/1 :: (queue_name()) -> [{message(), boolean()}]).
+-spec(queue_content/1 :: (queue_name()) -> [{message(), boolean()}]).
 
 -endif.
 
@@ -117,8 +116,8 @@ rollback_transaction(TxnKey) ->
 force_snapshot() ->
     gen_server:call(?SERVER, force_snapshot, infinity).
 
-fetch_content(QName) ->
-    gen_server:call(?SERVER, {fetch_content, QName}, infinity).
+queue_content(QName) ->
+    gen_server:call(?SERVER, {queue_content, QName}, infinity).
 
 %%--------------------------------------------------------------------
 
@@ -128,7 +127,7 @@ init([DurableQueues]) ->
     ok = filelib:ensure_dir(FileName),
     Snapshot = #psnapshot{transactions = dict:new(),
                           messages     = ets:new(messages, []),
-                          queues       = ets:new(queues, []),
+                          queues       = ets:new(queues, [ordered_set]),
                           next_seq_id  = 0},
     LogHandle =
         case disk_log:open([{name, rabbit_persister},
@@ -144,7 +143,7 @@ init([DurableQueues]) ->
                            [Recovered, Bad]),
                 LH
         end,
-    {Res, RecoveredContent, NewSnapshot} =
+    {Res, NewSnapshot} =
         internal_load_snapshot(LogHandle, DurableQueues, Snapshot),
     case Res of
         ok ->
@@ -158,8 +157,7 @@ init([DurableQueues]) ->
                     deadline          = infinity,
                     pending_logs      = [],
                     pending_replies   = [],
-                    snapshot          = NewSnapshot,
-                    recovered_content = RecoveredContent},
+                    snapshot          = NewSnapshot},
     {ok, State}.
 
 handle_call({transaction, Key, MessageList}, From, State) ->
@@ -169,13 +167,13 @@ handle_call({commit_transaction, TxnKey}, From, State) ->
     do_noreply(internal_commit(From, TxnKey, State));
 handle_call(force_snapshot, _From, State) ->
     do_reply(ok, flush(true, State));
-handle_call({fetch_content, QName}, _From, State =
-                #pstate{recovered_content = RC}) ->
-    List = case dict:find(QName, RC) of
-               {ok, Content} -> Content;
-               error         -> []
-           end,
-    do_reply(List, State#pstate{recovered_content = dict:erase(QName, RC)});
+handle_call({queue_content, QName}, _From,
+            State = #pstate{snapshot = #psnapshot{messages = Messages,
+                                                  queues   = Queues}}) ->
+    MatchSpec= [{{{QName,'$1'}, '$2', '$3'}, [], [{{'$3', '$1', '$2'}}]}],
+    do_reply([{ets:lookup_element(Messages, K, 2), D} ||
+                 {_, K, D} <- lists:sort(ets:select(Queues, MatchSpec))],
+             State);
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
@@ -353,10 +351,10 @@ current_snapshot(_Snapshot = #psnapshot{transactions = Ts,
                                         next_seq_id  = NextSeqId}) ->
     %% Avoid infinite growth of the table by removing messages not
     %% bound to a queue anymore
-    prune_table(Messages, ets:foldl(
-                            fun ({{_QName, PKey}, _Delivered, _SeqId}, S) ->
-                                    sets:add_element(PKey, S)
-                            end, sets:new(), Queues)),
+    PKeys = ets:foldl(fun ({{_QName, PKey}, _Delivered, _SeqId}, S) ->
+                              sets:add_element(PKey, S)
+                      end, sets:new(), Queues),
+    prune_table(Messages, fun (Key) -> sets:is_element(Key, PKeys) end),
     InnerSnapshot = {{txns, Ts},
                      {messages, ets:tab2list(Messages)},
                      {queues, ets:tab2list(Queues)},
@@ -365,18 +363,18 @@ current_snapshot(_Snapshot = #psnapshot{transactions = Ts,
     {persist_snapshot, {vsn, ?PERSISTER_LOG_FORMAT_VERSION},
      term_to_binary(InnerSnapshot)}.
 
-prune_table(Tab, Keys) ->
+prune_table(Tab, Pred) ->
     true = ets:safe_fixtable(Tab, true),
-    ok = prune_table(Tab, Keys, ets:first(Tab)),
+    ok = prune_table(Tab, Pred, ets:first(Tab)),
     true = ets:safe_fixtable(Tab, false).
 
-prune_table(_Tab, _Keys, '$end_of_table') -> ok;
-prune_table(Tab, Keys, Key) ->
-    case sets:is_element(Key, Keys) of
+prune_table(_Tab, _Pred, '$end_of_table') -> ok;
+prune_table(Tab, Pred, Key) ->
+    case Pred(Key) of
         true  -> ok;
         false -> ets:delete(Tab, Key)
     end,
-    prune_table(Tab, Keys, ets:next(Tab, Key)).
+    prune_table(Tab, Pred, ets:next(Tab, Key)).
 
 internal_load_snapshot(LogHandle,
                        DurableQueues,
@@ -393,14 +391,19 @@ internal_load_snapshot(LogHandle,
                                Snapshot#psnapshot{
                                  transactions = Ts,
                                  next_seq_id = NextSeqId}),
-            {RecoveredContent, Snapshot2} =
-                recover_messages(DurableQueues, Snapshot1),
+            %% Remove all entries for queues that no longer exist.
+            %% Note that the 'messages' table is pruned when the next
+            %% snapshot is taken.
+            DurableQueuesSet = sets:from_list(DurableQueues),
+            prune_table(Snapshot1#psnapshot.queues,
+                        fun ({QName, _PKey}) ->
+                                sets:is_element(QName, DurableQueuesSet)
+                        end),
             %% uncompleted transactions are discarded - this is TRTTD
             %% since we only get into this code on node restart, so
             %% any uncompleted transactions will have been aborted.
-            {ok, RecoveredContent,
-             Snapshot2#psnapshot{transactions = dict:new()}};
-        {error, Reason} -> {{error, Reason}, dict:new(), Snapshot}
+            {ok, Snapshot1#psnapshot{transactions = dict:new()}};
+        {error, Reason} -> {{error, Reason}, Snapshot}
     end.
 
 check_version({persist_snapshot, {vsn, ?PERSISTER_LOG_FORMAT_VERSION},
@@ -411,50 +414,6 @@ check_version({persist_snapshot, {vsn, Vsn}, _StateBin}) ->
 check_version(_Other) ->
     {error, unrecognised_persister_log_format}.
 
-recover_messages(DurableQueues, Snapshot = #psnapshot{messages = Messages,
-                                                      queues = Queues}) ->
-    DurableQueuesSet = sets:from_list(DurableQueues),
-    Work = ets:foldl(
-             fun ({{QName, PKey}, Delivered, SeqId}, Acc) ->
-                     case sets:is_element(QName, DurableQueuesSet) of
-                         true ->
-                             rabbit_misc:dict_cons(
-                               QName, {SeqId, PKey, Delivered}, Acc);
-                         false ->
-                             Acc
-                     end
-             end, dict:new(), Queues),
-    {L, RecoveredContent} =
-        lists:foldl(
-          fun ({Recovered, {QName, Msgs}}, {L, Dict}) ->
-                  {Recovered ++ L, dict:store(QName, Msgs, Dict)}
-          end, {[], dict:new()},
-          %% unstable parallel map, because order doesn't matter
-          rabbit_misc:upmap(
-            %% we do as much work as possible in spawned worker
-            %% processes, but we need to make sure the ets:inserts are
-            %% performed in self()
-            fun ({QName, Requeues}) ->
-                    recover(QName, Requeues, Messages)
-            end, dict:to_list(Work))),
-    NewMessages = [{K, M} || {_S, _Q, K, M, _D} <- L],
-    NewQueues  = [{{Q, K}, D, S} || {S, Q, K, _M, D} <- L],
-    ets:delete_all_objects(Messages),
-    ets:delete_all_objects(Queues),
-    true = ets:insert(Messages, NewMessages),
-    true = ets:insert(Queues, NewQueues),
-    %% contains the mutated messages and queues tables
-    {RecoveredContent, Snapshot}.
-
-recover(QName, Requeues, Messages) ->
-    RecoveredMessages =
-        lists:sort([{SeqId, QName, PKey, Message, Delivered} ||
-                       {SeqId, PKey, Delivered} <- Requeues,
-                       {_, Message} <- ets:lookup(Messages, PKey)]),
-    {RecoveredMessages, {QName, [{Message, Delivered} ||
-                                    {_, _, _, Message, Delivered}
-                                        <- RecoveredMessages]}}.
-
 replay([], LogHandle, K, Snapshot) ->
     case disk_log:chunk(LogHandle, K) of
         {K1, Items} ->
-- 
cgit v1.2.1


From 00eea075c30eca85161ca49263323858d236b51a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 17:52:22 +0100
Subject: ensure queues are fully initialsed before we advertise their presence
 Previously in a clustered setup a channel on another node could have found a
 recently recovered queue before that queue had been fully initialised and
 recovered its content.

Fixing that turns out to be simple since the main obstacle - the
re-declaration check for recovered durable queues and the associated
atomic update of the record with the new queue pid - can be removed
since it became superfluous with the changes in bug 20916.
---
 src/rabbit_amqqueue.erl | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index d705909a..4e12bb7d 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -144,30 +144,12 @@ find_durable_queues() ->
       end).
 
 recover_durable_queues(DurableQueues) ->
-    Qs = lists:foldl(
-           fun (RecoveredQ, Acc) ->
-                   Q = start_queue_process(RecoveredQ, false),
-                   %% We need to catch the case where a client
-                   %% connected to another node has deleted the queue
-                   %% (and possibly re-created it).
-                   case rabbit_misc:execute_mnesia_transaction(
-                          fun () ->
-                                  case mnesia:match_object(
-                                         rabbit_durable_queue, RecoveredQ,
-                                         read) of
-                                      [_] -> ok = store_queue(Q),
-                                             true;
-                                      []  -> false
-                                  end
-                          end) of
-                       true  -> [Q | Acc];
-                       false -> exit(Q#amqqueue.pid, shutdown),
-                                Acc
-                   end
-           end, [], DurableQueues),
+    Qs = [start_queue_process(Q, false) || Q <- DurableQueues],
     %% Issue inits to *all* the queues so that they all init at the same time
     [ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue) || Q <- Qs],
     [ok = gen_server2:call(Q#amqqueue.pid, sync, infinity) || Q <- Qs],
+    rabbit_misc:execute_mnesia_transaction(
+      fun () -> [ok = store_queue(Q) || Q <- Qs] end),
     Qs.
 
 declare(QueueName, Durable, AutoDelete, Args) ->
-- 
cgit v1.2.1


From 0e52c03aaec32e23d7efb790845591948c3f74a6 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 18:19:16 +0100
Subject: simplify queue startup by going through the same three-step sequence
 (start process, init_backing_queue, sync) in both recovery and creation

---
 src/rabbit_amqqueue.erl         | 10 ++++++----
 src/rabbit_amqqueue_process.erl | 25 +++++++++----------------
 2 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 4e12bb7d..668f4ae2 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -144,7 +144,7 @@ find_durable_queues() ->
       end).
 
 recover_durable_queues(DurableQueues) ->
-    Qs = [start_queue_process(Q, false) || Q <- DurableQueues],
+    Qs = [start_queue_process(Q) || Q <- DurableQueues],
     %% Issue inits to *all* the queues so that they all init at the same time
     [ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue) || Q <- Qs],
     [ok = gen_server2:call(Q#amqqueue.pid, sync, infinity) || Q <- Qs],
@@ -157,7 +157,9 @@ declare(QueueName, Durable, AutoDelete, Args) ->
                                       durable = Durable,
                                       auto_delete = AutoDelete,
                                       arguments = Args,
-                                      pid = none}, true),
+                                      pid = none}),
+    ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue),
+    ok = gen_server2:call(Q#amqqueue.pid, sync, infinity),
     internal_declare(Q, true).
 
 internal_declare(Q = #amqqueue{name = QueueName}, WantDefaultBinding) ->
@@ -194,8 +196,8 @@ store_queue(Q = #amqqueue{durable = false}) ->
     ok = mnesia:write(rabbit_queue, Q, write),
     ok.
 
-start_queue_process(Q, InitBackingQueue) ->
-    {ok, Pid} = rabbit_amqqueue_sup:start_child([Q, InitBackingQueue]),
+start_queue_process(Q) ->
+    {ok, Pid} = rabbit_amqqueue_sup:start_child([Q]),
     Q#amqqueue{pid = Pid}.
 
 add_default_binding(#amqqueue{name = QueueName}) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b10baacb..10e1193f 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -39,7 +39,7 @@
 -define(SYNC_INTERVAL,                 5). %% milliseconds
 -define(RAM_DURATION_UPDATE_INTERVAL,  5000).
 
--export([start_link/2, info_keys/0]).
+-export([start_link/1, info_keys/0]).
 
 -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
          handle_info/2, handle_pre_hibernate/1]).
@@ -94,14 +94,14 @@
 
 %%----------------------------------------------------------------------------
 
-start_link(Q, InitBackingQueue) ->
-    gen_server2:start_link(?MODULE, [Q, InitBackingQueue], []).
+start_link(Q) ->
+    gen_server2:start_link(?MODULE, [Q], []).
 
 info_keys() -> ?INFO_KEYS.
 
 %%----------------------------------------------------------------------------
 
-init([Q, InitBQ]) ->
+init([Q]) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
     ok = file_handle_cache:register_callback(
@@ -115,7 +115,7 @@ init([Q, InitBQ]) ->
             exclusive_consumer = none,
             has_had_consumers = false,
             backing_queue = BQ,
-            backing_queue_state = maybe_init_backing_queue(InitBQ, BQ, Q),
+            backing_queue_state = undefined,
             backing_queue_timeout_fun = undefined,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
@@ -123,12 +123,6 @@ init([Q, InitBQ]) ->
             rate_timer_ref = undefined}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-maybe_init_backing_queue(
-  true, BQ, #amqqueue{name = QName, durable = IsDurable}) ->
-    BQ:init(QName, IsDurable);
-maybe_init_backing_queue(false, _BQ, _Q) ->
-    undefined.
-
 terminate(shutdown,      State) ->
     terminate_shutdown(terminate, State);
 terminate({shutdown, _}, State) ->
@@ -731,11 +725,10 @@ handle_call({claim_queue, ReaderPid}, _From,
 handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
     reply(ok, maybe_run_queue_via_backing_queue(Fun, State)).
 
-
-handle_cast(init_backing_queue, State = #q{backing_queue_state = undefined,
-                                           backing_queue = BQ, q = Q}) ->
-    noreply(State#q{backing_queue_state =
-                        maybe_init_backing_queue(true, BQ, Q)});
+handle_cast(init_backing_queue,
+            State = #q{q = #amqqueue{name = QName, durable = IsDurable},
+                       backing_queue_state = undefined, backing_queue = BQ}) ->
+    noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable)});
 
 handle_cast(init_backing_queue, State) ->
     noreply(State);
-- 
cgit v1.2.1


From eed544797cbdb7f5790c4287d36100d85e748ccd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 18:44:10 +0100
Subject: consistency, consistency

---
 src/rabbit_amqqueue_process.erl | 44 ++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 10e1193f..4579c3b5 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -138,8 +138,8 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-terminate_shutdown(Fun, State =
-                       #q{backing_queue = BQ, backing_queue_state = BQS}) ->
+terminate_shutdown(Fun, State = #q{backing_queue = BQ,
+                                   backing_queue_state = BQS}) ->
     ok = rabbit_memory_monitor:deregister(self()),
     case BQS of
         undefined -> State;
@@ -164,8 +164,7 @@ noreply(NewState) ->
     {NewState1, Timeout} = next_state(NewState),
     {noreply, NewState1, Timeout}.
 
-next_state(State = #q{backing_queue_state = BQS,
-                      backing_queue = BQ}) ->
+next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     next_state1(ensure_rate_timer(State), BQ:sync_callback(BQS)).
 
 next_state1(State = #q{sync_timer_ref = undefined}, Fun)
@@ -206,8 +205,8 @@ stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
     State#q{sync_timer_ref = undefined, backing_queue_timeout_fun = undefined}.
 
-assert_invariant(#q{active_consumers = AC, backing_queue_state = BQS,
-                    backing_queue = BQ}) ->
+assert_invariant(#q{active_consumers = AC,
+                    backing_queue = BQ, backing_queue_state = BQS}) ->
     true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)).
 
 lookup_ch(ChPid) ->
@@ -321,15 +320,14 @@ deliver_from_queue_pred(IsEmpty, _State) ->
     not IsEmpty.
 
 deliver_from_queue_deliver(AckRequired, false,
-                           State = #q{backing_queue_state = BQS,
-                                      backing_queue = BQ}) ->
+                           State = #q{backing_queue = BQ,
+                                      backing_queue_state = BQS}) ->
     {{Message, IsDelivered, AckTag, Remaining}, BQS1} =
         BQ:fetch(AckRequired, BQS),
     {{Message, IsDelivered, AckTag}, 0 == Remaining,
      State #q { backing_queue_state = BQS1 }}.
 
-run_message_queue(State = #q{backing_queue_state = BQS,
-                             backing_queue = BQ}) ->
+run_message_queue(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     Funs = {fun deliver_from_queue_pred/2,
             fun deliver_from_queue_deliver/3},
     IsEmpty = BQ:is_empty(BQS),
@@ -676,8 +674,8 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
     end;
 
 handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    backing_queue_state = BQS,
                                     backing_queue = BQ,
+                                    backing_queue_state = BQS,
                                     active_consumers = ActiveConsumers}) ->
     reply({ok, Name, BQ:len(BQS), queue:len(ActiveConsumers)}, State);
 
@@ -695,9 +693,10 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
             {stop, normal, {ok, Length}, State}
     end;
 
-handle_call(purge, _From, State = #q{backing_queue = BQ}) ->
-    {Count, BQS} = BQ:purge(State#q.backing_queue_state),
-    reply({ok, Count}, State#q{backing_queue_state = BQS});
+handle_call(purge, _From, State = #q{backing_queue = BQ,
+                                     backing_queue_state = BQS}) ->
+    {Count, BQS1} = BQ:purge(BQS),
+    reply({ok, Count}, State#q{backing_queue_state = BQS1});
 
 handle_call({claim_queue, ReaderPid}, _From,
             State = #q{owner = Owner, exclusive_consumer = Holder}) ->
@@ -727,7 +726,7 @@ handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
 
 handle_cast(init_backing_queue,
             State = #q{q = #amqqueue{name = QName, durable = IsDurable},
-                       backing_queue_state = undefined, backing_queue = BQ}) ->
+                       backing_queue = BQ, backing_queue_state = undefined}) ->
     noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable)});
 
 handle_cast(init_backing_queue, State) ->
@@ -738,8 +737,8 @@ handle_cast({deliver, Txn, Message, ChPid}, State) ->
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
     noreply(NewState);
 
-handle_cast({ack, Txn, AckTags, ChPid}, State = #q{backing_queue_state = BQS,
-                                                   backing_queue = BQ}) ->
+handle_cast({ack, Txn, AckTags, ChPid},
+            State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     case lookup_ch(ChPid) of
         not_found ->
             noreply(State);
@@ -801,8 +800,8 @@ handle_cast({flush, ChPid}, State) ->
     ok = rabbit_channel:flushed(ChPid, self()),
     noreply(State);
 
-handle_cast(update_ram_duration, State = #q{backing_queue_state = BQS,
-                                            backing_queue = BQ}) ->
+handle_cast(update_ram_duration, State = #q{backing_queue = BQ,
+                                            backing_queue_state = BQS}) ->
     {RamDuration, BQS1} = BQ:ram_duration(BQS),
     DesiredDuration =
         rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
@@ -811,8 +810,7 @@ handle_cast(update_ram_duration, State = #q{backing_queue_state = BQS,
                     backing_queue_state = BQS2});
 
 handle_cast({set_ram_duration_target, Duration},
-            State = #q{backing_queue_state = BQS,
-                       backing_queue = BQ}) ->
+            State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     BQS1 = BQ:set_ram_duration_target(Duration, BQS),
     noreply(State#q{backing_queue_state = BQS1});
 
@@ -853,8 +851,8 @@ handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
 
-handle_pre_hibernate(State = #q{backing_queue_state = BQS,
-                                backing_queue = BQ}) ->
+handle_pre_hibernate(State = #q{backing_queue = BQ,
+                                backing_queue_state = BQS}) ->
     BQS1 = BQ:handle_pre_hibernate(BQS),
     %% no activity for a while == 0 egress and ingress rates
     DesiredDuration =
-- 
cgit v1.2.1


From 6c2f637c5f8974352da2618c7281e6ff293e23bb Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 18:45:02 +0100
Subject: remove redundant call to memory_monitor:deregister

---
 src/rabbit_amqqueue_process.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 4579c3b5..12429d25 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -128,7 +128,6 @@ terminate(shutdown,      State) ->
 terminate({shutdown, _}, State) ->
     terminate_shutdown(terminate, State);
 terminate(_Reason,       State) ->
-    ok = rabbit_memory_monitor:deregister(self()),
     %% FIXME: How do we cancel active subscriptions?
     State1 = terminate_shutdown(delete_and_terminate, State),
     ok = rabbit_amqqueue:internal_delete(qname(State1)).
-- 
cgit v1.2.1


From 6c2c8cebcbe24108c29aa0da9a64b84d9affb3e7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 19:00:00 +0100
Subject: guard against operating on uninitialised backing_queue_state

---
 src/rabbit_amqqueue_process.erl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 12429d25..b85bcdcb 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -106,8 +106,6 @@ init([Q]) ->
     process_flag(trap_exit, true),
     ok = file_handle_cache:register_callback(
            rabbit_amqqueue, set_maximum_since_use, [self()]),
-    ok = rabbit_memory_monitor:register
-           (self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
     {ok, BQ} = application:get_env(backing_queue_module),
 
     {ok, #q{q = Q,
@@ -139,10 +137,10 @@ code_change(_OldVsn, State, _Extra) ->
 
 terminate_shutdown(Fun, State = #q{backing_queue = BQ,
                                    backing_queue_state = BQS}) ->
-    ok = rabbit_memory_monitor:deregister(self()),
     case BQS of
         undefined -> State;
-        _         -> BQS1 = lists:foldl(
+        _         -> ok = rabbit_memory_monitor:deregister(self()),
+                     BQS1 = lists:foldl(
                               fun (#cr{txn = none}, BQSN) ->
                                       BQSN;
                                   (#cr{txn = Txn}, BQSN) ->
@@ -726,6 +724,8 @@ handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
 handle_cast(init_backing_queue,
             State = #q{q = #amqqueue{name = QName, durable = IsDurable},
                        backing_queue = BQ, backing_queue_state = undefined}) ->
+    ok = rabbit_memory_monitor:register(
+           self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
     noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable)});
 
 handle_cast(init_backing_queue, State) ->
@@ -850,6 +850,8 @@ handle_info(Info, State) ->
     ?LOGDEBUG("Info in queue: ~p~n", [Info]),
     {stop, {unhandled_info, Info}, State}.
 
+handle_pre_hibernate(State = #q{backing_queue_state = undefined}) ->
+    {hibernate, State};
 handle_pre_hibernate(State = #q{backing_queue = BQ,
                                 backing_queue_state = BQS}) ->
     BQS1 = BQ:handle_pre_hibernate(BQS),
-- 
cgit v1.2.1


From 326fb61df0b38bdb69f39dda7186611bf7cbadf6 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 26 Apr 2010 20:54:41 +0100
Subject: cosmetic - less obscure function calls Unfortunately fun M:F/N only
 works if M and F are atoms.

---
 src/rabbit_amqqueue_process.erl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b85bcdcb..8557cb94 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -121,13 +121,14 @@ init([Q]) ->
             rate_timer_ref = undefined}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-terminate(shutdown,      State) ->
-    terminate_shutdown(terminate, State);
-terminate({shutdown, _}, State) ->
-    terminate_shutdown(terminate, State);
-terminate(_Reason,       State) ->
+terminate(shutdown,      State = #q{backing_queue = BQ}) ->
+    terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State);
+terminate({shutdown, _}, State = #q{backing_queue = BQ}) ->
+    terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State);
+terminate(_Reason,       State = #q{backing_queue = BQ}) ->
     %% FIXME: How do we cancel active subscriptions?
-    State1 = terminate_shutdown(delete_and_terminate, State),
+    State1 = terminate_shutdown(fun (BQS) -> BQ:delete_and_terminate(BQS) end,
+                                State),
     ok = rabbit_amqqueue:internal_delete(qname(State1)).
 
 code_change(_OldVsn, State, _Extra) ->
@@ -148,7 +149,7 @@ terminate_shutdown(Fun, State = #q{backing_queue = BQ,
                                           BQ:tx_rollback(Txn, BQSN),
                                       BQSN1
                               end, BQS, all_ch_record()),
-                     State#q{backing_queue_state = BQ:Fun(BQS1)}
+                     State#q{backing_queue_state = Fun(BQS1)}
     end.
 
 reply(Reply, NewState) ->
-- 
cgit v1.2.1


From b4629c2f879ffa3431e34f713dd7f3129f7a8cbe Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 27 Apr 2010 05:40:03 +0100
Subject: more selective backing queue initialisation Tell BQ:init whether
 queue contents should be recovered. In the case of invariable_queue this
 allows us to suppress any interaction with the persister on queue
 declaration, which is beneficial since the persister can be a bottleneck.

There may also be scope to utilise this knowledge in variable_queue at
some point.
---
 include/rabbit_backing_queue_spec.hrl | 2 +-
 src/rabbit_amqqueue.erl               | 4 ++--
 src/rabbit_amqqueue_process.erl       | 6 +++---
 src/rabbit_backing_queue.erl          | 7 ++++---
 src/rabbit_invariable_queue.erl       | 6 +++---
 src/rabbit_tests.erl                  | 2 +-
 src/rabbit_variable_queue.erl         | 4 ++--
 7 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 63f4493b..7c83bb52 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -34,7 +34,7 @@
         ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})).
 
 -spec(start/1 :: ([queue_name()]) -> 'ok').
--spec(init/2 :: (queue_name(), boolean()) -> state()).
+-spec(init/3 :: (queue_name(), boolean(), boolean()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
 -spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 668f4ae2..ee769d55 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -146,7 +146,7 @@ find_durable_queues() ->
 recover_durable_queues(DurableQueues) ->
     Qs = [start_queue_process(Q) || Q <- DurableQueues],
     %% Issue inits to *all* the queues so that they all init at the same time
-    [ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue) || Q <- Qs],
+    [ok = gen_server2:cast(Q#amqqueue.pid, {init, true}) || Q <- Qs],
     [ok = gen_server2:call(Q#amqqueue.pid, sync, infinity) || Q <- Qs],
     rabbit_misc:execute_mnesia_transaction(
       fun () -> [ok = store_queue(Q) || Q <- Qs] end),
@@ -158,7 +158,7 @@ declare(QueueName, Durable, AutoDelete, Args) ->
                                       auto_delete = AutoDelete,
                                       arguments = Args,
                                       pid = none}),
-    ok = gen_server2:cast(Q#amqqueue.pid, init_backing_queue),
+    ok = gen_server2:cast(Q#amqqueue.pid, {init, false}),
     ok = gen_server2:call(Q#amqqueue.pid, sync, infinity),
     internal_declare(Q, true).
 
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 8557cb94..3b5bd823 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -722,14 +722,14 @@ handle_call({claim_queue, ReaderPid}, _From,
 handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
     reply(ok, maybe_run_queue_via_backing_queue(Fun, State)).
 
-handle_cast(init_backing_queue,
+handle_cast({init, Recover},
             State = #q{q = #amqqueue{name = QName, durable = IsDurable},
                        backing_queue = BQ, backing_queue_state = undefined}) ->
     ok = rabbit_memory_monitor:register(
            self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
-    noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable)});
+    noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable, Recover)});
 
-handle_cast(init_backing_queue, State) ->
+handle_cast({init, _Recover}, State) ->
     noreply(State);
 
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 7090d9cc..f21c290f 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -42,9 +42,10 @@ behaviour_info(callbacks) ->
      %% shared resources.
      {start, 1},
 
-     %% Called with queue name and a boolean to indicate whether or
-     %% not the queue is durable.
-     {init, 2},
+     %% Called with queue name, a boolean to indicate whether or
+     %% not the queue is durable, and a boolean to indicate whether
+     %% the queue contents should be attempted to be recovered.
+     {init, 3},
 
      %% Called on queue shutdown when queue isn't being deleted
      {terminate, 1},
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index 7765069f..e5811c34 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_invariable_queue).
 
--export([init/2, terminate/1, delete_and_terminate/1, purge/1, publish/2,
+-export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2,
          publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3,
          tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1,
          set_ram_duration_target/2, ram_duration/1, sync_callback/1,
@@ -61,8 +61,8 @@
 start(DurableQueues) ->
     ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]).
 
-init(QName, IsDurable) ->
-    Q = queue:from_list(case IsDurable of
+init(QName, IsDurable, Recover) ->
+    Q = queue:from_list(case IsDurable andalso Recover of
                             true  -> rabbit_persister:queue_content(QName);
                             false -> []
                         end),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1ab4f224..b9963400 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1384,7 +1384,7 @@ assert_prop(List, Prop, Value) ->
 fresh_variable_queue() ->
     stop_msg_store(),
     ok = empty_test_queue(),
-    VQ = rabbit_variable_queue:init(test_queue(), true),
+    VQ = rabbit_variable_queue:init(test_queue(), true, false),
     S0 = rabbit_variable_queue:status(VQ),
     assert_prop(S0, len, 0),
     assert_prop(S0, q1, 0),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b33df24f..35d2b191 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_variable_queue).
 
--export([init/2, terminate/1, publish/2, publish_delivered/3,
+-export([init/3, terminate/1, publish/2, publish_delivered/3,
          set_ram_duration_target/2, ram_duration/1, fetch/2, ack/2, len/1,
          is_empty/1, purge/1, delete_and_terminate/1, requeue/2, tx_publish/3,
          tx_ack/3, tx_rollback/2, tx_commit/3, sync_callback/1,
@@ -266,7 +266,7 @@ start(DurableQueues) ->
                                 [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
                                  PersistRefs, PersistStartFunState]).
 
-init(QueueName, IsDurable) ->
+init(QueueName, IsDurable, _Recover) ->
     PersistentStore = case IsDurable of
                           true  -> ?PERSISTENT_MSG_STORE;
                           false -> ?TRANSIENT_MSG_STORE
-- 
cgit v1.2.1


From 617dd04dbadb9e13b19f67a128ad0f501ce9f13c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 27 Apr 2010 05:42:07 +0100
Subject: explode on attempted double initialisation of queue which inidicates
 a serious bug and thus we shouldn't ignore it

---
 src/rabbit_amqqueue_process.erl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 3b5bd823..2eaf8b7e 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -729,9 +729,6 @@ handle_cast({init, Recover},
            self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
     noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable, Recover)});
 
-handle_cast({init, _Recover}, State) ->
-    noreply(State);
-
 handle_cast({deliver, Txn, Message, ChPid}, State) ->
     %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
     {_Delivered, NewState} = deliver_or_enqueue(Txn, ChPid, Message, State),
-- 
cgit v1.2.1


From bea0bf9aa270cde2b8cfb537d27e47cafdbba116 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 27 Apr 2010 10:50:12 +0100
Subject: some inlining

---
 src/rabbit_invariable_queue.erl | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index e5811c34..86d80e66 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -127,25 +127,29 @@ ack(AckTags, State = #iv_state { qname = QName, pending_ack = PA }) ->
     State #iv_state { pending_ack = PA1 }.
 
 tx_publish(Txn, Msg, State = #iv_state { qname = QName }) ->
-    publish_in_tx(Txn, Msg),
+    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }),
     ok = persist_message(Txn, QName, Msg),
     State.
 
 tx_ack(Txn, AckTags, State = #iv_state { qname = QName, pending_ack = PA }) ->
-    ack_in_tx(Txn, AckTags),
+    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }),
     ok = persist_acks(Txn, QName, AckTags, PA),
     State.
 
 tx_rollback(Txn, State = #iv_state { qname = QName }) ->
     #tx { pending_acks = AckTags } = lookup_tx(Txn),
-    ok = rollback_work(Txn, QName),
+    ok = do_if_persistent(fun rabbit_persister:rollback_transaction/1,
+                          Txn, QName),
     erase_tx(Txn),
     {lists:flatten(AckTags), State}.
 
 tx_commit(Txn, Fun, State = #iv_state { qname = QName, pending_ack = PA,
                                         queue = Q, len = Len }) ->
     #tx { pending_acks = AckTags, pending_messages = PubsRev } = lookup_tx(Txn),
-    ok = commit_work(Txn, QName),
+    ok = do_if_persistent(fun rabbit_persister:commit_transaction/1,
+                          Txn, QName),
     erase_tx(Txn),
     Fun(),
     AckTags1 = lists:flatten(AckTags),
@@ -228,14 +232,6 @@ do_if_persistent(F, Txn, QName) ->
              true  -> F({Txn, QName})
          end.
 
-publish_in_tx(Txn, Msg) ->
-    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
-    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }).
-
-ack_in_tx(Txn, AckTags) ->
-    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
-    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }).
-
 %%----------------------------------------------------------------------------
 
 persist_message(_Txn, _QName, #basic_message { is_persistent = false }) ->
@@ -273,11 +269,3 @@ persist_work(none, _QName, WorkList) ->
 persist_work(Txn, QName, WorkList) ->
     mark_tx_persistent(Txn),
     rabbit_persister:extend_transaction({Txn, QName}, WorkList).
-
-commit_work(Txn, QName) ->
-    do_if_persistent(fun rabbit_persister:commit_transaction/1,
-                     Txn, QName).
-
-rollback_work(Txn, QName) ->
-    do_if_persistent(fun rabbit_persister:rollback_transaction/1,
-                     Txn, QName).
-- 
cgit v1.2.1


From 571d5edf33f5ecdd7cb40cbb306dce1c6ff1e926 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 27 Apr 2010 11:02:30 +0100
Subject: simplification

---
 src/rabbit_invariable_queue.erl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index 86d80e66..30c5744d 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -255,12 +255,11 @@ persist_delivery(QName, #basic_message { guid = Guid }, _IsDelivered) ->
 
 persist_acks(Txn, QName, AckTags, PA) ->
     persist_work(Txn, QName,
-                 [{ack, {QName, Guid}} ||
-                     Guid <- AckTags,
-                     case dict:find(Guid, PA) of
-                         {ok, #basic_message { is_persistent = true }} -> true;
-                         _ -> false
-                     end]).
+                 [{ack, {QName, Guid}} || Guid <- AckTags,
+                                          begin
+                                              {ok, Msg} = dict:find(Guid, PA),
+                                              Msg #basic_message.is_persistent
+                                          end]).
 
 persist_work(_Txn,_QName, []) ->
     ok;
-- 
cgit v1.2.1


From 91c57adbc6b99473526830175e23f75e4174c4f9 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 27 Apr 2010 11:13:35 +0100
Subject: minor refactoring

---
 src/rabbit_invariable_queue.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index 30c5744d..fa11b976 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -154,8 +154,8 @@ tx_commit(Txn, Fun, State = #iv_state { qname = QName, pending_ack = PA,
     Fun(),
     AckTags1 = lists:flatten(AckTags),
     PA1 = remove_acks(AckTags1, PA),
-    {Q1, Len1} = lists:foldr(fun (Msg, {QN, LenN}) ->
-                                     {queue:in({Msg, false}, QN), LenN + 1}
+    {Q1, Len1} = lists:foldl(fun (Msg, {QN, LenN}) ->
+                                     {queue:in_r({Msg, false}, QN), LenN + 1}
                              end, {Q, Len}, PubsRev),
     {AckTags1, State #iv_state { pending_ack = PA1, queue = Q1, len = Len1 }}.
 
@@ -170,12 +170,12 @@ requeue(AckTags, State = #iv_state { pending_ack = PA, queue = Q,
     %% messages should appear, thus the persister is permitted to sort
     %% based on seq_id, even though it'll likely give a different
     %% order to the last known state of our queue, prior to shutdown.
-    {Q1, PA1, Len1} =
-        lists:foldl(
-          fun (Guid, {QN, PAN, LenN}) ->
-                  {ok, Msg = #basic_message {}} = dict:find(Guid, PAN),
-                  {queue:in({Msg, true}, QN), dict:erase(Guid, PAN), LenN + 1}
-          end, {Q, PA, Len}, AckTags),
+    {Q1, Len1} = lists:foldl(
+                   fun (Guid, {QN, LenN}) ->
+                           {ok, Msg = #basic_message {}} = dict:find(Guid, PA),
+                           {queue:in({Msg, true}, QN), LenN + 1}
+                   end, {Q, Len}, AckTags),
+    PA1 = remove_acks(AckTags, PA),
     State #iv_state { pending_ack = PA1, queue = Q1, len = Len1 }.
 
 len(#iv_state { len = Len }) ->
-- 
cgit v1.2.1


From 3b9b8848b6f30a7b93288747043047dcaf9e9ba2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 27 Apr 2010 12:06:37 +0100
Subject: Turns out on commit, the msgs really really should go at the end of
 the queue, not the head of the queue

---
 src/rabbit_invariable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index fa11b976..bee97651 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -154,8 +154,8 @@ tx_commit(Txn, Fun, State = #iv_state { qname = QName, pending_ack = PA,
     Fun(),
     AckTags1 = lists:flatten(AckTags),
     PA1 = remove_acks(AckTags1, PA),
-    {Q1, Len1} = lists:foldl(fun (Msg, {QN, LenN}) ->
-                                     {queue:in_r({Msg, false}, QN), LenN + 1}
+    {Q1, Len1} = lists:foldr(fun (Msg, {QN, LenN}) ->
+                                     {queue:in({Msg, false}, QN), LenN + 1}
                              end, {Q, Len}, PubsRev),
     {AckTags1, State #iv_state { pending_ack = PA1, queue = Q1, len = Len1 }}.
 
-- 
cgit v1.2.1


From 158b9e1cafc42498c0c7ad204e595a9ceb0a1a71 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 27 Apr 2010 13:07:55 +0100
Subject: refactor: move type spec since it's only used in one place

---
 include/rabbit.hrl      | 1 -
 src/rabbit_msg_file.erl | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 51c8b35e..a4abc1ff 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -85,7 +85,6 @@
 -type(info() :: {info_key(), any()}).
 -type(regexp() :: binary()).
 -type(file_path() :: string()).
--type(io_device() :: any()).
 
 %% this is really an abstract type, but dialyzer does not support them
 -type(guid() :: binary()).
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index f758c184..46288ccd 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -51,6 +51,7 @@
 
 -ifdef(use_specs).
 
+-type(io_device() :: any()).
 -type(position() :: non_neg_integer()).
 -type(msg_size() :: non_neg_integer()).
 -type(file_size() :: non_neg_integer()).
-- 
cgit v1.2.1


From cf0d5d82b895f605416d3727d5b2751bfd3066b5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 28 Apr 2010 22:28:37 +0100
Subject: tiny refactor to align with 'default'

---
 src/rabbit_amqqueue_process.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index c18caae4..b59cd074 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -93,14 +93,13 @@
 
 %%----------------------------------------------------------------------------
 
-start_link(Q) ->
-    gen_server2:start_link(?MODULE, [Q], []).
+start_link(Q) -> gen_server2:start_link(?MODULE, Q, []).
 
 info_keys() -> ?INFO_KEYS.
 
 %%----------------------------------------------------------------------------
 
-init([Q]) ->
+init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
     ok = file_handle_cache:register_callback(
-- 
cgit v1.2.1


From 19a9893e20a0bb95e450bab9dc4178f41d449204 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 15:28:04 +0100
Subject: cosmetic

---
 src/file_handle_cache.erl | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 929671cd..9dec339f 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -37,7 +37,8 @@
 %% module.
 %%
 %% Some constraints
-%% 1) This supports 1 writer, multiple readers per file. Nothing else.
+%% 1) This supports one writer, multiple readers per file. Nothing
+%% else.
 %% 2) Do not open the same file from different processes. Bad things
 %% may happen.
 %% 3) Writes are all appends. You cannot write to the middle of a
@@ -47,7 +48,7 @@
 %% between that buffer and the write buffer.
 %%
 %% Some benefits
-%% 1) You don't have to remember to call sync before close
+%% 1) You do not have to remember to call sync before close
 %% 2) Buffering is much more flexible than with plain file module, and
 %% you can control when the buffer gets flushed out. This means that
 %% you can rely on reads-after-writes working, without having to call
@@ -59,12 +60,11 @@
 %% 5) You can find out what the offset was when you last sync'd.
 %%
 %% There is also a server component which serves to limit the number
-%% of open file handles in a "soft" way. By "soft", I mean that the
-%% server will never prevent a client from opening a handle, but may
-%% immediately tell it to close the handle. Thus you can set the limit
-%% to zero and it will still all work correctly, it's just that
-%% effectively no caching will take place. The operation of limiting
-%% is as follows:
+%% of open file handles in a "soft" way - the server will never
+%% prevent a client from opening a handle, but may immediately tell it
+%% to close the handle. Thus you can set the limit to zero and it will
+%% still all work correctly, it is just that effectively no caching
+%% will take place. The operation of limiting is as follows:
 %%
 %% On open and close, the client sends messages to the server
 %% informing it of opens and closes. This allows the server to keep
@@ -85,15 +85,15 @@
 %% the last reported least recently used file handle of all the
 %% clients. It then tells all the clients to close any handles not
 %% used for longer than this average. The client should receive this
-%% message and pass it into set_maximum_since_use/1. However, it's
+%% message and pass it into set_maximum_since_use/1. However, it is
 %% highly possible this age will be greater than the ages of all the
 %% handles the client knows of because the client has used its file
 %% handles in the mean time. Thus at this point the client reports to
 %% the server the current timestamp at which its least recently used
 %% file handle was last used. The server will check two seconds later
-%% that either it's back under the limit, in which case all is well
+%% that either it is back under the limit, in which case all is well
 %% again, or if not, it will calculate a new average age. Its data
-%% will be much more recent now, and so it's very likely that when
+%% will be much more recent now, and so it is very likely that when
 %% this is communicated to the clients, the clients will close file
 %% handles.
 %%
@@ -101,13 +101,13 @@
 %% from the client to the server on open, close, and when in the
 %% process of trying to reduce file handle usage. There is no
 %% communication from the client to the server on normal file handle
-%% operations. This scheme forms a feed-back loop - the server doesn't
-%% care which file handles are closed, just that some are, and it
+%% operations. This scheme forms a feed-back loop - the server does
+%% not care which file handles are closed, just that some are, and it
 %% checks this repeatedly when over the limit. Given the guarantees of
 %% now(), even if there is just one file handle open, a limit of 1,
 %% and one client, it is certain that when the client calculates the
-%% age of the handle, it'll be greater than when the server calculated
-%% it, hence it should be closed.
+%% age of the handle, it will be greater than when the server
+%% calculated it, hence it should be closed.
 %%
 %% Handles which are closed as a result of the server are put into a
 %% "soft-closed" state in which the handle is closed (data flushed out
-- 
cgit v1.2.1


From eb1f0d465b3efa4424cc7d7371bea77c71f118da Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 15:30:29 +0100
Subject: clean up fhc sigs

---
 src/file_handle_cache.erl | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 9dec339f..b92f547a 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -183,28 +183,29 @@
 -type(ref() :: any()).
 -type(error() :: {'error', any()}).
 -type(ok_or_error() :: ('ok' | error())).
--type(position() :: ('bof' | 'eof' | {'bof',integer()} | {'eof',integer()}
-                     | {'cur',integer()} | integer())).
+-type(val_or_error(T) :: ({'ok', T} | error())).
+-type(position() :: ('bof' | 'eof' | {('bof' |'eof' | 'cur'), integer()} |
+                     integer())).
+-type(offset() :: non_neg_integer()).
 
 -spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok').
 -spec(open/3 ::
       (string(), [any()],
-       [{'write_buffer', (non_neg_integer()|'infinity'|'unbuffered')}]) ->
-             ({'ok', ref()} | error())).
--spec(close/1 :: (ref()) -> ('ok' | error())).
--spec(read/2 :: (ref(), integer()) ->
-             ({'ok', ([char()]|binary())} | eof | error())).
+       [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) ->
+             val_or_error(ref())).
+-spec(close/1 :: (ref()) -> ok_or_error()).
+-spec(read/2 :: (ref(), non_neg_integer()) ->
+             val_or_error([char()] | binary()) | 'eof').
 -spec(append/2 :: (ref(), iodata()) -> ok_or_error()).
 -spec(sync/1 :: (ref()) ->  ok_or_error()).
--spec(position/2 :: (ref(), position()) ->
-             ({'ok', non_neg_integer()} | error())).
+-spec(position/2 :: (ref(), position()) -> val_or_error(offset())).
 -spec(truncate/1 :: (ref()) -> ok_or_error()).
--spec(last_sync_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
--spec(current_virtual_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
--spec(current_raw_offset/1 :: (ref()) -> ({'ok', integer()} | error())).
+-spec(last_sync_offset/1       :: (ref()) -> val_or_error(offset())).
+-spec(current_virtual_offset/1 :: (ref()) -> val_or_error(offset())).
+-spec(current_raw_offset/1     :: (ref()) -> val_or_error(offset())).
 -spec(flush/1 :: (ref()) -> ok_or_error()).
 -spec(copy/3 :: (ref(), ref(), non_neg_integer()) ->
-             ({'ok', integer()} | error())).
+             val_or_error(non_neg_integer())).
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(delete/1 :: (ref()) -> ok_or_error()).
 -spec(clear/1 :: (ref()) -> ok_or_error()).
-- 
cgit v1.2.1


From b11da62f6e71cc50eb61319a8b932c7ee64fc822 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 29 Apr 2010 15:44:59 +0100
Subject: Minor corrections to specs

---
 src/file_handle_cache.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index b92f547a..63302a1e 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -184,8 +184,8 @@
 -type(error() :: {'error', any()}).
 -type(ok_or_error() :: ('ok' | error())).
 -type(val_or_error(T) :: ({'ok', T} | error())).
--type(position() :: ('bof' | 'eof' | {('bof' |'eof' | 'cur'), integer()} |
-                     integer())).
+-type(position() :: ('bof' | 'eof' | non_neg_integer() |
+                     {('bof' |'eof'), non_neg_integer()} | {'cur', integer()})).
 -type(offset() :: non_neg_integer()).
 
 -spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok').
-- 
cgit v1.2.1


From c308ec2deb0fb47130e23963be3148430cff97bd Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 29 Apr 2010 16:30:03 +0100
Subject: More meaningful specs for the backing queue, and removal of
 duplicated and potentially divergent documentation with the specs

---
 include/rabbit_backing_queue_spec.hrl | 12 ++++++++----
 src/rabbit_backing_queue.erl          | 20 +++++++++-----------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 7c83bb52..0a0931ea 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -32,16 +32,20 @@
 -type(fetch_result() ::
                  %% Message,  IsDelivered,  AckTag,  Remaining_Len
         ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})).
+-type(is_durable() :: boolean()).
+-type(attempt_recovery() :: boolean()).
+-type(purged_msg_count() :: non_neg_integer()).
+-type(ack_required() :: boolean()).
 
 -spec(start/1 :: ([queue_name()]) -> 'ok').
--spec(init/3 :: (queue_name(), boolean(), boolean()) -> state()).
+-spec(init/3 :: (queue_name(), is_durable(), attempt_recovery()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
--spec(purge/1 :: (state()) -> {non_neg_integer(), state()}).
+-spec(purge/1 :: (state()) -> {purged_msg_count(), state()}).
 -spec(publish/2 :: (basic_message(), state()) -> state()).
 -spec(publish_delivered/3 ::
-        (boolean(), basic_message(), state()) -> {ack(), state()}).
--spec(fetch/2 :: (boolean(), state()) -> {fetch_result(), state()}).
+        (ack_required(), basic_message(), state()) -> {ack(), state()}).
+-spec(fetch/2 :: (ack_required(), state()) -> {fetch_result(), state()}).
 -spec(ack/2 :: ([ack()], state()) -> state()).
 -spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()).
 -spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()).
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index f21c290f..38bee466 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -42,12 +42,10 @@ behaviour_info(callbacks) ->
      %% shared resources.
      {start, 1},
 
-     %% Called with queue name, a boolean to indicate whether or
-     %% not the queue is durable, and a boolean to indicate whether
-     %% the queue contents should be attempted to be recovered.
+     %% Initialise the backing queue and its state.
      {init, 3},
 
-     %% Called on queue shutdown when queue isn't being deleted
+     %% Called on queue shutdown when queue isn't being deleted.
      {terminate, 1},
 
      %% Called when the queue is terminating and needs to delete all
@@ -58,7 +56,7 @@ behaviour_info(callbacks) ->
      %% been fetched and are pending acks.
      {purge, 1},
 
-     %% Publish a message
+     %% Publish a message.
      {publish, 2},
 
      %% Called for messages which have already been passed straight
@@ -66,11 +64,11 @@ behaviour_info(callbacks) ->
      %% (i.e. saves the round trip through the backing queue).
      {publish_delivered, 3},
 
-     %% Produce the next message
+     %% Produce the next message.
      {fetch, 2},
 
      %% Acktags supplied are for messages which can now be forgotten
-     %% about
+     %% about.
      {ack, 2},
 
      %% A publish, but in the context of a transaction.
@@ -118,12 +116,12 @@ behaviour_info(callbacks) ->
      {ram_duration, 1},
 
      %% Can return 'undefined' or a thunk which will receive the
-     %% state, and must return the state, as soon as the queue process
-     %% can manage (either on an empty mailbox, or when a timer
-     %% fires).
+     %% state, and must return the state, which will be invoked as
+     %% soon as the queue process can manage (either on an empty
+     %% mailbox, or when a timer fires).
      {sync_callback, 1},
 
-     %% Called immediately before the queue hibernates
+     %% Called immediately before the queue hibernates.
      {handle_pre_hibernate, 1},
 
      %% Exists for debugging purposes, to be able to expose state via
-- 
cgit v1.2.1


From 0da055c15686ea280fd1c63f54e5e6ceae77b5e1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 29 Apr 2010 16:45:23 +0100
Subject: Allow people to specify append, but silently map it to write

---
 src/file_handle_cache.erl | 62 +++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 63302a1e..d5e1fe8b 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -224,36 +224,32 @@ register_callback(M, F, A)
     gen_server:call(?SERVER, {register_callback, self(), {M, F, A}}, infinity).
 
 open(Path, Mode, Options) ->
-    case is_appender(Mode) of
-        true  ->
-            {error, append_not_supported};
-        false ->
-            Path1 = filename:absname(Path),
-            File1 = #file { reader_count = RCount, has_writer = HasWriter } =
-                case get({Path1, fhc_file}) of
-                    File = #file {} -> File;
-                    undefined       -> #file { reader_count = 0,
-                                               has_writer = false }
-                end,
-            IsWriter = is_writer(Mode),
-            case IsWriter andalso HasWriter of
-                true  -> {error, writer_exists};
-                false -> Ref = make_ref(),
-                         case open1(Path1, Mode, Options, Ref, bof, new) of
-                             {ok, _Handle} ->
-                                 RCount1 = case is_reader(Mode) of
-                                               true  -> RCount + 1;
-                                               false -> RCount
-                                           end,
-                                 HasWriter1 = HasWriter orelse IsWriter,
-                                 put({Path1, fhc_file},
-                                     File1 #file { reader_count = RCount1,
-                                                   has_writer = HasWriter1}),
-                                 {ok, Ref};
-                             Error ->
-                                 Error
-                         end
-            end
+    Path1 = filename:absname(Path),
+    File1 = #file { reader_count = RCount, has_writer = HasWriter } =
+        case get({Path1, fhc_file}) of
+            File = #file {} -> File;
+            undefined       -> #file { reader_count = 0,
+                                       has_writer = false }
+        end,
+    Mode1 = append_to_write(Mode),
+    IsWriter = is_writer(Mode1),
+    case IsWriter andalso HasWriter of
+        true  -> {error, writer_exists};
+        false -> Ref = make_ref(),
+                 case open1(Path1, Mode1, Options, Ref, bof, new) of
+                     {ok, _Handle} ->
+                         RCount1 = case is_reader(Mode1) of
+                                       true  -> RCount + 1;
+                                       false -> RCount
+                                   end,
+                         HasWriter1 = HasWriter orelse IsWriter,
+                         put({Path1, fhc_file},
+                             File1 #file { reader_count = RCount1,
+                                           has_writer = HasWriter1}),
+                         {ok, Ref};
+                     Error ->
+                         Error
+                 end
     end.
 
 close(Ref) ->
@@ -462,7 +458,11 @@ is_reader(Mode) -> lists:member(read, Mode).
 
 is_writer(Mode) -> lists:member(write, Mode).
 
-is_appender(Mode) -> lists:member(append, Mode).
+append_to_write(Mode) ->
+    case lists:member(append, Mode) of
+        true  -> [write | lists:subtract(Mode, [append, write])];
+        false -> Mode
+    end.
 
 with_handles(Refs, Fun) ->
     ResHandles = lists:foldl(
-- 
cgit v1.2.1


From 3b86381db2f4cdaee69ccace061d773a726d1350 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 29 Apr 2010 16:49:27 +0100
Subject: cosmetic

---
 src/file_handle_cache.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index d5e1fe8b..32bd9cef 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -245,7 +245,7 @@ open(Path, Mode, Options) ->
                          HasWriter1 = HasWriter orelse IsWriter,
                          put({Path1, fhc_file},
                              File1 #file { reader_count = RCount1,
-                                           has_writer = HasWriter1}),
+                                           has_writer = HasWriter1 }),
                          {ok, Ref};
                      Error ->
                          Error
-- 
cgit v1.2.1


From ede6c82620a9c9dc84eac339dc4a9ad2ec93956b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 17:54:20 +0100
Subject: cosmetic

---
 src/file_handle_cache.erl | 56 ++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 32bd9cef..93a75ac0 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -157,11 +157,11 @@
           write_buffer_size_limit,
           write_buffer,
           at_eof,
-          is_write,
-          is_read,
+          path,
           mode,
           options,
-          path,
+          is_write,
+          is_read,
           last_used_at
         }).
 
@@ -332,8 +332,8 @@ truncate(Ref) ->
                                 trusted_offset = TOffset }]) ->
               case file:truncate(Hdl) of
                   ok    -> TOffset1 = lists:min([Offset, TOffset]),
-                           {ok, [Handle1 #handle {at_eof = true,
-                                                  trusted_offset = TOffset1 }]};
+                           {ok, [Handle1 #handle { trusted_offset = TOffset1,
+                                                   at_eof = true }]};
                   Error -> {Error, [Handle1]}
               end
       end).
@@ -361,7 +361,7 @@ flush(Ref) ->
 copy(Src, Dest, Count) ->
     with_flushed_handles(
       [Src, Dest],
-      fun ([SHandle = #handle { is_read = true, hdl = SHdl, offset = SOffset },
+      fun ([SHandle = #handle { is_read  = true, hdl = SHdl, offset = SOffset },
             DHandle = #handle { is_write = true, hdl = DHdl, offset = DOffset }]
           ) ->
               case file:copy(SHdl, DHdl, Count) of
@@ -401,8 +401,8 @@ clear(Ref) ->
                   {{ok, 0}, Handle2 = #handle { hdl = Hdl }} ->
                       case file:truncate(Hdl) of
                           ok    -> {ok, [Handle2 #handle {
-                                           at_eof = true,
-                                           trusted_offset = 0 }]};
+                                           trusted_offset = 0,
+                                           at_eof = true }]};
                           Error -> {Error, [Handle2]}
                       end;
                   Error ->
@@ -509,8 +509,8 @@ get_or_reopen(Ref) ->
     case get({Ref, fhc_handle}) of
         undefined ->
             {error, not_open, Ref};
-        #handle { hdl = closed, mode = Mode, options = Options,
-                  offset = Offset, path = Path } ->
+        #handle { hdl = closed, offset = Offset,
+                  path = Path, mode = Mode, options = Options } ->
             open1(Path, Mode, Options, Ref, Offset, reopen);
         Handle ->
             {ok, Handle}
@@ -545,14 +545,20 @@ open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
                     N when is_integer(N) -> N
                 end,
             Now = now(),
-            Handle = #handle { hdl = Hdl, offset = 0, trusted_offset = 0,
-                               write_buffer_size = 0, options = Options,
+            Handle = #handle { hdl                     = Hdl,
+                               offset                  = 0,
+                               trusted_offset          = 0,
+                               is_dirty                = false,
+                               write_buffer_size       = 0,
                                write_buffer_size_limit = WriteBufferSize,
-                               write_buffer = [], at_eof = false, mode = Mode,
-                               is_write = is_writer(Mode),
-                               is_read = is_reader(Mode),
-                               path = Path, last_used_at = Now,
-                               is_dirty = false },
+                               write_buffer            = [],
+                               at_eof                  = false,
+                               path                    = Path,
+                               mode                    = Mode,
+                               options                 = Options,
+                               is_write                = is_writer(Mode),
+                               is_read                 = is_reader(Mode),
+                               last_used_at            = Now },
             {{ok, Offset1}, Handle1} = maybe_seek(Offset, Handle),
             Handle2 = Handle1 #handle { trusted_offset = Offset1 },
             put({Ref, fhc_handle}, Handle2),
@@ -570,9 +576,9 @@ open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
 
 close1(Ref, Handle, SoftOrHard) ->
     case write_buffer(Handle) of
-        {ok, #handle { hdl = Hdl, path = Path, is_dirty = IsDirty,
-                       is_read = IsReader, is_write = IsWriter,
-                       last_used_at = Then, offset = Offset } = Handle1 } ->
+        {ok, #handle { hdl = Hdl, offset = Offset, is_dirty = IsDirty,
+                       path = Path, is_read = IsReader, is_write = IsWriter,
+                       last_used_at = Then } = Handle1 } ->
             Handle2 =
                 case Hdl of
                     closed ->
@@ -625,15 +631,15 @@ close1(Ref, Handle, SoftOrHard) ->
             Error
     end.
 
-maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, at_eof = AtEoF,
-                                         offset = Offset }) ->
+maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, offset = Offset,
+                                         at_eof = AtEoF }) ->
     {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset),
     case (case NeedsSeek of
               true  -> file:position(Hdl, NewOffset);
               false -> {ok, Offset}
           end) of
         {ok, Offset1} = Result ->
-            {Result, Handle #handle { at_eof = AtEoF1, offset = Offset1 }};
+            {Result, Handle #handle { offset = Offset1, at_eof = AtEoF1 }};
         {error, _} = Error ->
             {Error, Handle}
     end.
@@ -669,8 +675,8 @@ write_buffer(Handle = #handle { hdl = Hdl, offset = Offset,
     case file:write(Hdl, lists:reverse(WriteBuffer)) of
         ok ->
             Offset1 = Offset + DataSize,
-            {ok, Handle #handle { offset = Offset1, write_buffer = [],
-                                  write_buffer_size = 0, is_dirty = true }};
+            {ok, Handle #handle { offset = Offset1, is_dirty = true,
+                                  write_buffer = [], write_buffer_size = 0 }};
         {error, _} = Error ->
             {Error, Handle}
     end.
-- 
cgit v1.2.1


From bb7e5f09df379297801536b6b33751ab81e889fd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 18:37:44 +0100
Subject: made logic in fhc:close1 less obscure the previous code was working
 ok, but only because handles were soft-closed at most once

---
 src/file_handle_cache.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 93a75ac0..b65a05a7 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -582,7 +582,7 @@ close1(Ref, Handle, SoftOrHard) ->
             Handle2 =
                 case Hdl of
                     closed ->
-                        ok;
+                        Handle1;
                     _ ->
                         ok = case IsDirty of
                                  true  -> file:sync(Hdl);
@@ -605,7 +605,8 @@ close1(Ref, Handle, SoftOrHard) ->
                                     ?SERVER, {close, self(), Oldest}),
                                   Tree1
                           end),
-                        Handle1 #handle { trusted_offset = Offset,
+                        Handle1 #handle { hdl = closed,
+                                          trusted_offset = Offset,
                                           is_dirty = false }
                 end,
             case SoftOrHard of
@@ -624,7 +625,7 @@ close1(Ref, Handle, SoftOrHard) ->
                                                       has_writer = HasWriter1 })
                         end,
                         ok;
-                soft -> {ok, Handle2 #handle { hdl = closed }}
+                soft -> {ok, Handle2}
             end;
         {Error, Handle1} ->
             put_handle(Ref, Handle1),
-- 
cgit v1.2.1


From 6c82d1fe5b87fa9064215aa6ca30c3540cf0dae4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 19:25:43 +0100
Subject: refactor: extract age tree manipulation These functions only have one
 call site each, but they are non-trivial and perform a distinct function and
 hence were cluttering the call sites.

---
 src/file_handle_cache.erl | 93 +++++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 44 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index b65a05a7..4d1c78a3 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -429,18 +429,7 @@ set_maximum_since_use(MaximumAge) ->
                (_KeyValuePair, Rep) ->
                    Rep
            end, true, get()) of
-        true  -> with_age_tree(
-                   fun (Tree) ->
-                           case gb_trees:is_empty(Tree) of
-                               true  -> Tree;
-                               false -> {Oldest, _Ref} =
-                                            gb_trees:smallest(Tree),
-                                        gen_server:cast(
-                                          ?SERVER, {update, self(), Oldest})
-                           end,
-                           Tree
-                   end),
-                 ok;
+        true  -> age_tree_change(), ok;
         false -> ok
     end.
 
@@ -516,19 +505,56 @@ get_or_reopen(Ref) ->
             {ok, Handle}
     end.
 
-get_or_create_age_tree() ->
-    case get(fhc_age_tree) of
-        undefined -> gb_trees:empty();
-        AgeTree   -> AgeTree
-    end.
-
 with_age_tree(Fun) ->
-    put(fhc_age_tree, Fun(get_or_create_age_tree())).
+    put(fhc_age_tree, Fun(case get(fhc_age_tree) of
+                              undefined -> gb_trees:empty();
+                              AgeTree   -> AgeTree
+                          end)).
+
+age_tree_insert(Now, Ref) ->
+    with_age_tree(
+      fun (Tree) ->
+              Tree1 = gb_trees:insert(Now, Ref, Tree),
+              {Oldest, _Ref} = gb_trees:smallest(Tree1),
+              gen_server:cast(?SERVER, {open, self(), Oldest}),
+              Tree1
+      end).
+
+age_tree_update(Then, Now, Ref) ->
+    with_age_tree(
+      fun (Tree) ->
+              gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree))
+      end).
+
+age_tree_delete(Then) ->
+    with_age_tree(
+      fun (Tree) ->
+              Tree1 = gb_trees:delete(Then, Tree),
+              Oldest = case gb_trees:is_empty(Tree1) of
+                           true ->
+                               undefined;
+                           false ->
+                               {Oldest1, _Ref} = gb_trees:smallest(Tree1),
+                               Oldest1
+                       end,
+              gen_server:cast(?SERVER, {close, self(), Oldest}),
+              Tree1
+      end).
+
+age_tree_change() ->
+    with_age_tree(
+      fun (Tree) ->
+              case gb_trees:is_empty(Tree) of
+                  true  -> Tree;
+                  false -> {Oldest, _Ref} = gb_trees:smallest(Tree),
+                           gen_server:cast(?SERVER, {update, self(), Oldest})
+              end,
+              Tree
+      end).
 
 put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
     Now = now(),
-    with_age_tree(
-      fun (Tree) -> gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree)) end),
+    age_tree_update(Then, Now, Ref),
     put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
 
 open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
@@ -562,13 +588,7 @@ open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
             {{ok, Offset1}, Handle1} = maybe_seek(Offset, Handle),
             Handle2 = Handle1 #handle { trusted_offset = Offset1 },
             put({Ref, fhc_handle}, Handle2),
-            with_age_tree(fun (Tree) ->
-                                  Tree1 = gb_trees:insert(Now, Ref, Tree),
-                                  {Oldest, _Ref} = gb_trees:smallest(Tree1),
-                                  gen_server:cast(?SERVER,
-                                                  {open, self(), Oldest}),
-                                  Tree1
-                          end),
+            age_tree_insert(Now, Ref),
             {ok, Handle2};
         {error, Reason} ->
             {error, Reason}
@@ -589,22 +609,7 @@ close1(Ref, Handle, SoftOrHard) ->
                                  false -> ok
                              end,
                         ok = file:close(Hdl),
-                        with_age_tree(
-                          fun (Tree) ->
-                                  Tree1 = gb_trees:delete(Then, Tree),
-                                  Oldest =
-                                      case gb_trees:is_empty(Tree1) of
-                                          true ->
-                                              undefined;
-                                          false ->
-                                              {Oldest1, _Ref} =
-                                                  gb_trees:smallest(Tree1),
-                                              Oldest1
-                                      end,
-                                  gen_server:cast(
-                                    ?SERVER, {close, self(), Oldest}),
-                                  Tree1
-                          end),
+                        age_tree_delete(Then),
                         Handle1 #handle { hdl = closed,
                                           trusted_offset = Offset,
                                           is_dirty = false }
-- 
cgit v1.2.1


From 1486528fa0d29e4d53f3b5eff41fe6b627270d38 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 22:44:11 +0100
Subject: cosmetic changes and a little bit of refactoring on the file handle
 cache

---
 src/file_handle_cache.erl | 48 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 4d1c78a3..b9b94e11 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -449,7 +449,7 @@ is_writer(Mode) -> lists:member(write, Mode).
 
 append_to_write(Mode) ->
     case lists:member(append, Mode) of
-        true  -> [write | lists:subtract(Mode, [append, write])];
+        true  -> [write | Mode -- [append, write]];
         false -> Mode
     end.
 
@@ -505,6 +505,11 @@ get_or_reopen(Ref) ->
             {ok, Handle}
     end.
 
+put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
+    Now = now(),
+    age_tree_update(Then, Now, Ref),
+    put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
+
 with_age_tree(Fun) ->
     put(fhc_age_tree, Fun(case get(fhc_age_tree) of
                               undefined -> gb_trees:empty();
@@ -552,11 +557,6 @@ age_tree_change() ->
               Tree
       end).
 
-put_handle(Ref, Handle = #handle { last_used_at = Then }) ->
-    Now = now(),
-    age_tree_update(Then, Now, Ref),
-    put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }).
-
 open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
     Mode1 = case NewOrReopen of
                 new    -> Mode;
@@ -599,21 +599,18 @@ close1(Ref, Handle, SoftOrHard) ->
         {ok, #handle { hdl = Hdl, offset = Offset, is_dirty = IsDirty,
                        path = Path, is_read = IsReader, is_write = IsWriter,
                        last_used_at = Then } = Handle1 } ->
-            Handle2 =
-                case Hdl of
-                    closed ->
-                        Handle1;
-                    _ ->
-                        ok = case IsDirty of
-                                 true  -> file:sync(Hdl);
-                                 false -> ok
-                             end,
-                        ok = file:close(Hdl),
-                        age_tree_delete(Then),
-                        Handle1 #handle { hdl = closed,
-                                          trusted_offset = Offset,
-                                          is_dirty = false }
-                end,
+            Handle2 = case Hdl of
+                          closed -> Handle1;
+                          _      -> ok = case IsDirty of
+                                             true  -> file:sync(Hdl);
+                                             false -> ok
+                                         end,
+                                    ok = file:close(Hdl),
+                                    age_tree_delete(Then),
+                                    Handle1 #handle { hdl = closed,
+                                                      trusted_offset = Offset,
+                                                      is_dirty = false }
+                      end,
             case SoftOrHard of
                 hard -> #file { reader_count = RCount,
                                 has_writer = HasWriter } = File =
@@ -852,10 +849,9 @@ ulimit() ->
     end.
 
 ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) ->
-    State #fhc_state { client_mrefs = ensure_mref(Pid, ClientMRefs) };
-ensure_mref(Pid, ClientMRefs) ->
     case dict:find(Pid, ClientMRefs) of
-        {ok, _MRef} -> ClientMRefs;
-        error       -> dict:store(Pid, erlang:monitor(process, Pid),
-                                  ClientMRefs)
+        {ok, _MRef} -> State;
+        error       -> MRef = erlang:monitor(process, Pid),
+                       State #fhc_state {
+                         client_mrefs = dict:store(Pid, MRef, ClientMRefs) }
     end.
-- 
cgit v1.2.1


From 3714b37cb9f40f7cdc77fff9f694e9ec11acc98a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 29 Apr 2010 23:29:54 +0100
Subject: refactor: made 'close' control flow more obvious

---
 src/file_handle_cache.erl | 94 ++++++++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 45 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index b9b94e11..cd8d2fef 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -255,7 +255,11 @@ open(Path, Mode, Options) ->
 close(Ref) ->
     case erase({Ref, fhc_handle}) of
         undefined -> ok;
-        Handle    -> close1(Ref, Handle, hard)
+        Handle    -> case hard_close(Handle) of
+                         ok               -> ok;
+                         {Error, Handle1} -> put_handle(Ref, Handle1),
+                                             Error
+                     end
     end.
 
 read(Ref, Count) ->
@@ -381,10 +385,11 @@ delete(Ref) ->
         undefined ->
             ok;
         Handle = #handle { path = Path } ->
-            case close1(Ref, Handle #handle { is_dirty = false,
-                                              write_buffer = [] }, hard) of
-                ok    -> file:delete(Path);
-                Error -> Error
+            case hard_close(Handle #handle { is_dirty = false,
+                                             write_buffer = [] }) of
+                ok               -> file:delete(Path);
+                {Error, Handle1} -> put_handle(Ref, Handle1),
+                                    Error
             end
     end.
 
@@ -417,12 +422,11 @@ set_maximum_since_use(MaximumAge) ->
                  Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) ->
                    Age = timer:now_diff(Now, Then),
                    case Hdl /= closed andalso Age >= MaximumAge of
-                       true  -> case close1(Ref, Handle, soft) of
-                                    {ok, Handle1} ->
-                                        put({Ref, fhc_handle}, Handle1),
-                                        false;
-                                    _ ->
-                                        Rep
+                       true  -> {Res, Handle1} = soft_close(Handle),
+                                put_handle(Ref, Handle1),
+                                case Res of
+                                    ok -> false;
+                                    _  -> Rep
                                 end;
                        false -> Rep
                    end;
@@ -594,44 +598,44 @@ open1(Path, Mode, Options, Ref, Offset, NewOrReopen) ->
             {error, Reason}
     end.
 
-close1(Ref, Handle, SoftOrHard) ->
+soft_close(Handle = #handle { hdl = closed }) ->
+    {ok, Handle};
+soft_close(Handle) ->
     case write_buffer(Handle) of
         {ok, #handle { hdl = Hdl, offset = Offset, is_dirty = IsDirty,
-                       path = Path, is_read = IsReader, is_write = IsWriter,
                        last_used_at = Then } = Handle1 } ->
-            Handle2 = case Hdl of
-                          closed -> Handle1;
-                          _      -> ok = case IsDirty of
-                                             true  -> file:sync(Hdl);
-                                             false -> ok
-                                         end,
-                                    ok = file:close(Hdl),
-                                    age_tree_delete(Then),
-                                    Handle1 #handle { hdl = closed,
-                                                      trusted_offset = Offset,
-                                                      is_dirty = false }
+            ok = case IsDirty of
+                     true  -> file:sync(Hdl);
+                     false -> ok
+                 end,
+            ok = file:close(Hdl),
+            age_tree_delete(Then),
+            {ok, Handle1 #handle { hdl = closed, trusted_offset = Offset,
+                                   is_dirty = false }};
+        {_Error, _Handle} = Result ->
+            Result
+    end.
+
+hard_close(Handle) ->
+    case soft_close(Handle) of
+        {ok, #handle { path = Path,
+                       is_read = IsReader, is_write = IsWriter }} ->
+            #file { reader_count = RCount, has_writer = HasWriter } = File =
+                get({Path, fhc_file}),
+            RCount1 = case IsReader of
+                          true  -> RCount - 1;
+                          false -> RCount
                       end,
-            case SoftOrHard of
-                hard -> #file { reader_count = RCount,
-                                has_writer = HasWriter } = File =
-                            get({Path, fhc_file}),
-                        RCount1 = case IsReader of
-                                      true  -> RCount - 1;
-                                      false -> RCount
-                                  end,
-                        HasWriter1 = HasWriter andalso not IsWriter,
-                        case RCount1 =:= 0 andalso not HasWriter1 of
-                            true  -> erase({Path, fhc_file});
-                            false -> put({Path, fhc_file},
-                                         File #file { reader_count = RCount1,
-                                                      has_writer = HasWriter1 })
-                        end,
-                        ok;
-                soft -> {ok, Handle2}
-            end;
-        {Error, Handle1} ->
-            put_handle(Ref, Handle1),
-            Error
+            HasWriter1 = HasWriter andalso not IsWriter,
+            case RCount1 =:= 0 andalso not HasWriter1 of
+                true  -> erase({Path, fhc_file});
+                false -> put({Path, fhc_file},
+                             File #file { reader_count = RCount1,
+                                          has_writer = HasWriter1 })
+            end,
+            ok;
+        {_Error, _Handle} = Result ->
+            Result
     end.
 
 maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, offset = Offset,
-- 
cgit v1.2.1


From 1b4d5673e71f23a055e814a913515aa9a790b586 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 30 Apr 2010 10:35:52 +0100
Subject: cosmetic

---
 src/file_handle_cache.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index cd8d2fef..7c156ac7 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -126,15 +126,15 @@
 
 -behaviour(gen_server).
 
+-export([register_callback/3]).
 -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1,
          last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1,
          flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]).
+-export([release_on_death/1, obtain/0]).
 
 -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
--export([release_on_death/1, obtain/0, register_callback/3]).
-
 -define(SERVER, ?MODULE).
 -define(RESERVED_FOR_OTHERS, 100).
 -define(FILE_HANDLES_LIMIT_WINDOWS, 10000000).
-- 
cgit v1.2.1


From 6a35dc60ad9c27d1337ca4cce4b0e7ef13da7e07 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 30 Apr 2010 12:20:14 +0100
Subject: fix error handling in fhc:clear and remove some wrong (though benign)
 code

---
 src/file_handle_cache.erl | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 7c156ac7..2d326583 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -398,19 +398,16 @@ clear(Ref) ->
       [Ref],
       fun ([#handle { at_eof = true, write_buffer_size = 0, offset = 0 }]) ->
               ok;
-          ([Handle = #handle { write_buffer_size = Size, offset = Offset }]) ->
-              Handle1 = Handle #handle { write_buffer = [],
-                                         write_buffer_size = 0,
-                                         offset = Offset - Size },
-              case maybe_seek(bof, Handle1) of
-                  {{ok, 0}, Handle2 = #handle { hdl = Hdl }} ->
+          ([Handle]) ->
+              case maybe_seek(bof, Handle #handle { write_buffer = [],
+                                                    write_buffer_size = 0 }) of
+                  {{ok, 0}, Handle1 = #handle { hdl = Hdl }} ->
                       case file:truncate(Hdl) of
-                          ok    -> {ok, [Handle2 #handle {
-                                           trusted_offset = 0,
-                                           at_eof = true }]};
-                          Error -> {Error, [Handle2]}
+                          ok    -> {ok, [Handle1 #handle {trusted_offset = 0,
+                                                          at_eof = true }]};
+                          Error -> {Error, [Handle1]}
                       end;
-                  Error ->
+                  {{error, _} = Error, Handle1} ->
                       {Error, [Handle1]}
               end
       end).
-- 
cgit v1.2.1


From 656c43f790d80d3e58bb0dba1c32b2fb62d7e112 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 30 Apr 2010 13:00:21 +0100
Subject: cosmetic

---
 src/file_handle_cache.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 2d326583..7b4ff1a4 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -810,8 +810,8 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
                         end
                 end, Pids)
     end,
-    {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL, gen_server,
-                                    cast, [?SERVER, check_counts]),
+    {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL,
+                                    gen_server, cast, [?SERVER, check_counts]),
     State;
 maybe_reduce(State) ->
     State.
-- 
cgit v1.2.1


From 5233e32ec69cce4f609d9254381193dbc650ee41 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 13:12:41 +0100
Subject: Change register_callback to a cast, don't send messages in absence of
 a callback, and combine two dicts

---
 src/file_handle_cache.erl | 98 +++++++++++++++++++++++++----------------------
 1 file changed, 52 insertions(+), 46 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 2d326583..92b408c5 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -84,18 +84,18 @@
 %% When the limit is reached, the server calculates the average age of
 %% the last reported least recently used file handle of all the
 %% clients. It then tells all the clients to close any handles not
-%% used for longer than this average. The client should receive this
-%% message and pass it into set_maximum_since_use/1. However, it is
-%% highly possible this age will be greater than the ages of all the
-%% handles the client knows of because the client has used its file
-%% handles in the mean time. Thus at this point the client reports to
-%% the server the current timestamp at which its least recently used
-%% file handle was last used. The server will check two seconds later
-%% that either it is back under the limit, in which case all is well
-%% again, or if not, it will calculate a new average age. Its data
-%% will be much more recent now, and so it is very likely that when
-%% this is communicated to the clients, the clients will close file
-%% handles.
+%% used for longer than this average, by invoking the callback the
+%% client registered. The client should receive this message and pass
+%% it into set_maximum_since_use/1. However, it is highly possible
+%% this age will be greater than the ages of all the handles the
+%% client knows of because the client has used its file handles in the
+%% mean time. Thus at this point the client reports to the server the
+%% current timestamp at which its least recently used file handle was
+%% last used. The server will check two seconds later that either it
+%% is back under the limit, in which case all is well again, or if
+%% not, it will calculate a new average age. Its data will be much
+%% more recent now, and so it is very likely that when this is
+%% communicated to the clients, the clients will close file handles.
 %%
 %% The advantage of this scheme is that there is only communication
 %% from the client to the server on open, close, and when in the
@@ -170,8 +170,7 @@
           limit,
           count,
           obtains,
-          callbacks,
-          client_mrefs
+          callbacks_mrefs
         }).
 
 %%----------------------------------------------------------------------------
@@ -221,7 +220,7 @@ start_link() ->
 
 register_callback(M, F, A)
   when is_atom(M) andalso is_atom(F) andalso is_list(A) ->
-    gen_server:call(?SERVER, {register_callback, self(), {M, F, A}}, infinity).
+    gen_server:cast(?SERVER, {register_callback, self(), {M, F, A}}).
 
 open(Path, Mode, Options) ->
     Path1 = filename:absname(Path),
@@ -699,8 +698,7 @@ init([]) ->
             end,
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
-                      obtains = [], callbacks = dict:new(),
-                      client_mrefs = dict:new() }}.
+                      obtains = [], callbacks_mrefs = dict:new() }}.
 
 handle_call(obtain, From, State = #fhc_state { count = Count }) ->
     State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
@@ -709,13 +707,16 @@ handle_call(obtain, From, State = #fhc_state { count = Count }) ->
         true  -> {noreply, State1 #fhc_state { obtains = [From | Obtains],
                                                count = Count1 - 1 }};
         false -> {reply, ok, State1}
-    end;
+    end.
 
-handle_call({register_callback, Pid, MFA}, _From,
-            State = #fhc_state { callbacks = Callbacks }) ->
-    {reply, ok, ensure_mref(
-                  Pid, State #fhc_state {
-                         callbacks = dict:store(Pid, MFA, Callbacks) })}.
+handle_cast({register_callback, Pid, MFA}, State) ->
+    State1 = #fhc_state { callbacks_mrefs = CallsMRefs } =
+        ensure_mref(Pid, State),
+    {noreply,
+     State1 #fhc_state { callbacks_mrefs =
+                             dict:update(
+                               Pid, fun ({undefined, MRef}) -> {MFA, MRef} end,
+                               CallsMRefs) }};
 
 handle_cast({open, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
@@ -749,17 +750,17 @@ handle_cast({release_on_death, Pid}, State) ->
     {noreply, State}.
 
 handle_info({'DOWN', MRef, process, Pid, _Reason},
-            State = #fhc_state { count = Count, callbacks = Callbacks,
-                                 client_mrefs = ClientMRefs,
+            State = #fhc_state { count = Count, callbacks_mrefs = CallsMRefs,
                                  elders = Elders }) ->
-    State1 = case dict:find(Pid, ClientMRefs) of
-                 {ok, MRef} -> State #fhc_state {
-                                 elders       = dict:erase(Pid, Elders),
-                                 client_mrefs = dict:erase(Pid, ClientMRefs),
-                                 callbacks    = dict:erase(Pid, Callbacks) };
-                 _          -> State #fhc_state { count = Count - 1 }
-             end,
-    {noreply, process_obtains(State1)}.
+    {noreply, process_obtains(
+                case dict:find(Pid, CallsMRefs) of
+                    {ok, {_Callback, MRef}} ->
+                        State #fhc_state {
+                          elders          = dict:erase(Pid, Elders),
+                          callbacks_mrefs = dict:erase(Pid, CallsMRefs) };
+                    _ ->
+                        State #fhc_state { count = Count - 1 }
+                end)}.
 
 terminate(_Reason, State) ->
     State.
@@ -785,8 +786,9 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
     [gen_server:reply(From, ok) || From <- ObtainableRev],
     State #fhc_state { count = Count + ObtainableLen, obtains = ObtainsNew }.
 
-maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
-                                  elders = Elders, callbacks = Callbacks })
+maybe_reduce(State = #fhc_state {
+               limit = Limit, count = Count, elders = Elders,
+               callbacks_mrefs = CallsMRefs })
   when Limit /= infinity andalso Count >= Limit ->
     Now = now(),
     {Pids, Sum, ClientCount} =
@@ -801,11 +803,10 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count,
         _  -> AverageAge = Sum / ClientCount,
               lists:foreach(
                 fun (Pid) ->
-                        case dict:find(Pid, Callbacks) of
-                            error ->
-                                Pid ! {?MODULE, maximum_eldest_since_use,
-                                       AverageAge};
-                            {ok, {M, F, A}} ->
+                        case dict:fetch(Pid, CallsMRefs) of
+                            {undefined, _MRef} ->
+                                ok;
+                            {{M, F, A}, _MRef} ->
                                 apply(M, F, A ++ [AverageAge])
                         end
                 end, Pids)
@@ -849,10 +850,15 @@ ulimit() ->
             ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
     end.
 
-ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) ->
-    case dict:find(Pid, ClientMRefs) of
-        {ok, _MRef} -> State;
-        error       -> MRef = erlang:monitor(process, Pid),
-                       State #fhc_state {
-                         client_mrefs = dict:store(Pid, MRef, ClientMRefs) }
+ensure_mref(Pid, State = #fhc_state { callbacks_mrefs = CallsMRefs }) ->
+    case dict:find(Pid, CallsMRefs) of
+        {ok, {_Callback, MRef}} when MRef =/= undefined ->
+            State;
+        _ ->
+            MRef = erlang:monitor(process, Pid),
+            State #fhc_state {
+              callbacks_mrefs = dict:update(
+                                  Pid, fun ({Callback, undefined}) ->
+                                               {Callback, MRef}
+                                       end, {undefined, MRef}, CallsMRefs) }
     end.
-- 
cgit v1.2.1


From 6c2b3e7d16a48fbc7d7f9fc5c54fdfaa1aff3d06 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 13:22:16 +0100
Subject: Don't allow lots of timers to be started

---
 src/file_handle_cache.erl | 20 +++++++++++++-------
 src/rabbit_tests.erl      |  1 +
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index adcff653..7d96b66f 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -170,7 +170,8 @@
           limit,
           count,
           obtains,
-          callbacks_mrefs
+          callbacks_mrefs,
+          reduce_timer_set
         }).
 
 %%----------------------------------------------------------------------------
@@ -698,7 +699,8 @@ init([]) ->
             end,
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
-                      obtains = [], callbacks_mrefs = dict:new() }}.
+                      obtains = [], callbacks_mrefs = dict:new(),
+                      reduce_timer_set = false }}.
 
 handle_call(obtain, From, State = #fhc_state { count = Count }) ->
     State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
@@ -743,7 +745,7 @@ handle_cast({close, Pid, EldestUnusedSince}, State =
                                                     count = Count - 1 }))};
 
 handle_cast(check_counts, State) ->
-    {noreply, maybe_reduce(State)};
+    {noreply, maybe_reduce(State #fhc_state { reduce_timer_set = false })};
 
 handle_cast({release_on_death, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
@@ -788,7 +790,7 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
 
 maybe_reduce(State = #fhc_state {
                limit = Limit, count = Count, elders = Elders,
-               callbacks_mrefs = CallsMRefs })
+               callbacks_mrefs = CallsMRefs, reduce_timer_set = TimerSet })
   when Limit /= infinity andalso Count >= Limit ->
     Now = now(),
     {Pids, Sum, ClientCount} =
@@ -811,9 +813,13 @@ maybe_reduce(State = #fhc_state {
                         end
                 end, Pids)
     end,
-    {ok, _TRef} = timer:apply_after(?FILE_HANDLES_CHECK_INTERVAL,
-                                    gen_server, cast, [?SERVER, check_counts]),
-    State;
+    case TimerSet of
+        true  -> State;
+        false -> {ok, _TRef} = timer:apply_after(
+                                 ?FILE_HANDLES_CHECK_INTERVAL,
+                                 gen_server, cast, [?SERVER, check_counts]),
+                 State #fhc_state { reduce_timer_set = true }
+    end;
 maybe_reduce(State) ->
     State.
 
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b9963400..3cac429e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -54,6 +54,7 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
+    application:set_env(rabbit, file_handles_high_watermark, 10, infinity),
     passed = test_backing_queue(),
     passed = test_priority_queue(),
     passed = test_bpqueue(),
-- 
cgit v1.2.1


From 58c05ccf22172b603f917e78f1fffa90f93523f3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 13:28:53 +0100
Subject: reduce_timer_set => timer_ref

---
 src/file_handle_cache.erl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 7d96b66f..3d944636 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -171,7 +171,7 @@
           count,
           obtains,
           callbacks_mrefs,
-          reduce_timer_set
+          timer_ref
         }).
 
 %%----------------------------------------------------------------------------
@@ -700,7 +700,7 @@ init([]) ->
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
                       obtains = [], callbacks_mrefs = dict:new(),
-                      reduce_timer_set = false }}.
+                      timer_ref = undefined }}.
 
 handle_call(obtain, From, State = #fhc_state { count = Count }) ->
     State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
@@ -745,7 +745,7 @@ handle_cast({close, Pid, EldestUnusedSince}, State =
                                                     count = Count - 1 }))};
 
 handle_cast(check_counts, State) ->
-    {noreply, maybe_reduce(State #fhc_state { reduce_timer_set = false })};
+    {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })};
 
 handle_cast({release_on_death, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
@@ -790,7 +790,7 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
 
 maybe_reduce(State = #fhc_state {
                limit = Limit, count = Count, elders = Elders,
-               callbacks_mrefs = CallsMRefs, reduce_timer_set = TimerSet })
+               callbacks_mrefs = CallsMRefs, timer_ref = TRef })
   when Limit /= infinity andalso Count >= Limit ->
     Now = now(),
     {Pids, Sum, ClientCount} =
@@ -813,12 +813,12 @@ maybe_reduce(State = #fhc_state {
                         end
                 end, Pids)
     end,
-    case TimerSet of
-        true  -> State;
-        false -> {ok, _TRef} = timer:apply_after(
-                                 ?FILE_HANDLES_CHECK_INTERVAL,
-                                 gen_server, cast, [?SERVER, check_counts]),
-                 State #fhc_state { reduce_timer_set = true }
+    case TRef of
+        undefined -> {ok, TRef1} = timer:apply_after(
+                                     ?FILE_HANDLES_CHECK_INTERVAL,
+                                     gen_server, cast, [?SERVER, check_counts]),
+                     State #fhc_state { timer_ref = TRef1 };
+        _         -> State
     end;
 maybe_reduce(State) ->
     State.
-- 
cgit v1.2.1


From 91c45e3e6bc5e8b5f32df149532740b0e2b7e1e8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 14:37:54 +0100
Subject: gb_trees:delete crashes if the key is not in the tree. I kid you not.

---
 src/file_handle_cache.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 3d944636..71142e53 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -529,13 +529,13 @@ age_tree_insert(Now, Ref) ->
 age_tree_update(Then, Now, Ref) ->
     with_age_tree(
       fun (Tree) ->
-              gb_trees:insert(Now, Ref, gb_trees:delete(Then, Tree))
+              gb_trees:insert(Now, Ref, gb_trees:delete_any(Then, Tree))
       end).
 
 age_tree_delete(Then) ->
     with_age_tree(
       fun (Tree) ->
-              Tree1 = gb_trees:delete(Then, Tree),
+              Tree1 = gb_trees:delete_any(Then, Tree),
               Oldest = case gb_trees:is_empty(Tree1) of
                            true ->
                                undefined;
-- 
cgit v1.2.1


From e885404765b9dbeae72cc54dafec3511722477be Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 14:55:34 +0100
Subject: Make the logic on result of soft_close match that on hard_close (i.e.
 full put if there's an error. Don't touch the gb_tree if there's no error.
 Also, revert back to two diffs for client callbacks and mrefs

---
 src/file_handle_cache.erl | 76 ++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 71142e53..2ac674b4 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -170,7 +170,8 @@
           limit,
           count,
           obtains,
-          callbacks_mrefs,
+          callbacks,
+          client_mrefs,
           timer_ref
         }).
 
@@ -420,10 +421,11 @@ set_maximum_since_use(MaximumAge) ->
                    Age = timer:now_diff(Now, Then),
                    case Hdl /= closed andalso Age >= MaximumAge of
                        true  -> {Res, Handle1} = soft_close(Handle),
-                                put_handle(Ref, Handle1),
                                 case Res of
-                                    ok -> false;
-                                    _  -> Rep
+                                    ok -> put({Ref, fhc_handle}, Handle1),
+                                          false;
+                                    _  -> put_handle(Ref, Handle1),
+                                          Rep
                                 end;
                        false -> Rep
                    end;
@@ -699,8 +701,8 @@ init([]) ->
             end,
     error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]),
     {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0,
-                      obtains = [], callbacks_mrefs = dict:new(),
-                      timer_ref = undefined }}.
+                      obtains = [], callbacks = dict:new(),
+                      client_mrefs = dict:new(), timer_ref = undefined }}.
 
 handle_call(obtain, From, State = #fhc_state { count = Count }) ->
     State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } =
@@ -711,14 +713,11 @@ handle_call(obtain, From, State = #fhc_state { count = Count }) ->
         false -> {reply, ok, State1}
     end.
 
-handle_cast({register_callback, Pid, MFA}, State) ->
-    State1 = #fhc_state { callbacks_mrefs = CallsMRefs } =
-        ensure_mref(Pid, State),
-    {noreply,
-     State1 #fhc_state { callbacks_mrefs =
-                             dict:update(
-                               Pid, fun ({undefined, MRef}) -> {MFA, MRef} end,
-                               CallsMRefs) }};
+handle_cast({register_callback, Pid, MFA},
+            State = #fhc_state { callbacks = Callbacks }) ->
+    {noreply, ensure_mref(
+                Pid, State #fhc_state {
+                       callbacks = dict:store(Pid, MFA, Callbacks) })};
 
 handle_cast({open, Pid, EldestUnusedSince}, State =
             #fhc_state { elders = Elders, count = Count }) ->
@@ -751,17 +750,16 @@ handle_cast({release_on_death, Pid}, State) ->
     _MRef = erlang:monitor(process, Pid),
     {noreply, State}.
 
-handle_info({'DOWN', MRef, process, Pid, _Reason},
-            State = #fhc_state { count = Count, callbacks_mrefs = CallsMRefs,
-                                 elders = Elders }) ->
+handle_info({'DOWN', MRef, process, Pid, _Reason}, State =
+                #fhc_state { count = Count, callbacks = Callbacks,
+                             client_mrefs = ClientMRefs, elders = Elders }) ->
     {noreply, process_obtains(
-                case dict:find(Pid, CallsMRefs) of
-                    {ok, {_Callback, MRef}} ->
-                        State #fhc_state {
-                          elders          = dict:erase(Pid, Elders),
-                          callbacks_mrefs = dict:erase(Pid, CallsMRefs) };
-                    _ ->
-                        State #fhc_state { count = Count - 1 }
+                case dict:find(Pid, ClientMRefs) of
+                    {ok, MRef} -> State #fhc_state {
+                                    elders       = dict:erase(Pid, Elders),
+                                    client_mrefs = dict:erase(Pid, ClientMRefs),
+                                    callbacks    = dict:erase(Pid, Callbacks) };
+                    _          -> State #fhc_state { count = Count - 1 }
                 end)}.
 
 terminate(_Reason, State) ->
@@ -788,9 +786,8 @@ process_obtains(State = #fhc_state { limit = Limit, count = Count,
     [gen_server:reply(From, ok) || From <- ObtainableRev],
     State #fhc_state { count = Count + ObtainableLen, obtains = ObtainsNew }.
 
-maybe_reduce(State = #fhc_state {
-               limit = Limit, count = Count, elders = Elders,
-               callbacks_mrefs = CallsMRefs, timer_ref = TRef })
+maybe_reduce(State = #fhc_state { limit = Limit, count = Count, elders = Elders,
+                                  callbacks = Callbacks, timer_ref = TRef })
   when Limit /= infinity andalso Count >= Limit ->
     Now = now(),
     {Pids, Sum, ClientCount} =
@@ -805,11 +802,9 @@ maybe_reduce(State = #fhc_state {
         _  -> AverageAge = Sum / ClientCount,
               lists:foreach(
                 fun (Pid) ->
-                        case dict:fetch(Pid, CallsMRefs) of
-                            {undefined, _MRef} ->
-                                ok;
-                            {{M, F, A}, _MRef} ->
-                                apply(M, F, A ++ [AverageAge])
+                        case dict:find(Pid, Callbacks) of
+                            error           -> ok;
+                            {ok, {M, F, A}} -> apply(M, F, A ++ [AverageAge])
                         end
                 end, Pids)
     end,
@@ -856,15 +851,10 @@ ulimit() ->
             ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS
     end.
 
-ensure_mref(Pid, State = #fhc_state { callbacks_mrefs = CallsMRefs }) ->
-    case dict:find(Pid, CallsMRefs) of
-        {ok, {_Callback, MRef}} when MRef =/= undefined ->
-            State;
-        _ ->
-            MRef = erlang:monitor(process, Pid),
-            State #fhc_state {
-              callbacks_mrefs = dict:update(
-                                  Pid, fun ({Callback, undefined}) ->
-                                               {Callback, MRef}
-                                       end, {undefined, MRef}, CallsMRefs) }
+ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) ->
+    case dict:find(Pid, ClientMRefs) of
+        {ok, _MRef} -> State;
+        error       -> MRef = erlang:monitor(process, Pid),
+                       State #fhc_state {
+                         client_mrefs = dict:store(Pid, MRef, ClientMRefs) }
     end.
-- 
cgit v1.2.1


From 142412ba7bd5a5390afef6d786a63314049b9fef Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 15:07:58 +0100
Subject: Add missing specs

---
 src/file_handle_cache.erl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 2ac674b4..0f648dcd 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -210,6 +210,8 @@
 -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok').
 -spec(delete/1 :: (ref()) -> ok_or_error()).
 -spec(clear/1 :: (ref()) -> ok_or_error()).
+-spec(release_on_death/1 :: (pid()) -> 'ok').
+-spec(obtain/0 :: () -> 'ok').
 
 -endif.
 
-- 
cgit v1.2.1


From d5bbe15ace639acf8de45ab5e823c54afbc1ad25 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 30 Apr 2010 15:41:29 +0100
Subject: Make the msg_store_gc register an fhc callback

---
 src/rabbit_msg_store_gc.erl | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 32ea0014..ca5e2c6f 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -35,6 +35,8 @@
 
 -export([start_link/4, gc/3, stop/1]).
 
+-export([set_maximum_since_use/2]).
+
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
 
@@ -61,9 +63,14 @@ gc(Server, Source, Destination) ->
 stop(Server) ->
     gen_server2:call(Server, stop, infinity).
 
+set_maximum_since_use(Pid, Age) ->
+    gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}).
+
 %%----------------------------------------------------------------------------
 
 init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
     {ok, #gcstate { dir = Dir, index_state = IndexState,
                     index_module = IndexModule, parent = Parent,
                     file_summary_ets = FileSummaryEts},
@@ -80,11 +87,11 @@ handle_cast({gc, Source, Destination}, State =
     Reclaimed = rabbit_msg_store:gc(Source, Destination,
                                     {FileSummaryEts, Dir, Index, IndexState}),
     ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source, Destination),
-    {noreply, State, hibernate}.
+    {noreply, State, hibernate};
 
-handle_info({file_handle_cache, maximum_eldest_since_use, Age}, State) ->
+handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
-    {noreply, State, hibernate};
+    {noreply, State, hibernate}.
 
 handle_info(Info, State) ->
     {stop, {unhandled_info, Info}, State}.
-- 
cgit v1.2.1


From e286a8be716eb82ffa8c86ae8e17ff0f973d613e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 30 Apr 2010 16:40:35 +0100
Subject: cosmetic

---
 src/rabbit_amqqueue.erl | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index ee769d55..48d0edfc 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -32,8 +32,9 @@
 -module(rabbit_amqqueue).
 
 -export([start/0, declare/4, delete/3, purge/1]).
--export([internal_declare/2, internal_delete/1, update_ram_duration/1,
-         set_ram_duration_target/2, set_maximum_since_use/2]).
+-export([internal_declare/2, internal_delete/1,
+         update_ram_duration/1, set_ram_duration_target/2,
+         set_maximum_since_use/2]).
 -export([pseudo_queue/2]).
 -export([lookup/1, with/2, with_or_die/2,
          stat/1, stat_all/0, deliver/2, requeue/3, ack/4]).
@@ -41,8 +42,8 @@
 -export([consumers/1, consumers_all/1]).
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
--export([notify_sent/2, unblock/2, maybe_run_queue_via_backing_queue/2,
-         flush_all/2]).
+-export([notify_sent/2, unblock/2, flush_all/2]).
+-export([maybe_run_queue_via_backing_queue/2]).
 -export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -107,8 +108,8 @@
 -spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
--spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
+-spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
 -spec(update_ram_duration/1 :: (pid()) -> 'ok').
@@ -328,16 +329,16 @@ notify_sent(QPid, ChPid) ->
 unblock(QPid, ChPid) ->
     gen_server2:pcast(QPid, 7, {unblock, ChPid}).
 
-maybe_run_queue_via_backing_queue(QPid, Fun) ->
-    gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun},
-                      infinity).
-
 flush_all(QPids, ChPid) ->
     safe_pmap_ok(
       fun (_) -> ok end,
       fun (QPid) -> gen_server2:cast(QPid, {flush, ChPid}) end,
       QPids).
 
+maybe_run_queue_via_backing_queue(QPid, Fun) ->
+    gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun},
+                      infinity).
+
 internal_delete(QueueName) ->
     case
         rabbit_misc:execute_mnesia_transaction(
-- 
cgit v1.2.1


From e102c099adccc151847ce954458dab91b187f128 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 30 Apr 2010 17:29:12 +0100
Subject: move queue's fhc registration from startup into init handler so we
 don't do any unnecessary work in case the queue process gets terminated
 before initialisation is completed

---
 src/rabbit_amqqueue_process.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index b59cd074..be4aac32 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -102,8 +102,6 @@ info_keys() -> ?INFO_KEYS.
 init(Q) ->
     ?LOGDEBUG("Queue starting - ~p~n", [Q]),
     process_flag(trap_exit, true),
-    ok = file_handle_cache:register_callback(
-           rabbit_amqqueue, set_maximum_since_use, [self()]),
     {ok, BQ} = application:get_env(backing_queue_module),
 
     {ok, #q{q = Q,
@@ -721,6 +719,8 @@ handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
 handle_cast({init, Recover},
             State = #q{q = #amqqueue{name = QName, durable = IsDurable},
                        backing_queue = BQ, backing_queue_state = undefined}) ->
+    ok = file_handle_cache:register_callback(
+           rabbit_amqqueue, set_maximum_since_use, [self()]),
     ok = rabbit_memory_monitor:register(
            self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
     noreply(State#q{backing_queue_state = BQ:init(QName, IsDurable, Recover)});
-- 
cgit v1.2.1


From cde48ab2a83232b2e2bb1c7a408c85def6f928e7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 30 Apr 2010 18:31:25 +0100
Subject: cosmetic

---
 src/rabbit_amqqueue.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 48d0edfc..2d75b15b 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -33,6 +33,7 @@
 
 -export([start/0, declare/4, delete/3, purge/1]).
 -export([internal_declare/2, internal_delete/1,
+         maybe_run_queue_via_backing_queue/2,
          update_ram_duration/1, set_ram_duration_target/2,
          set_maximum_since_use/2]).
 -export([pseudo_queue/2]).
@@ -43,7 +44,6 @@
 -export([claim_queue/2]).
 -export([basic_get/3, basic_consume/8, basic_cancel/4]).
 -export([notify_sent/2, unblock/2, flush_all/2]).
--export([maybe_run_queue_via_backing_queue/2]).
 -export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
 
@@ -109,9 +109,9 @@
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
--spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue()).
 -spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
+-spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(update_ram_duration/1 :: (pid()) -> 'ok').
 -spec(set_ram_duration_target/2 :: (pid(), number()) -> 'ok').
 -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
@@ -335,10 +335,6 @@ flush_all(QPids, ChPid) ->
       fun (QPid) -> gen_server2:cast(QPid, {flush, ChPid}) end,
       QPids).
 
-maybe_run_queue_via_backing_queue(QPid, Fun) ->
-    gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun},
-                      infinity).
-
 internal_delete(QueueName) ->
     case
         rabbit_misc:execute_mnesia_transaction(
@@ -360,6 +356,10 @@ internal_delete(QueueName) ->
             ok
     end.
 
+maybe_run_queue_via_backing_queue(QPid, Fun) ->
+    gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun},
+                      infinity).
+
 update_ram_duration(QPid) ->
     gen_server2:pcast(QPid, 8, update_ram_duration).
 
-- 
cgit v1.2.1


From bf78e6f93f67d8a4bd80e5478d8a96dcc2ec63d4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 2 May 2010 12:24:48 +0100
Subject: refactoring of sync timer setting

---
 src/rabbit_amqqueue_process.erl | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index be4aac32..809f0cc6 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -159,17 +159,21 @@ noreply(NewState) ->
     {noreply, NewState1, Timeout}.
 
 next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
-    next_state1(ensure_rate_timer(State), BQ:sync_callback(BQS)).
-
-next_state1(State = #q{sync_timer_ref = undefined}, Fun)
-  when Fun =/= undefined ->
-    {start_sync_timer(State, Fun), 0};
-next_state1(State, Fun) when Fun =/= undefined ->
-    {State, 0};
-next_state1(State = #q{sync_timer_ref = undefined}, undefined) ->
+    set_sync_timer(ensure_rate_timer(State), BQ:sync_callback(BQS)).
+
+set_sync_timer(State = #q{sync_timer_ref = undefined}, undefined) ->
     {State, hibernate};
-next_state1(State, undefined) ->
-    {stop_sync_timer(State#q{backing_queue_timeout_fun = undefined}), hibernate}.
+set_sync_timer(State = #q{sync_timer_ref = undefined}, Fun) ->
+    {ok, TRef} = timer:apply_after(
+                   ?SYNC_INTERVAL, rabbit_amqqueue,
+                   maybe_run_queue_via_backing_queue, [self(), Fun]),
+    {State#q{sync_timer_ref = TRef, backing_queue_timeout_fun = Fun}, 0};
+set_sync_timer(State = #q{sync_timer_ref = TRef}, undefined) ->
+    {ok, cancel} = timer:cancel(TRef),
+    {State#q{sync_timer_ref = undefined, backing_queue_timeout_fun = undefined},
+     hibernate};
+set_sync_timer(State, _Fun) ->
+    {State, 0}.
 
 ensure_rate_timer(State = #q{rate_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(?RAM_DURATION_UPDATE_INTERVAL, rabbit_amqqueue,
@@ -188,17 +192,6 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
     State#q{rate_timer_ref = undefined}.
 
-start_sync_timer(State = #q{sync_timer_ref = undefined}, Fun)
-  when Fun =/= undefined ->
-    {ok, TRef} = timer:apply_after(
-                   ?SYNC_INTERVAL, rabbit_amqqueue,
-                   maybe_run_queue_via_backing_queue, [self(), Fun]),
-    State#q{sync_timer_ref = TRef, backing_queue_timeout_fun = Fun}.
-
-stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
-    {ok, cancel} = timer:cancel(TRef),
-    State#q{sync_timer_ref = undefined, backing_queue_timeout_fun = undefined}.
-
 assert_invariant(#q{active_consumers = AC,
                     backing_queue = BQ, backing_queue_state = BQS}) ->
     true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)).
-- 
cgit v1.2.1


From 50b4d162497a793d93670fc07afce8e84b7e6cfd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 2 May 2010 13:24:24 +0100
Subject: correct backing_queue:tx_{commit,rollback} descriptions

---
 src/rabbit_backing_queue.erl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 38bee466..583cd4da 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -77,14 +77,12 @@ behaviour_info(callbacks) ->
      %% Acks, but in the context of a transaction.
      {tx_ack, 3},
 
-     %% Undo anything which has been done by the tx_publish of the
-     %% indicated messages.
+     %% Undo anything which has been done in the context of the
+     %% specified transaction.
      {tx_rollback, 2},
 
-     %% Commit these publishes and acktags. The publishes you will
-     %% have previously seen in calls to tx_publish, and the acks in
-     %% calls to tx_ack. The Fun passed in must be called once the
-     %% messages have really been commited. This CPS permits the
+     %% Commit a transaction. The Fun passed in must be called once
+     %% the messages have really been commited. This CPS permits the
      %% possibility of commit coalescing.
      {tx_commit, 3},
 
-- 
cgit v1.2.1


From 743f86fac480a09291461acdd510021ca8302459 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 2 May 2010 15:05:50 +0100
Subject: restructure backing queue sync'ing The code very much relied on the
 fact that sync_callback would always return the same fun when a sync was
 required. So it makes sense to capture that in the API by splitting
 sync_callback into a 'needs_sync' predicate and a separate callback handler.

As a result we do not need the backing_queue_timeout_fun state member.
---
 include/rabbit_backing_queue_spec.hrl |  4 ++--
 src/rabbit_amqqueue_process.erl       | 28 ++++++++++++----------------
 src/rabbit_backing_queue.erl          | 11 ++++++-----
 src/rabbit_invariable_queue.erl       |  9 ++++++---
 src/rabbit_variable_queue.erl         |  8 +++++---
 5 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 0a0931ea..1b536dfa 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -57,7 +57,7 @@
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> {number(), state()}).
--spec(sync_callback/1 :: (state()) ->
-                              ('undefined' | (fun ((A) -> {boolean(), A})))).
+-spec(needs_sync/1 :: (state()) -> boolean()).
+-spec(sync/1 :: (state()) -> state()).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 809f0cc6..ab88a3c2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -55,7 +55,6 @@
             has_had_consumers,
             backing_queue,
             backing_queue_state,
-            backing_queue_timeout_fun,
             active_consumers,
             blocked_consumers,
             sync_timer_ref,
@@ -110,7 +109,6 @@ init(Q) ->
             has_had_consumers = false,
             backing_queue = BQ,
             backing_queue_state = undefined,
-            backing_queue_timeout_fun = undefined,
             active_consumers = queue:new(),
             blocked_consumers = queue:new(),
             sync_timer_ref = undefined,
@@ -159,19 +157,20 @@ noreply(NewState) ->
     {noreply, NewState1, Timeout}.
 
 next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
-    set_sync_timer(ensure_rate_timer(State), BQ:sync_callback(BQS)).
+    set_sync_timer(ensure_rate_timer(State), BQ:needs_sync(BQS)).
 
-set_sync_timer(State = #q{sync_timer_ref = undefined}, undefined) ->
+set_sync_timer(State = #q{sync_timer_ref = undefined}, false) ->
     {State, hibernate};
-set_sync_timer(State = #q{sync_timer_ref = undefined}, Fun) ->
+set_sync_timer(State = #q{sync_timer_ref = undefined,
+                          backing_queue = BQ}, true) ->
     {ok, TRef} = timer:apply_after(
-                   ?SYNC_INTERVAL, rabbit_amqqueue,
-                   maybe_run_queue_via_backing_queue, [self(), Fun]),
-    {State#q{sync_timer_ref = TRef, backing_queue_timeout_fun = Fun}, 0};
-set_sync_timer(State = #q{sync_timer_ref = TRef}, undefined) ->
+                   ?SYNC_INTERVAL,
+                   rabbit_amqqueue, maybe_run_queue_via_backing_queue,
+                   [self(), fun (BQS) -> BQ:sync(BQS) end]),
+    {State#q{sync_timer_ref = TRef}, 0};
+set_sync_timer(State = #q{sync_timer_ref = TRef}, false) ->
     {ok, cancel} = timer:cancel(TRef),
-    {State#q{sync_timer_ref = undefined, backing_queue_timeout_fun = undefined},
-     hibernate};
+    {State#q{sync_timer_ref = undefined}, hibernate};
 set_sync_timer(State, _Fun) ->
     {State, 0}.
 
@@ -823,12 +822,9 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
         {stop, NewState} -> {stop, normal, NewState}
     end;
 
-handle_info(timeout, State = #q{backing_queue_timeout_fun = undefined}) ->
-    noreply(State);
-
-handle_info(timeout, State = #q{backing_queue_timeout_fun = Fun}) ->
+handle_info(timeout, State = #q{backing_queue = BQ}) ->
     noreply(maybe_run_queue_via_backing_queue(
-              Fun, State#q{backing_queue_timeout_fun = undefined}));
+              fun (BQS) -> BQ:sync(BQS) end, State));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 583cd4da..d9c89820 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -113,11 +113,12 @@ behaviour_info(callbacks) ->
      %% queue.
      {ram_duration, 1},
 
-     %% Can return 'undefined' or a thunk which will receive the
-     %% state, and must return the state, which will be invoked as
-     %% soon as the queue process can manage (either on an empty
-     %% mailbox, or when a timer fires).
-     {sync_callback, 1},
+     %% Should 'sync' be called as soon as the queue process can
+     %% manage (either on an empty mailbox, or when a timer fires)?
+     {needs_sync, 1},
+
+     %% Called (eventually) after needs_sync returns 'true'.
+     {sync, 1},
 
      %% Called immediately before the queue hibernates.
      {handle_pre_hibernate, 1},
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index bee97651..722ea321 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -34,7 +34,7 @@
 -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2,
          publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3,
          tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1,
-         set_ram_duration_target/2, ram_duration/1, sync_callback/1,
+         set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1,
          handle_pre_hibernate/1, status/1]).
 
 -export([start/1]).
@@ -191,8 +191,11 @@ set_ram_duration_target(_DurationTarget, State) ->
 ram_duration(State) ->
     {0, State}.
 
-sync_callback(_State) ->
-    undefined.
+needs_sync(_State) ->
+    false.
+
+sync(State) ->
+    State.
 
 handle_pre_hibernate(State) ->
     State.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 35d2b191..b5cf9845 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -34,7 +34,7 @@
 -export([init/3, terminate/1, publish/2, publish_delivered/3,
          set_ram_duration_target/2, ram_duration/1, fetch/2, ack/2, len/1,
          is_empty/1, purge/1, delete_and_terminate/1, requeue/2, tx_publish/3,
-         tx_ack/3, tx_rollback/2, tx_commit/3, sync_callback/1,
+         tx_ack/3, tx_rollback/2, tx_commit/3, needs_sync/1, sync/1,
          handle_pre_hibernate/1, status/1]).
 
 -export([start/1]).
@@ -685,8 +685,10 @@ ram_duration(State = #vqstate { egress_rate = Egress,
                                   ram_msg_count_prev = RamMsgCount,
                                   out_counter = 0, in_counter = 0 })}.
 
-sync_callback(#vqstate { on_sync = {_, _, []} }) -> undefined;
-sync_callback(_)                                 -> fun tx_commit_index/1.
+needs_sync(#vqstate { on_sync = {_, _, []} }) -> false;
+needs_sync(_)                                 -> true.
+
+sync(State) -> tx_commit_index(State).
 
 handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state =
-- 
cgit v1.2.1


From 5fcf7fdd011544fc097649929ddbfcbc91f03d8d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 2 May 2010 15:07:02 +0100
Subject: cosmetic

---
 src/rabbit_amqqueue_process.erl |  6 ++++--
 src/rabbit_invariable_queue.erl | 25 ++++++++-----------------
 src/rabbit_variable_queue.erl   |  4 ----
 3 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index ab88a3c2..02254b4e 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -175,8 +175,10 @@ set_sync_timer(State, _Fun) ->
     {State, 0}.
 
 ensure_rate_timer(State = #q{rate_timer_ref = undefined}) ->
-    {ok, TRef} = timer:apply_after(?RAM_DURATION_UPDATE_INTERVAL, rabbit_amqqueue,
-                                   update_ram_duration, [self()]),
+    {ok, TRef} = timer:apply_after(
+                   ?RAM_DURATION_UPDATE_INTERVAL,
+                   rabbit_amqqueue, update_ram_duration,
+                   [self()]),
     State#q{rate_timer_ref = TRef};
 ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) ->
     State#q{rate_timer_ref = undefined};
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index 722ea321..b4fd9156 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -178,30 +178,21 @@ requeue(AckTags, State = #iv_state { pending_ack = PA, queue = Q,
     PA1 = remove_acks(AckTags, PA),
     State #iv_state { pending_ack = PA1, queue = Q1, len = Len1 }.
 
-len(#iv_state { len = Len }) ->
-    Len.
+len(#iv_state { len = Len }) -> Len.
 
-is_empty(State) ->
-    0 == len(State).
+is_empty(State) -> 0 == len(State).
 
-set_ram_duration_target(_DurationTarget, State) ->
-    %% HA!
-    State.
+set_ram_duration_target(_DurationTarget, State) -> State.
 
-ram_duration(State) ->
-    {0, State}.
+ram_duration(State) -> {0, State}.
 
-needs_sync(_State) ->
-    false.
+needs_sync(_State) -> false.
 
-sync(State) ->
-    State.
+sync(State) -> State.
 
-handle_pre_hibernate(State) ->
-    State.
+handle_pre_hibernate(State) -> State.
 
-status(_State) ->
-    [].
+status(_State) -> [].
 
 %%----------------------------------------------------------------------------
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b5cf9845..96f5401a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -236,10 +236,6 @@
                pending_ack           :: dict()
               }).
 
--spec(tx_commit_post_msg_store/5 ::
-        (boolean(), [guid()], [ack()], {pid(), any()}, state()) -> state()).
--spec(tx_commit_index/1 :: (state()) -> state()).
-
 -include("rabbit_backing_queue_spec.hrl").
 
 -endif.
-- 
cgit v1.2.1


From 62b47a2ae46918934849dec87f232b9d7d6a9697 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 2 May 2010 19:16:41 +0100
Subject: cancel queue's timers on shutdown This is cleaner, though not
 strictly necessary.

---
 src/rabbit_amqqueue_process.erl | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 02254b4e..eb103ec9 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -130,8 +130,9 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
-terminate_shutdown(Fun, State = #q{backing_queue = BQ,
-                                   backing_queue_state = BQS}) ->
+terminate_shutdown(Fun, State) ->
+    State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
+        stop_sync_timer(stop_rate_timer(State)),
     case BQS of
         undefined -> State;
         _         -> ok = rabbit_memory_monitor:deregister(self()),
@@ -143,7 +144,7 @@ terminate_shutdown(Fun, State = #q{backing_queue = BQ,
                                           BQ:tx_rollback(Txn, BQSN),
                                       BQSN1
                               end, BQS, all_ch_record()),
-                     State#q{backing_queue_state = Fun(BQS1)}
+                     State1#q{backing_queue_state = Fun(BQS1)}
     end.
 
 reply(Reply, NewState) ->
@@ -156,23 +157,28 @@ noreply(NewState) ->
     {NewState1, Timeout} = next_state(NewState),
     {noreply, NewState1, Timeout}.
 
-next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
-    set_sync_timer(ensure_rate_timer(State), BQ:needs_sync(BQS)).
+next_state(State) ->
+    State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
+        ensure_rate_timer(State),
+    case BQ:needs_sync(BQS)of
+        true  -> {ensure_sync_timer(State1), 0};
+        false -> {stop_sync_timer(State1), hibernate}
+    end.
 
-set_sync_timer(State = #q{sync_timer_ref = undefined}, false) ->
-    {State, hibernate};
-set_sync_timer(State = #q{sync_timer_ref = undefined,
-                          backing_queue = BQ}, true) ->
+ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) ->
     {ok, TRef} = timer:apply_after(
                    ?SYNC_INTERVAL,
                    rabbit_amqqueue, maybe_run_queue_via_backing_queue,
                    [self(), fun (BQS) -> BQ:sync(BQS) end]),
-    {State#q{sync_timer_ref = TRef}, 0};
-set_sync_timer(State = #q{sync_timer_ref = TRef}, false) ->
+    State#q{sync_timer_ref = TRef};
+ensure_sync_timer(State) ->
+    State.
+
+stop_sync_timer(State = #q{sync_timer_ref = undefined}) ->
+    State;
+stop_sync_timer(State = #q{sync_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
-    {State#q{sync_timer_ref = undefined}, hibernate};
-set_sync_timer(State, _Fun) ->
-    {State, 0}.
+    State#q{sync_timer_ref = undefined}.
 
 ensure_rate_timer(State = #q{rate_timer_ref = undefined}) ->
     {ok, TRef} = timer:apply_after(
-- 
cgit v1.2.1


From 25e0e141c365ebdc92ca205e7f1686854d9ceee8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 2 May 2010 19:46:30 +0100
Subject: cosmetic

---
 src/rabbit_amqqueue_process.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index eb103ec9..06712e9c 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -269,7 +269,8 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc,
                       ChPid, ConsumerTag, AckRequired,
                       {QName, self(), AckTag, IsDelivered, Message}),
                     ChAckTags1 = case AckRequired of
-                                     true  -> sets:add_element(AckTag, ChAckTags);
+                                     true  -> sets:add_element(
+                                                AckTag, ChAckTags);
                                      false -> ChAckTags
                                  end,
                     NewC = C#cr{unsent_message_count = Count + 1,
@@ -585,8 +586,8 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                            C#cr{acktags = sets:add_element(AckTag, ChAckTags)});
                 false -> ok
             end,
-            reply({ok, Remaining, {QName, self(), AckTag, IsDelivered, Message}},
-                  State#q{backing_queue_state = BQS1})
+            Msg = {QName, self(), AckTag, IsDelivered, Message},
+            reply({ok, Remaining, Msg}, State#q{backing_queue_state = BQS1})
     end;
 
 handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
@@ -673,8 +674,7 @@ handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
             State = #q{backing_queue_state = BQS, backing_queue = BQ}) ->
-    Length = BQ:len(BQS),
-    IsEmpty = Length == 0,
+    IsEmpty = BQ:is_empty(BQS),
     IsUnused = is_unused(State),
     if
         IfEmpty and not(IsEmpty) ->
@@ -682,7 +682,7 @@ handle_call({delete, IfUnused, IfEmpty}, _From,
         IfUnused and not(IsUnused) ->
             reply({error, in_use}, State);
         true ->
-            {stop, normal, {ok, Length}, State}
+            {stop, normal, {ok, BQ:len(BQS)}, State}
     end;
 
 handle_call(purge, _From, State = #q{backing_queue = BQ,
-- 
cgit v1.2.1


From 9a5680df0164cf97a7bf8320a89b1f6123674845 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 3 May 2010 15:56:06 +0100
Subject: Improvement to docs on BQ:sync

---
 src/rabbit_backing_queue.erl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index d9c89820..2dba00ad 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -117,7 +117,9 @@ behaviour_info(callbacks) ->
      %% manage (either on an empty mailbox, or when a timer fires)?
      {needs_sync, 1},
 
-     %% Called (eventually) after needs_sync returns 'true'.
+     %% Called (eventually) after needs_sync returns 'true'. Note this
+     %% may be called more than once for each 'true' returned from
+     %% needs_sync.
      {sync, 1},
 
      %% Called immediately before the queue hibernates.
-- 
cgit v1.2.1


From 38835704061cc61a1d462884a526104f16119d80 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 4 May 2010 17:55:52 +0100
Subject: cosmetic

---
 src/bpqueue.erl               |   6 +-
 src/gatherer.erl              |  14 ++--
 src/rabbit_msg_store.erl      |  44 ++++++-----
 src/rabbit_queue_index.erl    |  24 +++---
 src/rabbit_variable_queue.erl | 165 ++++++++++++++++++++++--------------------
 5 files changed, 136 insertions(+), 117 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 7acc9697..9cd0f230 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -111,7 +111,8 @@ in_q(Prefix, Queue, BPQ = {0, Q}) ->
         N -> {N, queue:in({Prefix, Queue}, Q)}
     end;
 in_q(Prefix, Queue, BPQ) ->
-    in_q1({fun queue:in/2, fun queue:out_r/1, fun queue:join/2},
+    in_q1({fun queue:in/2, fun queue:out_r/1,
+           fun queue:join/2},
           Prefix, Queue, BPQ).
 
 in_q_r(Prefix, Queue, BPQ = {0, _Q}) ->
@@ -232,7 +233,8 @@ to_list1({Prefix, InnerQ}) ->
 map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
 map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
-    map_fold_filter1({fun queue:out/1, fun queue:in/2, fun in_q/3, fun join/2},
+    map_fold_filter1({fun queue:out/1, fun queue:in/2,
+                      fun in_q/3, fun join/2},
                      N, PFilter, Fun, Init, Q, new()).
 
 map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
diff --git a/src/gatherer.erl b/src/gatherer.erl
index 8c44388c..d5b35e96 100644
--- a/src/gatherer.erl
+++ b/src/gatherer.erl
@@ -98,9 +98,9 @@ handle_call({finished, Token}, _From,
         false -> {reply, ok, State1, hibernate}
     end;
 
-handle_call(fetch, From, State =
-                #gstate { blocking = Blocking, results = Results,
-                          waiting_on = Tokens }) ->
+handle_call(fetch, From,
+            State = #gstate { waiting_on = Tokens, results = Results,
+                              blocking = Blocking }) ->
     case queue:out(Results) of
         {empty, _Results} ->
             case sets:size(Tokens) of
@@ -117,8 +117,8 @@ handle_call(fetch, From, State =
 handle_call(Msg, _From, State) ->
     {stop, {unexpected_call, Msg}, State}.
 
-handle_cast({produce, Result}, State = #gstate { blocking = Blocking,
-                                                 results = Results }) ->
+handle_cast({produce, Result},
+            State = #gstate { blocking = Blocking, results = Results }) ->
     {noreply, case queue:out(Blocking) of
                   {empty, _Blocking} ->
                       State #gstate { results = queue:in(Result, Results) };
@@ -137,6 +137,6 @@ code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
 terminate(_Reason, State = #gstate { blocking = Blocking } ) ->
-    [gen_server2:reply(Blocked, finished)
-     || Blocked <- queue:to_list(Blocking) ],
+    [gen_server2:reply(Blocked, finished) ||
+        Blocked <- queue:to_list(Blocking)],
     State.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 7e09f7fa..6bff9ae6 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -128,7 +128,8 @@
 -spec(remove/2 :: (server(), [guid()]) -> 'ok').
 -spec(release/2 :: (server(), [guid()]) -> 'ok').
 -spec(sync/3 :: (server(), [guid()], fun (() -> any())) -> 'ok').
--spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> 'ok').
+-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) ->
+             'ok').
 -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
 -spec(client_init/2 :: (server(), binary()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
@@ -866,7 +867,8 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                 true ->
                     add_to_pending_gc_completion({read, Guid, From}, State);
                 false ->
-                    {Msg, State1} = read_from_disk(MsgLoc, State, DedupCacheEts),
+                    {Msg, State1} = read_from_disk(MsgLoc, State,
+                                                   DedupCacheEts),
                     gen_server2:reply(From, {ok, Msg}),
                     State1
             end
@@ -1136,7 +1138,8 @@ insert_into_cache(DedupCacheEts, Guid, Msg) ->
 %% index
 %%----------------------------------------------------------------------------
 
-index_lookup(Key, #client_msstate { index_module = Index, index_state = State }) ->
+index_lookup(Key, #client_msstate { index_module = Index,
+                                    index_state = State }) ->
     Index:lookup(Key, State);
 
 index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
@@ -1148,8 +1151,8 @@ index_insert(Obj, #msstate { index_module = Index, index_state = State }) ->
 index_update(Obj, #msstate { index_module = Index, index_state = State }) ->
     Index:update(Obj, State).
 
-index_update_fields(Key, Updates,
-                    #msstate { index_module = Index, index_state = State }) ->
+index_update_fields(Key, Updates, #msstate { index_module = Index,
+                                             index_state = State }) ->
     Index:update_fields(Key, Updates, State).
 
 index_delete(Key, #msstate { index_module = Index, index_state = State }) ->
@@ -1324,9 +1327,10 @@ build_index(true, _Files, State =
                            file_size = FileSize, file = File },
            {_Offset, State1 = #msstate { sum_valid_data = SumValid,
                                          sum_file_size = SumFileSize }}) ->
-              {FileSize, State1 #msstate { sum_valid_data = SumValid + ValidTotalSize,
-                                           sum_file_size = SumFileSize + FileSize,
-                                           current_file = File }}
+              {FileSize, State1 #msstate {
+                           sum_valid_data = SumValid + ValidTotalSize,
+                           sum_file_size = SumFileSize + FileSize,
+                           current_file = File }}
       end, {0, State}, FileSummaryEts);
 build_index(false, Files, State) ->
     {ok, Pid} = gatherer:start_link(),
@@ -1361,8 +1365,9 @@ build_index(Gatherer, Left, [],
 build_index(Gatherer, Left, [File|Files], State) ->
     Child = make_ref(),
     ok = gatherer:wait_on(Gatherer, Child),
-    ok = worker_pool:submit_async({?MODULE, build_index_worker,
-                                   [Gatherer, Child, State, Left, File, Files]}),
+    ok = worker_pool:submit_async(
+           {?MODULE, build_index_worker,
+            [Gatherer, Child, State, Left, File, Files]}),
     build_index(Gatherer, File, Files, State).
 
 build_index_worker(
@@ -1409,12 +1414,13 @@ build_index_worker(
 %% garbage collection / compaction / aggregation -- internal
 %%----------------------------------------------------------------------------
 
-maybe_roll_to_new_file(Offset,
-                       State = #msstate { dir                 = Dir,
-                                          current_file_handle = CurHdl,
-                                          current_file        = CurFile,
-                                          file_summary_ets    = FileSummaryEts,
-                                          cur_file_cache_ets  = CurFileCacheEts })
+maybe_roll_to_new_file(
+  Offset,
+  State = #msstate { dir                 = Dir,
+                     current_file_handle = CurHdl,
+                     current_file        = CurFile,
+                     file_summary_ets    = FileSummaryEts,
+                     cur_file_cache_ets  = CurFileCacheEts })
   when Offset >= ?FILE_SIZE_LIMIT ->
     State1 = internal_sync(State),
     ok = file_handle_cache:close(CurHdl),
@@ -1631,7 +1637,8 @@ combine_files(#file_summary { file = Source,
             ok = file_handle_cache:sync(DestinationHdl),
             ok = file_handle_cache:delete(TmpHdl)
     end,
-    {SourceWorkList, SourceValid} = find_unremoved_messages_in_file(Source, State),
+    {SourceWorkList, SourceValid} =
+        find_unremoved_messages_in_file(Source, State),
     ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
                        SourceHdl, DestinationHdl, Destination, State),
     %% tidy up
@@ -1700,7 +1707,8 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                     {ok, BlockStart1} =
                         file_handle_cache:position(SourceHdl, BlockStart1),
                     {ok, BSize1} =
-                        file_handle_cache:copy(SourceHdl, DestinationHdl, BSize1),
+                        file_handle_cache:copy(SourceHdl, DestinationHdl,
+                                               BSize1),
                     ok = file_handle_cache:sync(DestinationHdl)
             end;
         {FinalOffsetZ, _BlockStart1, _BlockEnd1} ->
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d6ef0cb8..8d22d36a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -195,7 +195,8 @@
                             }).
 
 -spec(init/3 :: (queue_name(), boolean(), fun ((guid()) -> boolean())) ->
-                     {'undefined' | non_neg_integer(), binary(), binary(), [any()], qistate()}).
+             {'undefined' |
+              non_neg_integer(), binary(), binary(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
 -spec(write_published/4 :: (guid(), seq_id(), boolean(), qistate())
@@ -265,15 +266,14 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                           {SegEntries, PubCount, AckCount, Segment1} =
                               load_segment(false, Segment),
                           Segment2 =
-                               #segment { pubs = PubCount1, acks = AckCount1 } =
+                              #segment { pubs = PubCount1, acks = AckCount1 } =
                               array:sparse_foldl(
-                                fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack},
+                                fun (RelSeq, {{Guid, _IsPersistent}, Del,
+                                              no_ack},
                                      Segment3) ->
-                                        Segment4 =
-                                            maybe_add_to_journal(
-                                              ContainsCheckFun(Guid),
-                                              CleanShutdown, Del, RelSeq, Segment3),
-                                        Segment4
+                                        maybe_add_to_journal(
+                                          ContainsCheckFun(Guid),
+                                          CleanShutdown, Del, RelSeq, Segment3)
                                 end, Segment1 #segment { pubs = PubCount,
                                                          acks = AckCount },
                                 SegEntries),
@@ -485,9 +485,11 @@ queue_index_walker(DurableQueues) when is_list(DurableQueues) ->
 
 queue_index_walker({[], Gatherer}) ->
     case gatherer:fetch(Gatherer) of
-        finished                -> rabbit_misc:unlink_and_capture_exit(Gatherer),
-                                   finished;
-        {value, {Guid, Count}} -> {Guid, Count, {[], Gatherer}}
+        finished ->
+            rabbit_misc:unlink_and_capture_exit(Gatherer),
+            finished;
+        {value, {Guid, Count}} ->
+            {Guid, Count, {[], Gatherer}}
     end;
 queue_index_walker({[QueueName | QueueNames], Gatherer}) ->
     Child = make_ref(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 96f5401a..ba493e02 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -208,33 +208,37 @@
                           end_seq_id :: non_neg_integer() }).
 
 -type(state() :: #vqstate {
-               q1                    :: queue(),
-               q2                    :: bpqueue(),
-               delta                 :: delta(),
-               q3                    :: bpqueue(),
-               q4                    :: queue(),
-               duration_target       :: non_neg_integer(),
-               target_ram_msg_count  :: non_neg_integer(),
-               ram_msg_count         :: non_neg_integer(),
-               ram_msg_count_prev    :: non_neg_integer(),
-               ram_index_count       :: non_neg_integer(),
-               index_state           :: any(),
-               next_seq_id           :: seq_id(),
-               out_counter           :: non_neg_integer(),
-               in_counter            :: non_neg_integer(),
-               egress_rate           :: {{integer(), integer(), integer()}, non_neg_integer()},
-               avg_egress_rate       :: float(),
-               ingress_rate          :: {{integer(), integer(), integer()}, non_neg_integer()},
-               avg_ingress_rate      :: float(),
-               rate_timestamp        :: {integer(), integer(), integer()},
-               len                   :: non_neg_integer(),
-               on_sync               :: {[[ack()]], [[guid()]], [fun (() -> any())]},
-               msg_store_clients     :: 'undefined' | {{any(), binary()}, {any(), binary()}},
-               persistent_store      :: pid() | atom(),
-               persistent_count      :: non_neg_integer(),
-               transient_threshold   :: non_neg_integer(),
-               pending_ack           :: dict()
-              }).
+             q1                   :: queue(),
+             q2                   :: bpqueue(),
+             delta                :: delta(),
+             q3                   :: bpqueue(),
+             q4                   :: queue(),
+             duration_target      :: non_neg_integer(),
+             target_ram_msg_count :: non_neg_integer(),
+             ram_msg_count        :: non_neg_integer(),
+             ram_msg_count_prev   :: non_neg_integer(),
+             ram_index_count      :: non_neg_integer(),
+             index_state          :: any(),
+             next_seq_id          :: seq_id(),
+             out_counter          :: non_neg_integer(),
+             in_counter           :: non_neg_integer(),
+             egress_rate          :: {{integer(), integer(), integer()},
+                                      non_neg_integer()},
+             avg_egress_rate      :: float(),
+             ingress_rate         :: {{integer(), integer(), integer()},
+                                      non_neg_integer()},
+             avg_ingress_rate     :: float(),
+             rate_timestamp       :: {integer(), integer(), integer()},
+             len                  :: non_neg_integer(),
+             on_sync              :: {[[ack()]], [[guid()]],
+                                      [fun (() -> any())]},
+             msg_store_clients    :: 'undefined' | {{any(), binary()},
+                                                    {any(), binary()}},
+             persistent_store     :: pid() | atom(),
+             persistent_count     :: non_neg_integer(),
+             transient_threshold  :: non_neg_integer(),
+             pending_ack          :: dict()
+            }).
 
 -include("rabbit_backing_queue_spec.hrl").
 
@@ -286,34 +290,37 @@ init(QueueName, IsDurable, _Recover) ->
                                   end_seq_id = NextSeqId }
             end,
     Now = now(),
-    State =
-        #vqstate { q1 = queue:new(), q2 = bpqueue:new(),
-                   delta = Delta,
-                   q3 = bpqueue:new(), q4 = queue:new(),
-                   duration_target = undefined,
-                   target_ram_msg_count = undefined,
-                   ram_msg_count = 0,
-                   ram_msg_count_prev = 0,
-                   ram_index_count = 0,
-                   index_state = IndexState1,
-                   next_seq_id = NextSeqId,
-                   out_counter = 0,
-                   in_counter = 0,
-                   egress_rate = {Now, 0},
-                   avg_egress_rate = 0,
-                   ingress_rate = {Now, DeltaCount1},
-                   avg_ingress_rate = 0,
-                   rate_timestamp = Now,
-                   len = DeltaCount1,
-                   on_sync = {[], [], []},
-                   msg_store_clients = {
-                     {rabbit_msg_store:client_init(PersistentStore, PRef), PRef},
-                     {rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), TRef}},
-                   persistent_store = PersistentStore,
-                   persistent_count = DeltaCount1,
-                   transient_threshold = NextSeqId,
-                   pending_ack = dict:new()
-                 },
+    PersistentClient = rabbit_msg_store:client_init(PersistentStore, PRef),
+    TransientClient  = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef),
+    State = #vqstate {
+      q1                   = queue:new(),
+      q2                   = bpqueue:new(),
+      delta                = Delta,
+      q3                   = bpqueue:new(),
+      q4                   = queue:new(),
+      duration_target      = undefined,
+      target_ram_msg_count = undefined,
+      ram_msg_count        = 0,
+      ram_msg_count_prev   = 0,
+      ram_index_count      = 0,
+      index_state          = IndexState1,
+      next_seq_id          = NextSeqId,
+      out_counter          = 0,
+      in_counter           = 0,
+      egress_rate          = {Now, 0},
+      avg_egress_rate      = 0,
+      ingress_rate         = {Now, DeltaCount1},
+      avg_ingress_rate     = 0,
+      rate_timestamp       = Now,
+      len                  = DeltaCount1,
+      on_sync              = {[], [], []},
+      msg_store_clients    = {{PersistentClient, PRef},
+                              {TransientClient, TRef}},
+      persistent_store     = PersistentStore,
+      persistent_count     = DeltaCount1,
+      transient_threshold  = NextSeqId,
+      pending_ack          = dict:new()
+     },
     maybe_deltas_to_betas(State).
 
 terminate(State) ->
@@ -594,7 +601,8 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
                                          msg_on_disk = false,
                                          is_persistent = false,
                                          msg = Msg }} ->
-                          {_SeqId, StateN2} = publish(Msg, true, false, StateN1),
+                          {_SeqId, StateN2} =
+                              publish(Msg, true, false, StateN1),
                           {SeqIdsAcc, Dict, StateN2};
                       {ok, {IsPersistent, Guid}} ->
                           {{ok, Msg = #basic_message{}}, MSCStateN1} =
@@ -889,28 +897,25 @@ should_force_index_to_disk(State =
 
 msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
     Self = self(),
-    fun() ->
-            spawn(
-              fun() ->
-                      ok = rabbit_misc:with_exit_handler(
-                             fun() -> rabbit_msg_store:remove(
-                                        ?PERSISTENT_MSG_STORE,
-                                        PersistentGuids)
-                             end,
-                             fun() -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                                        Self, fun (StateN) ->
-                                                      tx_commit_post_msg_store(
-                                                        IsTransientPubs, Pubs,
-                                                        AckTags, Fun, StateN)
-                                              end)
-                             end)
-              end)
+    Fun = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
+                     Self, fun (StateN) -> tx_commit_post_msg_store(
+                                             IsTransientPubs, Pubs,
+                                             AckTags, Fun, StateN)
+                           end)
+          end,
+    fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
+                                     fun () -> rabbit_msg_store:remove(
+                                                 ?PERSISTENT_MSG_STORE,
+                                                 PersistentGuids)
+                                     end,
+                                     Fun)
+                    end)
     end.
 
 tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun, State =
-                             #vqstate { on_sync = OnSync = {SAcks, SPubs, SFuns},
-                                        persistent_store = PersistentStore,
-                                        pending_ack = PA }) ->
+                         #vqstate { on_sync = OnSync = {SAcks, SPubs, SFuns},
+                                    persistent_store = PersistentStore,
+                                    pending_ack = PA }) ->
     %% If we are a non-durable queue, or (no persisent pubs, and no
     %% persistent acks) then we can skip the queue_index loop.
     case PersistentStore == ?TRANSIENT_MSG_STORE orelse
@@ -1038,7 +1043,8 @@ remove_queue_entries1(
     {PersistentStore, CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
-                         q1 = Q1, q2 = Q2, delta = #delta { count = DeltaCount },
+                         q1 = Q1, q2 = Q2,
+                         delta = #delta { count = DeltaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
                          msg_store_clients = MSCState,
@@ -1419,9 +1425,10 @@ maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
                                 q4 = Q4a }
       end, Q4, State).
 
-maybe_push_alphas_to_betas(_Generator, _Consumer, _Q, State =
-                           #vqstate { ram_msg_count = RamMsgCount,
-                                      target_ram_msg_count = TargetRamMsgCount })
+maybe_push_alphas_to_betas(
+  _Generator, _Consumer, _Q,
+  State = #vqstate { ram_msg_count = RamMsgCount,
+                     target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 maybe_push_alphas_to_betas(
-- 
cgit v1.2.1


From 1471064097654dea30a7a868cb4cd8b9bd2fba9a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 4 May 2010 18:16:49 +0100
Subject: cosmetic

---
 src/bpqueue.erl               | 24 ++++++++++++----------
 src/rabbit_msg_file.erl       |  6 +++---
 src/rabbit_msg_store.erl      | 45 +++++++++++++++++++++---------------------
 src/rabbit_msg_store_gc.erl   | 10 ++++++----
 src/rabbit_queue_index.erl    | 46 +++++++++++++++++++++----------------------
 src/rabbit_variable_queue.erl | 46 +++++++++++++++++++++----------------------
 6 files changed, 92 insertions(+), 85 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 9cd0f230..3010cb11 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -48,8 +48,8 @@
 -type(bpqueue() :: {non_neg_integer(), queue()}).
 -type(prefix() :: any()).
 -type(value() :: any()).
--type(result() :: {'empty', bpqueue()} |
-                  {{'value', prefix(), value()}, bpqueue()}).
+-type(result() :: ({'empty', bpqueue()} |
+                   {{'value', prefix(), value()}, bpqueue()})).
 
 -spec(new/0 :: () -> bpqueue()).
 -spec(is_empty/1 :: (bpqueue()) -> boolean()).
@@ -63,14 +63,18 @@
 -spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
 -spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()).
 -spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
--spec(map_fold_filter_l/4 ::
-        (fun ((prefix()) -> boolean()),
-             fun ((value(), B) -> ({prefix(), value(), B} | 'stop')), B,
-                 bpqueue()) -> {bpqueue(), B}).
--spec(map_fold_filter_r/4 ::
-        (fun ((prefix()) -> boolean()),
-             fun ((value(), B) -> ({prefix(), value(), B} | 'stop')), B,
-                 bpqueue()) -> {bpqueue(), B}).
+-spec(map_fold_filter_l/4 :: ((fun ((prefix()) -> boolean())),
+                              (fun ((value(), B) ->
+                                           ({prefix(), value(), B} | 'stop'))),
+                              B,
+                              bpqueue()) ->
+             {bpqueue(), B}).
+-spec(map_fold_filter_r/4 :: ((fun ((prefix()) -> boolean())),
+                              (fun ((value(), B) ->
+                                           ({prefix(), value(), B} | 'stop'))),
+                              B,
+                              bpqueue()) ->
+             {bpqueue(), B}).
 
 -endif.
 
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 46288ccd..301f4a9f 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -86,9 +86,9 @@ read(FileHdl, TotalSize) ->
     BodyBinSize = Size - ?GUID_SIZE_BYTES,
     case file_handle_cache:read(FileHdl, TotalSize) of
         {ok, <<Size:?INTEGER_SIZE_BITS,
-               Guid:?GUID_SIZE_BYTES/binary,
-               MsgBodyBin:BodyBinSize/binary,
-               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
+              Guid:?GUID_SIZE_BYTES/binary,
+              MsgBodyBin:BodyBinSize/binary,
+              ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
             {ok, {Guid, binary_to_term(MsgBodyBin)}};
         KO -> KO
     end.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6bff9ae6..c4a9885f 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -69,8 +69,7 @@
           index_module,           %% the module for index ops
           index_state,            %% where are messages?
           current_file,           %% current file name as number
-          current_file_handle,    %% current file handle
-                                  %% since the last fsync?
+          current_file_handle,    %% current file handle since the last fsync?
           file_handle_cache,      %% file handle cache
           on_sync,                %% pending sync requests
           sync_timer_ref,         %% TRef for our interval timer
@@ -85,7 +84,7 @@
           cur_file_cache_ets,     %% tid of current file cache table
           client_refs,            %% set of references of all registered clients
           recovered_state         %% boolean: did we recover state?
-        }).
+         }).
 
 -record(client_msstate,
         { file_handle_cache,
@@ -96,7 +95,7 @@
           file_summary_ets,
           dedup_cache_ets,
           cur_file_cache_ets
-        }).
+         }).
 
 -record(file_summary,
         {file, valid_total_size, contiguous_top, left, right, file_size,
@@ -119,11 +118,11 @@
 
 -spec(start_link/4 ::
       (atom(), file_path(), [binary()] | 'undefined', startup_fun_state()) ->
-                           {'ok', pid()} | 'ignore' | {'error', any()}).
+             {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(write/4 :: (server(), guid(), msg(), client_msstate()) ->
-                      {'ok', client_msstate()}).
+             {'ok', client_msstate()}).
 -spec(read/3 :: (server(), guid(), client_msstate()) ->
-                     {{'ok', msg()} | 'not_found', client_msstate()}).
+             {{'ok', msg()} | 'not_found', client_msstate()}).
 -spec(contains/2 :: (server(), guid()) -> boolean()).
 -spec(remove/2 :: (server(), [guid()]) -> 'ok').
 -spec(release/2 :: (server(), [guid()]) -> 'ok').
@@ -305,14 +304,14 @@ start_link(Server, Dir, ClientRefs, StartupFunState) ->
                            [Server, Dir, ClientRefs, StartupFunState],
                            [{timeout, infinity}]).
 
-write(Server, Guid, Msg, CState =
-          #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+write(Server, Guid, Msg,
+      CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
     ok = add_to_cache(CurFileCacheEts, Guid, Msg),
     {gen_server2:cast(Server, {write, Guid, Msg}), CState}.
 
-read(Server, Guid, CState =
-         #client_msstate { dedup_cache_ets = DedupCacheEts,
-                           cur_file_cache_ets = CurFileCacheEts }) ->
+read(Server, Guid,
+     CState = #client_msstate { dedup_cache_ets = DedupCacheEts,
+                                cur_file_cache_ets = CurFileCacheEts }) ->
     %% 1. Check the dedup cache
     case fetch_and_increment_cache(DedupCacheEts, Guid) of
         not_found ->
@@ -393,9 +392,10 @@ add_to_cache(CurFileCacheEts, Guid, Msg) ->
             end
     end.
 
-client_read1(Server, #msg_location { guid = Guid, file = File } =
-                 MsgLocation, Defer, CState =
-                 #client_msstate { file_summary_ets = FileSummaryEts }) ->
+client_read1(Server,
+             #msg_location { guid = Guid, file = File } = MsgLocation,
+             Defer,
+             CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
     case ets:lookup(FileSummaryEts, File) of
         [] -> %% File has been GC'd and no longer exists. Go around again.
             read(Server, Guid, CState);
@@ -404,7 +404,8 @@ client_read1(Server, #msg_location { guid = Guid, file = File } =
     end.
 
 client_read2(_Server, false, undefined,
-             #msg_location { guid = Guid, ref_count = RefCount }, Defer,
+             #msg_location { guid = Guid, ref_count = RefCount },
+             Defer,
              CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
                                         dedup_cache_ets = DedupCacheEts }) ->
     case ets:lookup(CurFileCacheEts, Guid) of
@@ -421,10 +422,10 @@ client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
     Defer();
 client_read2(Server, false, _Right,
              #msg_location { guid = Guid, ref_count = RefCount, file = File },
-             Defer, CState =
-                 #client_msstate { file_handles_ets = FileHandlesEts,
-                                   file_summary_ets = FileSummaryEts,
-                                   dedup_cache_ets  = DedupCacheEts }) ->
+             Defer,
+             CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets  = DedupCacheEts }) ->
     %% It's entirely possible that everything we're doing from here on
     %% is for the wrong file, or a non-existent file, as a GC may have
     %% finished.
@@ -486,7 +487,7 @@ client_read2(Server, false, _Right,
     end.
 
 close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
-                        CState) ->
+                    CState) ->
     Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}),
     lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
                         true = ets:delete(FileHandlesEts, Key),
@@ -559,7 +560,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                        cur_file_cache_ets     = CurFileCacheEts,
                        client_refs            = ClientRefs1,
                        recovered_state        = Recovered
-                     },
+                      },
 
     ok = count_msg_refs(Recovered, MsgRefDeltaGen, MsgRefDeltaGenInit, State),
     FileNames =
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index ca5e2c6f..8a275c39 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -80,10 +80,12 @@ init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
 handle_call(stop, _From, State) ->
     {stop, normal, ok, State}.
 
-handle_cast({gc, Source, Destination}, State =
-                #gcstate { parent = Parent, dir = Dir, index_module = Index,
-                           index_state = IndexState,
-                           file_summary_ets = FileSummaryEts }) ->
+handle_cast({gc, Source, Destination},
+            State = #gcstate { dir              = Dir,
+                               index_state      = IndexState,
+                               index_module     = Index,
+                               parent           = Parent,
+                               file_summary_ets = FileSummaryEts }) ->
     Reclaimed = rabbit_msg_store:gc(Source, Destination,
                                     {FileSummaryEts, Dir, Index, IndexState}),
     ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source, Destination),
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 8d22d36a..369a52d9 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -160,7 +160,7 @@
           segments,
           journal_handle,
           dirty_count
-        }).
+         }).
 
 -record(segment,
         { pubs,
@@ -169,7 +169,7 @@
           journal_entries,
           path,
           num
-        }).
+         }).
 
 -include("rabbit_msg_store.hrl").
 
@@ -185,14 +185,14 @@
                                journal_entries :: array(),
                                path            :: file_path(),
                                num             :: non_neg_integer()
-                             })).
+                              })).
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
 -type(qistate() :: #qistate { dir             :: file_path(),
                               segments        :: 'undefined' | seg_dict(),
                               journal_handle  :: hdl(),
                               dirty_count     :: integer()
-                            }).
+                             }).
 
 -spec(init/3 :: (queue_name(), boolean(), fun ((guid()) -> boolean())) ->
              {'undefined' |
@@ -212,7 +212,7 @@
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
 -spec(prepare_msg_store_seed_funs/1 ::
-        ([queue_name()]) ->
+      ([queue_name()]) ->
              {{[binary()] | 'undefined', startup_fun_state()},
               {[binary()] | 'undefined', startup_fun_state()}}).
 
@@ -553,7 +553,7 @@ blank_state(QueueName) ->
                segments       = segments_new(),
                journal_handle = undefined,
                dirty_count    = 0
-             }.
+              }.
 
 array_new() ->
     array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
@@ -617,7 +617,7 @@ segment_new(Seg, Dir) ->
                journal_entries = array_new(),
                path = seg_num_to_path(Dir, Seg),
                num = Seg
-             }.
+              }.
 
 segment_find_or_new(Seg, Dir, Segments) ->
     case segment_find(Seg, Segments) of
@@ -683,15 +683,15 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
              {Guid, IsPersistent} ->
                  file_handle_cache:append(
                    Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                           (bool_to_int(IsPersistent)):1,
-                           RelSeq:?REL_SEQ_BITS>>, Guid])
+                          (bool_to_int(IsPersistent)):1,
+                          RelSeq:?REL_SEQ_BITS>>, Guid])
          end,
     ok = case {Del, Ack} of
              {no_del, no_ack} ->
                  ok;
              _ ->
                  Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                            RelSeq:?REL_SEQ_BITS>>,
+                           RelSeq:?REL_SEQ_BITS>>,
                  file_handle_cache:append(
                    Hdl, case {Del, Ack} of
                             {del, ack} -> [Binary, Binary];
@@ -710,14 +710,14 @@ terminate(StoreShutdown, Terms, State =
              _         -> file_handle_cache:close(JournalHdl)
          end,
     SegTerms = segment_fold(
-           fun (Seg, #segment { handle = Hdl, pubs = PubCount,
-                                acks = AckCount }, SegTermsAcc) ->
-                   ok = case Hdl of
-                            undefined -> ok;
-                            _         -> file_handle_cache:close(Hdl)
-                        end,
-                   [{Seg, {PubCount, AckCount}} | SegTermsAcc]
-           end, [], Segments),
+                 fun (Seg, #segment { handle = Hdl, pubs = PubCount,
+                                      acks = AckCount }, SegTermsAcc) ->
+                         ok = case Hdl of
+                                  undefined -> ok;
+                                  _         -> file_handle_cache:close(Hdl)
+                              end,
+                         [{Seg, {PubCount, AckCount}} | SegTermsAcc]
+                 end, [], Segments),
     case StoreShutdown of
         true  -> store_clean_shutdown([{segments, SegTerms} | Terms], Dir);
         false -> ok
@@ -756,13 +756,13 @@ load_segment(KeepAcks,
 load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
     case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-                RelSeq:?REL_SEQ_BITS>>} ->
+              RelSeq:?REL_SEQ_BITS>>} ->
             {AckCount1, SegEntries1} =
                 deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries),
             load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount,
                                  AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-                IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
+              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
@@ -834,9 +834,9 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
                             <<Guid:?GUID_BYTES/binary>> =
                                 <<GuidNum:?GUID_BITS>>,
                             Publish = {Guid, case Prefix of
-                                                  ?PUB_PERSIST_JPREFIX -> true;
-                                                  ?PUB_TRANS_JPREFIX   -> false
-                                              end},
+                                                 ?PUB_PERSIST_JPREFIX -> true;
+                                                 ?PUB_TRANS_JPREFIX   -> false
+                                             end},
                             load_journal_entries(
                               add_to_journal(SeqId, Publish, State));
                         _ErrOrEoF -> %% err, we've lost at least a publish
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ba493e02..6895700c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -162,7 +162,7 @@
           persistent_count,
           transient_threshold,
           pending_ack
-        }).
+         }).
 
 -record(msg_status,
         { seq_id,
@@ -172,13 +172,13 @@
           is_delivered,
           msg_on_disk,
           index_on_disk
-        }).
+         }).
 
 -record(delta,
         { start_seq_id,
           count,
           end_seq_id %% note the end_seq_id is always >, not >=
-        }).
+         }).
 
 -record(tx, { pending_messages, pending_acks }).
 
@@ -332,8 +332,8 @@ terminate(State) ->
     rabbit_msg_store:client_terminate(MSCStateT),
     Terms = [{persistent_ref, PRef}, {transient_ref, TRef},
              {persistent_count, PCount}],
-    State1 #vqstate { index_state =
-                          rabbit_queue_index:terminate(Terms, IndexState),
+    State1 #vqstate { index_state = rabbit_queue_index:terminate(
+                                      Terms, IndexState),
                       msg_store_clients = undefined }.
 
 %% the only difference between purge and delete is that delete also
@@ -359,7 +359,7 @@ delete_and_terminate(State) ->
                     delete1(PersistentStore, TransientThreshold, NextSeqId, 0,
                             DeltaSeqId, IndexState3),
                 IndexState4
-    end,
+        end,
     IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
     rabbit_msg_store:delete_client(PersistentStore, PRef),
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
@@ -432,9 +432,9 @@ fetch(AckRequired, State =
                 {loaded, State1}          -> fetch(AckRequired, State1)
             end;
         {{value, MsgStatus = #msg_status {
-            msg = Msg, guid = Guid, seq_id = SeqId,
-            is_persistent = IsPersistent, is_delivered = IsDelivered,
-            msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+                   msg = Msg, guid = Guid, seq_id = SeqId,
+                   is_persistent = IsPersistent, is_delivered = IsDelivered,
+                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
 
             AckTag = case AckRequired of
@@ -592,8 +592,8 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
                          persistent_count = PCount }} =
         lists:foldl(
           fun (SeqId, {SeqIdsAcc, Dict, StateN =
-                           #vqstate { msg_store_clients = MSCStateN,
-                                      pending_ack = PAN}}) ->
+                       #vqstate { msg_store_clients = MSCStateN,
+                                  pending_ack = PAN }}) ->
                   PAN1 = dict:erase(SeqId, PAN),
                   StateN1 = StateN #vqstate { pending_ack = PAN1 },
                   case dict:find(SeqId, PAN) of
@@ -618,9 +618,9 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
                                   false ->
                                       {SeqIdsAcc, ?TRANSIENT_MSG_STORE}
                               end,
-                           {SeqIdsAcc1,
-                            rabbit_misc:dict_cons(MsgStore, Guid, Dict),
-                            StateN3}
+                          {SeqIdsAcc1,
+                           rabbit_misc:dict_cons(MsgStore, Guid, Dict),
+                           StateN3}
                   end
           end, {[], dict:new(), State}, AckTags),
     IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
@@ -644,7 +644,7 @@ set_ram_duration_target(
   DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
                                      avg_ingress_rate = AvgIngressRate,
                                      target_ram_msg_count = TargetRamMsgCount
-                                   }) ->
+                                    }) ->
     Rate = AvgEgressRate + AvgIngressRate,
     TargetRamMsgCount1 =
         case DurationTarget of
@@ -819,7 +819,7 @@ betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
                                                   is_delivered  = IsDelivered,
                                                   msg_on_disk   = true,
                                                   index_on_disk = true
-                                                } | FilteredAcc],
+                                                 } | FilteredAcc],
                                    IndexStateAcc};
                               false ->
                                   {FilteredAcc, IndexStateAcc}
@@ -898,10 +898,10 @@ should_force_index_to_disk(State =
 msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
     Self = self(),
     Fun = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                     Self, fun (StateN) -> tx_commit_post_msg_store(
-                                             IsTransientPubs, Pubs,
-                                             AckTags, Fun, StateN)
-                           end)
+                      Self, fun (StateN) -> tx_commit_post_msg_store(
+                                              IsTransientPubs, Pubs,
+                                              AckTags, Fun, StateN)
+                            end)
           end,
     fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
                                      fun () -> rabbit_msg_store:remove(
@@ -1195,9 +1195,9 @@ publish(index, MsgStatus, #vqstate {
     store_beta_entry(MsgStatus2, State1);
 
 publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
-            #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                       delta = Delta, msg_store_clients = MSCState,
-                       persistent_store = PersistentStore }) ->
+        #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
+                   delta = Delta, msg_store_clients = MSCState,
+                   persistent_store = PersistentStore }) ->
     {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
         maybe_write_msg_to_disk(PersistentStore, true, MsgStatus, MSCState),
     {#msg_status { index_on_disk = true }, IndexState1} =
-- 
cgit v1.2.1


From b92e075a55e8c9b5e314f643d78665b531bf72b3 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 4 May 2010 18:55:21 +0100
Subject: oops

---
 src/rabbit_variable_queue.erl | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6895700c..c5f18a94 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -897,18 +897,17 @@ should_force_index_to_disk(State =
 
 msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
     Self = self(),
-    Fun = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-                      Self, fun (StateN) -> tx_commit_post_msg_store(
-                                              IsTransientPubs, Pubs,
-                                              AckTags, Fun, StateN)
-                            end)
-          end,
+    F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
+                    Self, fun (StateN) -> tx_commit_post_msg_store(
+                                            IsTransientPubs, Pubs,
+                                            AckTags, Fun, StateN)
+                          end)
+        end,
     fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
                                      fun () -> rabbit_msg_store:remove(
                                                  ?PERSISTENT_MSG_STORE,
                                                  PersistentGuids)
-                                     end,
-                                     Fun)
+                                     end, F)
                     end)
     end.
 
-- 
cgit v1.2.1


From 87590a88ea9532c455c29c4c20ff79d5a35b1383 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 4 May 2010 22:54:15 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c4a9885f..2201bebb 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -576,17 +576,16 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
         build_index(Recovered1, Files, State),
 
     %% read is only needed so that we can seek
-    {ok, FileHdl} = open_file(
-                      Dir, filenum_to_name(CurFile),
-                      [read | ?WRITE_MODE]),
-    {ok, Offset} = file_handle_cache:position(FileHdl, Offset),
-    ok = file_handle_cache:truncate(FileHdl),
+    {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
+                             [read | ?WRITE_MODE]),
+    {ok, Offset} = file_handle_cache:position(CurHdl, Offset),
+    ok = file_handle_cache:truncate(CurHdl),
 
     {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
                                                  FileSummaryEts),
 
-    {ok, State1 #msstate { current_file_handle = FileHdl,
-                           gc_pid = GCPid }, hibernate,
+    {ok, State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid },
+     hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 handle_call({read, Guid}, From, State) ->
@@ -739,7 +738,7 @@ handle_info({'EXIT', _Pid, Reason}, State) ->
 
 terminate(_Reason, State = #msstate { index_state         = IndexState,
                                       index_module        = IndexModule,
-                                      current_file_handle = FileHdl,
+                                      current_file_handle = CurHdl,
                                       gc_pid              = GCPid,
                                       file_handles_ets    = FileHandlesEts,
                                       file_summary_ets    = FileSummaryEts,
@@ -750,10 +749,10 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
     %% stop the gc first, otherwise it could be working and we pull
     %% out the ets tables from under it.
     ok = rabbit_msg_store_gc:stop(GCPid),
-    State1 = case FileHdl of
+    State1 = case CurHdl of
                  undefined -> State;
                  _ -> State2 = internal_sync(State),
-                      file_handle_cache:close(FileHdl),
+                      file_handle_cache:close(CurHdl),
                       State2
              end,
     State3 = close_all_handles(State1),
@@ -1010,7 +1009,7 @@ close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
     State #msstate { file_handle_cache = dict:new() }.
 
 get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC,
-                                                   dir = Dir }) ->
+                                                    dir = Dir }) ->
     {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
     {Hdl, CState #client_msstate { file_handle_cache = FHC2 }};
 
@@ -1021,13 +1020,10 @@ get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC,
 
 get_read_handle(FileNum, FHC, Dir) ->
     case dict:find(FileNum, FHC) of
-        {ok, Hdl} ->
-            {Hdl, FHC};
-        error ->
-            {ok, Hdl} = open_file(
-                          Dir, filenum_to_name(FileNum),
-                          ?READ_MODE),
-            {Hdl, dict:store(FileNum, Hdl, FHC) }
+        {ok, Hdl} -> {Hdl, FHC};
+        error     -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum),
+                                           ?READ_MODE),
+                     {Hdl, dict:store(FileNum, Hdl, FHC)}
     end.
 
 detect_clean_shutdown(Dir) ->
@@ -1066,10 +1062,10 @@ preallocate(Hdl, FileSizeLimit, FinalPos) ->
     {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
     ok.
 
-truncate_and_extend_file(FileHdl, Lowpoint, Highpoint) ->
-    {ok, Lowpoint} = file_handle_cache:position(FileHdl, Lowpoint),
-    ok = file_handle_cache:truncate(FileHdl),
-    ok = preallocate(FileHdl, Highpoint, Lowpoint).
+truncate_and_extend_file(Hdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint),
+    ok = file_handle_cache:truncate(Hdl),
+    ok = preallocate(Hdl, Highpoint, Lowpoint).
 
 form_filename(Dir, Name) -> filename:join(Dir, Name).
 
-- 
cgit v1.2.1


From 8c6f12ecff2accbfd57064726121ca96f8cac924 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 4 May 2010 23:05:19 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 369a52d9..5d2908e1 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -432,9 +432,8 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
     {LowSeqIdSeg, NextSeqId, State}.
 
 prepare_msg_store_seed_funs(DurableQueues) ->
-    DurableDict =
-        dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
-                           Queue <- DurableQueues ]),
+    DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
+                                     Queue <- DurableQueues ]),
     QueuesDir = queues_dir(),
     Directories = case file:list_dir(QueuesDir) of
                       {ok, Entries} ->
-- 
cgit v1.2.1


From c405c34e20290deb154b581260fe169d875dc509 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 4 May 2010 23:48:24 +0100
Subject: disentangle qi from msg_store

---
 src/rabbit_queue_index.erl    | 17 +++++++----------
 src/rabbit_variable_queue.erl |  7 +++----
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5d2908e1..aee295ae 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -34,8 +34,7 @@
 -export([init/3, terminate/2, terminate_and_erase/1, write_published/4,
          write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1,
-         prepare_msg_store_seed_funs/1]).
+         find_lowest_seq_id_seg_and_next_seq_id/1, recover/1]).
 
 -export([queue_index_walker_reader/3]). %% for internal use only
 
@@ -171,7 +170,7 @@
           num
          }).
 
--include("rabbit_msg_store.hrl").
+-include("rabbit.hrl").
 
 %%----------------------------------------------------------------------------
 
@@ -193,6 +192,8 @@
                               journal_handle  :: hdl(),
                               dirty_count     :: integer()
                              }).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
 -spec(init/3 :: (queue_name(), boolean(), fun ((guid()) -> boolean())) ->
              {'undefined' |
@@ -211,10 +212,7 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(prepare_msg_store_seed_funs/1 ::
-      ([queue_name()]) ->
-             {{[binary()] | 'undefined', startup_fun_state()},
-              {[binary()] | 'undefined', startup_fun_state()}}).
+-spec(recover/1 :: ([queue_name()]) -> {[binary()], startup_fun_state()}).
 
 -endif.
 
@@ -431,7 +429,7 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
         end,
     {LowSeqIdSeg, NextSeqId, State}.
 
-prepare_msg_store_seed_funs(DurableQueues) ->
+recover(DurableQueues) ->
     DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
                                      Queue <- DurableQueues ]),
     QueuesDir = queues_dir(),
@@ -471,8 +469,7 @@ prepare_msg_store_seed_funs(DurableQueues) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = rabbit_misc:recursive_delete([Dir])
                   end, TransientDirs),
-    {{undefined, {fun (ok) -> finished end, ok}},
-     {DurableRefs, {fun queue_index_walker/1, DurableQueueNames}}}.
+    {DurableRefs, {fun queue_index_walker/1, DurableQueueNames}}.
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c5f18a94..18b3847d 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -257,14 +257,13 @@
 
 start(DurableQueues) ->
     ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
-    {{TransRefs, TransStartFunState}, {PersistRefs, PersistStartFunState}}
-        = rabbit_queue_index:prepare_msg_store_seed_funs(DurableQueues),
+    {Refs, StartFunState} = rabbit_queue_index:recover(DurableQueues),
     ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
                                 [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
-                                 TransRefs, TransStartFunState]),
+                                 undefined,  {fun (ok) -> finished end, ok}]),
     ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
                                 [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
-                                 PersistRefs, PersistStartFunState]).
+                                 Refs, StartFunState]).
 
 init(QueueName, IsDurable, _Recover) ->
     PersistentStore = case IsDurable of
-- 
cgit v1.2.1


From 288c75b662b0f1174544e9c2c7f0876587042652 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 5 May 2010 01:08:41 +0100
Subject: made shutdown term structure opaque to qi only the vq needs to know
 about it

---
 src/rabbit_queue_index.erl    | 47 ++++++++++++++-----------------------------
 src/rabbit_tests.erl          | 16 +++++++--------
 src/rabbit_variable_queue.erl | 21 +++++++++++++++----
 3 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index aee295ae..95df8938 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -196,8 +196,7 @@
         {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
 -spec(init/3 :: (queue_name(), boolean(), fun ((guid()) -> boolean())) ->
-             {'undefined' |
-              non_neg_integer(), binary(), binary(), [any()], qistate()}).
+             {'undefined' | non_neg_integer(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
 -spec(write_published/4 :: (guid(), seq_id(), boolean(), qistate())
@@ -212,7 +211,7 @@
 -spec(segment_size/0 :: () -> non_neg_integer()).
 -spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
--spec(recover/1 :: ([queue_name()]) -> {[binary()], startup_fun_state()}).
+-spec(recover/1 :: ([queue_name()]) -> {[[any()]], startup_fun_state()}).
 
 -endif.
 
@@ -223,20 +222,10 @@
 
 init(Name, MsgStoreRecovered, ContainsCheckFun) ->
     State = blank_state(Name),
-    {PRef, TRef, Terms} =
-        case read_shutdown_terms(State #qistate.dir) of
-            {error, _} ->
-                {rabbit_guid:guid(), rabbit_guid:guid(), []};
-            {ok, Terms1} ->
-                case [persistent_ref, transient_ref] --
-                    proplists:get_keys(Terms1) of
-                    [] ->
-                        {proplists:get_value(persistent_ref, Terms1),
-                         proplists:get_value(transient_ref, Terms1), Terms1};
-                    _ ->
-                        {rabbit_guid:guid(), rabbit_guid:guid(), []}
-                end
-        end,
+    Terms = case read_shutdown_terms(State #qistate.dir) of
+                {error, _}   -> [];
+                {ok, Terms1} -> Terms1
+            end,
     %% 1. Load the journal completely. This will also load segments
     %%    which have entries in the journal and remove duplicates.
     %%    The counts will correctly reflect the combination of the
@@ -305,7 +294,7 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
     %% artificially set the dirty_count non zero and call flush again
     State3 = flush_journal(State2 #qistate { segments = Segments1,
                                              dirty_count = 1 }),
-    {Count, PRef, TRef, Terms, State3}.
+    {Count, Terms, State3}.
 
 maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
     Segment;
@@ -442,34 +431,28 @@ recover(DurableQueues) ->
                           []
                   end,
     DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
-    {DurableQueueNames, TransientDirs, DurableRefs} =
+    {DurableQueueNames, TransientDirs, DurableTerms} =
         lists:foldl(
-          fun (QueueDir, {DurableAcc, TransientAcc, RefsAcc}) ->
+          fun (QueueDir, {DurableAcc, TransientAcc, TermsAcc}) ->
                   case sets:is_element(QueueDir, DurableDirectories) of
                       true ->
-                          RefsAcc1 =
+                          TermsAcc1 =
                               case read_shutdown_terms(
                                      filename:join(QueuesDir, QueueDir)) of
-                                  {error, _} ->
-                                      RefsAcc;
-                                  {ok, Terms} ->
-                                      case proplists:get_value(
-                                             persistent_ref, Terms) of
-                                          undefined -> RefsAcc;
-                                          Ref       -> [Ref | RefsAcc]
-                                      end
+                                  {error, _}  -> TermsAcc;
+                                  {ok, Terms} -> [Terms | TermsAcc]
                               end,
                           {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
-                           TransientAcc, RefsAcc1};
+                           TransientAcc, TermsAcc1};
                       false ->
-                          {DurableAcc, [QueueDir | TransientAcc], RefsAcc}
+                          {DurableAcc, [QueueDir | TransientAcc], TermsAcc}
                   end
           end, {[], [], []}, Directories),
     lists:foreach(fun (DirName) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = rabbit_misc:recursive_delete([Dir])
                   end, TransientDirs),
-    {DurableRefs, {fun queue_index_walker/1, DurableQueueNames}}.
+    {DurableTerms, {fun queue_index_walker/1, DurableQueueNames}}.
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3cac429e..c8de7984 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1214,7 +1214,7 @@ test_queue() ->
 
 empty_test_queue() ->
     ok = rabbit_variable_queue:start([]),
-    {0, _PRef, _TRef, _Terms, Qi1} = test_queue_init(),
+    {0, _Terms, Qi1} = test_queue_init(),
     _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
     ok.
 
@@ -1270,7 +1270,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(0,9999),
     SeqIdsB = lists:seq(10000,19999),
-    {0, _PRef, _TRef, _Terms, Qi0} = test_queue_init(),
+    {0, _Terms, Qi0} = test_queue_init(),
     {0, 0, Qi1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
@@ -1284,7 +1284,7 @@ test_queue_index() ->
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
-    {0, _PRef1, _TRef1, _Terms1, Qi6} = test_queue_init(),
+    {0, _Terms1, Qi6} = test_queue_init(),
     {0, 0, Qi7} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
@@ -1298,7 +1298,7 @@ test_queue_index() ->
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 10000
     LenB = length(SeqIdsB),
-    {LenB, _PRef2, _TRef2, _Terms2, Qi12} = test_queue_init(),
+    {LenB, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
@@ -1314,7 +1314,7 @@ test_queue_index() ->
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, _PRef3, _TRef3, _Terms3, Qi20} = test_queue_init(),
+    {0, _Terms3, Qi20} = test_queue_init(),
     _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1323,7 +1323,7 @@ test_queue_index() ->
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    {0, _PRef4, _TRef4, _Terms4, Qi22} = test_queue_init(),
+    {0, _Terms4, Qi22} = test_queue_init(),
     {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = queue_index_deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
@@ -1334,7 +1334,7 @@ test_queue_index() ->
     ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    {0, _PRef5, _TRef5, _Terms5, Qi29} = test_queue_init(),
+    {0, _Terms5, Qi29} = test_queue_init(),
     {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = queue_index_deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
@@ -1346,7 +1346,7 @@ test_queue_index() ->
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
-    {0, _PRef6, _TRef6, _Terms6, Qi36} = test_queue_init(),
+    {0, _Terms6, Qi36} = test_queue_init(),
     {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 18b3847d..39ef3ec4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -257,7 +257,12 @@
 
 start(DurableQueues) ->
     ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
-    {Refs, StartFunState} = rabbit_queue_index:recover(DurableQueues),
+    {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues),
+    Refs = [Ref || Terms <- AllTerms,
+                   begin
+                       Ref = proplists:get_value(persistent_ref, Terms),
+                       Ref =/= undefined
+                   end],
     ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
                                 [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
                                  undefined,  {fun (ok) -> finished end, ok}]),
@@ -276,12 +281,19 @@ init(QueueName, IsDurable, _Recover) ->
         fun (Guid) ->
                 rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
         end,
-    {DeltaCount, PRef, TRef, Terms, IndexState} =
+    {DeltaCount, Terms, IndexState} =
         rabbit_queue_index:init(QueueName, MsgStoreRecovered, ContainsCheckFun),
     {DeltaSeqId, NextSeqId, IndexState1} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
 
-    DeltaCount1 = proplists:get_value(persistent_count, Terms, DeltaCount),
+    {PRef, TRef, Terms1} =
+        case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of
+            [] -> {proplists:get_value(persistent_ref, Terms),
+                   proplists:get_value(transient_ref, Terms),
+                   Terms};
+            _  -> {rabbit_guid:guid(), rabbit_guid:guid(), []}
+        end,
+    DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount),
     Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
                 true  -> ?BLANK_DELTA;
                 false -> #delta { start_seq_id = DeltaSeqId,
@@ -329,7 +341,8 @@ terminate(State) ->
         remove_pending_ack(true, tx_commit_index(State)),
     rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
-    Terms = [{persistent_ref, PRef}, {transient_ref, TRef},
+    Terms = [{persistent_ref, PRef},
+             {transient_ref, TRef},
              {persistent_count, PCount}],
     State1 #vqstate { index_state = rabbit_queue_index:terminate(
                                       Terms, IndexState),
-- 
cgit v1.2.1


From 7767fdd46936c60f16c0bcd41f800c65ed704af8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 5 May 2010 08:10:42 +0100
Subject: refactor concurrent ets manipulation logic

---
 src/rabbit_msg_store.erl | 76 +++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 2201bebb..498bbff9 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -306,7 +306,7 @@ start_link(Server, Dir, ClientRefs, StartupFunState) ->
 
 write(Server, Guid, Msg,
       CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
-    ok = add_to_cache(CurFileCacheEts, Guid, Msg),
+    ok = update_msg_cache(CurFileCacheEts, Guid, Msg),
     {gen_server2:cast(Server, {write, Guid, Msg}), CState}.
 
 read(Server, Guid,
@@ -380,16 +380,21 @@ clean(Server, BaseDir) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
-add_to_cache(CurFileCacheEts, Guid, Msg) ->
-    case ets:insert_new(CurFileCacheEts, {Guid, Msg, 1}) of
-        true ->
-            ok;
-        false ->
-            try
-                ets:update_counter(CurFileCacheEts, Guid, {3, +1}),
-                ok
-            catch error:badarg -> add_to_cache(CurFileCacheEts, Guid, Msg)
-            end
+safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
+    try
+        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
+    catch error:badarg -> FailThunk()
+    end.
+
+safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
+    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
+
+update_msg_cache(CacheEts, Guid, Msg) ->
+    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
+        true  -> ok;
+        false -> safe_ets_update_counter_ok(
+                   CacheEts, Guid, {3, +1},
+                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
     end.
 
 client_read1(Server,
@@ -429,10 +434,9 @@ client_read2(Server, false, _Right,
     %% It's entirely possible that everything we're doing from here on
     %% is for the wrong file, or a non-existent file, as a GC may have
     %% finished.
-    try ets:update_counter(FileSummaryEts, File, {#file_summary.readers, +1})
-    catch error:badarg -> %% the File has been GC'd and deleted. Go around.
-            read(Server, Guid, CState)
-    end,
+    safe_ets_update_counter_ok(
+      FileSummaryEts, File, {#file_summary.readers, +1},
+      fun () -> read(Server, Guid, CState) end),
     Release = fun() -> ets:update_counter(FileSummaryEts, File,
                                           {#file_summary.readers, -1})
               end,
@@ -898,7 +902,7 @@ read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
 
 maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
   when RefCount > 1 ->
-    insert_into_cache(DedupCacheEts, Guid, Msg);
+    update_msg_cache(DedupCacheEts, Guid, Msg);
 maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
     ok.
 
@@ -1098,39 +1102,25 @@ fetch_and_increment_cache(DedupCacheEts, Guid) ->
         [] ->
             not_found;
         [{_Guid, Msg, _RefCount}] ->
-            try
-                ets:update_counter(DedupCacheEts, Guid, {3, 1})
-            catch error:badarg ->
-                    %% someone has deleted us in the meantime, insert us
-                    ok = insert_into_cache(DedupCacheEts, Guid, Msg)
-            end,
+            safe_ets_update_counter_ok(
+              DedupCacheEts, Guid, {3, +1},
+              %% someone has deleted us in the meantime, insert us
+              fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end),
             Msg
     end.
 
 decrement_cache(DedupCacheEts, Guid) ->
-    true = try case ets:update_counter(DedupCacheEts, Guid, {3, -1}) of
-                   N when N =< 0 -> true = ets:delete(DedupCacheEts, Guid);
-                   _N            -> true
-               end
-           catch error:badarg ->
-                   %% Guid is not in there because although it's been
-                   %% delivered, it's never actually been read (think:
-                   %% persistent message held in RAM)
-                   true
-           end,
+    true = safe_ets_update_counter(
+             DedupCacheEts, Guid, {3, -1},
+             fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid);
+                 (_N)            -> true
+             end,
+             %% Guid is not in there because although it's been
+             %% delivered, it's never actually been read (think:
+             %% persistent message held in RAM)
+             fun () -> true end),
     ok.
 
-insert_into_cache(DedupCacheEts, Guid, Msg) ->
-    case ets:insert_new(DedupCacheEts, {Guid, Msg, 1}) of
-        true  -> ok;
-        false -> try
-                     ets:update_counter(DedupCacheEts, Guid, {3, 1}),
-                     ok
-                 catch error:badarg ->
-                         insert_into_cache(DedupCacheEts, Guid, Msg)
-                 end
-    end.
-
 %%----------------------------------------------------------------------------
 %% index
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 1032d5e090ef5d3f16519ba1f04334acf76372f8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 5 May 2010 09:03:19 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 104 ++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 55 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 498bbff9..58682046 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -521,10 +521,11 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                 {fresh, IndexState1} = IndexModule:init(fresh, Dir),
                 {false, IndexState1, sets:new()};
             {true, Terms} ->
-                case undefined /= ClientRefs andalso lists:sort(ClientRefs) ==
-                    lists:sort(proplists:get_value(client_refs, Terms, []))
-                    andalso proplists:get_value(index_module, Terms) ==
-                    IndexModule of
+                RecClientRefs  = proplists:get_value(client_refs, Terms, []),
+                RecIndexModule = proplists:get_value(index_module, Terms),
+                case (undefined /= ClientRefs andalso
+                      lists:sort(ClientRefs) == lists:sort(RecClientRefs)
+                      andalso IndexModule == RecIndexModule) of
                     true ->
                         case IndexModule:init(recover, Dir) of
                             {fresh, IndexState1} ->
@@ -656,11 +657,12 @@ handle_cast({write, Guid, Msg},
                       {#file_summary.contiguous_top, ContiguousTop1},
                       {#file_summary.file_size, FileSize + TotalSize}]),
             NextOffset = CurOffset + TotalSize,
-            noreply(maybe_compact(maybe_roll_to_new_file(
-                                    NextOffset, State #msstate
-                                    { sum_valid_data = SumValid + TotalSize,
-                                      sum_file_size = SumFileSize + TotalSize }
-                                   )));
+            noreply(
+              maybe_compact(
+                maybe_roll_to_new_file(
+                  NextOffset, State #msstate {
+                                sum_valid_data = SumValid + TotalSize,
+                                sum_file_size = SumFileSize + TotalSize })));
         #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter. Only
             %% update field otherwise bad interaction with concurrent GC
@@ -755,9 +757,9 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
     ok = rabbit_msg_store_gc:stop(GCPid),
     State1 = case CurHdl of
                  undefined -> State;
-                 _ -> State2 = internal_sync(State),
-                      file_handle_cache:close(CurHdl),
-                      State2
+                 _         -> State2 = internal_sync(State),
+                              file_handle_cache:close(CurHdl),
+                              State2
              end,
     State3 = close_all_handles(State1),
     store_file_summary(FileSummaryEts, Dir),
@@ -990,26 +992,23 @@ close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
 
 close_handle(Key, FHC) ->
     case dict:find(Key, FHC) of
-        {ok, Hdl} ->
-            ok = file_handle_cache:close(Hdl),
-            dict:erase(Key, FHC);
-        error -> FHC
+        {ok, Hdl} -> ok = file_handle_cache:close(Hdl),
+                     dict:erase(Key, FHC);
+        error     -> FHC
     end.
 
 close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts,
                                              file_handle_cache = FHC }) ->
     Self = self(),
     ok = dict:fold(fun (File, Hdl, ok) ->
-                           true =
-                               ets:delete(FileHandlesEts, {Self, File}),
+                           true = ets:delete(FileHandlesEts, {Self, File}),
                            file_handle_cache:close(Hdl)
                    end, ok, FHC),
     CState #client_msstate { file_handle_cache = dict:new() };
 
 close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
-    ok = dict:fold(fun (_Key, Hdl, ok) ->
-                           file_handle_cache:close(Hdl)
-                   end, ok, FHC),
+    ok = dict:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end,
+                   ok, FHC),
     State #msstate { file_handle_cache = dict:new() }.
 
 get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC,
@@ -1547,20 +1546,18 @@ gc(SourceFile, DestFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
         ets:lookup(FileSummaryEts, DestFile),
 
     case SourceReaders =:= 0 andalso DestReaders =:= 0 of
-        true ->
-            TotalValidData = DestValidData + SourceValidData,
-            ok = combine_files(SourceObj, DestObj, State),
-            %% don't update dest.right, because it could be changing
-            %% at the same time
-            true = ets:update_element(
-                     FileSummaryEts, DestFile,
-                     [{#file_summary.valid_total_size, TotalValidData},
-                      {#file_summary.contiguous_top,   TotalValidData},
-                      {#file_summary.file_size,        TotalValidData}]),
-            SourceFileSize + DestFileSize - TotalValidData;
-        false ->
-            timer:sleep(100),
-            gc(SourceFile, DestFile, State)
+        true  -> TotalValidData = DestValidData + SourceValidData,
+                 ok = combine_files(SourceObj, DestObj, State),
+                 %% don't update dest.right, because it could be
+                 %% changing at the same time
+                 true = ets:update_element(
+                          FileSummaryEts, DestFile,
+                          [{#file_summary.valid_total_size, TotalValidData},
+                           {#file_summary.contiguous_top,   TotalValidData},
+                           {#file_summary.file_size,        TotalValidData}]),
+                 SourceFileSize + DestFileSize - TotalValidData;
+        false -> timer:sleep(100),
+                 gc(SourceFile, DestFile, State)
     end.
 
 combine_files(#file_summary { file = Source,
@@ -1639,15 +1636,14 @@ find_unremoved_messages_in_file(File,
     {ok, Messages, _FileSize} =
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
-    lists:foldl(
-      fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
-              case Index:lookup(Guid, IndexState) of
-                  Entry = #msg_location { file = File } ->
-                      {[ Entry | List ], TotalSize + Size};
-                  _ ->
-                      Acc
-              end
-      end, {[], 0}, Messages).
+    lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
+                        case Index:lookup(Guid, IndexState) of
+                            Entry = #msg_location { file = File } ->
+                                {[ Entry | List ], TotalSize + Size};
+                            _ ->
+                                Acc
+                        end
+                end, {[], 0}, Messages).
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
@@ -1686,17 +1682,15 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
           end, {InitOffset, undefined, undefined}, WorkList) of
         {FinalOffset, BlockStart1, BlockEnd1} ->
             case WorkList of
-                [] ->
-                    ok;
-                _ ->
-                    %% do the last remaining block
-                    BSize1 = BlockEnd1 - BlockStart1,
-                    {ok, BlockStart1} =
-                        file_handle_cache:position(SourceHdl, BlockStart1),
-                    {ok, BSize1} =
-                        file_handle_cache:copy(SourceHdl, DestinationHdl,
-                                               BSize1),
-                    ok = file_handle_cache:sync(DestinationHdl)
+                [] -> ok;
+                %% do the last remaining block
+                _  -> BSize1 = BlockEnd1 - BlockStart1,
+                      {ok, BlockStart1} =
+                          file_handle_cache:position(SourceHdl, BlockStart1),
+                      {ok, BSize1} =
+                          file_handle_cache:copy(SourceHdl, DestinationHdl,
+                                                 BSize1),
+                      ok = file_handle_cache:sync(DestinationHdl)
             end;
         {FinalOffsetZ, _BlockStart1, _BlockEnd1} ->
             throw({gc_error, [{expected, FinalOffset},
-- 
cgit v1.2.1


From 77633acf891f1dc652359828dd8c316a81e2868a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 14:41:03 +0100
Subject: Various fixes and improvements to client reading

---
 src/rabbit_msg_store.erl | 82 ++++++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 58682046..6e4c1b56 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -408,63 +408,71 @@ client_read1(Server,
             client_read2(Server, Locked, Right, MsgLocation, Defer, CState)
     end.
 
-client_read2(_Server, false, undefined,
-             #msg_location { guid = Guid, ref_count = RefCount },
-             Defer,
-             CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
-                                        dedup_cache_ets = DedupCacheEts }) ->
-    case ets:lookup(CurFileCacheEts, Guid) of
-        [] ->
-            Defer(); %% may have rolled over
-        [{Guid, Msg, _CacheRefCount}] ->
-            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
-            {{ok, Msg}, CState}
-    end;
+client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) ->
+    %% Although we've already checked both caches and not found the
+    %% message there, the message is apparently in the
+    %% current_file. We can only arrive here if we are trying to read
+    %% a message which we have not written, which is very odd, so just
+    %% defer.
+
+    %% OR, on startup, the cur_file_cache is not populated with the
+    %% contents of the current file, thus reads from the current file
+    %% will end up here and will need to be deferred.
+    Defer();
 client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
     %% Of course, in the mean time, the GC could have run and our msg
     %% is actually in a different file, unlocked. However, defering is
     %% the safest and simplest thing to do.
     Defer();
 client_read2(Server, false, _Right,
-             #msg_location { guid = Guid, ref_count = RefCount, file = File },
+             MsgLocation = #msg_location { guid = Guid, file = File },
              Defer,
-             CState = #client_msstate { file_handles_ets = FileHandlesEts,
-                                        file_summary_ets = FileSummaryEts,
-                                        dedup_cache_ets  = DedupCacheEts }) ->
+             CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
     %% It's entirely possible that everything we're doing from here on
     %% is for the wrong file, or a non-existent file, as a GC may have
     %% finished.
-    safe_ets_update_counter_ok(
+    safe_ets_update_counter(
       FileSummaryEts, File, {#file_summary.readers, +1},
-      fun () -> read(Server, Guid, CState) end),
+      fun (_) -> client_read3(Server, Guid, MsgLocation, Defer, CState) end,
+      fun () -> read(Server, Guid, CState) end).
+
+client_read3(Server, Guid,
+             #msg_location { guid = Guid, ref_count = RefCount, file = File },
+             Defer,
+             CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets  = DedupCacheEts }) ->
     Release = fun() -> ets:update_counter(FileSummaryEts, File,
                                           {#file_summary.readers, -1})
               end,
+    ReleaseDefer =
+        fun () ->
+                %% If we get a badarg here, then the GC has finished
+                %% and deleted our file. Try going around
+                %% again. Otherwise, just defer.
+
+                %% badarg scenario: we lookup, msg_store locks, gc
+                %% starts, gc ends, we +1 readers, msg_store
+                %% ets:deletes (and unlocks the dest)
+                try Release(),
+                    Defer()
+                catch error:badarg -> read(Server, Guid, CState)
+                end
+        end,
     %% If a GC hasn't already started, it won't start now. Need to
     %% check again to see if we've been locked in the meantime,
     %% between lookup and update_counter (thus GC started before our
-    %% +1).
-    [#file_summary { locked = Locked }] = ets:lookup(FileSummaryEts, File),
-    case Locked of
-        true ->
-            %% If we get a badarg here, then the GC has finished and
-            %% deleted our file. Try going around again. Otherwise,
-            %% just defer.
-
-            %% badarg scenario:
-            %% we lookup, msg_store locks, gc starts, gc ends, we +1
-            %% readers, msg_store ets:deletes (and unlocks the dest)
-            try Release(),
-                Defer()
-            catch error:badarg -> read(Server, Guid, CState)
-            end;
-        false ->
+    %% +1. In fact, it could have finished by now too).
+    case ets:lookup(FileSummaryEts, File) of
+        [] -> %% GC has deleted our file
+            ReleaseDefer();
+        [{#file_summary { locked = true }}] ->
+            ReleaseDefer();
+        _ ->
             %% Ok, we're definitely safe to continue - a GC can't
             %% start up now, and isn't running, so nothing will tell
             %% us from now on to close the handle if it's already
-            %% open. (Well, a GC could start, and could put close
-            %% entries into the ets table, but the GC will wait until
-            %% we're done here before doing any real work.)
+            %% open.
 
             %% Finally, we need to recheck that the msg is still at
             %% the same place - it's possible an entire GC ran between
-- 
cgit v1.2.1


From b9fa6d899645d6e6b1ba43cf4c3bd2a47d2707d8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 14:43:42 +0100
Subject: Whoops

---
 src/rabbit_msg_store.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6e4c1b56..21fb41c4 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -433,10 +433,10 @@ client_read2(Server, false, _Right,
     %% finished.
     safe_ets_update_counter(
       FileSummaryEts, File, {#file_summary.readers, +1},
-      fun (_) -> client_read3(Server, Guid, MsgLocation, Defer, CState) end,
+      fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end,
       fun () -> read(Server, Guid, CState) end).
 
-client_read3(Server, Guid,
+client_read3(Server,
              #msg_location { guid = Guid, ref_count = RefCount, file = File },
              Defer,
              CState = #client_msstate { file_handles_ets = FileHandlesEts,
-- 
cgit v1.2.1


From c4e0a87ac66edd441b49d3e49aeb2630433f0db7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 14:52:22 +0100
Subject: Whoops

---
 src/rabbit_msg_store.erl | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 21fb41c4..5e785447 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -445,29 +445,25 @@ client_read3(Server,
     Release = fun() -> ets:update_counter(FileSummaryEts, File,
                                           {#file_summary.readers, -1})
               end,
-    ReleaseDefer =
-        fun () ->
-                %% If we get a badarg here, then the GC has finished
-                %% and deleted our file. Try going around
-                %% again. Otherwise, just defer.
-
-                %% badarg scenario: we lookup, msg_store locks, gc
-                %% starts, gc ends, we +1 readers, msg_store
-                %% ets:deletes (and unlocks the dest)
-                try Release(),
-                    Defer()
-                catch error:badarg -> read(Server, Guid, CState)
-                end
-        end,
     %% If a GC hasn't already started, it won't start now. Need to
     %% check again to see if we've been locked in the meantime,
     %% between lookup and update_counter (thus GC started before our
     %% +1. In fact, it could have finished by now too).
     case ets:lookup(FileSummaryEts, File) of
-        [] -> %% GC has deleted our file
-            ReleaseDefer();
+        [] -> %% GC has deleted our file, just go round again.
+            read(Server, Guid, CState);
         [{#file_summary { locked = true }}] ->
-            ReleaseDefer();
+            %% If we get a badarg here, then the GC has finished and
+            %% deleted our file. Try going around again. Otherwise,
+            %% just defer.
+
+            %% badarg scenario: we lookup, msg_store locks, gc starts,
+            %% gc ends, we +1 readers, msg_store ets:deletes (and
+            %% unlocks the dest)
+            try Release(),
+                Defer()
+            catch error:badarg -> read(Server, Guid, CState)
+            end;
         _ ->
             %% Ok, we're definitely safe to continue - a GC can't
             %% start up now, and isn't running, so nothing will tell
-- 
cgit v1.2.1


From 36913618e82bf46015623280a8114a1990fe713c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 14:55:05 +0100
Subject: %%%%

---
 src/rabbit_msg_store.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 5e785447..5c5cdb2d 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -456,7 +456,7 @@ client_read3(Server,
             %% If we get a badarg here, then the GC has finished and
             %% deleted our file. Try going around again. Otherwise,
             %% just defer.
-
+            %%
             %% badarg scenario: we lookup, msg_store locks, gc starts,
             %% gc ends, we +1 readers, msg_store ets:deletes (and
             %% unlocks the dest)
@@ -469,13 +469,12 @@ client_read3(Server,
             %% start up now, and isn't running, so nothing will tell
             %% us from now on to close the handle if it's already
             %% open.
-
+            %%
             %% Finally, we need to recheck that the msg is still at
             %% the same place - it's possible an entire GC ran between
             %% us doing the lookup and the +1 on the readers. (Same as
             %% badarg scenario above, but we don't have a missing file
             %% - we just have the /wrong/ file).
-
             case index_lookup(Guid, CState) of
                 MsgLocation = #msg_location { file = File } ->
                     %% Still the same file.
-- 
cgit v1.2.1


From fab73750b84e6814ae87721a34c901742ec47d81 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 14:55:48 +0100
Subject: %%

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 5c5cdb2d..6c5a1c6e 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -414,7 +414,7 @@ client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) ->
     %% current_file. We can only arrive here if we are trying to read
     %% a message which we have not written, which is very odd, so just
     %% defer.
-
+    %%
     %% OR, on startup, the cur_file_cache is not populated with the
     %% contents of the current file, thus reads from the current file
     %% will end up here and will need to be deferred.
-- 
cgit v1.2.1


From f9a4e242b41a78b9a4e35887ec004bd2fe6b46e9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 15:44:41 +0100
Subject: Correct insertions into dedup cache

---
 src/rabbit_msg_store.erl | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6c5a1c6e..217446a6 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -436,9 +436,7 @@ client_read2(Server, false, _Right,
       fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end,
       fun () -> read(Server, Guid, CState) end).
 
-client_read3(Server,
-             #msg_location { guid = Guid, ref_count = RefCount, file = File },
-             Defer,
+client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
              CState = #client_msstate { file_handles_ets = FileHandlesEts,
                                         file_summary_ets = FileSummaryEts,
                                         dedup_cache_ets  = DedupCacheEts }) ->
@@ -483,8 +481,6 @@ client_read3(Server,
                     CState1 = close_all_indicated(CState),
                     {Msg, CState2} =
                         read_from_disk(MsgLocation, CState1, DedupCacheEts),
-                    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid,
-                                                 Msg),
                     Release(), %% this MUST NOT fail with badarg
                     {{ok, Msg}, CState2};
                 MsgLocation -> %% different file!
@@ -864,9 +860,10 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                              end,
                         read_from_disk(MsgLoc, State, DedupCacheEts);
                     [{Guid, Msg1, _CacheRefCount}] ->
+                        ok = maybe_insert_into_cache(DedupCacheEts, RefCount,
+                                                     Guid, Msg1),
                         {Msg1, State}
                 end,
-            ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
             gen_server2:reply(From, {ok, Msg}),
             State1;
         false ->
-- 
cgit v1.2.1


From aaa318cd522fa7475d94a501267d4e776e2f7a12 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 5 May 2010 15:56:43 +0100
Subject: more precise comments

---
 src/rabbit_msg_store.erl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 217446a6..b9a70680 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -443,10 +443,11 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
     Release = fun() -> ets:update_counter(FileSummaryEts, File,
                                           {#file_summary.readers, -1})
               end,
-    %% If a GC hasn't already started, it won't start now. Need to
-    %% check again to see if we've been locked in the meantime,
-    %% between lookup and update_counter (thus GC started before our
-    %% +1. In fact, it could have finished by now too).
+    %% If a GC involving the file hasn't already started, it won't
+    %% start now. Need to check again to see if we've been locked in
+    %% the meantime, between lookup and update_counter (thus GC
+    %% started before our +1. In fact, it could have finished by now
+    %% too).
     case ets:lookup(FileSummaryEts, File) of
         [] -> %% GC has deleted our file, just go round again.
             read(Server, Guid, CState);
@@ -455,18 +456,18 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
             %% deleted our file. Try going around again. Otherwise,
             %% just defer.
             %%
-            %% badarg scenario: we lookup, msg_store locks, gc starts,
-            %% gc ends, we +1 readers, msg_store ets:deletes (and
+            %% badarg scenario: we lookup, msg_store locks, GC starts,
+            %% GC ends, we +1 readers, msg_store ets:deletes (and
             %% unlocks the dest)
             try Release(),
                 Defer()
             catch error:badarg -> read(Server, Guid, CState)
             end;
         _ ->
-            %% Ok, we're definitely safe to continue - a GC can't
-            %% start up now, and isn't running, so nothing will tell
-            %% us from now on to close the handle if it's already
-            %% open.
+            %% Ok, we're definitely safe to continue - a GC involving
+            %% the file cannot start up now, and isn't running, so
+            %% nothing will tell us from now on to close the handle if
+            %% it's already open.
             %%
             %% Finally, we need to recheck that the msg is still at
             %% the same place - it's possible an entire GC ran between
-- 
cgit v1.2.1


From 91901bd24488d72a730674d22575b09da632e129 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 15:57:26 +0100
Subject: Separation of Recovered and Recovered1

---
 src/rabbit_msg_store.erl | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b9a70680..7773576b 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -83,7 +83,7 @@
           dedup_cache_ets,        %% tid of dedup cache table
           cur_file_cache_ets,     %% tid of current file cache table
           client_refs,            %% set of references of all registered clients
-          recovered_state         %% boolean: did we recover state?
+          successfully_recovered  %% boolean: did we recover state?
          }).
 
 -record(client_msstate,
@@ -515,7 +515,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     rabbit_log:info("Using ~p to provide index for message store~n",
                     [IndexModule]),
 
-    {Recovered, IndexState, ClientRefs1} =
+    {AllCleanShutdown, IndexState, ClientRefs1} =
         case detect_clean_shutdown(Dir) of
             {false, _Error} ->
                 {fresh, IndexState1} = IndexModule:init(fresh, Dir),
@@ -540,7 +540,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
         end,
 
     InitFile = 0,
-    {Recovered1, FileSummaryEts} = recover_file_summary(Recovered, Dir),
+    {RecoveredFileSummary, FileSummaryEts} =
+        recover_file_summary(AllCleanShutdown, Dir),
     DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
     FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
                              [ordered_set, public]),
@@ -564,10 +565,10 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                        dedup_cache_ets        = DedupCacheEts,
                        cur_file_cache_ets     = CurFileCacheEts,
                        client_refs            = ClientRefs1,
-                       recovered_state        = Recovered
+                       successfully_recovered = AllCleanShutdown
                       },
 
-    ok = count_msg_refs(Recovered, MsgRefDeltaGen, MsgRefDeltaGenInit, State),
+    ok = count_msg_refs(AllCleanShutdown, MsgRefDeltaGen, MsgRefDeltaGenInit, State),
     FileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
@@ -578,7 +579,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
     {Offset, State1 = #msstate { current_file = CurFile }} =
-        build_index(Recovered1, Files, State),
+        build_index(RecoveredFileSummary, Files, State),
 
     %% read is only needed so that we can seek
     {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
@@ -614,7 +615,7 @@ handle_call({new_client_state, CRef}, _From,
           State #msstate { client_refs = sets:add_element(CRef, ClientRefs) });
 
 handle_call(successfully_recovered_state, _From, State) ->
-    reply(State #msstate.recovered_state, State);
+    reply(State #msstate.successfully_recovered, State);
 
 handle_call({delete_client, CRef}, _From,
             State = #msstate { client_refs = ClientRefs }) ->
@@ -709,10 +710,11 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
                                file_summary_ets = FileSummaryEts }) ->
     %% GC done, so now ensure that any clients that have open fhs to
     %% those files close them before using them again. This has to be
-    %% done here, and not when starting up the GC, because if done
-    %% when starting up the GC, the client could find the close, and
-    %% close and reopen the fh, whilst the GC is waiting for readers
-    %% to disappear, before it's actually done the GC.
+    %% done here (given it's done in the msg_store, and not the gc),
+    %% and not when starting up the GC, because if done when starting
+    %% up the GC, the client could find the close, and close and
+    %% reopen the fh, whilst the GC is waiting for readers to
+    %% disappear, before it's actually done the GC.
     true = mark_handle_to_close(FileHandlesEts, Source),
     true = mark_handle_to_close(FileHandlesEts, Dest),
     %% we always move data left, so Source has gone and was on the
-- 
cgit v1.2.1


From 00fea23c35dcca1b020afbc3e5ac2bf64454c6e1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 5 May 2010 16:03:37 +0100
Subject: RecoveredFileSummary => IndexRecovered. Also added an apparently
 crucial comment

---
 src/rabbit_msg_store.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 7773576b..4411a950 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -480,7 +480,7 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
                     %% This is fine to fail (already exists)
                     ets:insert_new(FileHandlesEts, {{self(), File}, open}),
                     CState1 = close_all_indicated(CState),
-                    {Msg, CState2} =
+                    {Msg, CState2} = %% This will never be the current file
                         read_from_disk(MsgLocation, CState1, DedupCacheEts),
                     Release(), %% this MUST NOT fail with badarg
                     {{ok, Msg}, CState2};
@@ -540,7 +540,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
         end,
 
     InitFile = 0,
-    {RecoveredFileSummary, FileSummaryEts} =
+    {IndexRecovered, FileSummaryEts} =
         recover_file_summary(AllCleanShutdown, Dir),
     DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
     FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
@@ -579,7 +579,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
     {Offset, State1 = #msstate { current_file = CurFile }} =
-        build_index(RecoveredFileSummary, Files, State),
+        build_index(IndexRecovered, Files, State),
 
     %% read is only needed so that we can seek
     {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
-- 
cgit v1.2.1


From 79c0e37c9a4da96c8d3f76fd22a97d4b315af8d1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 6 May 2010 06:22:20 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 4411a950..4dc390f6 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -540,7 +540,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
         end,
 
     InitFile = 0,
-    {IndexRecovered, FileSummaryEts} =
+    {FileSummaryRecovered, FileSummaryEts} =
         recover_file_summary(AllCleanShutdown, Dir),
     DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
     FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
@@ -568,7 +568,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                        successfully_recovered = AllCleanShutdown
                       },
 
-    ok = count_msg_refs(AllCleanShutdown, MsgRefDeltaGen, MsgRefDeltaGenInit, State),
+    ok = count_msg_refs(AllCleanShutdown, MsgRefDeltaGen, MsgRefDeltaGenInit,
+                        State),
     FileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
@@ -579,7 +580,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     %% whole lot
     Files = [filename_to_num(FileName) || FileName <- FileNames],
     {Offset, State1 = #msstate { current_file = CurFile }} =
-        build_index(IndexRecovered, Files, State),
+        build_index(FileSummaryRecovered, Files, State),
 
     %% read is only needed so that we can seek
     {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
@@ -856,8 +857,9 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                 %% can return [] if msg in file existed on startup
                 case ets:lookup(CurFileCacheEts, Guid) of
                     [] ->
-                        ok = case {ok, Offset} >=
-                                 file_handle_cache:current_raw_offset(CurHdl) of
+                        {ok, RawOffSet} =
+                            file_handle_cache:current_raw_offset(CurHdl),
+                        ok = case Offset >= RawOffSet of
                                  true  -> file_handle_cache:flush(CurHdl);
                                  false -> ok
                              end,
-- 
cgit v1.2.1


From 42f530bef7005d3ae7633e95205a9198388813aa Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 6 May 2010 07:20:04 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 168 ++++++++++++++++++++++++++---------------------
 1 file changed, 92 insertions(+), 76 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 4dc390f6..d92d7aa3 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -310,7 +310,7 @@ write(Server, Guid, Msg,
     {gen_server2:cast(Server, {write, Guid, Msg}), CState}.
 
 read(Server, Guid,
-     CState = #client_msstate { dedup_cache_ets = DedupCacheEts,
+     CState = #client_msstate { dedup_cache_ets    = DedupCacheEts,
                                 cur_file_cache_ets = CurFileCacheEts }) ->
     %% 1. Check the dedup cache
     case fetch_and_increment_cache(DedupCacheEts, Guid) of
@@ -389,14 +389,6 @@ safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
 safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
     safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
 
-update_msg_cache(CacheEts, Guid, Msg) ->
-    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
-        true  -> ok;
-        false -> safe_ets_update_counter_ok(
-                   CacheEts, Guid, {3, +1},
-                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
-    end.
-
 client_read1(Server,
              #msg_location { guid = Guid, file = File } = MsgLocation,
              Defer,
@@ -539,7 +531,6 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                 end
         end,
 
-    InitFile = 0,
     {FileSummaryRecovered, FileSummaryEts} =
         recover_file_summary(AllCleanShutdown, Dir),
     DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
@@ -550,7 +541,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     State = #msstate { dir                    = Dir,
                        index_module           = IndexModule,
                        index_state            = IndexState,
-                       current_file           = InitFile,
+                       current_file           = 0,
                        current_file_handle    = undefined,
                        file_handle_cache      = dict:new(),
                        on_sync                = [],
@@ -641,10 +632,10 @@ handle_cast({write, Guid, Msg},
                                 offset = CurOffset, total_size = TotalSize },
                               State),
             [#file_summary { valid_total_size = ValidTotalSize,
-                             contiguous_top = ContiguousTop,
-                             right = undefined,
-                             locked = false,
-                             file_size = FileSize }] =
+                             contiguous_top   = ContiguousTop,
+                             right            = undefined,
+                             locked           = false,
+                             file_size        = FileSize }] =
                 ets:lookup(FileSummaryEts, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
             ContiguousTop1 = if CurOffset =:= ContiguousTop ->
@@ -664,7 +655,7 @@ handle_cast({write, Guid, Msg},
                 maybe_roll_to_new_file(
                   NextOffset, State #msstate {
                                 sum_valid_data = SumValid + TotalSize,
-                                sum_file_size = SumFileSize + TotalSize })));
+                                sum_file_size  = SumFileSize + TotalSize })));
         #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter. Only
             %% update field otherwise bad interaction with concurrent GC
@@ -705,8 +696,8 @@ handle_cast(sync, State) ->
     noreply(internal_sync(State));
 
 handle_cast({gc_done, Reclaimed, Source, Dest},
-            State = #msstate { sum_file_size = SumFileSize,
-                               gc_active = {Source, Dest},
+            State = #msstate { sum_file_size    = SumFileSize,
+                               gc_active        = {Source, Dest},
                                file_handles_ets = FileHandlesEts,
                                file_summary_ets = FileSummaryEts }) ->
     %% GC done, so now ensure that any clients that have open fhs to
@@ -721,9 +712,10 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
     %% we always move data left, so Source has gone and was on the
     %% right, so need to make dest = source.right.left, and also
     %% dest.right = source.right
-    [#file_summary { left = Dest, right = SourceRight, locked = true,
-                     readers = 0 }] =
-        ets:lookup(FileSummaryEts, Source),
+    [#file_summary { left    = Dest,
+                     right   = SourceRight,
+                     locked  = true,
+                     readers = 0 }] = ets:lookup(FileSummaryEts, Source),
     %% this could fail if SourceRight == undefined
     ets:update_element(FileSummaryEts, SourceRight,
                        {#file_summary.left, Dest}),
@@ -733,7 +725,7 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
     true = ets:delete(FileSummaryEts, Source),
     noreply(run_pending(
               State #msstate { sum_file_size = SumFileSize - Reclaimed,
-                               gc_active = false }));
+                               gc_active     = false }));
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
@@ -819,7 +811,7 @@ sort_file_names(FileNames) ->
                FileNames).
 
 internal_sync(State = #msstate { current_file_handle = CurHdl,
-                        on_sync = Syncs }) ->
+                                 on_sync = Syncs }) ->
     State1 = stop_sync_timer(State),
     case Syncs of
         [] -> State1;
@@ -829,8 +821,8 @@ internal_sync(State = #msstate { current_file_handle = CurHdl,
             State1 #msstate { on_sync = [] }
     end.
 
-read_message(Guid, From, State =
-                 #msstate { dedup_cache_ets = DedupCacheEts }) ->
+read_message(Guid, From,
+             State = #msstate { dedup_cache_ets = DedupCacheEts }) ->
     case index_lookup(Guid, State) of
         not_found -> gen_server2:reply(From, not_found),
                      State;
@@ -846,11 +838,11 @@ read_message(Guid, From, State =
 
 read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                                     file = File, offset = Offset } = MsgLoc,
-              State = #msstate { current_file = CurFile,
+              State = #msstate { current_file        = CurFile,
                                  current_file_handle = CurHdl,
-                                 file_summary_ets = FileSummaryEts,
-                                 dedup_cache_ets = DedupCacheEts,
-                                 cur_file_cache_ets = CurFileCacheEts }) ->
+                                 file_summary_ets    = FileSummaryEts,
+                                 dedup_cache_ets     = DedupCacheEts,
+                                 cur_file_cache_ets  = CurFileCacheEts }) ->
     case File =:= CurFile of
         true ->
             {Msg, State1} =
@@ -913,6 +905,14 @@ maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
 maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
     ok.
 
+update_msg_cache(CacheEts, Guid, Msg) ->
+    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
+        true  -> ok;
+        false -> safe_ets_update_counter_ok(
+                   CacheEts, Guid, {3, +1},
+                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
+    end.
+
 contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
     case index_lookup(Guid, State) of
         not_found ->
@@ -929,9 +929,9 @@ contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
             end
     end.
 
-remove_message(Guid, State = #msstate { sum_valid_data = SumValid,
+remove_message(Guid, State = #msstate { sum_valid_data   = SumValid,
                                         file_summary_ets = FileSummaryEts,
-                                        dedup_cache_ets = DedupCacheEts }) ->
+                                        dedup_cache_ets  = DedupCacheEts }) ->
     #msg_location { ref_count = RefCount, file = File,
                     offset = Offset, total_size = TotalSize } =
         index_lookup(Guid, State),
@@ -942,8 +942,8 @@ remove_message(Guid, State = #msstate { sum_valid_data = SumValid,
             %% msg.
             ok = remove_cache_entry(DedupCacheEts, Guid),
             [#file_summary { valid_total_size = ValidTotalSize,
-                             contiguous_top = ContiguousTop,
-                             locked = Locked }] =
+                             contiguous_top   = ContiguousTop,
+                             locked           = Locked }] =
                 ets:lookup(FileSummaryEts, File),
             case Locked of
                 true ->
@@ -1063,7 +1063,6 @@ store_file_summary(Tid, Dir) ->
                       [{extended_info, [object_count]}]),
     ets:delete(Tid).
 
-
 preallocate(Hdl, FileSizeLimit, FinalPos) ->
     {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
     ok = file_handle_cache:truncate(Hdl),
@@ -1130,7 +1129,7 @@ decrement_cache(DedupCacheEts, Guid) ->
 %%----------------------------------------------------------------------------
 
 index_lookup(Key, #client_msstate { index_module = Index,
-                                    index_state = State }) ->
+                                    index_state  = State }) ->
     Index:lookup(Key, State);
 
 index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
@@ -1143,14 +1142,14 @@ index_update(Obj, #msstate { index_module = Index, index_state = State }) ->
     Index:update(Obj, State).
 
 index_update_fields(Key, Updates, #msstate { index_module = Index,
-                                             index_state = State }) ->
+                                             index_state  = State }) ->
     Index:update_fields(Key, Updates, State).
 
 index_delete(Key, #msstate { index_module = Index, index_state = State }) ->
     Index:delete(Key, State).
 
 index_delete_by_file(File, #msstate { index_module = Index,
-                                      index_state = State }) ->
+                                      index_state  = State }) ->
     Index:delete_by_file(File, State).
 
 %%----------------------------------------------------------------------------
@@ -1315,13 +1314,14 @@ build_index(true, _Files, State =
                 #msstate { file_summary_ets = FileSummaryEts }) ->
     ets:foldl(
       fun (#file_summary { valid_total_size = ValidTotalSize,
-                           file_size = FileSize, file = File },
+                           file_size        = FileSize,
+                           file             = File },
            {_Offset, State1 = #msstate { sum_valid_data = SumValid,
-                                         sum_file_size = SumFileSize }}) ->
+                                         sum_file_size  = SumFileSize }}) ->
               {FileSize, State1 #msstate {
                            sum_valid_data = SumValid + ValidTotalSize,
-                           sum_file_size = SumFileSize + FileSize,
-                           current_file = File }}
+                           sum_file_size  = SumFileSize + FileSize,
+                           current_file   = File }}
       end, {0, State}, FileSummaryEts);
 build_index(false, Files, State) ->
     {ok, Pid} = gatherer:start_link(),
@@ -1333,8 +1333,8 @@ build_index(false, Files, State) ->
 
 build_index(Gatherer, Left, [],
             State = #msstate { file_summary_ets = FileSummaryEts,
-                               sum_valid_data = SumValid,
-                               sum_file_size = SumFileSize }) ->
+                               sum_valid_data   = SumValid,
+                               sum_file_size    = SumFileSize }) ->
     case gatherer:fetch(Gatherer) of
         finished ->
             ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
@@ -1351,7 +1351,7 @@ build_index(Gatherer, Left, [],
             build_index(Gatherer, Left, [],
                         State #msstate {
                           sum_valid_data = SumValid + ValidTotalSize,
-                          sum_file_size = SumFileSize + FileSize })
+                          sum_file_size  = SumFileSize + FileSize })
     end;
 build_index(Gatherer, Left, [File|Files], State) ->
     Child = make_ref(),
@@ -1395,10 +1395,14 @@ build_index_worker(
             [F|_] -> {F, FileSize}
         end,
     ok = gatherer:produce(Gatherer, #file_summary {
-                            file = File, valid_total_size = ValidTotalSize,
-                            contiguous_top = ContiguousTop, locked = false,
-                            left = Left, right = Right, file_size = FileSize1,
-                            readers = 0 }),
+                            file             = File,
+                            valid_total_size = ValidTotalSize,
+                            contiguous_top   = ContiguousTop,
+                            left             = Left,
+                            right            = Right,
+                            file_size        = FileSize1,
+                            locked           = false,
+                            readers          = 0 }),
     ok = gatherer:finished(Gatherer, Ref).
 
 %%----------------------------------------------------------------------------
@@ -1419,11 +1423,15 @@ maybe_roll_to_new_file(
     {ok, NextHdl} = open_file(
                       Dir, filenum_to_name(NextFile),
                       ?WRITE_MODE),
-    true = ets:insert_new(
-             FileSummaryEts, #file_summary {
-                file = NextFile, valid_total_size = 0, contiguous_top = 0,
-                left = CurFile, right = undefined, file_size = 0,
-                locked = false, readers = 0 }),
+    true = ets:insert_new(FileSummaryEts, #file_summary {
+                            file             = NextFile,
+                            valid_total_size = 0,
+                            contiguous_top   = 0,
+                            left             = CurFile,
+                            right            = undefined,
+                            file_size        = 0,
+                            locked           = false,
+                            readers          = 0 }),
     true = ets:update_element(FileSummaryEts, CurFile,
                               {#file_summary.right, NextFile}),
     true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}),
@@ -1481,11 +1489,13 @@ find_files_to_gc(FileSummaryEts, N, First) ->
 find_files_to_gc(_FileSummaryEts, _N, #file_summary {}, [], Pairs) ->
     lists:reverse(Pairs);
 find_files_to_gc(FileSummaryEts, N,
-                 #file_summary { right = Source, file = Dest,
+                 #file_summary { right            = Source,
+                                 file             = Dest,
                                  valid_total_size = DestValid },
-                 [SourceObj = #file_summary { left = Dest, right = SourceRight,
+                 [SourceObj = #file_summary { left             = Dest,
+                                              right            = SourceRight,
                                               valid_total_size = SourceValid,
-                                              file = Source }],
+                                              file             = Source }],
                  Pairs) when DestValid + SourceValid =< ?FILE_SIZE_LIMIT andalso
                              not is_atom(SourceRight) ->
     Pair = {Source, Dest},
@@ -1502,13 +1512,17 @@ find_files_to_gc(FileSummaryEts, N, _Left,
 
 delete_file_if_empty(File, State = #msstate { current_file = File }) ->
     State;
-delete_file_if_empty(File, State =
-                         #msstate { dir = Dir, sum_file_size = SumFileSize,
-                                    file_handles_ets = FileHandlesEts,
-                                    file_summary_ets = FileSummaryEts }) ->
-    [#file_summary { valid_total_size = ValidData, file_size = FileSize,
-                     left = Left, right = Right, locked = false }]
-        = ets:lookup(FileSummaryEts, File),
+delete_file_if_empty(File, State = #msstate {
+                             dir              = Dir,
+                             sum_file_size    = SumFileSize,
+                             file_handles_ets = FileHandlesEts,
+                             file_summary_ets = FileSummaryEts }) ->
+    [#file_summary { valid_total_size = ValidData,
+                     left             = Left,
+                     right            = Right,
+                     file_size        = FileSize,
+                     locked           = false }] =
+        ts:lookup(FileSummaryEts, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
@@ -1540,15 +1554,17 @@ delete_file_if_empty(File, State =
 
 gc(SourceFile, DestFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
     [SourceObj = #file_summary {
-       readers = SourceReaders,
-       valid_total_size = SourceValidData, left = DestFile,
-       file_size = SourceFileSize, locked = true }] =
-        ets:lookup(FileSummaryEts, SourceFile),
+       readers          = SourceReaders,
+       valid_total_size = SourceValidData,
+       left             = DestFile,
+       file_size        = SourceFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, SourceFile),
     [DestObj = #file_summary {
-       readers = DestReaders,
-       valid_total_size = DestValidData, right = SourceFile,
-       file_size = DestFileSize, locked = true }] =
-        ets:lookup(FileSummaryEts, DestFile),
+       readers          = DestReaders,
+       valid_total_size = DestValidData,
+       right            = SourceFile,
+       file_size        = DestFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, DestFile),
 
     case SourceReaders =:= 0 andalso DestReaders =:= 0 of
         true  -> TotalValidData = DestValidData + SourceValidData,
@@ -1565,13 +1581,13 @@ gc(SourceFile, DestFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
                  gc(SourceFile, DestFile, State)
     end.
 
-combine_files(#file_summary { file = Source,
+combine_files(#file_summary { file             = Source,
                               valid_total_size = SourceValid,
-                              left = Destination },
-              #file_summary { file = Destination,
+                              left             = Destination },
+              #file_summary { file             = Destination,
                               valid_total_size = DestinationValid,
-                              contiguous_top = DestinationContiguousTop,
-                              right = Source },
+                              contiguous_top   = DestinationContiguousTop,
+                              right            = Source },
               State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
     SourceName = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
-- 
cgit v1.2.1


From a690b79354cca9ac0edf743760d2f19c46f5b756 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 6 May 2010 09:33:34 +0100
Subject: whoops

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index d92d7aa3..18e8b7e2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1522,7 +1522,7 @@ delete_file_if_empty(File, State = #msstate {
                      right            = Right,
                      file_size        = FileSize,
                      locked           = false }] =
-        ts:lookup(FileSummaryEts, File),
+        ets:lookup(FileSummaryEts, File),
     case ValidData of
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
-- 
cgit v1.2.1


From a49ea83a3606a5276cea0ae008a4fe25fc696aee Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 6 May 2010 13:54:05 +0100
Subject: refactoring and tweaking of gatherer code and usage - better function
 names - get rid of the notion of named forks; just counting them is
 sufficient - don't terminate automatically, which results in a more symmetric
 API   and allows gatherer reuse - in the gatherer usage, spawn workers with a
 thunk rather than MFA,   which is less cryptic - use of folds and list
 comprehensions in queue_index_walker, in   preference over recursion

---
 src/gatherer.erl           | 111 +++++++++++++++++++++++----------------------
 src/rabbit_msg_store.erl   |  44 +++++++++---------
 src/rabbit_queue_index.erl |  75 +++++++++++++-----------------
 3 files changed, 110 insertions(+), 120 deletions(-)

diff --git a/src/gatherer.erl b/src/gatherer.erl
index d5b35e96..30cb5909 100644
--- a/src/gatherer.erl
+++ b/src/gatherer.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/0, wait_on/2, produce/2, finished/2, fetch/1]).
+-export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]).
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3]).
@@ -43,10 +43,11 @@
 -ifdef(use_specs).
 
 -spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(wait_on/2 :: (pid(), any()) -> 'ok').
--spec(produce/2 :: (pid(), any()) -> 'ok').
--spec(finished/2 :: (pid(), any()) -> 'ok').
--spec(fetch/1 :: (pid()) -> {'value', any()} | 'finished').
+-spec(stop/1 :: (pid()) -> 'ok').
+-spec(fork/1 :: (pid()) -> 'ok').
+-spec(finish/1 :: (pid()) -> 'ok').
+-spec(in/2 :: (pid(), any()) -> 'ok').
+-spec(out/1 :: (pid()) -> {'value', any()} | 'empty').
 
 -endif.
 
@@ -57,74 +58,78 @@
 
 %%----------------------------------------------------------------------------
 
--record(gstate, { waiting_on, results, blocking }).
+-record(gstate, { forks, values, blocked }).
 
 %%----------------------------------------------------------------------------
 
-wait_on(Pid, Token) ->
-    gen_server2:call(Pid, {wait_on, Token}, infinity).
+start_link() ->
+    gen_server2:start_link(?MODULE, [], [{timeout, infinity}]).
 
-produce(Pid, Result) ->
-    gen_server2:cast(Pid, {produce, Result}).
+stop(Pid) ->
+    gen_server2:call(Pid, stop, infinity).
 
-finished(Pid, Token) ->
-    gen_server2:call(Pid, {finished, Token}, infinity).
+fork(Pid) ->
+    gen_server2:call(Pid, fork, infinity).
 
-fetch(Pid) ->
-    gen_server2:call(Pid, fetch, infinity).
+finish(Pid) ->
+    gen_server2:cast(Pid, finish).
 
-%%----------------------------------------------------------------------------
+in(Pid, Value) ->
+    gen_server2:cast(Pid, {in, Value}).
 
-start_link() ->
-    gen_server2:start_link(?MODULE, [], [{timeout, infinity}]).
+out(Pid) ->
+    gen_server2:call(Pid, out, infinity).
+
+%%----------------------------------------------------------------------------
 
 init([]) ->
-    {ok, #gstate { waiting_on = sets:new(), results = queue:new(),
-                   blocking = queue:new() }, hibernate,
+    {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() },
+     hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-handle_call({wait_on, Token}, _From, State = #gstate { waiting_on = Tokens }) ->
-    {reply, ok, State #gstate { waiting_on = sets:add_element(Token, Tokens) },
-     hibernate};
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State};
 
-handle_call({finished, Token}, _From,
-            State = #gstate { waiting_on = Tokens, results = Results,
-                              blocking = Blocking }) ->
-    Tokens1 = sets:del_element(Token, Tokens),
-    State1 = State #gstate { waiting_on = Tokens1 },
-    case 0 =:= sets:size(Tokens1) andalso queue:is_empty(Results) andalso
-        not queue:is_empty(Blocking) of
-        true  -> {stop, normal, ok, State1};
-        false -> {reply, ok, State1, hibernate}
-    end;
+handle_call(fork, _From, State = #gstate { forks = Forks }) ->
+    {reply, ok, State #gstate { forks = Forks + 1 }, hibernate};
 
-handle_call(fetch, From,
-            State = #gstate { waiting_on = Tokens, results = Results,
-                              blocking = Blocking }) ->
-    case queue:out(Results) of
-        {empty, _Results} ->
-            case sets:size(Tokens) of
-                0 -> {stop, normal, finished, State};
+handle_call(out, From, State = #gstate { forks   = Forks,
+                                         values  = Values,
+                                         blocked = Blocked }) ->
+    case queue:out(Values) of
+        {empty, _} ->
+            case Forks of
+                0 -> {reply, empty, State, hibernate};
                 _ -> {noreply,
-                      State #gstate { blocking = queue:in(From, Blocking) },
+                      State #gstate { blocked = queue:in(From, Blocked) },
                       hibernate}
             end;
-        {{value, Result}, Results1} ->
-            {reply, {value, Result}, State #gstate { results = Results1 },
-             hibernate}
+        {{value, Value}, NewValues} ->
+            {reply, Value, State #gstate { values = NewValues }, hibernate}
     end;
 
 handle_call(Msg, _From, State) ->
     {stop, {unexpected_call, Msg}, State}.
 
-handle_cast({produce, Result},
-            State = #gstate { blocking = Blocking, results = Results }) ->
-    {noreply, case queue:out(Blocking) of
-                  {empty, _Blocking} ->
-                      State #gstate { results = queue:in(Result, Results) };
-                  {{value, Blocked}, Blocking1} ->
-                      gen_server2:reply(Blocked, {value, Result}),
-                      State #gstate { blocking = Blocking1 }
+handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) ->
+    NewForks = Forks - 1,
+    NewBlocked = case NewForks of
+                     0 -> [gen_server2:reply(From, empty) ||
+                              From <- queue:to_list(Blocked)],
+                          queue:new();
+                     _ -> Blocked
+                 end,
+    {noreply, State #gstate { forks = NewForks, blocked = NewBlocked },
+     hibernate};
+
+handle_cast({in, Value}, State = #gstate { values  = Values,
+                                           blocked = Blocked }) ->
+    {noreply, case queue:out(Blocked) of
+                  {empty, _} ->
+                      State #gstate { values = queue:in(Value, Values) };
+                  {{value, From}, NewBlocked} ->
+                      gen_server2:reply(From, {value, Value}),
+                      State #gstate { blocked = NewBlocked }
               end, hibernate};
 
 handle_cast(Msg, State) ->
@@ -136,7 +141,5 @@ handle_info(Msg, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-terminate(_Reason, State = #gstate { blocking = Blocking } ) ->
-    [gen_server2:reply(Blocked, finished) ||
-        Blocked <- queue:to_list(Blocking)],
+terminate(_Reason, State) ->
     State.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 18e8b7e2..656bec28 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -37,8 +37,7 @@
          sync/3, client_init/2, client_terminate/1, delete_client/2, clean/2,
          successfully_recovered_state/1]).
 
--export([sync/1, gc_done/4, set_maximum_since_use/2,
-         build_index_worker/6, gc/3]). %% internal
+-export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
          terminate/2, code_change/3, handle_pre_hibernate/1]).
@@ -1310,8 +1309,8 @@ find_contiguous_block_prefix([{Guid, TotalSize, ExpectedOffset} | Tail],
 find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, Guids) ->
     {ExpectedOffset, Guids}.
 
-build_index(true, _Files, State =
-                #msstate { file_summary_ets = FileSummaryEts }) ->
+build_index(true, _Files, State = #msstate {
+                            file_summary_ets = FileSummaryEts }) ->
     ets:foldl(
       fun (#file_summary { valid_total_size = ValidTotalSize,
                            file_size        = FileSize,
@@ -1335,9 +1334,10 @@ build_index(Gatherer, Left, [],
             State = #msstate { file_summary_ets = FileSummaryEts,
                                sum_valid_data   = SumValid,
                                sum_file_size    = SumFileSize }) ->
-    case gatherer:fetch(Gatherer) of
-        finished ->
+    case gatherer:out(Gatherer) of
+        empty ->
             ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
+            ok = gatherer:stop(Gatherer),
             ok = index_delete_by_file(undefined, State),
             Offset = case ets:lookup(FileSummaryEts, Left) of
                          []                                       -> 0;
@@ -1354,15 +1354,15 @@ build_index(Gatherer, Left, [],
                           sum_file_size  = SumFileSize + FileSize })
     end;
 build_index(Gatherer, Left, [File|Files], State) ->
-    Child = make_ref(),
-    ok = gatherer:wait_on(Gatherer, Child),
+    ok = gatherer:fork(Gatherer),
     ok = worker_pool:submit_async(
-           {?MODULE, build_index_worker,
-            [Gatherer, Child, State, Left, File, Files]}),
+           fun () -> build_index_worker(Gatherer, State,
+                                        Left, File, Files)
+           end),
     build_index(Gatherer, File, Files, State).
 
-build_index_worker(
-  Gatherer, Ref, State = #msstate { dir = Dir }, Left, File, Files) ->
+build_index_worker(Gatherer, State = #msstate { dir = Dir },
+                   Left, File, Files) ->
     {ok, Messages, FileSize} =
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
@@ -1394,16 +1394,16 @@ build_index_worker(
                                  end};
             [F|_] -> {F, FileSize}
         end,
-    ok = gatherer:produce(Gatherer, #file_summary {
-                            file             = File,
-                            valid_total_size = ValidTotalSize,
-                            contiguous_top   = ContiguousTop,
-                            left             = Left,
-                            right            = Right,
-                            file_size        = FileSize1,
-                            locked           = false,
-                            readers          = 0 }),
-    ok = gatherer:finished(Gatherer, Ref).
+    ok = gatherer:in(Gatherer, #file_summary {
+                       file             = File,
+                       valid_total_size = ValidTotalSize,
+                       contiguous_top   = ContiguousTop,
+                       left             = Left,
+                       right            = Right,
+                       file_size        = FileSize1,
+                       locked           = false,
+                       readers          = 0 }),
+    ok = gatherer:finish(Gatherer).
 
 %%----------------------------------------------------------------------------
 %% garbage collection / compaction / aggregation -- internal
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 95df8938..0cb44e0a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -36,8 +36,6 @@
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, recover/1]).
 
--export([queue_index_walker_reader/3]). %% for internal use only
-
 -define(CLEAN_FILENAME, "clean.dot").
 
 %%----------------------------------------------------------------------------
@@ -452,55 +450,44 @@ recover(DurableQueues) ->
                           Dir = filename:join(queues_dir(), DirName),
                           ok = rabbit_misc:recursive_delete([Dir])
                   end, TransientDirs),
-    {DurableTerms, {fun queue_index_walker/1, DurableQueueNames}}.
+    {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
 
 %%----------------------------------------------------------------------------
 %% Msg Store Startup Delta Function
 %%----------------------------------------------------------------------------
 
-queue_index_walker(DurableQueues) when is_list(DurableQueues) ->
-    {ok, Pid} = gatherer:start_link(),
-    queue_index_walker({DurableQueues, Pid});
-
-queue_index_walker({[], Gatherer}) ->
-    case gatherer:fetch(Gatherer) of
-        finished ->
-            rabbit_misc:unlink_and_capture_exit(Gatherer),
+queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
+    {ok, Gatherer} = gatherer:start_link(),
+    [begin
+         ok = gatherer:fork(Gatherer),
+         ok = worker_pool:submit_async(
+                fun () -> queue_index_walker_reader(QueueName, Gatherer)
+                end)
+     end || QueueName <- DurableQueues],
+    queue_index_walker({next, Gatherer});
+
+queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
+    case gatherer:out(Gatherer) of
+        empty ->
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
+            ok = gatherer:stop(Gatherer),
             finished;
         {value, {Guid, Count}} ->
-            {Guid, Count, {[], Gatherer}}
-    end;
-queue_index_walker({[QueueName | QueueNames], Gatherer}) ->
-    Child = make_ref(),
-    ok = gatherer:wait_on(Gatherer, Child),
-    ok = worker_pool:submit_async({?MODULE, queue_index_walker_reader,
-                                   [QueueName, Gatherer, Child]}),
-    queue_index_walker({QueueNames, Gatherer}).
-
-queue_index_walker_reader(QueueName, Gatherer, Ref) ->
-    State = blank_state(QueueName),
-    State1 = load_journal(State),
-    SegNums = all_segment_nums(State1),
-    queue_index_walker_reader(Gatherer, Ref, State1, SegNums).
-
-queue_index_walker_reader(Gatherer, Ref, State, []) ->
-    _State = terminate(false, [], State),
-    ok = gatherer:finished(Gatherer, Ref);
-queue_index_walker_reader(Gatherer, Ref, State, [Seg | SegNums]) ->
-    SeqId = reconstruct_seq_id(Seg, 0),
-    {Messages, State1} = read_segment_entries(SeqId, State),
-    State2 = queue_index_walker_reader1(Gatherer, State1, Messages),
-    queue_index_walker_reader(Gatherer, Ref, State2, SegNums).
-
-queue_index_walker_reader1(_Gatherer, State, []) ->
-    State;
-queue_index_walker_reader1(
-  Gatherer, State, [{Guid, _SeqId, IsPersistent, _IsDelivered} | Msgs]) ->
-    case IsPersistent of
-        true  -> gatherer:produce(Gatherer, {Guid, 1});
-        false -> ok
-    end,
-    queue_index_walker_reader1(Gatherer, State, Msgs).
+            {Guid, Count, {next, Gatherer}}
+    end.
+
+queue_index_walker_reader(QueueName, Gatherer) ->
+    State = load_journal(blank_state(QueueName)),
+    State1 = lists:foldl(
+               fun (Seg, State2) ->
+                       SeqId = reconstruct_seq_id(Seg, 0),
+                       {Messages, State3} = read_segment_entries(SeqId, State2),
+                       [ok = gatherer:in(Gatherer, {Guid, 1}) ||
+                           {Guid, _SeqId, true, _IsDelivered} <- Messages],
+                       State3
+               end, State, all_segment_nums(State)),
+    _State = terminate(false, [], State1),
+    ok = gatherer:finish(Gatherer).
 
 %%----------------------------------------------------------------------------
 %% Minors
-- 
cgit v1.2.1


From e87fddd5f4619592eb062176c74fff8b48bcccc9 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 6 May 2010 14:03:08 +0100
Subject: oops

---
 src/gatherer.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gatherer.erl b/src/gatherer.erl
index 30cb5909..31dda16e 100644
--- a/src/gatherer.erl
+++ b/src/gatherer.erl
@@ -104,8 +104,8 @@ handle_call(out, From, State = #gstate { forks   = Forks,
                       State #gstate { blocked = queue:in(From, Blocked) },
                       hibernate}
             end;
-        {{value, Value}, NewValues} ->
-            {reply, Value, State #gstate { values = NewValues }, hibernate}
+        {{value, _Value} = V, NewValues} ->
+            {reply, V, State #gstate { values = NewValues }, hibernate}
     end;
 
 handle_call(Msg, _From, State) ->
-- 
cgit v1.2.1


From f08421b911ef61d308c90008703513a7c6e47652 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 7 May 2010 17:13:25 +0100
Subject: minor refactor

---
 src/rabbit_msg_store.erl | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 656bec28..1ad54445 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -532,9 +532,9 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
 
     {FileSummaryRecovered, FileSummaryEts} =
         recover_file_summary(AllCleanShutdown, Dir),
-    DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
-    FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles,
-                             [ordered_set, public]),
+    DedupCacheEts   = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
+    FileHandlesEts  = ets:new(rabbit_msg_store_shared_file_handles,
+                              [ordered_set, public]),
     CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
 
     State = #msstate { dir                    = Dir,
@@ -757,9 +757,8 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
              end,
     State3 = close_all_handles(State1),
     store_file_summary(FileSummaryEts, Dir),
-    ets:delete(DedupCacheEts),
-    ets:delete(FileHandlesEts),
-    ets:delete(CurFileCacheEts),
+    [ets:delete(T) ||
+        T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]],
     IndexModule:terminate(IndexState),
     store_clean_shutdown([{client_refs, sets:to_list(ClientRefs)},
                           {index_module, IndexModule}], Dir),
@@ -1059,8 +1058,7 @@ recover_file_summary(true, Dir) ->
 
 store_file_summary(Tid, Dir) ->
     ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
-                      [{extended_info, [object_count]}]),
-    ets:delete(Tid).
+                      [{extended_info, [object_count]}]).
 
 preallocate(Hdl, FileSizeLimit, FinalPos) ->
     {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
-- 
cgit v1.2.1


From fb5e9c9d591a9f740878814aa2a399e497e665fc Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 10 May 2010 08:20:32 +0100
Subject: more obvious check for current file and some cosmetic tweaks

---
 src/rabbit_msg_store.erl | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 1ad54445..64b543b3 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -466,7 +466,7 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
             %% badarg scenario above, but we don't have a missing file
             %% - we just have the /wrong/ file).
             case index_lookup(Guid, CState) of
-                MsgLocation = #msg_location { file = File } ->
+                #msg_location { file = File } = MsgLocation ->
                     %% Still the same file.
                     %% This is fine to fail (already exists)
                     ets:insert_new(FileHandlesEts, {{self(), File}, open}),
@@ -1168,7 +1168,7 @@ count_msg_refs(Gen, Seed, State) ->
                          index_insert(#msg_location { guid = Guid,
                                                       ref_count = Delta },
                                       State);
-                     StoreEntry = #msg_location { ref_count = RefCount } ->
+                     #msg_location { ref_count = RefCount } = StoreEntry ->
                          NewRefCount = RefCount + Delta,
                          case NewRefCount of
                              0 -> index_delete(Guid, State);
@@ -1342,9 +1342,8 @@ build_index(Gatherer, Left, [],
                          [#file_summary { file_size = FileSize }] -> FileSize
                      end,
             {Offset, State #msstate { current_file = Left }};
-        {value, FileSummary =
-             #file_summary { valid_total_size = ValidTotalSize,
-                             file_size = FileSize }} ->
+        {value, #file_summary { valid_total_size = ValidTotalSize,
+                                file_size = FileSize } = FileSummary} ->
             true = ets:insert_new(FileSummaryEts, FileSummary),
             build_index(Gatherer, Left, [],
                         State #msstate {
@@ -1495,7 +1494,7 @@ find_files_to_gc(FileSummaryEts, N,
                                               valid_total_size = SourceValid,
                                               file             = Source }],
                  Pairs) when DestValid + SourceValid =< ?FILE_SIZE_LIMIT andalso
-                             not is_atom(SourceRight) ->
+                             SourceRight =/= undefined ->
     Pair = {Source, Dest},
     case N == 1 of
         true  -> [Pair];
@@ -1525,12 +1524,12 @@ delete_file_if_empty(File, State = #msstate {
         %% we should NEVER find the current file in here hence right
         %% should always be a file, not undefined
         0 -> case {Left, Right} of
-                 {undefined, _} when not is_atom(Right) ->
+                 {undefined, _} when Right =/= undefined ->
                      %% the eldest file is empty.
                      true = ets:update_element(
                               FileSummaryEts, Right,
                               {#file_summary.left, undefined});
-                 {_, _} when not is_atom(Right) ->
+                 {_, _} when Right =/= undefined ->
                      true = ets:update_element(FileSummaryEts, Right,
                                                {#file_summary.left, Left}),
                      true = ets:update_element(FileSummaryEts, Left,
@@ -1657,7 +1656,7 @@ find_unremoved_messages_in_file(File,
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
                         case Index:lookup(Guid, IndexState) of
-                            Entry = #msg_location { file = File } ->
+                            #msg_location { file = File } = Entry ->
                                 {[ Entry | List ], TotalSize + Size};
                             _ ->
                                 Acc
-- 
cgit v1.2.1


From 220e3ac569937d6014b533a937040e11edf6d9bb Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 10 May 2010 16:23:14 +0100
Subject: trigger possible msg_store gc in all the right places - on removal of
 a message (since there is now more garbage) - at the end of init (since the
 recovery may have created garbage) - after rolling to a new file (since the
 old current file may be   eligible for compaction with it's left neighbour) -
 at the end of gc (since we may not have reclaimed enough garbage to   drop
 below the gc threshold)

---
 src/rabbit_msg_store.erl | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 64b543b3..c8e32665 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -40,7 +40,7 @@
 -export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal
 
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3, handle_pre_hibernate/1]).
+         terminate/2, code_change/3]).
 
 %%----------------------------------------------------------------------------
 
@@ -581,7 +581,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
                                                  FileSummaryEts),
 
-    {ok, State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid },
+    {ok, maybe_compact(
+           State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }),
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -650,11 +651,10 @@ handle_cast({write, Guid, Msg},
                       {#file_summary.file_size, FileSize + TotalSize}]),
             NextOffset = CurOffset + TotalSize,
             noreply(
-              maybe_compact(
-                maybe_roll_to_new_file(
-                  NextOffset, State #msstate {
-                                sum_valid_data = SumValid + TotalSize,
-                                sum_file_size  = SumFileSize + TotalSize })));
+              maybe_roll_to_new_file(
+                NextOffset, State #msstate {
+                              sum_valid_data = SumValid + TotalSize,
+                              sum_file_size  = SumFileSize + TotalSize }));
         #msg_location { ref_count = RefCount } ->
             %% We already know about it, just update counter. Only
             %% update field otherwise bad interaction with concurrent GC
@@ -722,9 +722,10 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
                               [{#file_summary.locked, false},
                                {#file_summary.right, SourceRight}]),
     true = ets:delete(FileSummaryEts, Source),
-    noreply(run_pending(
-              State #msstate { sum_file_size = SumFileSize - Reclaimed,
-                               gc_active     = false }));
+    noreply(
+      maybe_compact(run_pending(
+                      State #msstate { sum_file_size = SumFileSize - Reclaimed,
+                                       gc_active     = false })));
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
@@ -768,9 +769,6 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-handle_pre_hibernate(State) ->
-    {hibernate, maybe_compact(State)}.
-
 %%----------------------------------------------------------------------------
 %% general helper functions
 %%----------------------------------------------------------------------------
@@ -1432,8 +1430,8 @@ maybe_roll_to_new_file(
     true = ets:update_element(FileSummaryEts, CurFile,
                               {#file_summary.right, NextFile}),
     true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}),
-    State1 #msstate { current_file_handle = NextHdl,
-                      current_file        = NextFile };
+    maybe_compact(State1 #msstate { current_file_handle = NextHdl,
+                                    current_file        = NextFile });
 maybe_roll_to_new_file(_, State) ->
     State.
 
-- 
cgit v1.2.1


From ed24343f8837a806b14f5e56f513274271c7493d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 10 May 2010 18:51:09 +0100
Subject: change the way we pick gc candidates instead of picking them with a
 random geometric distribution, which requires a magic constant and involved
 some rather obscure code, we simply pick the eldest suitable file pair.

---
 src/rabbit_msg_store.erl | 159 +++++++++++++++++++++--------------------------
 1 file changed, 72 insertions(+), 87 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c8e32665..af711a60 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -47,7 +47,6 @@
 -include("rabbit_msg_store.hrl").
 
 -define(SYNC_INTERVAL,  5).   %% milliseconds
--define(GEOMETRIC_P,    0.3). %% parameter to geometric distribution rng
 -define(CLEAN_FILENAME, "clean.dot").
 -define(FILE_SUMMARY_FILENAME, "file_summary.ets").
 
@@ -694,9 +693,9 @@ handle_cast({sync, Guids, K},
 handle_cast(sync, State) ->
     noreply(internal_sync(State));
 
-handle_cast({gc_done, Reclaimed, Source, Dest},
+handle_cast({gc_done, Reclaimed, Src, Dst},
             State = #msstate { sum_file_size    = SumFileSize,
-                               gc_active        = {Source, Dest},
+                               gc_active        = {Src, Dst},
                                file_handles_ets = FileHandlesEts,
                                file_summary_ets = FileSummaryEts }) ->
     %% GC done, so now ensure that any clients that have open fhs to
@@ -706,22 +705,21 @@ handle_cast({gc_done, Reclaimed, Source, Dest},
     %% up the GC, the client could find the close, and close and
     %% reopen the fh, whilst the GC is waiting for readers to
     %% disappear, before it's actually done the GC.
-    true = mark_handle_to_close(FileHandlesEts, Source),
-    true = mark_handle_to_close(FileHandlesEts, Dest),
-    %% we always move data left, so Source has gone and was on the
+    true = mark_handle_to_close(FileHandlesEts, Src),
+    true = mark_handle_to_close(FileHandlesEts, Dst),
+    %% we always move data left, so Src has gone and was on the
     %% right, so need to make dest = source.right.left, and also
     %% dest.right = source.right
-    [#file_summary { left    = Dest,
-                     right   = SourceRight,
+    [#file_summary { left    = Dst,
+                     right   = SrcRight,
                      locked  = true,
-                     readers = 0 }] = ets:lookup(FileSummaryEts, Source),
-    %% this could fail if SourceRight == undefined
-    ets:update_element(FileSummaryEts, SourceRight,
-                       {#file_summary.left, Dest}),
-    true = ets:update_element(FileSummaryEts, Dest,
+                     readers = 0 }] = ets:lookup(FileSummaryEts, Src),
+    %% this could fail if SrcRight == undefined
+    ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}),
+    true = ets:update_element(FileSummaryEts, Dst,
                               [{#file_summary.locked, false},
-                               {#file_summary.right, SourceRight}]),
-    true = ets:delete(FileSummaryEts, Source),
+                               {#file_summary.right, SrcRight}]),
+    true = ets:delete(FileSummaryEts, Src),
     noreply(
       maybe_compact(run_pending(
                       State #msstate { sum_file_size = SumFileSize - Reclaimed,
@@ -1442,69 +1440,56 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  file_summary_ets = FileSummaryEts })
   when SumFileSize > 3 * ?FILE_SIZE_LIMIT andalso
        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
-    First = ets:first(FileSummaryEts),
-    N = rabbit_misc:ceil(math:log(1.0 - random:uniform()) /
-                             math:log(1.0 - ?GEOMETRIC_P)),
-    case find_files_to_gc(FileSummaryEts, N, First) of
-        undefined ->
+    case ets:first(FileSummaryEts) of
+        '$end_of_table' ->
             State;
-        {Source, Dest} ->
-            State1 = close_handle(Source, close_handle(Dest, State)),
-            true = ets:update_element(FileSummaryEts, Source,
-                                      {#file_summary.locked, true}),
-            true = ets:update_element(FileSummaryEts, Dest,
-                                      {#file_summary.locked, true}),
-            ok = rabbit_msg_store_gc:gc(GCPid, Source, Dest),
-            State1 #msstate { gc_active = {Source, Dest} }
+        First ->
+            case find_files_to_gc(FileSummaryEts,
+                                  ets:lookup(FileSummaryEts, First)) of
+                not_found ->
+                    State;
+                {Src, Dst} ->
+                    State1 = close_handle(Src, close_handle(Dst, State)),
+                    true = ets:update_element(FileSummaryEts, Src,
+                                              {#file_summary.locked, true}),
+                    true = ets:update_element(FileSummaryEts, Dst,
+                                              {#file_summary.locked, true}),
+                    ok = rabbit_msg_store_gc:gc(GCPid, Src, Dst),
+                    State1 #msstate { gc_active = {Src, Dst} }
+            end
     end;
 maybe_compact(State) ->
     State.
 
 mark_handle_to_close(FileHandlesEts, File) ->
     [ ets:update_element(FileHandlesEts, Key, {2, close})
-      || {Key, open} <- ets:match_object(FileHandlesEts,
-                                         {{'_', File}, open}) ],
+      || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ],
     true.
 
-find_files_to_gc(_FileSummaryEts, _N, '$end_of_table') ->
-    undefined;
-find_files_to_gc(FileSummaryEts, N, First) ->
-    [FirstObj = #file_summary { right = Right }] =
-        ets:lookup(FileSummaryEts, First),
-    Pairs = find_files_to_gc(FileSummaryEts, N, FirstObj,
-                             ets:lookup(FileSummaryEts, Right), []),
-    case Pairs of
-        []     -> undefined;
-        [Pair] -> Pair;
-        _      -> Len = length(Pairs),   %% The list is the wrong way
-                  M = Len - (N rem Len), %% around, so subtract our N
-                  lists:nth(M, Pairs)    %% from its length
+find_files_to_gc(FileSummaryEts,
+                 [#file_summary { file             = Dst,
+                                  valid_total_size = DstValid,
+                                  right            = Src }]) ->
+    case Src of
+        undefined ->
+            not_found;
+        _   ->
+            [#file_summary { file             = Src,
+                             valid_total_size = SrcValid,
+                             left             = Dst,
+                             right            = SrcRight }] = Next =
+                ets:lookup(FileSummaryEts, Src),
+            case SrcRight of
+                undefined ->
+                    not_found;
+                _ ->
+                    case DstValid + SrcValid =< ?FILE_SIZE_LIMIT of
+                        true  -> {Src, Dst};
+                        false -> find_files_to_gc(FileSummaryEts, Next)
+                    end
+            end
     end.
 
-find_files_to_gc(_FileSummaryEts, _N, #file_summary {}, [], Pairs) ->
-    lists:reverse(Pairs);
-find_files_to_gc(FileSummaryEts, N,
-                 #file_summary { right            = Source,
-                                 file             = Dest,
-                                 valid_total_size = DestValid },
-                 [SourceObj = #file_summary { left             = Dest,
-                                              right            = SourceRight,
-                                              valid_total_size = SourceValid,
-                                              file             = Source }],
-                 Pairs) when DestValid + SourceValid =< ?FILE_SIZE_LIMIT andalso
-                             SourceRight =/= undefined ->
-    Pair = {Source, Dest},
-    case N == 1 of
-        true  -> [Pair];
-        false -> find_files_to_gc(FileSummaryEts, (N - 1), SourceObj,
-                                  ets:lookup(FileSummaryEts, SourceRight),
-                                  [Pair | Pairs])
-    end;
-find_files_to_gc(FileSummaryEts, N, _Left,
-                 [Right = #file_summary { right = RightRight }], Pairs) ->
-    find_files_to_gc(
-      FileSummaryEts, N, Right, ets:lookup(FileSummaryEts, RightRight), Pairs).
-
 delete_file_if_empty(File, State = #msstate { current_file = File }) ->
     State;
 delete_file_if_empty(File, State = #msstate {
@@ -1547,33 +1532,33 @@ delete_file_if_empty(File, State = #msstate {
 %% garbage collection / compaction / aggregation -- external
 %%----------------------------------------------------------------------------
 
-gc(SourceFile, DestFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
-    [SourceObj = #file_summary {
-       readers          = SourceReaders,
-       valid_total_size = SourceValidData,
-       left             = DestFile,
-       file_size        = SourceFileSize,
-       locked           = true }] = ets:lookup(FileSummaryEts, SourceFile),
-    [DestObj = #file_summary {
-       readers          = DestReaders,
-       valid_total_size = DestValidData,
-       right            = SourceFile,
-       file_size        = DestFileSize,
-       locked           = true }] = ets:lookup(FileSummaryEts, DestFile),
-
-    case SourceReaders =:= 0 andalso DestReaders =:= 0 of
-        true  -> TotalValidData = DestValidData + SourceValidData,
-                 ok = combine_files(SourceObj, DestObj, State),
+gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
+    [SrcObj = #file_summary {
+       readers          = SrcReaders,
+       valid_total_size = SrcValidData,
+       left             = DstFile,
+       file_size        = SrcFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, SrcFile),
+    [DstObj = #file_summary {
+       readers          = DstReaders,
+       valid_total_size = DstValidData,
+       right            = SrcFile,
+       file_size        = DstFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, DstFile),
+
+    case SrcReaders =:= 0 andalso DstReaders =:= 0 of
+        true  -> TotalValidData = DstValidData + SrcValidData,
+                 ok = combine_files(SrcObj, DstObj, State),
                  %% don't update dest.right, because it could be
                  %% changing at the same time
                  true = ets:update_element(
-                          FileSummaryEts, DestFile,
+                          FileSummaryEts, DstFile,
                           [{#file_summary.valid_total_size, TotalValidData},
                            {#file_summary.contiguous_top,   TotalValidData},
                            {#file_summary.file_size,        TotalValidData}]),
-                 SourceFileSize + DestFileSize - TotalValidData;
+                 SrcFileSize + DstFileSize - TotalValidData;
         false -> timer:sleep(100),
-                 gc(SourceFile, DestFile, State)
+                 gc(SrcFile, DstFile, State)
     end.
 
 combine_files(#file_summary { file             = Source,
-- 
cgit v1.2.1


From 4982de17e10f5c02cb5e8972888f0e6d6c0789aa Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 10 May 2010 19:12:00 +0100
Subject: nitpick

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index af711a60..ed7b55f2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1438,7 +1438,7 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  gc_active        = false,
                                  gc_pid           = GCPid,
                                  file_summary_ets = FileSummaryEts })
-  when SumFileSize > 3 * ?FILE_SIZE_LIMIT andalso
+  when SumFileSize >= 3 * ?FILE_SIZE_LIMIT andalso
        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
     case ets:first(FileSummaryEts) of
         '$end_of_table' ->
-- 
cgit v1.2.1


From 31da7858404e4b5c1d7028edaa87bbd2490c1959 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 10 May 2010 19:26:09 +0100
Subject: correct harmless off-by-one mistake a gc can only take place if we
 have at least three files - the current file (which we cannot gc), the source
 file and the destination file. We do a quick check wether there is enough
 data - garbage or otherwise - to require more than two files. This can give
 false positives since the last message in a file can overrun the limit, but
 that's ok as the code that follows performs more accurate (but also more
 expensive) checks.

---
 src/rabbit_msg_store.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index ed7b55f2..cc139773 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1438,8 +1438,8 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  gc_active        = false,
                                  gc_pid           = GCPid,
                                  file_summary_ets = FileSummaryEts })
-  when SumFileSize >= 3 * ?FILE_SIZE_LIMIT andalso
-       (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION ->
+  when (SumFileSize > 2 * ?FILE_SIZE_LIMIT andalso
+        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) ->
     case ets:first(FileSummaryEts) of
         '$end_of_table' ->
             State;
-- 
cgit v1.2.1


From 7e116d527e304cc8ad7101295a102f11a3eaac1a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 13 May 2010 11:19:58 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index cc139773..701ca29d 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -378,15 +378,6 @@ clean(Server, BaseDir) ->
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
 
-safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
-    try
-        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
-    catch error:badarg -> FailThunk()
-    end.
-
-safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
-    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
-
 client_read1(Server,
              #msg_location { guid = Guid, file = File } = MsgLocation,
              Defer,
@@ -907,6 +898,15 @@ update_msg_cache(CacheEts, Guid, Msg) ->
                    fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
     end.
 
+safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
+    try
+        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
+    catch error:badarg -> FailThunk()
+    end.
+
+safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
+    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
+
 contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
     case index_lookup(Guid, State) of
         not_found ->
@@ -1042,6 +1042,12 @@ store_clean_shutdown(Terms, Dir) ->
     rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
 
 recover_file_summary(false, _Dir) ->
+    %% TODO: the only reason for this to be an *ordered*_set is so
+    %% that maybe_compact can start a traversal from the eldest
+    %% file. It's awkward to have both that odering and the left/right
+    %% pointers in the entries - replacing the former with some
+    %% additional bit of state would be easy, but ditching the latter
+    %% would be neater.
     {false, ets:new(rabbit_msg_store_file_summary,
                     [ordered_set, public, {keypos, #file_summary.file}])};
 recover_file_summary(true, Dir) ->
@@ -1440,6 +1446,8 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  file_summary_ets = FileSummaryEts })
   when (SumFileSize > 2 * ?FILE_SIZE_LIMIT andalso
         (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) ->
+    %% TODO: the algorithm here is sub-optimal - it may result in a
+    %% complete traversal of FileSummaryEts.
     case ets:first(FileSummaryEts) of
         '$end_of_table' ->
             State;
-- 
cgit v1.2.1


From 4b3aba0ceb29bde5b22223436726ad329fc5792f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 13 May 2010 11:20:41 +0100
Subject: tweak msg_store_index initialisation API

---
 include/rabbit_msg_store_index.hrl |  4 ++--
 src/rabbit_msg_store.erl           | 17 ++++++++---------
 src/rabbit_msg_store_ets_index.erl | 14 ++++++++------
 src/rabbit_msg_store_index.erl     | 17 +++++++++--------
 4 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
index eb0ad5cb..88a474ae 100644
--- a/include/rabbit_msg_store_index.hrl
+++ b/include/rabbit_msg_store_index.hrl
@@ -41,8 +41,8 @@
 -type(fieldpos() :: non_neg_integer()).
 -type(fieldvalue() :: any()).
 
--spec(init/2 :: (('fresh'|'recover'), dir()) ->
-                     {'fresh'|'recovered', index_state()}).
+-spec(new/1 :: (dir()) -> index_state()).
+-spec(recover/1 :: (dir()) -> {'ok', index_state()} | {'error', any()}).
 -spec(lookup/2 :: (guid(), index_state()) -> ('not_found' | keyvalue())).
 -spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
 -spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 701ca29d..508bc60d 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -496,11 +496,11 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     rabbit_log:info("Using ~p to provide index for message store~n",
                     [IndexModule]),
 
+    Fresh = fun () -> {false, IndexModule:new(Dir), sets:new()} end,
     {AllCleanShutdown, IndexState, ClientRefs1} =
         case detect_clean_shutdown(Dir) of
             {false, _Error} ->
-                {fresh, IndexState1} = IndexModule:init(fresh, Dir),
-                {false, IndexState1, sets:new()};
+                Fresh();
             {true, Terms} ->
                 RecClientRefs  = proplists:get_value(client_refs, Terms, []),
                 RecIndexModule = proplists:get_value(index_module, Terms),
@@ -508,15 +508,14 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                       lists:sort(ClientRefs) == lists:sort(RecClientRefs)
                       andalso IndexModule == RecIndexModule) of
                     true ->
-                        case IndexModule:init(recover, Dir) of
-                            {fresh, IndexState1} ->
-                                {false, IndexState1, sets:new()};
-                            {recovered, IndexState1} ->
-                                {true, IndexState1, sets:from_list(ClientRefs)}
+                        case IndexModule:recover(Dir) of
+                            {ok, IndexState1} ->
+                                {true, IndexState1, sets:from_list(ClientRefs)};
+                            _Error ->
+                                Fresh()
                         end;
                     false ->
-                        {fresh, IndexState1} = IndexModule:init(fresh, Dir),
-                        {false, IndexState1, sets:new()}
+                        Fresh()
                 end
         end,
 
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
index b4fb5ef1..1eb3c11f 100644
--- a/src/rabbit_msg_store_ets_index.erl
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -33,7 +33,8 @@
 
 -behaviour(rabbit_msg_store_index).
 
--export([init/2, lookup/2, insert/2, update/2, update_fields/3, delete/2,
+-export([new/1, recover/1,
+         lookup/2, insert/2, update/2, update_fields/3, delete/2,
          delete_by_file/2, terminate/1]).
 
 -define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
@@ -43,16 +44,17 @@
 
 -record(state, { table, dir }).
 
-init(fresh, Dir) ->
+new(Dir) ->
     file:delete(filename:join(Dir, ?FILENAME)),
     Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]),
-    {fresh, #state { table = Tid, dir = Dir }};
-init(recover, Dir) ->
+    #state { table = Tid, dir = Dir }.
+
+recover(Dir) ->
     Path = filename:join(Dir, ?FILENAME),
     case ets:file2tab(Path) of
         {ok, Tid}  -> file:delete(Path),
-                      {recovered, #state { table = Tid, dir = Dir }};
-        {error, _} -> init(fresh, Dir)
+                      {ok, #state { table = Tid, dir = Dir }};
+        Error      -> Error
     end.
 
 lookup(Key, State) ->
diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl
index 2c9de3fd..0ed64a9d 100644
--- a/src/rabbit_msg_store_index.erl
+++ b/src/rabbit_msg_store_index.erl
@@ -34,13 +34,14 @@
 -export([behaviour_info/1]).
 
 behaviour_info(callbacks) ->
-    [{init,            2},
-     {lookup,          2},
-     {insert,          2},
-     {update,          2},
-     {update_fields,   3},
-     {delete,          2},
-     {delete_by_file,  2},
-     {terminate,       1}];
+    [{new,            1},
+     {recover,        1},
+     {lookup,         2},
+     {insert,         2},
+     {update,         2},
+     {update_fields,  3},
+     {delete,         2},
+     {delete_by_file, 2},
+     {terminate,      1}];
 behaviour_info(_Other) ->
     undefined.
-- 
cgit v1.2.1


From a95a4af50db83a7e6c8b18aa92813ba1f43d5ec3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 13 May 2010 12:38:59 +0100
Subject: Reverse the order so that we stop and then unlink+capture_exit. We
 don't think this should make any difference, but it's a little neater

---
 src/rabbit_msg_store.erl   | 2 +-
 src/rabbit_queue_index.erl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 508bc60d..0f8f4256 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1335,8 +1335,8 @@ build_index(Gatherer, Left, [],
                                sum_file_size    = SumFileSize }) ->
     case gatherer:out(Gatherer) of
         empty ->
-            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             ok = gatherer:stop(Gatherer),
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             ok = index_delete_by_file(undefined, State),
             Offset = case ets:lookup(FileSummaryEts, Left) of
                          []                                       -> 0;
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 0cb44e0a..a4e36891 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -469,8 +469,8 @@ queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
 queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
     case gatherer:out(Gatherer) of
         empty ->
-            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             ok = gatherer:stop(Gatherer),
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
             finished;
         {value, {Guid, Count}} ->
             {Guid, Count, {next, Gatherer}}
-- 
cgit v1.2.1


From f03b666515f8819d5ecd30af961ccd26dc355722 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 13 May 2010 13:36:50 +0100
Subject: Improved documentation and added additional tests for bpqueue

---
 src/bpqueue.erl      | 18 +++++++++-----
 src/rabbit_tests.erl | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 3010cb11..0210436f 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -31,11 +31,16 @@
 
 -module(bpqueue).
 
-%% Block-prefixed queue. This implements a queue of queues, but
-%% supporting the normal queue interface. Each inner queue has a
-%% prefix, which does not need to be unique, and it is guaranteed that
-%% no two consecutive blocks have the same prefix. len/1 returns the
-%% flattened length of the queue and is O(1).
+%% Block-prefixed queue. From the perspective of the queue interface
+%% the datastructure acts like a regular queue where each value is
+%% paired with the prefix.
+%%
+%% This is implemented as a queue of queues, which is more space and
+%% time efficient, whilst supporting the normal queue interface. Each
+%% inner queue has a prefix, which does not need to be unique, and it
+%% is guaranteed that no two consecutive blocks have the same
+%% prefix. len/1 returns the flattened length of the queue and is
+%% O(1).
 
 -export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
          foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4,
@@ -233,7 +238,8 @@ to_list1({Prefix, InnerQ}) ->
 %% you're not interested in. Such blocks appear in the resulting bpq
 %% without modification. The Fun is then used both to map the value,
 %% which also allows you to change the prefix (and thus block) of the
-%% value, and also to modify the Init/Acc (just like a fold).
+%% value, and also to modify the Init/Acc (just like a fold).  If the
+%% Fun returns 'stop' then it is not applied to any further items.
 map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
     {BPQ, Init};
 map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 9c659652..9e45b758 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -262,6 +262,9 @@ test_bpqueue() ->
 
     [] = bpqueue:to_list(Q),
 
+    [{bar,3},{foo,2},{foo,1}] =
+        bpqueue:foldr(fun(P,V,I) -> [{P,V}|I] end, [], Q2),
+
     F1 = fun (Qn) ->
                  bpqueue:map_fold_filter_l(
                    fun (foo) -> true;
@@ -292,6 +295,71 @@ test_bpqueue() ->
     {Q12, 0} = F2(Q),
     [] = bpqueue:to_list(Q12),
 
+    FF1 = fun (Prefixes) ->
+                  fun (P) -> lists:member(P, Prefixes) end
+          end,
+    FF2 = fun (Prefix, Stoppers) ->
+                  fun (Val, Num) ->
+                          case lists:member(Val, Stoppers) of
+                              true -> stop;
+                              false -> {Prefix, -Val, 1 + Num}
+                          end
+                  end
+          end,
+    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
+
+    BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}],
+    BPQ = bpqueue:from_list(BPQL),
+
+    %% no effect
+    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_l(
+                                FF1([none]), FF2(none, []), 0, BPQ)),
+    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_l(
+                                FF1([foo,bar]), FF2(none, [1]), 0, BPQ)),
+    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_l(
+                                FF1([bar]), FF2(none, [3]), 0, BPQ)),
+    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_r(
+                                FF1([bar]), FF2(foo, [5]), 0, BPQ)),
+    {[], 0} = Queue_to_list(bpqueue:map_fold_filter_l(
+                              fun(_P)-> throw(explosion) end,
+                              fun(_V, _N) -> throw(explosion) end, 0, Q)),
+
+    %% process 1 item
+    {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([foo, bar]), FF2(foo, [2]), 0, BPQ)),
+    {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([bar]), FF2(bar, [4]), 0, BPQ)),
+    {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} =
+        Queue_to_list(bpqueue:map_fold_filter_r(
+                        FF1([foo, bar]), FF2(foo, [6]), 0, BPQ)),
+    {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} =
+        Queue_to_list(bpqueue:map_fold_filter_r(
+                        FF1([bar]), FF2(baz, [4]), 0, BPQ)),
+
+    %% change prefix
+    {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([foo, bar]), FF2(bar, []), 0, BPQ)),
+    {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([foo]), FF2(bar, [5]), 0, BPQ)),
+    {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([foo]), FF2(bar, [7]), 0, BPQ)),
+    {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([bar]), FF2(foo, [5]), 0, BPQ)),
+
+    %% edge cases
+    {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([foo]), FF2(foo, [5]), 0, BPQ)),
+    {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} =
+        Queue_to_list(bpqueue:map_fold_filter_r(
+                        FF1([foo]), FF2(foo, [2]), 0, BPQ)),
+
     passed.
 
 test_simple_n_element_queue(N) ->
-- 
cgit v1.2.1


From 8cb3e7e29e2dcf7e8a4512ea972676bef2955541 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 13 May 2010 13:45:38 +0100
Subject: Added a couple more bpqueue tests

---
 src/rabbit_tests.erl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 9e45b758..217b2809 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -351,6 +351,12 @@ test_bpqueue() ->
     {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} =
         Queue_to_list(bpqueue:map_fold_filter_l(
                         FF1([bar]), FF2(foo, [5]), 0, BPQ)),
+    {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([foo]), FF2(bar, []), 0, BPQ)),
+    {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} =
+        Queue_to_list(bpqueue:map_fold_filter_l(
+                        FF1([bar]), FF2(foo, []), 0, BPQ)),
 
     %% edge cases
     {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} =
-- 
cgit v1.2.1


From 058638d12cb85023b142396b3800765cf085a3d5 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 13 May 2010 14:22:07 +0100
Subject: One more test, and simplification of the bpqueue:to_list function

---
 src/bpqueue.erl      | 5 +----
 src/rabbit_tests.erl | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index 0210436f..a0c0c41b 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -225,10 +225,7 @@ from_list(List) ->
 to_list({0, _Q}) ->
     [];
 to_list({_N, Q}) ->
-    lists:map(fun to_list1/1, queue:to_list(Q)).
-
-to_list1({Prefix, InnerQ}) ->
-    {Prefix, queue:to_list(InnerQ)}.
+    [{Prefix, queue:to_list(InnerQ)} || {Prefix, InnerQ} <- queue:to_list(Q)].
 
 %% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init}
 %% where FilterFun(Prefix) -> boolean()
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 217b2809..9836a988 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -323,6 +323,9 @@ test_bpqueue() ->
     {[], 0} = Queue_to_list(bpqueue:map_fold_filter_l(
                               fun(_P)-> throw(explosion) end,
                               fun(_V, _N) -> throw(explosion) end, 0, Q)),
+    {[], 0} = Queue_to_list(bpqueue:map_fold_filter_r(
+                              fun(_P)-> throw(explosion) end,
+                              fun(_V, _N) -> throw(explosion) end, 0, Q)),
 
     %% process 1 item
     {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} =
-- 
cgit v1.2.1


From a535142bfcd7b6db800671c5d693b1cdc6bd9edc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Thu, 13 May 2010 14:42:02 +0100
Subject: Cosmetic, and move to 100% code coverage of bpqueue

---
 src/rabbit_tests.erl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 9836a988..b9f6dfd6 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -259,11 +259,13 @@ test_bpqueue() ->
 
     ok = bpqueue:foldl(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
                        ok, Q),
+    ok = bpqueue:foldr(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
+                       ok, Q),
 
     [] = bpqueue:to_list(Q),
 
-    [{bar,3},{foo,2},{foo,1}] =
-        bpqueue:foldr(fun(P,V,I) -> [{P,V}|I] end, [], Q2),
+    [{bar,3}, {foo,2}, {foo,1}] =
+        bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2),
 
     F1 = fun (Qn) ->
                  bpqueue:map_fold_filter_l(
-- 
cgit v1.2.1


From 6c0bb1db8aef568a30f8a4b62294b75434bb0b7a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 13 May 2010 19:18:51 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 0f8f4256..a1abcfe4 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -348,9 +348,9 @@ set_maximum_since_use(Server, Age) ->
     gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}).
 
 client_init(Server, Ref) ->
-    {IState, IModule, Dir, FileHandlesEts, FileSummaryEts, DedupCacheEts,
-     CurFileCacheEts} = gen_server2:call(Server, {new_client_state, Ref},
-                                         infinity),
+    {IState, IModule, Dir,
+     FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} =
+        gen_server2:call(Server, {new_client_state, Ref}, infinity),
     #client_msstate { file_handle_cache  = dict:new(),
                       index_state        = IState,
                       index_module       = IModule,
@@ -584,15 +584,16 @@ handle_call({contains, Guid}, From, State) ->
     noreply(State1);
 
 handle_call({new_client_state, CRef}, _From,
-            State = #msstate { index_state        = IndexState, dir = Dir,
+            State = #msstate { dir                = Dir,
+                               index_state        = IndexState,
                                index_module       = IndexModule,
                                file_handles_ets   = FileHandlesEts,
                                file_summary_ets   = FileSummaryEts,
                                dedup_cache_ets    = DedupCacheEts,
                                cur_file_cache_ets = CurFileCacheEts,
                                client_refs        = ClientRefs }) ->
-    reply({IndexState, IndexModule, Dir, FileHandlesEts, FileSummaryEts,
-           DedupCacheEts, CurFileCacheEts},
+    reply({IndexState, IndexModule, Dir,
+           FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts},
           State #msstate { client_refs = sets:add_element(CRef, ClientRefs) });
 
 handle_call(successfully_recovered_state, _From, State) ->
@@ -627,17 +628,15 @@ handle_cast({write, Guid, Msg},
                              file_size        = FileSize }] =
                 ets:lookup(FileSummaryEts, CurFile),
             ValidTotalSize1 = ValidTotalSize + TotalSize,
-            ContiguousTop1 = if CurOffset =:= ContiguousTop ->
-                                     %% can't be any holes in this file
-                                     ValidTotalSize1;
-                                true -> ContiguousTop
+            ContiguousTop1 = case CurOffset =:= ContiguousTop of
+                                 true  -> ValidTotalSize1;
+                                 false -> ContiguousTop
                              end,
             true = ets:update_element(
-                     FileSummaryEts,
-                     CurFile,
+                     FileSummaryEts, CurFile,
                      [{#file_summary.valid_total_size, ValidTotalSize1},
-                      {#file_summary.contiguous_top, ContiguousTop1},
-                      {#file_summary.file_size, FileSize + TotalSize}]),
+                      {#file_summary.contiguous_top,   ContiguousTop1},
+                      {#file_summary.file_size,        FileSize + TotalSize}]),
             NextOffset = CurOffset + TotalSize,
             noreply(
               maybe_roll_to_new_file(
@@ -708,7 +707,7 @@ handle_cast({gc_done, Reclaimed, Src, Dst},
     ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}),
     true = ets:update_element(FileSummaryEts, Dst,
                               [{#file_summary.locked, false},
-                               {#file_summary.right, SrcRight}]),
+                               {#file_summary.right,  SrcRight}]),
     true = ets:delete(FileSummaryEts, Src),
     noreply(
       maybe_compact(run_pending(
-- 
cgit v1.2.1


From 0bd43a1ce3ea5074bdc79ef6e8bfe8fb65e7b7e9 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 13 May 2010 19:42:07 +0100
Subject: use exact comparison this is just for consistency; functionally it
 doesn't make any difference

---
 src/rabbit_msg_store.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index a1abcfe4..55480f34 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -504,9 +504,9 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
             {true, Terms} ->
                 RecClientRefs  = proplists:get_value(client_refs, Terms, []),
                 RecIndexModule = proplists:get_value(index_module, Terms),
-                case (undefined /= ClientRefs andalso
-                      lists:sort(ClientRefs) == lists:sort(RecClientRefs)
-                      andalso IndexModule == RecIndexModule) of
+                case (ClientRefs =/= undefined andalso
+                      lists:sort(ClientRefs) =/= lists:sort(RecClientRefs)
+                      andalso IndexModule =/= RecIndexModule) of
                     true ->
                         case IndexModule:recover(Dir) of
                             {ok, IndexState1} ->
@@ -703,7 +703,7 @@ handle_cast({gc_done, Reclaimed, Src, Dst},
                      right   = SrcRight,
                      locked  = true,
                      readers = 0 }] = ets:lookup(FileSummaryEts, Src),
-    %% this could fail if SrcRight == undefined
+    %% this could fail if SrcRight =:= undefined
     ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}),
     true = ets:update_element(FileSummaryEts, Dst,
                               [{#file_summary.locked, false},
@@ -912,7 +912,7 @@ contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
             State;
         #msg_location { file = File } ->
             case GCActive of
-                {A, B} when File == A orelse File == B ->
+                {A, B} when File =:= A orelse File =:= B ->
                     add_to_pending_gc_completion(
                       {contains, Guid, From}, State);
                 _ ->
@@ -1235,7 +1235,7 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             EldestTmpGuid = lists:last(GuidsTmp),
             {Guids1, UncorruptedMessages1}
                 = case lists:splitwith(
-                         fun (Guid) -> Guid /= EldestTmpGuid end, Guids) of
+                         fun (Guid) -> Guid =/= EldestTmpGuid end, Guids) of
                       {_Guids, []} -> %% no msgs from tmp in main
                           {Guids, UncorruptedMessages};
                       {Dropped, [EldestTmpGuid | Rest]} ->
@@ -1597,8 +1597,8 @@ combine_files(#file_summary { file             = Source,
             Worklist =
                 lists:dropwhile(
                   fun (#msg_location { offset = Offset })
-                      when Offset /= DestinationContiguousTop ->
-                          %% it cannot be that Offset ==
+                      when Offset =/= DestinationContiguousTop ->
+                          %% it cannot be that Offset =:=
                           %% DestinationContiguousTop because if it
                           %% was then DestinationContiguousTop would
                           %% have been extended by TotalSize
-- 
cgit v1.2.1


From c009fee9ba0cec35f7cfee6ff7b3e04830c1fe7e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 08:46:20 +0100
Subject: whoops

---
 src/rabbit_msg_store.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 55480f34..a4f27db7 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -505,8 +505,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                 RecClientRefs  = proplists:get_value(client_refs, Terms, []),
                 RecIndexModule = proplists:get_value(index_module, Terms),
                 case (ClientRefs =/= undefined andalso
-                      lists:sort(ClientRefs) =/= lists:sort(RecClientRefs)
-                      andalso IndexModule =/= RecIndexModule) of
+                      lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
+                      andalso IndexModule =:= RecIndexModule) of
                     true ->
                         case IndexModule:recover(Dir) of
                             {ok, IndexState1} ->
-- 
cgit v1.2.1


From d71207fee92424ff10bad0345abb8105927c895a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 09:11:36 +0100
Subject: log msg_store recovery failures as warnings

---
 src/rabbit_msg_store.erl | 130 +++++++++++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 60 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index a4f27db7..f17977e8 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -493,34 +493,14 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     {ok, IndexModule} = application:get_env(msg_store_index_module),
-    rabbit_log:info("Using ~p to provide index for message store~n",
-                    [IndexModule]),
+    rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]),
 
-    Fresh = fun () -> {false, IndexModule:new(Dir), sets:new()} end,
     {AllCleanShutdown, IndexState, ClientRefs1} =
-        case detect_clean_shutdown(Dir) of
-            {false, _Error} ->
-                Fresh();
-            {true, Terms} ->
-                RecClientRefs  = proplists:get_value(client_refs, Terms, []),
-                RecIndexModule = proplists:get_value(index_module, Terms),
-                case (ClientRefs =/= undefined andalso
-                      lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
-                      andalso IndexModule =:= RecIndexModule) of
-                    true ->
-                        case IndexModule:recover(Dir) of
-                            {ok, IndexState1} ->
-                                {true, IndexState1, sets:from_list(ClientRefs)};
-                            _Error ->
-                                Fresh()
-                        end;
-                    false ->
-                        Fresh()
-                end
-        end,
+        recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server),
 
     {FileSummaryRecovered, FileSummaryEts} =
-        recover_file_summary(AllCleanShutdown, Dir),
+        recover_file_summary(AllCleanShutdown, Dir, Server),
+
     DedupCacheEts   = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
     FileHandlesEts  = ets:new(rabbit_msg_store_shared_file_handles,
                               [ordered_set, public]),
@@ -748,7 +728,7 @@ terminate(_Reason, State = #msstate { index_state         = IndexState,
     [ets:delete(T) ||
         T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]],
     IndexModule:terminate(IndexState),
-    store_clean_shutdown([{client_refs, sets:to_list(ClientRefs)},
+    store_recovery_terms([{client_refs, sets:to_list(ClientRefs)},
                           {index_module, IndexModule}], Dir),
     State3 #msstate { index_state         = undefined,
                       current_file_handle = undefined }.
@@ -1026,40 +1006,6 @@ get_read_handle(FileNum, FHC, Dir) ->
                      {Hdl, dict:store(FileNum, Hdl, FHC)}
     end.
 
-detect_clean_shutdown(Dir) ->
-    Path = filename:join(Dir, ?CLEAN_FILENAME),
-    case rabbit_misc:read_term_file(Path) of
-        {ok, Terms}    -> case file:delete(Path) of
-                              ok             -> {true,  Terms};
-                              {error, Error} -> {false, Error}
-                          end;
-        {error, Error} -> {false, Error}
-    end.
-
-store_clean_shutdown(Terms, Dir) ->
-    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
-
-recover_file_summary(false, _Dir) ->
-    %% TODO: the only reason for this to be an *ordered*_set is so
-    %% that maybe_compact can start a traversal from the eldest
-    %% file. It's awkward to have both that odering and the left/right
-    %% pointers in the entries - replacing the former with some
-    %% additional bit of state would be easy, but ditching the latter
-    %% would be neater.
-    {false, ets:new(rabbit_msg_store_file_summary,
-                    [ordered_set, public, {keypos, #file_summary.file}])};
-recover_file_summary(true, Dir) ->
-    Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
-    case ets:file2tab(Path) of
-        {ok, Tid}  -> file:delete(Path),
-                      {true, Tid};
-        {error, _} -> recover_file_summary(false, Dir)
-    end.
-
-store_file_summary(Tid, Dir) ->
-    ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
-                      [{extended_info, [object_count]}]).
-
 preallocate(Hdl, FileSizeLimit, FinalPos) ->
     {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
     ok = file_handle_cache:truncate(Hdl),
@@ -1150,9 +1096,73 @@ index_delete_by_file(File, #msstate { index_module = Index,
     Index:delete_by_file(File, State).
 
 %%----------------------------------------------------------------------------
-%% recovery
+%% shutdown and recovery
 %%----------------------------------------------------------------------------
 
+recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server) ->
+    Fresh = fun (ErrorMsg, ErrorArgs) ->
+                    rabbit_log:warning("~w: " ++ ErrorMsg ++
+                                       "~nrebuilding indices from scratch~n",
+                                       [Server | ErrorArgs]),
+                    {false, IndexModule:new(Dir), sets:new()}
+            end,
+    case read_recovery_terms(Dir) of
+        {false, Error} ->
+            Fresh("failed to read recovery terms: ~p", [Error]);
+        {true, Terms} ->
+            RecClientRefs  = proplists:get_value(client_refs, Terms, []),
+            RecIndexModule = proplists:get_value(index_module, Terms),
+            case (ClientRefs =/= undefined andalso
+                  lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
+                  andalso IndexModule =:= RecIndexModule) of
+                true  -> case IndexModule:recover(Dir) of
+                             {ok, IndexState1} ->
+                                 ClientRefs1 = sets:from_list(ClientRefs),
+                                 {true, IndexState1, ClientRefs1};
+                             {error, Error} ->
+                                 Fresh("failed to recover index: ~p", [Error])
+                         end;
+                false  -> Fresh("recovery terms differ from present", [])
+            end
+    end.
+
+store_recovery_terms(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+read_recovery_terms(Dir) ->
+    Path = filename:join(Dir, ?CLEAN_FILENAME),
+    case rabbit_misc:read_term_file(Path) of
+        {ok, Terms}    -> case file:delete(Path) of
+                              ok             -> {true,  Terms};
+                              {error, Error} -> {false, Error}
+                          end;
+        {error, Error} -> {false, Error}
+    end.
+
+store_file_summary(Tid, Dir) ->
+    ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+                      [{extended_info, [object_count]}]).
+
+recover_file_summary(false, _Dir, _Server) ->
+    %% TODO: the only reason for this to be an *ordered*_set is so
+    %% that maybe_compact can start a traversal from the eldest
+    %% file. It's awkward to have both that odering and the left/right
+    %% pointers in the entries - replacing the former with some
+    %% additional bit of state would be easy, but ditching the latter
+    %% would be neater.
+    {false, ets:new(rabbit_msg_store_file_summary,
+                    [ordered_set, public, {keypos, #file_summary.file}])};
+recover_file_summary(true, Dir, Server) ->
+    Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}      -> file:delete(Path),
+                          {true, Tid};
+        {error, Error} -> rabbit_log:warning(
+                            "~w: failed to recover file summary: ~p~n"
+                            "rebuilding~n", [Server, Error]),
+                          recover_file_summary(false, Dir, Server)
+    end.
+
 count_msg_refs(false, Gen, Seed, State) ->
     count_msg_refs(Gen, Seed, State);
 count_msg_refs(true, _Gen, _Seed, _State) ->
-- 
cgit v1.2.1


From 4294306b4bdde66ed1e62471759d1aa753372503 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 14:03:56 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 83 +++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index f17977e8..40fbb885 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -778,24 +778,22 @@ internal_sync(State = #msstate { current_file_handle = CurHdl,
     State1 = stop_sync_timer(State),
     case Syncs of
         [] -> State1;
-        _ ->
-            ok = file_handle_cache:sync(CurHdl),
-            lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
-            State1 #msstate { on_sync = [] }
+        _  -> ok = file_handle_cache:sync(CurHdl),
+              lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
+              State1 #msstate { on_sync = [] }
     end.
 
 read_message(Guid, From,
              State = #msstate { dedup_cache_ets = DedupCacheEts }) ->
     case index_lookup(Guid, State) of
-        not_found -> gen_server2:reply(From, not_found),
-                     State;
+        not_found ->
+            gen_server2:reply(From, not_found),
+            State;
         MsgLocation ->
             case fetch_and_increment_cache(DedupCacheEts, Guid) of
-                not_found ->
-                    read_message1(From, MsgLocation, State);
-                Msg ->
-                    gen_server2:reply(From, {ok, Msg}),
-                    State
+                not_found -> read_message1(From, MsgLocation, State);
+                Msg       -> gen_server2:reply(From, {ok, Msg}),
+                             State
             end
     end.
 
@@ -807,43 +805,40 @@ read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
                                  dedup_cache_ets     = DedupCacheEts,
                                  cur_file_cache_ets  = CurFileCacheEts }) ->
     case File =:= CurFile of
-        true ->
-            {Msg, State1} =
-                %% can return [] if msg in file existed on startup
-                case ets:lookup(CurFileCacheEts, Guid) of
-                    [] ->
-                        {ok, RawOffSet} =
-                            file_handle_cache:current_raw_offset(CurHdl),
-                        ok = case Offset >= RawOffSet of
-                                 true  -> file_handle_cache:flush(CurHdl);
-                                 false -> ok
-                             end,
-                        read_from_disk(MsgLoc, State, DedupCacheEts);
-                    [{Guid, Msg1, _CacheRefCount}] ->
-                        ok = maybe_insert_into_cache(DedupCacheEts, RefCount,
-                                                     Guid, Msg1),
-                        {Msg1, State}
-                end,
-            gen_server2:reply(From, {ok, Msg}),
-            State1;
-        false ->
-            [#file_summary { locked = Locked }] =
-                ets:lookup(FileSummaryEts, File),
-            case Locked of
-                true ->
-                    add_to_pending_gc_completion({read, Guid, From}, State);
-                false ->
-                    {Msg, State1} = read_from_disk(MsgLoc, State,
-                                                   DedupCacheEts),
-                    gen_server2:reply(From, {ok, Msg}),
-                    State1
-            end
+        true  -> {Msg, State1} =
+                     %% can return [] if msg in file existed on startup
+                     case ets:lookup(CurFileCacheEts, Guid) of
+                         [] ->
+                             {ok, RawOffSet} =
+                                 file_handle_cache:current_raw_offset(CurHdl),
+                             ok = case Offset >= RawOffSet of
+                                      true  -> file_handle_cache:flush(CurHdl);
+                                      false -> ok
+                                  end,
+                             read_from_disk(MsgLoc, State, DedupCacheEts);
+                         [{Guid, Msg1, _CacheRefCount}] ->
+                             ok = maybe_insert_into_cache(
+                                    DedupCacheEts, RefCount, Guid, Msg1),
+                             {Msg1, State}
+                     end,
+                 gen_server2:reply(From, {ok, Msg}),
+                 State1;
+        false -> [#file_summary { locked = Locked }] =
+                     ets:lookup(FileSummaryEts, File),
+                 case Locked of
+                     true  -> add_to_pending_gc_completion({read, Guid, From},
+                                                           State);
+                     false -> {Msg, State1} =
+                                  read_from_disk(MsgLoc, State, DedupCacheEts),
+                              gen_server2:reply(From, {ok, Msg}),
+                              State1
+                 end
     end.
 
 read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
                                file = File, offset = Offset,
-                               total_size = TotalSize }, State,
-               DedupCacheEts) ->
+                               total_size = TotalSize },
+               State, DedupCacheEts) ->
     {Hdl, State1} = get_read_handle(File, State),
     {ok, Offset} = file_handle_cache:position(Hdl, Offset),
     {ok, {Guid, Msg}} =
-- 
cgit v1.2.1


From ae4167c15881041d8fdb3874c31b634ba3902248 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Fri, 14 May 2010 15:37:24 +0100
Subject: Rely on badmatch - no need to throw

---
 src/rabbit_msg_store.erl | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 40fbb885..6234cb64 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -846,13 +846,13 @@ read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
             {ok, {Guid, _}} = Obj ->
                 Obj;
             Rest ->
-                throw({error, {misread, [{old_state, State},
-                                         {file_num,  File},
-                                         {offset,    Offset},
-                                         {guid,      Guid},
-                                         {read,      Rest},
-                                         {proc_dict, get()}
-                                        ]}})
+                {error, {misread, [{old_state, State},
+                                   {file_num,  File},
+                                   {offset,    Offset},
+                                   {guid,      Guid},
+                                   {read,      Rest},
+                                   {proc_dict, get()}
+                                  ]}}
         end,
     ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
     {Msg, State1}.
@@ -1026,8 +1026,7 @@ scan_file_for_valid_messages(Dir, FileName) ->
             file_handle_cache:close(Hdl),
             Valid;
         {error, enoent} -> {ok, [], 0};
-        {error, Reason} -> throw({error,
-                                  {unable_to_scan_file, FileName, Reason}})
+        {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
     end.
 
 %%----------------------------------------------------------------------------
@@ -1705,8 +1704,7 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                       ok = file_handle_cache:sync(DestinationHdl)
             end;
         {FinalOffsetZ, _BlockStart1, _BlockEnd1} ->
-            throw({gc_error, [{expected, FinalOffset},
-                              {got, FinalOffsetZ},
-                              {destination, Destination}]})
-    end,
-    ok.
+            {gc_error, [{expected, FinalOffset},
+                        {got, FinalOffsetZ},
+                        {destination, Destination}]}
+    end.
-- 
cgit v1.2.1


From 5cbbd76fb73e756ba46ab0ab7cf41401affe1bbd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 17:43:35 +0100
Subject: cosmetic

---
 src/rabbit_msg_store.erl | 86 +++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 41 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 6234cb64..832f4039 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -767,12 +767,6 @@ stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) ->
     {ok, cancel} = timer:cancel(TRef),
     State #msstate { sync_timer_ref = undefined }.
 
-filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
-
-sort_file_names(FileNames) ->
-    lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
-               FileNames).
-
 internal_sync(State = #msstate { current_file_handle = CurHdl,
                                  on_sync = Syncs }) ->
     State1 = stop_sync_timer(State),
@@ -857,29 +851,6 @@ read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
     ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
     {Msg, State1}.
 
-maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
-  when RefCount > 1 ->
-    update_msg_cache(DedupCacheEts, Guid, Msg);
-maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
-    ok.
-
-update_msg_cache(CacheEts, Guid, Msg) ->
-    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
-        true  -> ok;
-        false -> safe_ets_update_counter_ok(
-                   CacheEts, Guid, {3, +1},
-                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
-    end.
-
-safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
-    try
-        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
-    catch error:badarg -> FailThunk()
-    end.
-
-safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
-    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
-
 contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
     case index_lookup(Guid, State) of
         not_found ->
@@ -952,6 +923,19 @@ run_pending({contains, Guid, From}, State) ->
 run_pending({remove, Guid}, State) ->
     remove_message(Guid, State).
 
+safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
+    try
+        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
+    catch error:badarg -> FailThunk()
+    end.
+
+safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
+    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
+
+%%----------------------------------------------------------------------------
+%% file helper functions
+%%----------------------------------------------------------------------------
+
 open_file(Dir, FileName, Mode) ->
     file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
                            [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
@@ -1016,23 +1000,30 @@ form_filename(Dir, Name) -> filename:join(Dir, Name).
 
 filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
 
-scan_file_for_valid_messages(Dir, FileName) ->
-    case open_file(Dir, FileName, ?READ_MODE) of
-        {ok, Hdl} ->
-            Size = filelib:file_size(form_filename(Dir, FileName)),
-            Valid = rabbit_msg_file:scan(Hdl, Size),
-            %% if something really bad's happened, the close could fail,
-            %% but ignore
-            file_handle_cache:close(Hdl),
-            Valid;
-        {error, enoent} -> {ok, [], 0};
-        {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
-    end.
+filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
+
+sort_file_names(FileNames) ->
+    lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
+               FileNames).
 
 %%----------------------------------------------------------------------------
 %% message cache helper functions
 %%----------------------------------------------------------------------------
 
+maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
+  when RefCount > 1 ->
+    update_msg_cache(DedupCacheEts, Guid, Msg);
+maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
+    ok.
+
+update_msg_cache(CacheEts, Guid, Msg) ->
+    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
+        true  -> ok;
+        false -> safe_ets_update_counter_ok(
+                   CacheEts, Guid, {3, +1},
+                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
+    end.
+
 remove_cache_entry(DedupCacheEts, Guid) ->
     true = ets:delete(DedupCacheEts, Guid),
     ok.
@@ -1290,6 +1281,19 @@ is_sublist(SmallerL, BiggerL) ->
 is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
+scan_file_for_valid_messages(Dir, FileName) ->
+    case open_file(Dir, FileName, ?READ_MODE) of
+        {ok, Hdl} ->
+            Size = filelib:file_size(form_filename(Dir, FileName)),
+            Valid = rabbit_msg_file:scan(Hdl, Size),
+            %% if something really bad's happened, the close could fail,
+            %% but ignore
+            file_handle_cache:close(Hdl),
+            Valid;
+        {error, enoent} -> {ok, [], 0};
+        {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
+    end.
+
 scan_file_for_valid_messages_guids(Dir, FileName) ->
     {ok, Messages, _FileSize} =
         scan_file_for_valid_messages(Dir, FileName),
-- 
cgit v1.2.1


From 21d0c3f375151eecdf5f9308833dbd3783178e87 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 20:34:02 +0100
Subject: minor refactoring and cosmetic changes

---
 src/rabbit_msg_store.erl | 87 ++++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 832f4039..563c57d2 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -527,8 +527,11 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                        successfully_recovered = AllCleanShutdown
                       },
 
-    ok = count_msg_refs(AllCleanShutdown, MsgRefDeltaGen, MsgRefDeltaGenInit,
-                        State),
+    ok = case AllCleanShutdown of
+             true  -> ok;
+             false -> count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State)
+         end,
+
     FileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
@@ -1130,11 +1133,12 @@ store_file_summary(Tid, Dir) ->
 
 recover_file_summary(false, _Dir, _Server) ->
     %% TODO: the only reason for this to be an *ordered*_set is so
-    %% that maybe_compact can start a traversal from the eldest
-    %% file. It's awkward to have both that odering and the left/right
-    %% pointers in the entries - replacing the former with some
-    %% additional bit of state would be easy, but ditching the latter
-    %% would be neater.
+    %% that a) maybe_compact can start a traversal from the eldest
+    %% file, and b) build_index in fast recovery mode can easily
+    %% identify the current file. It's awkward to have both that
+    %% odering and the left/right pointers in the entries - replacing
+    %% the former with some additional bit of state would be easy, but
+    %% ditching the latter would be neater.
     {false, ets:new(rabbit_msg_store_file_summary,
                     [ordered_set, public, {keypos, #file_summary.file}])};
 recover_file_summary(true, Dir, Server) ->
@@ -1148,15 +1152,12 @@ recover_file_summary(true, Dir, Server) ->
                           recover_file_summary(false, Dir, Server)
     end.
 
-count_msg_refs(false, Gen, Seed, State) ->
-    count_msg_refs(Gen, Seed, State);
-count_msg_refs(true, _Gen, _Seed, _State) ->
-    ok.
-
 count_msg_refs(Gen, Seed, State) ->
     case Gen(Seed) of
-        finished -> ok;
-        {_Guid, 0, Next} -> count_msg_refs(Gen, Next, State);
+        finished ->
+            ok;
+        {_Guid, 0, Next} ->
+            count_msg_refs(Gen, Next, State);
         {Guid, Delta, Next} ->
             ok = case index_lookup(Guid, State) of
                      not_found ->
@@ -1176,19 +1177,21 @@ count_msg_refs(Gen, Seed, State) ->
     end.
 
 recover_crashed_compactions(Dir, FileNames, TmpFileNames) ->
-    lists:foreach(fun (TmpFileName) ->
-                          ok = recover_crashed_compactions1(
-                                 Dir, FileNames, TmpFileName)
-                  end, TmpFileNames),
+    lists:foreach(
+      fun (TmpFileName) ->
+              NonTmpRelatedFileName =
+                  filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
+              true = lists:member(NonTmpRelatedFileName, FileNames),
+              ok = recover_crashed_compaction(
+                     Dir, TmpFileName, NonTmpRelatedFileName)
+      end, TmpFileNames),
     ok.
 
-recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
-    NonTmpRelatedFileName = filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
-    true = lists:member(NonTmpRelatedFileName, FileNames),
+recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
     {ok, UncorruptedMessagesTmp, GuidsTmp} =
-        scan_file_for_valid_messages_guids(Dir, TmpFileName),
+        scan_file_for_valid_messages_and_guids(Dir, TmpFileName),
     {ok, UncorruptedMessages, Guids} =
-        scan_file_for_valid_messages_guids(Dir, NonTmpRelatedFileName),
+        scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -1247,8 +1250,8 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% are in the tmp file
             true = is_disjoint(Guids1, GuidsTmp),
             %% must open with read flag, otherwise will stomp over contents
-            {ok, MainHdl} = open_file(
-                              Dir, NonTmpRelatedFileName, [read | ?WRITE_MODE]),
+            {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
+                                      [read | ?WRITE_MODE]),
             %% Wipe out any rubbish at the end of the file. Remember
             %% the head of the list will be the highest entry in the
             %% file.
@@ -1257,16 +1260,14 @@ recover_crashed_compactions1(Dir, FileNames, TmpFileName) ->
             %% Extend the main file as big as necessary in a single
             %% move. If we run out of disk space, this truncate could
             %% fail, but we still aren't risking losing data
-            ok = truncate_and_extend_file(
-                   MainHdl, Top, Top + TmpSize),
-            {ok, TmpHdl} = open_file(
-                             Dir, TmpFileName, ?READ_AHEAD_MODE),
+            ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
+            {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE),
             {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize),
             ok = file_handle_cache:close(MainHdl),
             ok = file_handle_cache:delete(TmpHdl),
 
             {ok, _MainMessages, GuidsMain} =
-                scan_file_for_valid_messages_guids(
+                scan_file_for_valid_messages_and_guids(
                   Dir, NonTmpRelatedFileName),
             %% check that everything in Guids1 is in GuidsMain
             true = is_sublist(Guids1, GuidsMain),
@@ -1283,28 +1284,25 @@ is_disjoint(SmallerL, BiggerL) ->
 
 scan_file_for_valid_messages(Dir, FileName) ->
     case open_file(Dir, FileName, ?READ_MODE) of
-        {ok, Hdl} ->
-            Size = filelib:file_size(form_filename(Dir, FileName)),
-            Valid = rabbit_msg_file:scan(Hdl, Size),
-            %% if something really bad's happened, the close could fail,
-            %% but ignore
-            file_handle_cache:close(Hdl),
-            Valid;
+        {ok, Hdl}       -> Valid = rabbit_msg_file:scan(
+                                     Hdl, filelib:file_size(
+                                            form_filename(Dir, FileName))),
+                           %% if something really bad has happened,
+                           %% the close could fail, but ignore
+                           file_handle_cache:close(Hdl),
+                           Valid;
         {error, enoent} -> {ok, [], 0};
         {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
     end.
 
-scan_file_for_valid_messages_guids(Dir, FileName) ->
-    {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(Dir, FileName),
+scan_file_for_valid_messages_and_guids(Dir, FileName) ->
+    {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}.
 
 %% Takes the list in *ascending* order (i.e. eldest message
 %% first). This is the opposite of what scan_file_for_valid_messages
 %% produces. The list of msgs that is produced is youngest first.
-find_contiguous_block_prefix([]) -> {0, []};
-find_contiguous_block_prefix(List) ->
-    find_contiguous_block_prefix(List, 0, []).
+find_contiguous_block_prefix(L) -> find_contiguous_block_prefix(L, 0, []).
 
 find_contiguous_block_prefix([], ExpectedOffset, Guids) ->
     {ExpectedOffset, Guids};
@@ -1374,7 +1372,8 @@ build_index_worker(Gatherer, State = #msstate { dir = Dir },
         lists:foldl(
           fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
                   case index_lookup(Guid, State) of
-                      not_found -> {VMAcc, VTSAcc};
+                      not_found ->
+                          {VMAcc, VTSAcc};
                       StoreEntry ->
                           ok = index_update(StoreEntry #msg_location {
                                               file = File, offset = Offset,
-- 
cgit v1.2.1


From 74caa0579ebed561235ec22681c209e23a2e9fff Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 20:42:58 +0100
Subject: move startup_fun_state type def since it is only used by the
 msg_store

---
 include/rabbit_msg_store.hrl | 2 --
 src/rabbit_msg_store.erl     | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 73eb4ae0..36b908a7 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -36,8 +36,6 @@
 -ifdef(use_specs).
 
 -type(msg() :: any()).
--type(startup_fun_state() ::
-        {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
 -endif.
 
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 563c57d2..913cd65f 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -113,6 +113,8 @@
                                             file_summary_ets   :: tid(),
                                             dedup_cache_ets    :: tid(),
                                             cur_file_cache_ets :: tid() }).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
 -spec(start_link/4 ::
       (atom(), file_path(), [binary()] | 'undefined', startup_fun_state()) ->
-- 
cgit v1.2.1


From 0b27c0fe9a678fb7c90255129c8191d49262cdfa Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 14 May 2010 20:55:59 +0100
Subject: cosmetic

---
 src/bpqueue.erl | 71 +++++++++++++++++++++++----------------------------------
 1 file changed, 28 insertions(+), 43 deletions(-)

diff --git a/src/bpqueue.erl b/src/bpqueue.erl
index a0c0c41b..bd59fef8 100644
--- a/src/bpqueue.erl
+++ b/src/bpqueue.erl
@@ -85,16 +85,12 @@
 
 %%----------------------------------------------------------------------------
 
-new() ->
-    {0, queue:new()}.
+new() -> {0, queue:new()}.
 
-is_empty({0, _Q}) ->
-    true;
-is_empty(_BPQ) ->
-    false.
+is_empty({0, _Q}) -> true;
+is_empty(_BPQ)    -> false.
 
-len({N, _Q}) ->
-    N.
+len({N, _Q}) -> N.
 
 in(Prefix, Value, {0, Q}) ->
     {1, queue:in({Prefix, queue:from_list([Value])}, Q)};
@@ -142,15 +138,11 @@ in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) ->
                      end}
     end.
 
-out({0, _Q} = BPQ) ->
-    {empty, BPQ};
-out(BPQ) ->
-    out1({fun queue:in_r/2, fun queue:out/1}, BPQ).
+out({0, _Q} = BPQ) -> {empty, BPQ};
+out(BPQ)           -> out1({fun queue:in_r/2, fun queue:out/1}, BPQ).
 
-out_r({0, _Q} = BPQ) ->
-    {empty, BPQ};
-out_r(BPQ) ->
-    out1({fun queue:in/2, fun queue:out_r/1}, BPQ).
+out_r({0, _Q} = BPQ) -> {empty, BPQ};
+out_r(BPQ)           -> out1({fun queue:in/2, fun queue:out_r/1}, BPQ).
 
 out1({In, Out}, {N, Q}) ->
     {{value, {Prefix, InnerQ}}, Q1} = Out(Q),
@@ -177,15 +169,11 @@ join({NHead, QHead}, {NTail, QTail}) ->
              queue:join(QHead, QTail)
      end}.
 
-foldl(_Fun, Init, {0, _Q}) ->
-    Init;
-foldl(Fun, Init, {_N, Q}) ->
-    fold1(fun queue:out/1, Fun, Init, Q).
+foldl(_Fun, Init, {0, _Q}) -> Init;
+foldl( Fun, Init, {_N, Q}) -> fold1(fun queue:out/1, Fun, Init, Q).
 
-foldr(_Fun, Init, {0, _Q}) ->
-    Init;
-foldr(Fun, Init, {_N, Q}) ->
-    fold1(fun queue:out_r/1, Fun, Init, Q).
+foldr(_Fun, Init, {0, _Q}) -> Init;
+foldr( Fun, Init, {_N, Q}) -> fold1(fun queue:out_r/1, Fun, Init, Q).
 
 fold1(Out, Fun, Init, Q) ->
     case Out(Q) of
@@ -222,10 +210,9 @@ from_list(List) ->
                               false -> All
                           end)}.
 
-to_list({0, _Q}) ->
-    [];
-to_list({_N, Q}) ->
-    [{Prefix, queue:to_list(InnerQ)} || {Prefix, InnerQ} <- queue:to_list(Q)].
+to_list({0, _Q}) -> [];
+to_list({_N, Q}) -> [{Prefix, queue:to_list(InnerQ)} ||
+                        {Prefix, InnerQ} <- queue:to_list(Q)].
 
 %% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init}
 %% where FilterFun(Prefix) -> boolean()
@@ -251,8 +238,8 @@ map_fold_filter_r(PFilter, Fun, Init, {N, Q}) ->
                       fun in_q_r/3, fun (T, H) -> join(H, T) end},
                      N, PFilter, Fun, Init, Q, new()).
 
-map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, Init,
-                 Q, QNew) ->
+map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun,
+                 Init, Q, QNew) ->
     case Out(Q) of
         {empty, _Q} ->
             {QNew, Init};
@@ -260,23 +247,21 @@ map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, Init,
             case PFilter(Prefix) of
                 true ->
                     {Init1, QNew1, Cont} =
-                        map_fold_filter2(Funs, Fun, Prefix, Prefix, Init,
-                                         InnerQ, QNew, queue:new()),
+                        map_fold_filter2(Funs, Fun, Prefix, Prefix,
+                                         Init, InnerQ, QNew, queue:new()),
                     case Cont of
-                        false ->
-                            {Join(QNew1, {Len - len(QNew1), Q1}), Init1};
-                        true ->
-                            map_fold_filter1(Funs, Len, PFilter, Fun, Init1,
-                                             Q1, QNew1)
+                        false -> {Join(QNew1, {Len - len(QNew1), Q1}), Init1};
+                        true  -> map_fold_filter1(Funs, Len, PFilter, Fun,
+                                                  Init1, Q1, QNew1)
                     end;
                 false ->
-                    map_fold_filter1(Funs, Len, PFilter, Fun, Init,
-                                     Q1, InQ(Prefix, InnerQ, QNew))
+                    map_fold_filter1(Funs, Len, PFilter, Fun,
+                                     Init, Q1, InQ(Prefix, InnerQ, QNew))
             end
     end.
 
-map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, Init,
-                 InnerQ, QNew, InnerQNew) ->
+map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix,
+                 Init, InnerQ, QNew, InnerQNew) ->
     case Out(InnerQ) of
         {empty, _Q} ->
             {Init, InQ(OrigPrefix, InnerQ,
@@ -293,7 +278,7 @@ map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, Init,
                             false -> {Prefix1, InQ(Prefix, InnerQNew, QNew),
                                       In(Value1, queue:new())}
                         end,
-                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2, Init1,
-                                     InnerQ1, QNew1, InnerQNew1)
+                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2,
+                                     Init1, InnerQ1, QNew1, InnerQNew1)
             end
     end.
-- 
cgit v1.2.1


From 6f5c55c9bcfd886a9e0abd55ed164633f75f5b0c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 08:57:22 +0100
Subject: minor refactoring and some cosmetic changes

---
 src/rabbit_msg_store.erl    | 140 +++++++++++++++++++++-----------------------
 src/rabbit_msg_store_gc.erl |   8 ++-
 2 files changed, 71 insertions(+), 77 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 913cd65f..21f15058 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -460,8 +460,8 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
             case index_lookup(Guid, CState) of
                 #msg_location { file = File } = MsgLocation ->
                     %% Still the same file.
-                    %% This is fine to fail (already exists)
-                    ets:insert_new(FileHandlesEts, {{self(), File}, open}),
+                    mark_handle_open(FileHandlesEts, File),
+
                     CState1 = close_all_indicated(CState),
                     {Msg, CState2} = %% This will never be the current file
                         read_from_disk(MsgLocation, CState1, DedupCacheEts),
@@ -473,14 +473,6 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
             end
     end.
 
-close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
-                    CState) ->
-    Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}),
-    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
-                        true = ets:delete(FileHandlesEts, Key),
-                        close_handle(File, CStateM)
-                end, CState, Objs).
-
 %%----------------------------------------------------------------------------
 %% gen_server callbacks
 %%----------------------------------------------------------------------------
@@ -958,6 +950,24 @@ close_handle(Key, FHC) ->
         error     -> FHC
     end.
 
+mark_handle_open(FileHandlesEts, File) ->
+    %% This is fine to fail (already exists)
+    ets:insert_new(FileHandlesEts, {{self(), File}, open}),
+    true.
+
+mark_handle_to_close(FileHandlesEts, File) ->
+    [ ets:update_element(FileHandlesEts, Key, {2, close})
+      || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ],
+    true.
+
+close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
+                    CState) ->
+    Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}),
+    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
+                        true = ets:delete(FileHandlesEts, Key),
+                        close_handle(File, CStateM)
+                end, CState, Objs).
+
 close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts,
                                              file_handle_cache = FHC }) ->
     Self = self(),
@@ -1426,9 +1436,7 @@ maybe_roll_to_new_file(
     State1 = internal_sync(State),
     ok = file_handle_cache:close(CurHdl),
     NextFile = CurFile + 1,
-    {ok, NextHdl} = open_file(
-                      Dir, filenum_to_name(NextFile),
-                      ?WRITE_MODE),
+    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
     true = ets:insert_new(FileSummaryEts, #file_summary {
                             file             = NextFile,
                             valid_total_size = 0,
@@ -1476,11 +1484,6 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
 maybe_compact(State) ->
     State.
 
-mark_handle_to_close(FileHandlesEts, File) ->
-    [ ets:update_element(FileHandlesEts, Key, {2, close})
-      || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ],
-    true.
-
 find_files_to_gc(FileSummaryEts,
                  [#file_summary { file             = Dst,
                                   valid_total_size = DstValid,
@@ -1495,13 +1498,11 @@ find_files_to_gc(FileSummaryEts,
                              right            = SrcRight }] = Next =
                 ets:lookup(FileSummaryEts, Src),
             case SrcRight of
-                undefined ->
-                    not_found;
-                _ ->
-                    case DstValid + SrcValid =< ?FILE_SIZE_LIMIT of
-                        true  -> {Src, Dst};
-                        false -> find_files_to_gc(FileSummaryEts, Next)
-                    end
+                undefined -> not_found;
+                _         -> case DstValid + SrcValid =< ?FILE_SIZE_LIMIT of
+                                 true  -> {Src, Dst};
+                                 false -> find_files_to_gc(FileSummaryEts, Next)
+                             end
             end
     end.
 
@@ -1536,9 +1537,7 @@ delete_file_if_empty(File, State = #msstate {
              true = mark_handle_to_close(FileHandlesEts, File),
              true = ets:delete(FileSummaryEts, File),
              State1 = close_handle(File, State),
-             ok = file:delete(form_filename(
-                                Dir,
-                                filenum_to_name(File))),
+             ok = file:delete(form_filename(Dir, filenum_to_name(File))),
              State1 #msstate { sum_file_size = SumFileSize - FileSize };
         _ -> State
     end.
@@ -1550,20 +1549,17 @@ delete_file_if_empty(File, State = #msstate {
 gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
     [SrcObj = #file_summary {
        readers          = SrcReaders,
-       valid_total_size = SrcValidData,
        left             = DstFile,
        file_size        = SrcFileSize,
        locked           = true }] = ets:lookup(FileSummaryEts, SrcFile),
     [DstObj = #file_summary {
        readers          = DstReaders,
-       valid_total_size = DstValidData,
        right            = SrcFile,
        file_size        = DstFileSize,
        locked           = true }] = ets:lookup(FileSummaryEts, DstFile),
 
     case SrcReaders =:= 0 andalso DstReaders =:= 0 of
-        true  -> TotalValidData = DstValidData + SrcValidData,
-                 ok = combine_files(SrcObj, DstObj, State),
+        true  -> TotalValidData = combine_files(SrcObj, DstObj, State),
                  %% don't update dest.right, because it could be
                  %% changing at the same time
                  true = ets:update_element(
@@ -1584,12 +1580,12 @@ combine_files(#file_summary { file             = Source,
                               contiguous_top   = DestinationContiguousTop,
                               right            = Source },
               State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
-    SourceName = filenum_to_name(Source),
+    SourceName      = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
-    {ok, SourceHdl} =
-        open_file(Dir, SourceName, ?READ_AHEAD_MODE),
-    {ok, DestinationHdl} =
-        open_file(Dir, DestinationName, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+    {ok, SourceHdl}      = open_file(Dir, SourceName,
+                                     ?READ_AHEAD_MODE),
+    {ok, DestinationHdl} = open_file(Dir, DestinationName,
+                                     ?READ_AHEAD_MODE ++ ?WRITE_MODE),
     ExpectedSize = SourceValid + DestinationValid,
     %% if DestinationValid =:= DestinationContiguousTop then we don't
     %% need a tmp file
@@ -1597,10 +1593,11 @@ combine_files(#file_summary { file             = Source,
     %%   the DestinationContiguousTop to a tmp file then truncate,
     %%   copy back in, and then copy over from Source
     %% otherwise we just truncate straight away and copy over from Source
-    if DestinationContiguousTop =:= DestinationValid ->
+    case DestinationContiguousTop =:= DestinationValid of
+        true ->
             ok = truncate_and_extend_file(
                    DestinationHdl, DestinationValid, ExpectedSize);
-       true ->
+        false ->
             {DestinationWorkList, DestinationValid} =
                 find_unremoved_messages_in_file(Destination, State),
             Worklist =
@@ -1618,8 +1615,7 @@ combine_files(#file_summary { file             = Source,
                           %% enforce it anyway
                   end, DestinationWorkList),
             Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
-            {ok, TmpHdl} = open_file(
-                             Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
             ok = copy_messages(
                    Worklist, DestinationContiguousTop, DestinationValid,
                    DestinationHdl, TmpHdl, Destination, State),
@@ -1644,11 +1640,11 @@ combine_files(#file_summary { file             = Source,
     %% tidy up
     ok = file_handle_cache:close(DestinationHdl),
     ok = file_handle_cache:delete(SourceHdl),
-    ok.
+    ExpectedSize.
 
 find_unremoved_messages_in_file(File,
                                 {_FileSummaryEts, Dir, Index, IndexState}) ->
-    %% Msgs here will be end-of-file at start-of-list
+    %% Messages here will be end-of-file at start-of-list
     {ok, Messages, _FileSize} =
         scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
@@ -1663,11 +1659,18 @@ find_unremoved_messages_in_file(File,
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
+    Copy = fun ({BlockStart, BlockEnd}) ->
+                   BSize = BlockEnd - BlockStart,
+                   {ok, BlockStart} =
+                       file_handle_cache:position(SourceHdl, BlockStart),
+                   {ok, BSize} =
+                       file_handle_cache:copy(SourceHdl, DestinationHdl, BSize)
+           end,
     case
         lists:foldl(
           fun (#msg_location { guid = Guid, offset = Offset,
                                total_size = TotalSize },
-               {CurOffset, BlockStart, BlockEnd}) ->
+               {CurOffset, Block = {BlockStart, BlockEnd}}) ->
                   %% CurOffset is in the DestinationFile.
                   %% Offset, BlockStart and BlockEnd are in the SourceFile
                   %% update MsgLocation to reflect change of file and offset
@@ -1675,40 +1678,29 @@ copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
                                            [{#msg_location.file, Destination},
                                             {#msg_location.offset, CurOffset}],
                                            IndexState),
-                  {BlockStart2, BlockEnd2} =
-                      if BlockStart =:= undefined ->
-                              %% base case, called only for the first list elem
-                              {Offset, Offset + TotalSize};
-                         Offset =:= BlockEnd ->
-                              %% extend the current block because the
-                              %% next msg follows straight on
-                              {BlockStart, BlockEnd + TotalSize};
-                         true ->
-                              %% found a gap, so actually do the work
-                              %% for the previous block
-                              BSize = BlockEnd - BlockStart,
-                              {ok, BlockStart} =
-                                  file_handle_cache:position(SourceHdl,
-                                                             BlockStart),
-                              {ok, BSize} = file_handle_cache:copy(
-                                              SourceHdl, DestinationHdl, BSize),
-                              {Offset, Offset + TotalSize}
-                      end,
-                  {CurOffset + TotalSize, BlockStart2, BlockEnd2}
-          end, {InitOffset, undefined, undefined}, WorkList) of
-        {FinalOffset, BlockStart1, BlockEnd1} ->
+                  {CurOffset + TotalSize,
+                   case BlockEnd of
+                       undefined ->
+                           %% base case, called only for the first list elem
+                           {Offset, Offset + TotalSize};
+                       Offset ->
+                           %% extend the current block because the
+                           %% next msg follows straight on
+                           {BlockStart, BlockEnd + TotalSize};
+                       _ ->
+                           %% found a gap, so actually do the work for
+                           %% the previous block
+                           Copy(Block),
+                           {Offset, Offset + TotalSize}
+                   end}
+          end, {InitOffset, {undefined, undefined}}, WorkList) of
+        {FinalOffset, Block} ->
             case WorkList of
                 [] -> ok;
-                %% do the last remaining block
-                _  -> BSize1 = BlockEnd1 - BlockStart1,
-                      {ok, BlockStart1} =
-                          file_handle_cache:position(SourceHdl, BlockStart1),
-                      {ok, BSize1} =
-                          file_handle_cache:copy(SourceHdl, DestinationHdl,
-                                                 BSize1),
+                _  -> Copy(Block), %% do the last remaining block
                       ok = file_handle_cache:sync(DestinationHdl)
             end;
-        {FinalOffsetZ, _BlockStart1, _BlockEnd1} ->
+        {FinalOffsetZ, _Block} ->
             {gc_error, [{expected, FinalOffset},
                         {got, FinalOffsetZ},
                         {destination, Destination}]}
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 8a275c39..038d51c4 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -71,9 +71,11 @@ set_maximum_since_use(Pid, Age) ->
 init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
                                              [self()]),
-    {ok, #gcstate { dir = Dir, index_state = IndexState,
-                    index_module = IndexModule, parent = Parent,
-                    file_summary_ets = FileSummaryEts},
+    {ok, #gcstate { dir              = Dir,
+                    index_state      = IndexState,
+                    index_module     = IndexModule,
+                    parent           = Parent,
+                    file_summary_ets = FileSummaryEts },
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
-- 
cgit v1.2.1


From d8d9d29a281ffd9f041bc604c86253c86ae3f6cd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 11:34:44 +0100
Subject: remove unused constant

---
 src/rabbit_msg_file.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 301f4a9f..dca18e48 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -44,7 +44,6 @@
 -define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
 -define(GUID_SIZE_BYTES,         16).
 -define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
--define(SIZE_AND_GUID_BYTES,     (?GUID_SIZE_BYTES + ?INTEGER_SIZE_BYTES)).
 -define(SCAN_BLOCK_SIZE,         ?FILE_SIZE_LIMIT div 4).
 
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 52151a09e258c78d8b0bd0dab9ee9808ae8727fa Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 11:36:00 +0100
Subject: tighten guid size check on 'append' the guid must be *exactly* the
 right size; the rest of the code will break if it isn't.

---
 src/rabbit_msg_file.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index dca18e48..fc9368a1 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -67,7 +67,7 @@
 %%----------------------------------------------------------------------------
 
 append(FileHdl, Guid, MsgBody)
-  when is_binary(Guid) andalso size(Guid) =< ?GUID_SIZE_BYTES ->
+  when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES ->
     MsgBodyBin  = term_to_binary(MsgBody),
     MsgBodyBinSize = size(MsgBodyBin),
     Size = MsgBodyBinSize + ?GUID_SIZE_BYTES,
-- 
cgit v1.2.1


From 4dbd3544d6bd143a5a72a7427e36eaf8354923ed Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 11:36:49 +0100
Subject: refactor: more sensible ordering of result tuple

---
 src/rabbit_msg_file.erl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index fc9368a1..3dbf8ead 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -101,17 +101,18 @@ scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
     Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
     case file_handle_cache:read(FileHdl, Read) of
         {ok, Data1} ->
-            {Acc1, ScanOffset1, Data2} =
+            {Data2, Acc1, ScanOffset1} =
                 scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
-            scan(FileHdl, FileSize, Data2, ReadOffset + iolist_size(Data1),
-                 Acc1, ScanOffset1);
-        _KO        -> {ok, Acc, ScanOffset}
+            ReadOffset1 = ReadOffset + size(Data1),
+            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1);
+        _KO ->
+            {ok, Acc, ScanOffset}
     end.
 
 scan(<<>>, Acc, Offset) ->
-    {Acc, Offset, <<>>};
+    {<<>>, Acc, Offset};
 scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
-    {Acc, Offset, <<>>}; %% Nothing to do other than stop.
+    {<<>>, Acc, Offset}; %% Nothing to do other than stop.
 scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
        WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
     TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
@@ -130,4 +131,4 @@ scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
             scan(Rest, Acc, Offset + TotalSize)
     end;
 scan(Data, Acc, Offset) ->
-    {Acc, Offset, Data}.
+    {Data, Acc, Offset}.
-- 
cgit v1.2.1


From 6eee2306d5613c90835a2b1f3c37937cf8472727 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 17:25:14 +0100
Subject: cosmetic: group functions by purpose

---
 src/rabbit_queue_index.erl | 415 +++++++++++++++++++++++----------------------
 1 file changed, 209 insertions(+), 206 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a4e36891..5e5ac4ce 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -215,7 +215,7 @@
 
 
 %%----------------------------------------------------------------------------
-%% Public API
+%% public API
 %%----------------------------------------------------------------------------
 
 init(Name, MsgStoreRecovered, ContainsCheckFun) ->
@@ -294,17 +294,6 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                                              dirty_count = 1 }),
     {Count, Terms, State3}.
 
-maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
-    Segment;
-maybe_add_to_journal( true, false,  del, _RelSeq, Segment) ->
-    Segment;
-maybe_add_to_journal( true, false, _Del,  RelSeq, Segment) ->
-    add_to_journal(RelSeq, del, Segment);
-maybe_add_to_journal(false,     _,  del,  RelSeq, Segment) ->
-    add_to_journal(RelSeq, ack, Segment);
-maybe_add_to_journal(false,     _, _Del,  RelSeq, Segment) ->
-    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
-
 terminate(Terms, State) ->
     terminate(true, Terms, State).
 
@@ -453,7 +442,65 @@ recover(DurableQueues) ->
     {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
 
 %%----------------------------------------------------------------------------
-%% Msg Store Startup Delta Function
+%% startup and shutdown
+%%----------------------------------------------------------------------------
+
+blank_state(QueueName) ->
+    StrName = queue_name_to_dir_name(QueueName),
+    Dir = filename:join(queues_dir(), StrName),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    #qistate { dir            = Dir,
+               segments       = segments_new(),
+               journal_handle = undefined,
+               dirty_count    = 0 }.
+
+detect_clean_shutdown(Dir) ->
+    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+        ok              -> true;
+        {error, enoent} -> false
+    end.
+
+read_shutdown_terms(Dir) ->
+    rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)).
+
+store_clean_shutdown(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+terminate(_StoreShutdown, _Terms, State = #qistate { segments = undefined }) ->
+    State;
+terminate(StoreShutdown, Terms, State =
+          #qistate { journal_handle = JournalHdl,
+                     dir = Dir, segments = Segments }) ->
+    ok = case JournalHdl of
+             undefined -> ok;
+             _         -> file_handle_cache:close(JournalHdl)
+         end,
+    SegTerms = segment_fold(
+                 fun (Seg, #segment { handle = Hdl, pubs = PubCount,
+                                      acks = AckCount }, SegTermsAcc) ->
+                         ok = case Hdl of
+                                  undefined -> ok;
+                                  _         -> file_handle_cache:close(Hdl)
+                              end,
+                         [{Seg, {PubCount, AckCount}} | SegTermsAcc]
+                 end, [], Segments),
+    case StoreShutdown of
+        true  -> store_clean_shutdown([{segments, SegTerms} | Terms], Dir);
+        false -> ok
+    end,
+    State #qistate { journal_handle = undefined, segments = undefined }.
+
+queue_name_to_dir_name(Name = #resource { kind = queue }) ->
+    Bin = term_to_binary(Name),
+    Size = 8*size(Bin),
+    <<Num:Size>> = Bin,
+    lists:flatten(io_lib:format("~.36B", [Num])).
+
+queues_dir() ->
+    filename:join(rabbit_mnesia:dir(), "queues").
+
+%%----------------------------------------------------------------------------
+%% msg store startup delta function
 %%----------------------------------------------------------------------------
 
 queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
@@ -490,39 +537,138 @@ queue_index_walker_reader(QueueName, Gatherer) ->
     ok = gatherer:finish(Gatherer).
 
 %%----------------------------------------------------------------------------
-%% Minors
+%% journal manipulation
 %%----------------------------------------------------------------------------
 
+maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
+    Segment;
+maybe_add_to_journal( true, false,  del, _RelSeq, Segment) ->
+    Segment;
+maybe_add_to_journal( true, false, _Del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, del, Segment);
+maybe_add_to_journal(false,     _,  del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, Segment);
+maybe_add_to_journal(false,     _, _Del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
+
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
+                                                 segments = Segments,
+                                                 dir = Dir }) ->
+    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    Segment1 = add_to_journal(RelSeq, Action, Segment),
+    State #qistate { dirty_count = DCount + 1,
+                     segments = segment_store(Segment1, Segments) };
+
+add_to_journal(RelSeq, Action,
+               Segment = #segment { journal_entries = JEntries,
+                                    pubs = PubCount, acks = AckCount }) ->
+    Segment1 = Segment #segment {
+                 journal_entries = add_to_journal(RelSeq, Action, JEntries) },
+    case Action of
+        del                     -> Segment1;
+        ack                     -> Segment1 #segment { acks = AckCount + 1 };
+        {_Guid, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
+    end;
+
+%% This is a more relaxed version of deliver_or_ack_msg because we can
+%% have dels or acks in the journal without the corresponding
+%% pub. Also, always want to keep acks. Things must occur in the right
+%% order though.
+add_to_journal(RelSeq, Action, SegJArray) ->
+    case array:get(RelSeq, SegJArray) of
+        undefined ->
+            array:set(RelSeq,
+                      case Action of
+                          {_Msg, _IsPersistent} -> {Action, no_del, no_ack};
+                          del                   -> {no_pub,    del, no_ack};
+                          ack                   -> {no_pub, no_del,    ack}
+                      end, SegJArray);
+        ({Pub, no_del, no_ack}) when Action == del ->
+            array:set(RelSeq, {Pub, del, no_ack}, SegJArray);
+        ({Pub,    Del, no_ack}) when Action == ack ->
+            array:set(RelSeq, {Pub, Del,    ack}, SegJArray)
+    end.
+
 maybe_flush_journal(State = #qistate { dirty_count = DCount })
   when DCount > ?MAX_JOURNAL_ENTRY_COUNT ->
     flush_journal(State);
 maybe_flush_journal(State) ->
     State.
 
-all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
-    lists:sort(
-      sets:to_list(
-        lists:foldl(
-          fun (SegName, Set) ->
-                  sets:add_element(
-                    list_to_integer(
-                      lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
-                                      SegName)), Set)
-          end, sets:from_list(segment_fetch_keys(Segments)),
-          filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
+get_journal_handle(State = #qistate { journal_handle = undefined,
+                                      dir = Dir }) ->
+    Path = filename:join(Dir, ?JOURNAL_FILENAME),
+    {ok, Hdl} = file_handle_cache:open(Path,
+                                       [binary, raw, read, write,
+                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+                                       [{write_buffer, infinity}]),
+    {Hdl, State #qistate { journal_handle = Hdl }};
+get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
+    {Hdl, State}.
 
-blank_state(QueueName) ->
-    StrName = queue_name_to_dir_name(QueueName),
-    Dir = filename:join(queues_dir(), StrName),
-    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    #qistate { dir            = Dir,
-               segments       = segments_new(),
-               journal_handle = undefined,
-               dirty_count    = 0
-              }.
+%% Loading Journal. This isn't idempotent and will mess up the counts
+%% if you call it more than once on the same state. Assumes the counts
+%% are 0 to start with.
+load_journal(State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
+    State2 = #qistate { segments = Segments } = load_journal_entries(State1),
+    Segments1 =
+        segment_map(
+          fun (_Seg, Segment = #segment { journal_entries = JEntries,
+                                          pubs = PubCountInJournal,
+                                          acks = AckCountInJournal }) ->
+                  %% We want to keep acks in so that we can remove
+                  %% them if duplicates are in the journal. The counts
+                  %% here are purely from the segment itself.
+                  {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
+                      load_segment(true, Segment),
+                  %% Removed counts here are the number of pubs and
+                  %% acks that are duplicates - i.e. found in both the
+                  %% segment and journal.
+                  {JEntries1, PubsRemoved, AcksRemoved} =
+                      journal_minus_segment(JEntries, SegEntries),
+                  PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
+                  AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
+                  Segment1 #segment { journal_entries = JEntries1,
+                                      pubs = PubCount1,
+                                      acks = AckCount1 }
+          end, Segments),
+    State2 #qistate { segments = Segments1 }.
 
-array_new() ->
-    array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
+load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
+            case Prefix of
+                ?DEL_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, del, State));
+                ?ACK_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, ack, State));
+                _ ->
+                    case file_handle_cache:read(Hdl, ?GUID_BYTES) of
+                        {ok, <<GuidNum:?GUID_BITS>>} ->
+                            %% work around for binary data
+                            %% fragmentation. See
+                            %% rabbit_msg_file:read_next/2
+                            <<Guid:?GUID_BYTES/binary>> =
+                                <<GuidNum:?GUID_BITS>>,
+                            Publish = {Guid, case Prefix of
+                                                 ?PUB_PERSIST_JPREFIX -> true;
+                                                 ?PUB_TRANS_JPREFIX   -> false
+                                             end},
+                            load_journal_entries(
+                              add_to_journal(SeqId, Publish, State));
+                        _ErrOrEoF -> %% err, we've lost at least a publish
+                            State
+                    end
+            end;
+        _ErrOrEoF -> State
+    end.
+
+%%----------------------------------------------------------------------------
+%% segment manipulation
+%%----------------------------------------------------------------------------
 
 seq_id_to_seg_and_rel_seq_id(SeqId) ->
     { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
@@ -534,32 +680,23 @@ seg_num_to_path(Dir, Seg) ->
     SegName = integer_to_list(Seg),
     filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
 
+all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
+    lists:sort(
+      sets:to_list(
+        lists:foldl(
+          fun (SegName, Set) ->
+                  sets:add_element(
+                    list_to_integer(
+                      lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                                      SegName)), Set)
+          end, sets:from_list(segment_fetch_keys(Segments)),
+          filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
+
 delete_segment(#segment { handle = undefined }) ->
     ok;
 delete_segment(#segment { handle = Hdl }) ->
     ok = file_handle_cache:delete(Hdl).
 
-detect_clean_shutdown(Dir) ->
-    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
-        ok              -> true;
-        {error, enoent} -> false
-    end.
-
-read_shutdown_terms(Dir) ->
-    rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)).
-
-store_clean_shutdown(Terms, Dir) ->
-    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
-
-queue_name_to_dir_name(Name = #resource { kind = queue }) ->
-    Bin = term_to_binary(Name),
-    Size = 8*size(Bin),
-    <<Num:Size>> = Bin,
-    lists:flatten(io_lib:format("~.36B", [Num])).
-
-queues_dir() ->
-    filename:join(rabbit_mnesia:dir(), "queues").
-
 get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
     {ok, Hdl} = file_handle_cache:open(Path,
                                        [binary, raw, read, write,
@@ -626,20 +763,6 @@ segment_fetch_keys({Segments, CachedSegments}) ->
 segments_new() ->
     {dict:new(), []}.
 
-get_journal_handle(State =
-                   #qistate { journal_handle = undefined, dir = Dir }) ->
-    Path = filename:join(Dir, ?JOURNAL_FILENAME),
-    {ok, Hdl} = file_handle_cache:open(Path,
-                                       [binary, raw, read, write,
-                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
-                                       [{write_buffer, infinity}]),
-    {Hdl, State #qistate { journal_handle = Hdl }};
-get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
-    {Hdl, State}.
-
-bool_to_int(true ) -> 1;
-bool_to_int(false) -> 0.
-
 write_entry_to_segment(_RelSeq, {{_Guid, _IsPersistent}, del, ack}, Hdl) ->
     Hdl;
 write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
@@ -666,57 +789,26 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
          end,
     Hdl.
 
-terminate(_StoreShutdown, _Terms, State = #qistate { segments = undefined }) ->
-    State;
-terminate(StoreShutdown, Terms, State =
-          #qistate { journal_handle = JournalHdl,
-                     dir = Dir, segments = Segments }) ->
-    ok = case JournalHdl of
-             undefined -> ok;
-             _         -> file_handle_cache:close(JournalHdl)
-         end,
-    SegTerms = segment_fold(
-                 fun (Seg, #segment { handle = Hdl, pubs = PubCount,
-                                      acks = AckCount }, SegTermsAcc) ->
-                         ok = case Hdl of
-                                  undefined -> ok;
-                                  _         -> file_handle_cache:close(Hdl)
-                              end,
-                         [{Seg, {PubCount, AckCount}} | SegTermsAcc]
-                 end, [], Segments),
-    case StoreShutdown of
-        true  -> store_clean_shutdown([{segments, SegTerms} | Terms], Dir);
-        false -> ok
-    end,
-    State #qistate { journal_handle = undefined, segments = undefined }.
-
-%%----------------------------------------------------------------------------
-%% Majors
-%%----------------------------------------------------------------------------
-
 %% Loading segments
-
+%%
 %% Does not do any combining with the journal at all. The PubCount
 %% that comes back is the number of publishes in the segment. The
 %% number of unacked msgs is PubCount - AckCount. If KeepAcks is
 %% false, then array:sparse_size(SegEntries) == PubCount -
 %% AckCount. If KeepAcks is true, then array:sparse_size(SegEntries)
 %% == PubCount.
-load_segment(KeepAcks,
-             Segment = #segment { path = Path, handle = SegHdl }) ->
+load_segment(KeepAcks, Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
                         undefined -> filelib:is_file(Path);
                         _         -> true
                     end,
     case SegmentExists of
-        false ->
-            {array_new(), 0, 0, Segment};
-        true ->
-            {Hdl, Segment1} = get_segment_handle(Segment),
-            {ok, 0} = file_handle_cache:position(Hdl, bof),
-            {SegEntries, PubCount, AckCount} =
-                load_segment_entries(KeepAcks, Hdl, array_new(), 0, 0),
-            {SegEntries, PubCount, AckCount, Segment1}
+        false -> {array_new(), 0, 0, Segment};
+        true  -> {Hdl, Segment1} = get_segment_handle(Segment),
+                 {ok, 0} = file_handle_cache:position(Hdl, bof),
+                 {SegEntries, PubCount, AckCount} =
+                     load_segment_entries(KeepAcks, Hdl, array_new(), 0, 0),
+                 {SegEntries, PubCount, AckCount, Segment1}
     end.
 
 load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
@@ -752,104 +844,15 @@ deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries) ->
             {AckCount + 1, array:reset(RelSeq, SegEntries)}
     end.
 
-%% Loading Journal. This isn't idempotent and will mess up the counts
-%% if you call it more than once on the same state. Assumes the counts
-%% are 0 to start with.
-
-load_journal(State) ->
-    {JournalHdl, State1} = get_journal_handle(State),
-    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
-    State2 = #qistate { segments = Segments } = load_journal_entries(State1),
-    Segments1 =
-        segment_map(
-          fun (_Seg, Segment = #segment { journal_entries = JEntries,
-                                          pubs = PubCountInJournal,
-                                          acks = AckCountInJournal }) ->
-                  %% We want to keep acks in so that we can remove
-                  %% them if duplicates are in the journal. The counts
-                  %% here are purely from the segment itself.
-                  {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
-                      load_segment(true, Segment),
-                  %% Removed counts here are the number of pubs and
-                  %% acks that are duplicates - i.e. found in both the
-                  %% segment and journal.
-                  {JEntries1, PubsRemoved, AcksRemoved} =
-                      journal_minus_segment(JEntries, SegEntries),
-                  PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
-                  AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
-                  Segment1 #segment { journal_entries = JEntries1,
-                                      pubs = PubCount1,
-                                      acks = AckCount1 }
-          end, Segments),
-    State2 #qistate { segments = Segments1 }.
-
-load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
-    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
-        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
-            case Prefix of
-                ?DEL_JPREFIX ->
-                    load_journal_entries(add_to_journal(SeqId, del, State));
-                ?ACK_JPREFIX ->
-                    load_journal_entries(add_to_journal(SeqId, ack, State));
-                _ ->
-                    case file_handle_cache:read(Hdl, ?GUID_BYTES) of
-                        {ok, <<GuidNum:?GUID_BITS>>} ->
-                            %% work around for binary data
-                            %% fragmentation. See
-                            %% rabbit_msg_file:read_next/2
-                            <<Guid:?GUID_BYTES/binary>> =
-                                <<GuidNum:?GUID_BITS>>,
-                            Publish = {Guid, case Prefix of
-                                                 ?PUB_PERSIST_JPREFIX -> true;
-                                                 ?PUB_TRANS_JPREFIX   -> false
-                                             end},
-                            load_journal_entries(
-                              add_to_journal(SeqId, Publish, State));
-                        _ErrOrEoF -> %% err, we've lost at least a publish
-                            State
-                    end
-            end;
-        _ErrOrEoF -> State
-    end.
-
-add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
-                                                 segments = Segments,
-                                                 dir = Dir }) ->
-    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    Segment = segment_find_or_new(Seg, Dir, Segments),
-    Segment1 = add_to_journal(RelSeq, Action, Segment),
-    State #qistate { dirty_count = DCount + 1,
-                     segments = segment_store(Segment1, Segments) };
+array_new() ->
+    array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
 
-add_to_journal(RelSeq, Action,
-               Segment = #segment { journal_entries = JEntries,
-                                    pubs = PubCount, acks = AckCount }) ->
-    Segment1 = Segment #segment {
-                 journal_entries = add_to_journal(RelSeq, Action, JEntries) },
-    case Action of
-        del                     -> Segment1;
-        ack                     -> Segment1 #segment { acks = AckCount + 1 };
-        {_Guid, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
-    end;
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
 
-%% This is a more relaxed version of deliver_or_ack_msg because we can
-%% have dels or acks in the journal without the corresponding
-%% pub. Also, always want to keep acks. Things must occur in the right
-%% order though.
-add_to_journal(RelSeq, Action, SegJArray) ->
-    case array:get(RelSeq, SegJArray) of
-        undefined ->
-            array:set(RelSeq,
-                      case Action of
-                          {_Msg, _IsPersistent} -> {Action, no_del, no_ack};
-                          del                   -> {no_pub,    del, no_ack};
-                          ack                   -> {no_pub, no_del,    ack}
-                      end, SegJArray);
-        ({Pub, no_del, no_ack}) when Action == del ->
-            array:set(RelSeq, {Pub, del, no_ack}, SegJArray);
-        ({Pub,    Del, no_ack}) when Action == ack ->
-            array:set(RelSeq, {Pub, Del,    ack}, SegJArray)
-    end.
+%%----------------------------------------------------------------------------
+%% journal & segment combination
+%%----------------------------------------------------------------------------
 
 %% Combine what we have just read from a segment file with what we're
 %% holding for that segment in memory. There must be no
-- 
cgit v1.2.1


From 1531a47dfdc7cd17cd19306071f14370c96401c1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 17:26:20 +0100
Subject: cosmetic: more consistent and easier to read match syntax

---
 src/rabbit_queue_index.erl | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5e5ac4ce..bcee4b1d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -871,11 +871,11 @@ journal_plus_segment(JEntries, SegEntries) ->
 %% Here, the Out is the Seg Array which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
 %% from (ack in journal, not segment).
-journal_plus_segment(Obj = {{_Guid, _IsPersistent}, no_del, no_ack},
+journal_plus_segment({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
                      not_found,
                      RelSeq, Out) ->
     array:set(RelSeq, Obj, Out);
-journal_plus_segment(Obj = {{_Guid, _IsPersistent}, del, no_ack},
+journal_plus_segment({{_Guid, _IsPersistent}, del, no_ack} = Obj,
                      not_found,
                      RelSeq, Out) ->
     array:set(RelSeq, Obj, Out);
@@ -885,7 +885,7 @@ journal_plus_segment({{_Guid, _IsPersistent}, del, ack},
     array:reset(RelSeq, Out);
 
 journal_plus_segment({no_pub, del, no_ack},
-                     {Pub = {_Guid, _IsPersistent}, no_del, no_ack},
+                     {{_Guid, _IsPersistent} = Pub, no_del, no_ack},
                      RelSeq, Out) ->
     array:set(RelSeq, {Pub, del, no_ack}, Out);
 
@@ -917,21 +917,23 @@ journal_minus_segment(JEntries, SegEntries) ->
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-journal_minus_segment(Obj, Obj = {{_Guid, _IsPersistent}, _Del, no_ack},
+journal_minus_segment({{_Guid, _IsPersistent}, _Del, no_ack} = Obj,
+                      Obj,
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved + 1, AcksRemoved};
-journal_minus_segment(Obj, Obj = {{_Guid, _IsPersistent}, _Del, ack},
+journal_minus_segment({{_Guid, _IsPersistent}, _Del, ack} = Obj,
+                      Obj,
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved + 1, AcksRemoved + 1};
 
 %% Just publish in journal
-journal_minus_segment(Obj = {{_Guid, _IsPersistent}, no_del, no_ack},
+journal_minus_segment({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
                       not_found,
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
 
 %% Just deliver in journal
-journal_minus_segment(Obj = {no_pub, del, no_ack},
+journal_minus_segment({no_pub, del, no_ack} = Obj,
                       {{_Guid, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
@@ -941,7 +943,7 @@ journal_minus_segment({no_pub, del, no_ack},
     {Out, PubsRemoved, AcksRemoved};
 
 %% Just ack in journal
-journal_minus_segment(Obj = {no_pub, no_del, ack},
+journal_minus_segment({no_pub, no_del, ack} = Obj,
                       {{_Guid, _IsPersistent}, del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
@@ -951,18 +953,18 @@ journal_minus_segment({no_pub, no_del, ack},
     {Out, PubsRemoved, AcksRemoved};
 
 %% Publish and deliver in journal
-journal_minus_segment(Obj = {{_Guid, _IsPersistent}, del, no_ack},
+journal_minus_segment({{_Guid, _IsPersistent}, del, no_ack} = Obj,
                       not_found,
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
-journal_minus_segment({Pub, del, no_ack},
-                      {Pub = {_Guid, _IsPersistent}, no_del, no_ack},
+journal_minus_segment({{_Guid, _IsPersistent} = Pub, del, no_ack},
+                      {Pub, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, del, no_ack}, Out),
      PubsRemoved + 1, AcksRemoved};
 
 %% Deliver and ack in journal
-journal_minus_segment(Obj = {no_pub, del, ack},
+journal_minus_segment({no_pub, del, ack} = Obj,
                       {{_Guid, _IsPersistent}, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
@@ -981,13 +983,13 @@ journal_minus_segment({{_Guid, _IsPersistent}, del, ack},
                       not_found,
                       _RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {Out, PubsRemoved, AcksRemoved};
-journal_minus_segment({Pub, del, ack},
-                      {Pub = {_Guid, _IsPersistent}, no_del, no_ack},
+journal_minus_segment({{_Guid, _IsPersistent} = Pub, del, ack},
+                      {Pub, no_del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, del, ack}, Out),
      PubsRemoved + 1, AcksRemoved};
-journal_minus_segment({Pub, del, ack},
-                      {Pub = {_Guid, _IsPersistent}, del, no_ack},
+journal_minus_segment({{_Guid, _IsPersistent} = Pub, del, ack},
+                      {Pub, del, no_ack},
                       RelSeq, Out, PubsRemoved, AcksRemoved) ->
     {array:set(RelSeq, {no_pub, no_del, ack}, Out),
      PubsRemoved + 1, AcksRemoved}.
-- 
cgit v1.2.1


From c4601e0b48627c09a5200af4a338305acbd0360e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 17:26:38 +0100
Subject: fix typo

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index bcee4b1d..e0dba505 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -913,7 +913,7 @@ journal_minus_segment(JEntries, SegEntries) ->
       end, {array_new(), 0, 0}, JEntries).
 
 %% Here, the Out is a fresh journal that we're filling with valid
-%% entries. PubsRemoved and AcksRemoved only get increased when the a
+%% entries. PubsRemoved and AcksRemoved only get increased when a
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-- 
cgit v1.2.1


From 0d5a3ac60eb3b0f0e81f06b9e55dbee67b9b7e37 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 18:03:07 +0100
Subject: refactor: more concise journal_minus_segment

---
 src/rabbit_queue_index.erl | 123 ++++++++++++++++++++-------------------------
 1 file changed, 55 insertions(+), 68 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e0dba505..c1e7fed7 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -862,7 +862,7 @@ journal_plus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, SegEntriesOut) ->
               SegEntry = case array:get(RelSeq, SegEntriesOut) of
-                             undefined -> not_found;
+                             undefined        -> not_found;
                              SObj = {_, _, _} -> SObj
                          end,
               journal_plus_segment(JObj, SegEntry, RelSeq, SegEntriesOut)
@@ -905,11 +905,17 @@ journal_minus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
               SegEntry = case array:get(RelSeq, SegEntries) of
-                             undefined -> not_found;
+                             undefined        -> not_found;
                              SObj = {_, _, _} -> SObj
                          end,
-              journal_minus_segment(JObj, SegEntry, RelSeq, JEntriesOut,
-                                    PubsRemoved, AcksRemoved)
+              {Obj, PubsRemovedDelta, AcksRemovedDelta} =
+                  journal_minus_segment1(JObj, SegEntry),
+              {case Obj of
+                   undefined -> JEntriesOut;
+                   _         -> array:set(RelSeq, Obj, JEntriesOut)
+               end,
+               PubsRemoved + PubsRemovedDelta,
+               AcksRemoved + AcksRemovedDelta}
       end, {array_new(), 0, 0}, JEntries).
 
 %% Here, the Out is a fresh journal that we're filling with valid
@@ -917,79 +923,60 @@ journal_minus_segment(JEntries, SegEntries) ->
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-journal_minus_segment({{_Guid, _IsPersistent}, _Del, no_ack} = Obj,
-                      Obj,
-                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {Out, PubsRemoved + 1, AcksRemoved};
-journal_minus_segment({{_Guid, _IsPersistent}, _Del, ack} = Obj,
-                      Obj,
-                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {Out, PubsRemoved + 1, AcksRemoved + 1};
+journal_minus_segment1({{_Guid, _IsPersistent}, _Del, no_ack} = Obj,
+                       Obj) ->
+    {undefined, 1, 0};
+journal_minus_segment1({{_Guid, _IsPersistent}, _Del, ack} = Obj,
+                       Obj) ->
+    {undefined, 1, 1};
 
 %% Just publish in journal
-journal_minus_segment({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
-                      not_found,
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
+journal_minus_segment1({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
+                       not_found) ->
+    {Obj, 0, 0};
 
 %% Just deliver in journal
-journal_minus_segment({no_pub, del, no_ack} = Obj,
-                      {{_Guid, _IsPersistent}, no_del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, del, no_ack},
-                      {{_Guid, _IsPersistent}, del, no_ack},
-                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {Out, PubsRemoved, AcksRemoved};
+journal_minus_segment1({no_pub, del, no_ack} = Obj,
+                       {{_Guid, _IsPersistent}, no_del, no_ack}) ->
+    {Obj, 0, 0};
+journal_minus_segment1({no_pub, del, no_ack},
+                       {{_Guid, _IsPersistent}, del, no_ack}) ->
+    {undefined, 0, 0};
 
 %% Just ack in journal
-journal_minus_segment({no_pub, no_del, ack} = Obj,
-                      {{_Guid, _IsPersistent}, del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, no_del, ack},
-                      {{_Guid, _IsPersistent}, del, ack},
-                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {Out, PubsRemoved, AcksRemoved};
+journal_minus_segment1({no_pub, no_del, ack} = Obj,
+                       {{_Guid, _IsPersistent}, del, no_ack}) ->
+    {Obj, 0, 0};
+journal_minus_segment1({no_pub, no_del, ack},
+                       {{_Guid, _IsPersistent}, del, ack}) ->
+    {undefined, 0, 0};
 
 %% Publish and deliver in journal
-journal_minus_segment({{_Guid, _IsPersistent}, del, no_ack} = Obj,
-                      not_found,
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
-journal_minus_segment({{_Guid, _IsPersistent} = Pub, del, no_ack},
-                      {Pub, no_del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, del, no_ack}, Out),
-     PubsRemoved + 1, AcksRemoved};
+journal_minus_segment1({{_Guid, _IsPersistent}, del, no_ack} = Obj,
+                       not_found) ->
+    {Obj, 0, 0};
+journal_minus_segment1({{_Guid, _IsPersistent} = Pub, del, no_ack},
+                       {Pub, no_del, no_ack}) ->
+    {{no_pub, del, no_ack}, 1, 0};
 
 %% Deliver and ack in journal
-journal_minus_segment({no_pub, del, ack} = Obj,
-                      {{_Guid, _IsPersistent}, no_del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, Obj, Out), PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, del, ack},
-                      {{_Guid, _IsPersistent}, del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, no_del, ack}, Out),
-     PubsRemoved, AcksRemoved};
-journal_minus_segment({no_pub, del, ack},
-                      {{_Guid, _IsPersistent}, del, ack},
-                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {Out, PubsRemoved, AcksRemoved + 1};
+journal_minus_segment1({no_pub, del, ack} = Obj,
+                       {{_Guid, _IsPersistent}, no_del, no_ack}) ->
+    {Obj, 0, 0};
+journal_minus_segment1({no_pub, del, ack},
+                       {{_Guid, _IsPersistent}, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 0, 0};
+journal_minus_segment1({no_pub, del, ack},
+                       {{_Guid, _IsPersistent}, del, ack}) ->
+    {undefined, 0, 1};
 
 %% Publish, deliver and ack in journal
-journal_minus_segment({{_Guid, _IsPersistent}, del, ack},
-                      not_found,
-                      _RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {Out, PubsRemoved, AcksRemoved};
-journal_minus_segment({{_Guid, _IsPersistent} = Pub, del, ack},
-                      {Pub, no_del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, del, ack}, Out),
-     PubsRemoved + 1, AcksRemoved};
-journal_minus_segment({{_Guid, _IsPersistent} = Pub, del, ack},
-                      {Pub, del, no_ack},
-                      RelSeq, Out, PubsRemoved, AcksRemoved) ->
-    {array:set(RelSeq, {no_pub, no_del, ack}, Out),
-     PubsRemoved + 1, AcksRemoved}.
+journal_minus_segment1({{_Guid, _IsPersistent}, del, ack},
+                       not_found) ->
+    {undefined, 0, 0};
+journal_minus_segment1({{_Guid, _IsPersistent} = Pub, del, ack},
+                       {Pub, no_del, no_ack}) ->
+    {{no_pub, del, ack}, 1, 0};
+journal_minus_segment1({{_Guid, _IsPersistent} = Pub, del, ack},
+                       {Pub, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 1, 0}.
-- 
cgit v1.2.1


From d81e5822fd561df29f81289103a03d5bf2fe6239 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 18:23:36 +0100
Subject: refactor: more concise journal_plus_segment

---
 src/rabbit_queue_index.erl | 50 +++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c1e7fed7..d793ffca 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -865,38 +865,34 @@ journal_plus_segment(JEntries, SegEntries) ->
                              undefined        -> not_found;
                              SObj = {_, _, _} -> SObj
                          end,
-              journal_plus_segment(JObj, SegEntry, RelSeq, SegEntriesOut)
+              case journal_plus_segment1(JObj, SegEntry) of
+                  undefined -> array:reset(RelSeq, SegEntriesOut);
+                  Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
+              end
       end, SegEntries, JEntries).
 
 %% Here, the Out is the Seg Array which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
 %% from (ack in journal, not segment).
-journal_plus_segment({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
-                     not_found,
-                     RelSeq, Out) ->
-    array:set(RelSeq, Obj, Out);
-journal_plus_segment({{_Guid, _IsPersistent}, del, no_ack} = Obj,
-                     not_found,
-                     RelSeq, Out) ->
-    array:set(RelSeq, Obj, Out);
-journal_plus_segment({{_Guid, _IsPersistent}, del, ack},
-                     not_found,
-                     RelSeq, Out) ->
-    array:reset(RelSeq, Out);
-
-journal_plus_segment({no_pub, del, no_ack},
-                     {{_Guid, _IsPersistent} = Pub, no_del, no_ack},
-                     RelSeq, Out) ->
-    array:set(RelSeq, {Pub, del, no_ack}, Out);
-
-journal_plus_segment({no_pub, del, ack},
-                     {{_Guid, _IsPersistent}, no_del, no_ack},
-                     RelSeq, Out) ->
-    array:reset(RelSeq, Out);
-journal_plus_segment({no_pub, no_del, ack},
-                     {{_Guid, _IsPersistent}, del, no_ack},
-                     RelSeq, Out) ->
-    array:reset(RelSeq, Out).
+journal_plus_segment1({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
+                      not_found) ->
+    Obj;
+journal_plus_segment1({{_Guid, _IsPersistent}, del, no_ack} = Obj,
+                      not_found) ->
+    Obj;
+journal_plus_segment1({{_Guid, _IsPersistent}, del, ack},
+                      not_found) ->
+    undefined;
+
+journal_plus_segment1({no_pub, del, no_ack},
+                      {{_Guid, _IsPersistent} = Pub, no_del, no_ack}) ->
+    {Pub, del, no_ack};
+journal_plus_segment1({no_pub, del, ack},
+                      {{_Guid, _IsPersistent}, no_del, no_ack}) ->
+    undefined;
+journal_plus_segment1({no_pub, no_del, ack},
+                      {{_Guid, _IsPersistent}, del, no_ack}) ->
+    undefined.
 
 %% Remove from the journal entries for a segment, items that are
 %% duplicates of entries found in the segment itself. Used on start up
-- 
cgit v1.2.1


From 257946f45fc803573e7f75a0c24309d1e3441619 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 18:27:38 +0100
Subject: convenient macro

---
 src/rabbit_queue_index.erl | 54 +++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d793ffca..5306a6ca 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -150,6 +150,10 @@
         (?PUBLISH_RECORD_LENGTH_BYTES +
          (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
 
+%% ---- misc ----
+
+-define(PUB, {_Guid, _IsPersistent}).
+
 %%----------------------------------------------------------------------------
 
 -record(qistate,
@@ -566,9 +570,9 @@ add_to_journal(RelSeq, Action,
     Segment1 = Segment #segment {
                  journal_entries = add_to_journal(RelSeq, Action, JEntries) },
     case Action of
-        del                     -> Segment1;
-        ack                     -> Segment1 #segment { acks = AckCount + 1 };
-        {_Guid, _IsPersistent} -> Segment1 #segment { pubs = PubCount + 1 }
+        del  -> Segment1;
+        ack  -> Segment1 #segment { acks = AckCount + 1 };
+        ?PUB -> Segment1 #segment { pubs = PubCount + 1 }
     end;
 
 %% This is a more relaxed version of deliver_or_ack_msg because we can
@@ -763,7 +767,7 @@ segment_fetch_keys({Segments, CachedSegments}) ->
 segments_new() ->
     {dict:new(), []}.
 
-write_entry_to_segment(_RelSeq, {{_Guid, _IsPersistent}, del, ack}, Hdl) ->
+write_entry_to_segment(_RelSeq, {?PUB, del, ack}, Hdl) ->
     Hdl;
 write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
     ok = case Pub of
@@ -874,24 +878,24 @@ journal_plus_segment(JEntries, SegEntries) ->
 %% Here, the Out is the Seg Array which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
 %% from (ack in journal, not segment).
-journal_plus_segment1({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
+journal_plus_segment1({?PUB, no_del, no_ack} = Obj,
                       not_found) ->
     Obj;
-journal_plus_segment1({{_Guid, _IsPersistent}, del, no_ack} = Obj,
+journal_plus_segment1({?PUB, del, no_ack} = Obj,
                       not_found) ->
     Obj;
-journal_plus_segment1({{_Guid, _IsPersistent}, del, ack},
+journal_plus_segment1({?PUB, del, ack},
                       not_found) ->
     undefined;
 
 journal_plus_segment1({no_pub, del, no_ack},
-                      {{_Guid, _IsPersistent} = Pub, no_del, no_ack}) ->
+                      {?PUB = Pub, no_del, no_ack}) ->
     {Pub, del, no_ack};
 journal_plus_segment1({no_pub, del, ack},
-                      {{_Guid, _IsPersistent}, no_del, no_ack}) ->
+                      {?PUB, no_del, no_ack}) ->
     undefined;
 journal_plus_segment1({no_pub, no_del, ack},
-                      {{_Guid, _IsPersistent}, del, no_ack}) ->
+                      {?PUB, del, no_ack}) ->
     undefined.
 
 %% Remove from the journal entries for a segment, items that are
@@ -919,60 +923,60 @@ journal_minus_segment(JEntries, SegEntries) ->
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-journal_minus_segment1({{_Guid, _IsPersistent}, _Del, no_ack} = Obj,
+journal_minus_segment1({?PUB, _Del, no_ack} = Obj,
                        Obj) ->
     {undefined, 1, 0};
-journal_minus_segment1({{_Guid, _IsPersistent}, _Del, ack} = Obj,
+journal_minus_segment1({?PUB, _Del, ack} = Obj,
                        Obj) ->
     {undefined, 1, 1};
 
 %% Just publish in journal
-journal_minus_segment1({{_Guid, _IsPersistent}, no_del, no_ack} = Obj,
+journal_minus_segment1({?PUB, no_del, no_ack} = Obj,
                        not_found) ->
     {Obj, 0, 0};
 
 %% Just deliver in journal
 journal_minus_segment1({no_pub, del, no_ack} = Obj,
-                       {{_Guid, _IsPersistent}, no_del, no_ack}) ->
+                       {?PUB, no_del, no_ack}) ->
     {Obj, 0, 0};
 journal_minus_segment1({no_pub, del, no_ack},
-                       {{_Guid, _IsPersistent}, del, no_ack}) ->
+                       {?PUB, del, no_ack}) ->
     {undefined, 0, 0};
 
 %% Just ack in journal
 journal_minus_segment1({no_pub, no_del, ack} = Obj,
-                       {{_Guid, _IsPersistent}, del, no_ack}) ->
+                       {?PUB, del, no_ack}) ->
     {Obj, 0, 0};
 journal_minus_segment1({no_pub, no_del, ack},
-                       {{_Guid, _IsPersistent}, del, ack}) ->
+                       {?PUB, del, ack}) ->
     {undefined, 0, 0};
 
 %% Publish and deliver in journal
-journal_minus_segment1({{_Guid, _IsPersistent}, del, no_ack} = Obj,
+journal_minus_segment1({?PUB, del, no_ack} = Obj,
                        not_found) ->
     {Obj, 0, 0};
-journal_minus_segment1({{_Guid, _IsPersistent} = Pub, del, no_ack},
+journal_minus_segment1({?PUB = Pub, del, no_ack},
                        {Pub, no_del, no_ack}) ->
     {{no_pub, del, no_ack}, 1, 0};
 
 %% Deliver and ack in journal
 journal_minus_segment1({no_pub, del, ack} = Obj,
-                       {{_Guid, _IsPersistent}, no_del, no_ack}) ->
+                       {?PUB, no_del, no_ack}) ->
     {Obj, 0, 0};
 journal_minus_segment1({no_pub, del, ack},
-                       {{_Guid, _IsPersistent}, del, no_ack}) ->
+                       {?PUB, del, no_ack}) ->
     {{no_pub, no_del, ack}, 0, 0};
 journal_minus_segment1({no_pub, del, ack},
-                       {{_Guid, _IsPersistent}, del, ack}) ->
+                       {?PUB, del, ack}) ->
     {undefined, 0, 1};
 
 %% Publish, deliver and ack in journal
-journal_minus_segment1({{_Guid, _IsPersistent}, del, ack},
+journal_minus_segment1({?PUB, del, ack},
                        not_found) ->
     {undefined, 0, 0};
-journal_minus_segment1({{_Guid, _IsPersistent} = Pub, del, ack},
+journal_minus_segment1({?PUB = Pub, del, ack},
                        {Pub, no_del, no_ack}) ->
     {{no_pub, del, ack}, 1, 0};
-journal_minus_segment1({{_Guid, _IsPersistent} = Pub, del, ack},
+journal_minus_segment1({?PUB = Pub, del, ack},
                        {Pub, del, no_ack}) ->
     {{no_pub, no_del, ack}, 1, 0}.
-- 
cgit v1.2.1


From 54deaf9a2612dcfe09b32a46b45090cadd768047 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 18:34:52 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 63 ++++++++++++++++------------------------------
 1 file changed, 21 insertions(+), 42 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5306a6ca..9f740042 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -878,24 +878,18 @@ journal_plus_segment(JEntries, SegEntries) ->
 %% Here, the Out is the Seg Array which we may be adding to (for
 %% items only in the journal), modifying (bits in both), or erasing
 %% from (ack in journal, not segment).
-journal_plus_segment1({?PUB, no_del, no_ack} = Obj,
-                      not_found) ->
+journal_plus_segment1({?PUB, no_del, no_ack} = Obj, not_found) ->
     Obj;
-journal_plus_segment1({?PUB, del, no_ack} = Obj,
-                      not_found) ->
+journal_plus_segment1({?PUB, del, no_ack} = Obj,    not_found) ->
     Obj;
-journal_plus_segment1({?PUB, del, ack},
-                      not_found) ->
+journal_plus_segment1({?PUB, del, ack},             not_found) ->
     undefined;
 
-journal_plus_segment1({no_pub, del, no_ack},
-                      {?PUB = Pub, no_del, no_ack}) ->
+journal_plus_segment1({no_pub, del, no_ack}, {?PUB = Pub, no_del, no_ack}) ->
     {Pub, del, no_ack};
-journal_plus_segment1({no_pub, del, ack},
-                      {?PUB, no_del, no_ack}) ->
+journal_plus_segment1({no_pub, del, ack},    {?PUB, no_del, no_ack}) ->
     undefined;
-journal_plus_segment1({no_pub, no_del, ack},
-                      {?PUB, del, no_ack}) ->
+journal_plus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) ->
     undefined.
 
 %% Remove from the journal entries for a segment, items that are
@@ -923,60 +917,45 @@ journal_minus_segment(JEntries, SegEntries) ->
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-journal_minus_segment1({?PUB, _Del, no_ack} = Obj,
-                       Obj) ->
+journal_minus_segment1({?PUB, _Del, no_ack} = Obj,   Obj) ->
     {undefined, 1, 0};
-journal_minus_segment1({?PUB, _Del, ack} = Obj,
-                       Obj) ->
+journal_minus_segment1({?PUB, _Del, ack} = Obj,      Obj) ->
     {undefined, 1, 1};
 
 %% Just publish in journal
-journal_minus_segment1({?PUB, no_del, no_ack} = Obj,
-                       not_found) ->
+journal_minus_segment1({?PUB, no_del, no_ack} = Obj, not_found) ->
     {Obj, 0, 0};
 
 %% Just deliver in journal
-journal_minus_segment1({no_pub, del, no_ack} = Obj,
-                       {?PUB, no_del, no_ack}) ->
+journal_minus_segment1({no_pub, del, no_ack} = Obj,  {?PUB, no_del, no_ack}) ->
     {Obj, 0, 0};
-journal_minus_segment1({no_pub, del, no_ack},
-                       {?PUB, del, no_ack}) ->
+journal_minus_segment1({no_pub, del, no_ack},        {?PUB, del, no_ack}) ->
     {undefined, 0, 0};
 
 %% Just ack in journal
-journal_minus_segment1({no_pub, no_del, ack} = Obj,
-                       {?PUB, del, no_ack}) ->
+journal_minus_segment1({no_pub, no_del, ack} = Obj,  {?PUB, del, no_ack}) ->
     {Obj, 0, 0};
-journal_minus_segment1({no_pub, no_del, ack},
-                       {?PUB, del, ack}) ->
+journal_minus_segment1({no_pub, no_del, ack},        {?PUB, del, ack}) ->
     {undefined, 0, 0};
 
 %% Publish and deliver in journal
-journal_minus_segment1({?PUB, del, no_ack} = Obj,
-                       not_found) ->
+journal_minus_segment1({?PUB, del, no_ack} = Obj,    not_found) ->
     {Obj, 0, 0};
-journal_minus_segment1({?PUB = Pub, del, no_ack},
-                       {Pub, no_del, no_ack}) ->
+journal_minus_segment1({?PUB = Pub, del, no_ack},    {Pub, no_del, no_ack}) ->
     {{no_pub, del, no_ack}, 1, 0};
 
 %% Deliver and ack in journal
-journal_minus_segment1({no_pub, del, ack} = Obj,
-                       {?PUB, no_del, no_ack}) ->
+journal_minus_segment1({no_pub, del, ack} = Obj,     {?PUB, no_del, no_ack}) ->
     {Obj, 0, 0};
-journal_minus_segment1({no_pub, del, ack},
-                       {?PUB, del, no_ack}) ->
+journal_minus_segment1({no_pub, del, ack},           {?PUB, del, no_ack}) ->
     {{no_pub, no_del, ack}, 0, 0};
-journal_minus_segment1({no_pub, del, ack},
-                       {?PUB, del, ack}) ->
+journal_minus_segment1({no_pub, del, ack},           {?PUB, del, ack}) ->
     {undefined, 0, 1};
 
 %% Publish, deliver and ack in journal
-journal_minus_segment1({?PUB, del, ack},
-                       not_found) ->
+journal_minus_segment1({?PUB, del, ack},             not_found) ->
     {undefined, 0, 0};
-journal_minus_segment1({?PUB = Pub, del, ack},
-                       {Pub, no_del, no_ack}) ->
+journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, no_del, no_ack}) ->
     {{no_pub, del, ack}, 1, 0};
-journal_minus_segment1({?PUB = Pub, del, ack},
-                       {Pub, del, no_ack}) ->
+journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, del, no_ack}) ->
     {{no_pub, no_del, ack}, 1, 0}.
-- 
cgit v1.2.1


From 66700a5e8f10ff2a016311c6d2524f51f3347a8f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 18:51:52 +0100
Subject: minor tweak for consistency

---
 src/rabbit_msg_store.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 21f15058..b799346c 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1596,7 +1596,7 @@ combine_files(#file_summary { file             = Source,
     case DestinationContiguousTop =:= DestinationValid of
         true ->
             ok = truncate_and_extend_file(
-                   DestinationHdl, DestinationValid, ExpectedSize);
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize);
         false ->
             {DestinationWorkList, DestinationValid} =
                 find_unremoved_messages_in_file(Destination, State),
-- 
cgit v1.2.1


From f84513e3a0fd9e288cedf289a9e9b79d504b1478 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 19:22:47 +0100
Subject: fix up comments

---
 src/rabbit_queue_index.erl | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9f740042..6788debe 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -875,9 +875,10 @@ journal_plus_segment(JEntries, SegEntries) ->
               end
       end, SegEntries, JEntries).
 
-%% Here, the Out is the Seg Array which we may be adding to (for
-%% items only in the journal), modifying (bits in both), or erasing
-%% from (ack in journal, not segment).
+%% Here, the result is the item which we may be adding to (for items
+%% only in the journal), modifying in (bits in both), or, when
+%% returning 'undefined', erasing from (ack in journal, not segment)
+%% the segment array.
 journal_plus_segment1({?PUB, no_del, no_ack} = Obj, not_found) ->
     Obj;
 journal_plus_segment1({?PUB, del, no_ack} = Obj,    not_found) ->
@@ -912,8 +913,11 @@ journal_minus_segment(JEntries, SegEntries) ->
                AcksRemoved + AcksRemovedDelta}
       end, {array_new(), 0, 0}, JEntries).
 
-%% Here, the Out is a fresh journal that we're filling with valid
-%% entries. PubsRemoved and AcksRemoved only get increased when a
+%% Here, the result is a triple with the first element containing the
+%% item we are adding to or modifying in the (initially fresh) journal
+%% array. If the item is 'undefined' we leave the journal array
+%% alone. The other two elements of the triple are the deltas for
+%% PubsRemoved and AcksRemoved - these only get increased when a
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-- 
cgit v1.2.1


From 2a5dc497e9ccaa96b07cfbe260a83df6c5000459 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 19:31:25 +0100
Subject: tweak

---
 src/rabbit_queue_index.erl | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6788debe..1207c951 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -865,10 +865,7 @@ bool_to_int(false) -> 0.
 journal_plus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, SegEntriesOut) ->
-              SegEntry = case array:get(RelSeq, SegEntriesOut) of
-                             undefined        -> not_found;
-                             SObj = {_, _, _} -> SObj
-                         end,
+              SegEntry = array:get(RelSeq, SegEntriesOut),
               case journal_plus_segment1(JObj, SegEntry) of
                   undefined -> array:reset(RelSeq, SegEntriesOut);
                   Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
@@ -879,11 +876,11 @@ journal_plus_segment(JEntries, SegEntries) ->
 %% only in the journal), modifying in (bits in both), or, when
 %% returning 'undefined', erasing from (ack in journal, not segment)
 %% the segment array.
-journal_plus_segment1({?PUB, no_del, no_ack} = Obj, not_found) ->
+journal_plus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
     Obj;
-journal_plus_segment1({?PUB, del, no_ack} = Obj,    not_found) ->
+journal_plus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
     Obj;
-journal_plus_segment1({?PUB, del, ack},             not_found) ->
+journal_plus_segment1({?PUB, del, ack},             undefined) ->
     undefined;
 
 journal_plus_segment1({no_pub, del, no_ack}, {?PUB = Pub, no_del, no_ack}) ->
@@ -899,10 +896,7 @@ journal_plus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) ->
 journal_minus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
-              SegEntry = case array:get(RelSeq, SegEntries) of
-                             undefined        -> not_found;
-                             SObj = {_, _, _} -> SObj
-                         end,
+              SegEntry = array:get(RelSeq, SegEntries),
               {Obj, PubsRemovedDelta, AcksRemovedDelta} =
                   journal_minus_segment1(JObj, SegEntry),
               {case Obj of
@@ -927,7 +921,7 @@ journal_minus_segment1({?PUB, _Del, ack} = Obj,      Obj) ->
     {undefined, 1, 1};
 
 %% Just publish in journal
-journal_minus_segment1({?PUB, no_del, no_ack} = Obj, not_found) ->
+journal_minus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
     {Obj, 0, 0};
 
 %% Just deliver in journal
@@ -943,7 +937,7 @@ journal_minus_segment1({no_pub, no_del, ack},        {?PUB, del, ack}) ->
     {undefined, 0, 0};
 
 %% Publish and deliver in journal
-journal_minus_segment1({?PUB, del, no_ack} = Obj,    not_found) ->
+journal_minus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
     {Obj, 0, 0};
 journal_minus_segment1({?PUB = Pub, del, no_ack},    {Pub, no_del, no_ack}) ->
     {{no_pub, del, no_ack}, 1, 0};
@@ -957,7 +951,7 @@ journal_minus_segment1({no_pub, del, ack},           {?PUB, del, ack}) ->
     {undefined, 0, 1};
 
 %% Publish, deliver and ack in journal
-journal_minus_segment1({?PUB, del, ack},             not_found) ->
+journal_minus_segment1({?PUB, del, ack},             undefined) ->
     {undefined, 0, 0};
 journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, no_del, no_ack}) ->
     {{no_pub, del, ack}, 1, 0};
-- 
cgit v1.2.1


From c38d8d5afb8764be25cba07765650140bf1ed532 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sat, 15 May 2010 20:43:47 +0100
Subject: cosmetic: more sensible order of clauses

---
 src/rabbit_queue_index.erl | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 1207c951..1e583384 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -924,6 +924,20 @@ journal_minus_segment1({?PUB, _Del, ack} = Obj,      Obj) ->
 journal_minus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
     {Obj, 0, 0};
 
+%% Publish and deliver in journal
+journal_minus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
+    {Obj, 0, 0};
+journal_minus_segment1({?PUB = Pub, del, no_ack},    {Pub, no_del, no_ack}) ->
+    {{no_pub, del, no_ack}, 1, 0};
+
+%% Publish, deliver and ack in journal
+journal_minus_segment1({?PUB, del, ack},             undefined) ->
+    {undefined, 0, 0};
+journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, no_del, no_ack}) ->
+    {{no_pub, del, ack}, 1, 0};
+journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 1, 0};
+
 %% Just deliver in journal
 journal_minus_segment1({no_pub, del, no_ack} = Obj,  {?PUB, no_del, no_ack}) ->
     {Obj, 0, 0};
@@ -936,24 +950,10 @@ journal_minus_segment1({no_pub, no_del, ack} = Obj,  {?PUB, del, no_ack}) ->
 journal_minus_segment1({no_pub, no_del, ack},        {?PUB, del, ack}) ->
     {undefined, 0, 0};
 
-%% Publish and deliver in journal
-journal_minus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
-    {Obj, 0, 0};
-journal_minus_segment1({?PUB = Pub, del, no_ack},    {Pub, no_del, no_ack}) ->
-    {{no_pub, del, no_ack}, 1, 0};
-
 %% Deliver and ack in journal
 journal_minus_segment1({no_pub, del, ack} = Obj,     {?PUB, no_del, no_ack}) ->
     {Obj, 0, 0};
 journal_minus_segment1({no_pub, del, ack},           {?PUB, del, no_ack}) ->
     {{no_pub, no_del, ack}, 0, 0};
 journal_minus_segment1({no_pub, del, ack},           {?PUB, del, ack}) ->
-    {undefined, 0, 1};
-
-%% Publish, deliver and ack in journal
-journal_minus_segment1({?PUB, del, ack},             undefined) ->
-    {undefined, 0, 0};
-journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, no_del, no_ack}) ->
-    {{no_pub, del, ack}, 1, 0};
-journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, del, no_ack}) ->
-    {{no_pub, no_del, ack}, 1, 0}.
+    {undefined, 0, 1}.
-- 
cgit v1.2.1


From 8fb72f9baeb457c2a1e861af85eeac4adf660cd2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Sat, 15 May 2010 21:06:00 +0100
Subject: Correct an accounting bug in queue recovery. By incorrectly counting
 duplicated acks, we remove too few acks when constructing a segment's
 combined ackcount, leading to an ackcount that is too high. On (dirty)
 recovery, this will cause the qi to report it's length as being shorter than
 in really is, which will then cause the ? record in vq to be too short.
 Subsequent loading of segments (? => ?/? in q3) will then crash as more
 publishes will be found than expected. Note that to trigger this bug, Rabbit
 must crash/be-killed in the first place, during flushing of the qi journal in
 a durable queue with persistent messages.

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 1e583384..41f25a8a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -948,7 +948,7 @@ journal_minus_segment1({no_pub, del, no_ack},        {?PUB, del, no_ack}) ->
 journal_minus_segment1({no_pub, no_del, ack} = Obj,  {?PUB, del, no_ack}) ->
     {Obj, 0, 0};
 journal_minus_segment1({no_pub, no_del, ack},        {?PUB, del, ack}) ->
-    {undefined, 0, 0};
+    {undefined, 0, 1};
 
 %% Deliver and ack in journal
 journal_minus_segment1({no_pub, del, ack} = Obj,     {?PUB, no_del, no_ack}) ->
-- 
cgit v1.2.1


From d254f6d9507f6dc4a2a3dcf60092120d62256f37 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 16 May 2010 11:24:06 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 41f25a8a..6a279925 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -156,21 +156,9 @@
 
 %%----------------------------------------------------------------------------
 
--record(qistate,
-        { dir,
-          segments,
-          journal_handle,
-          dirty_count
-         }).
-
--record(segment,
-        { pubs,
-          acks,
-          handle,
-          journal_entries,
-          path,
-          num
-         }).
+-record(qistate, { dir, segments, journal_handle, dirty_count }).
+
+-record(segment, { pubs, acks, handle, journal_entries, path, num }).
 
 -include("rabbit.hrl").
 
@@ -346,7 +334,8 @@ flush_journal(State = #qistate { dirty_count = 0 }) ->
 flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
-          fun (_Seg, #segment { journal_entries = JEntries, pubs = PubCount,
+          fun (_Seg, #segment { journal_entries = JEntries,
+                                pubs = PubCount,
                                 acks = AckCount } = Segment, SegmentsN) ->
                   case PubCount > 0 andalso PubCount == AckCount of
                       true  -> ok = delete_segment(Segment),
@@ -480,7 +469,8 @@ terminate(StoreShutdown, Terms, State =
              _         -> file_handle_cache:close(JournalHdl)
          end,
     SegTerms = segment_fold(
-                 fun (Seg, #segment { handle = Hdl, pubs = PubCount,
+                 fun (Seg, #segment { handle = Hdl,
+                                      pubs = PubCount,
                                       acks = AckCount }, SegTermsAcc) ->
                          ok = case Hdl of
                                   undefined -> ok;
@@ -566,7 +556,8 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
 
 add_to_journal(RelSeq, Action,
                Segment = #segment { journal_entries = JEntries,
-                                    pubs = PubCount, acks = AckCount }) ->
+                                    pubs = PubCount,
+                                    acks = AckCount }) ->
     Segment1 = Segment #segment {
                  journal_entries = add_to_journal(RelSeq, Action, JEntries) },
     case Action of
-- 
cgit v1.2.1


From 6b8a86c5d5e0f5ebbd9e9f6a740764c4690cfbd5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Sun, 16 May 2010 11:26:22 +0100
Subject: refactoring: extract segment recovery also rename
 maybe_add_to_journal to recover_message, which better describes its purpose.

---
 src/rabbit_queue_index.erl | 57 +++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6a279925..e4783338 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -239,23 +239,13 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
             false ->
                 lists:foldl(
                   fun (Seg, {Segments2, CountAcc}) ->
-                          Segment = segment_find_or_new(Seg, Dir, Segments2),
-                          {SegEntries, PubCount, AckCount, Segment1} =
-                              load_segment(false, Segment),
-                          Segment2 =
-                              #segment { pubs = PubCount1, acks = AckCount1 } =
-                              array:sparse_foldl(
-                                fun (RelSeq, {{Guid, _IsPersistent}, Del,
-                                              no_ack},
-                                     Segment3) ->
-                                        maybe_add_to_journal(
-                                          ContainsCheckFun(Guid),
-                                          CleanShutdown, Del, RelSeq, Segment3)
-                                end, Segment1 #segment { pubs = PubCount,
-                                                         acks = AckCount },
-                                SegEntries),
-                          {segment_store(Segment2, Segments2),
-                           CountAcc + PubCount1 - AckCount1}
+                          Segment = #segment { pubs = PubCount,
+                                               acks = AckCount } =
+                              recover_segment(
+                                ContainsCheckFun, CleanShutdown,
+                                segment_find_or_new(Seg, Dir, Segments2)),
+                          {segment_store(Segment, Segments2),
+                           CountAcc + PubCount - AckCount}
                   end, {Segments, 0}, all_segment_nums(State2));
             true ->
                 %% At this stage, we will only know about files that
@@ -484,6 +474,28 @@ terminate(StoreShutdown, Terms, State =
     end,
     State #qistate { journal_handle = undefined, segments = undefined }.
 
+recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
+    {SegEntries, PubCount, AckCount, Segment1} =
+        load_segment(false, Segment),
+    array:sparse_foldl(
+      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment3) ->
+              recover_message(ContainsCheckFun(Guid), CleanShutdown,
+                              Del, RelSeq, Segment3)
+      end,
+      Segment1 #segment { pubs = PubCount, acks = AckCount },
+      SegEntries).
+
+recover_message( true,  true,   _Del, _RelSeq, Segment) ->
+    Segment;
+recover_message( true, false,    del, _RelSeq, Segment) ->
+    Segment;
+recover_message( true, false, no_del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, del, Segment);
+recover_message(false,     _,    del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, Segment);
+recover_message(false,     _, no_del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
+
 queue_name_to_dir_name(Name = #resource { kind = queue }) ->
     Bin = term_to_binary(Name),
     Size = 8*size(Bin),
@@ -534,17 +546,6 @@ queue_index_walker_reader(QueueName, Gatherer) ->
 %% journal manipulation
 %%----------------------------------------------------------------------------
 
-maybe_add_to_journal( true,  true, _Del, _RelSeq, Segment) ->
-    Segment;
-maybe_add_to_journal( true, false,  del, _RelSeq, Segment) ->
-    Segment;
-maybe_add_to_journal( true, false, _Del,  RelSeq, Segment) ->
-    add_to_journal(RelSeq, del, Segment);
-maybe_add_to_journal(false,     _,  del,  RelSeq, Segment) ->
-    add_to_journal(RelSeq, ack, Segment);
-maybe_add_to_journal(false,     _, _Del,  RelSeq, Segment) ->
-    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
-
 add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
                                                  segments = Segments,
                                                  dir = Dir }) ->
-- 
cgit v1.2.1


From f5f071d42fbd191d34917bcf658e721887c5cf5e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 17 May 2010 14:51:58 +0100
Subject: commenting on the non-obvious

---
 src/rabbit_queue_index.erl | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e4783338..44df5976 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -222,7 +222,8 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
     %%    segment and the journal.
     State1 = load_journal(State),
     %% 2. Flush the journal. This makes life easier for everyone, as
-    %%    it means there won't be any publishes in the journal alone.
+    %%    it means there won't be any publishes in the journal
+    %%    alone. The dirty recovery code below relies on this.
     State2 = #qistate { dir = Dir, segments = Segments } =
         flush_journal(State1),
     %% 3. Load each segment in turn and filter out messages that are
@@ -249,7 +250,7 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                   end, {Segments, 0}, all_segment_nums(State2));
             true ->
                 %% At this stage, we will only know about files that
-                %% were loaded during flushing. They *will* have
+                %% were loaded during journal loading, They *will* have
                 %% correct ack and pub counts, but for all remaining
                 %% segments, if they're not in the Segments store then
                 %% we need to add them and populate with saved data.
@@ -269,9 +270,15 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                                    SegmentsN
                            end
                    end, Segments, all_segment_nums(State2)),
+                 %% the counts above include transient messages, which
+                 %% would be the wrong thing to return
                  undefined}
         end,
-    %% artificially set the dirty_count non zero and call flush again
+    %% flush again so we eagerly remove any segments that have become
+    %% empty due to either ContainsCheckFun returning false in the
+    %% non-clean recovery case or PubCount==AckCount in the clean
+    %% recovery case. Since the latter doesn't go through the journal
+    %% logic we we artificially set the dirty_count non zero.
     State3 = flush_journal(State2 #qistate { segments = Segments1,
                                              dirty_count = 1 }),
     {Count, Terms, State3}.
-- 
cgit v1.2.1


From 7d0f076a7d1e2b5411f0b319df4456fee8cda03e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 May 2010 17:10:13 +0100
Subject: Improved incorrect or outdated documentation in msg_store

---
 src/rabbit_msg_store.erl | 57 +++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index b799346c..af0f8de5 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -149,11 +149,12 @@
 
 %% The components:
 %%
-%% MsgLocation: this is a mapping from Guid to #msg_location{}:
-%%              {Guid, RefCount, File, Offset, TotalSize}
-%%              By default, it's in ets, but it's also pluggable.
-%% FileSummary: this is an ets table which contains:
-%%              {File, ValidTotalSize, ContiguousTop, Left, Right}
+%% Index: this is a mapping from Guid to #msg_location{}:
+%%        {Guid, RefCount, File, Offset, TotalSize}
+%%        By default, it's in ets, but it's also pluggable.
+%% FileSummary: this is an ets table which maps File to #file_summary():
+%%        {File, ValidTotalSize, ContiguousTop, Left, Right,
+%%         FileSize, Locked, Readers}
 %%
 %% The basic idea is that messages are appended to the current file up
 %% until that file becomes too big (> file_size_limit). At that point,
@@ -163,9 +164,9 @@
 %% eldest file.
 %%
 %% We need to keep track of which messages are in which files (this is
-%% the MsgLocation mapping); how much useful data is in each file and
-%% which files are on the left and right of each other. This is the
-%% purpose of the FileSummary table.
+%% the Index); how much useful data is in each file and which files
+%% are on the left and right of each other. This is the purpose of the
+%% FileSummary table.
 %%
 %% As messages are removed from files, holes appear in these
 %% files. The field ValidTotalSize contains the total amount of useful
@@ -190,14 +191,14 @@
 %% file, then read back in to form a contiguous chunk of good data at
 %% the start of the left file. Thus the left file is garbage collected
 %% and compacted. Then the good data from the right file is copied
-%% onto the end of the left file. MsgLocation and FileSummary tables
-%% are updated.
+%% onto the end of the left file. Index and FileSummary tables are
+%% updated.
 %%
-%% On startup, we scan the files we discover, dealing with the
-%% possibilites of a crash have occured during a compaction (this
-%% consists of tidyup - the compaction is deliberately designed such
-%% that data is duplicated on disk rather than risking it being lost),
-%% and rebuild the FileSummary ets table and MsgLocation mapping.
+%% On non-clean startup, we scan the files we discover, dealing with
+%% the possibilites of a crash having occured during a compaction
+%% (this consists of tidyup - the compaction is deliberately designed
+%% such that data is duplicated on disk rather than risking it being
+%% lost), and rebuild the FileSummary ets table and Index.
 %%
 %% So, with this design, messages move to the left. Eventually, they
 %% should end up in a contiguous block on the left and are then never
@@ -255,14 +256,14 @@
 %% alternating full files and files with only one tiny message in
 %% them).
 %%
-%% Messages are reference-counted. When a message with the same id is
-%% written several times we only store it once, and only remove it
-%% from the store when it has been removed the same number of
-%% times.
+%% Messages are reference-counted. When a message with the same guid
+%% is written several times we only store it once, and only remove it
+%% from the store when it has been removed the same number of times.
 %%
 %% The reference counts do not persist. Therefore the initialisation
 %% function must be provided with a generator that produces ref count
-%% deltas for all recovered messages.
+%% deltas for all recovered messages. This is only used on startup
+%% when the shutdown was non-clean.
 %%
 %% Read messages with a reference count greater than one are entered
 %% into a message cache. The purpose of the cache is not especially
@@ -284,12 +285,12 @@
 %% not overtake removes.
 %%
 %% The current file to which messages are being written has a
-%% write-back cache. This is written to immediately by the client and
-%% can be read from by the client too. This means that there are only
-%% ever writes made to the current file, thus eliminating delays due
-%% to flushing write buffers in order to be able to safely read from
-%% the current file. The one exception to this is that on start up,
-%% the cache is not populated with msgs found in the current file, and
+%% write-back cache. This is written to immediately by clients and can
+%% be read from by clients too. This means that there are only ever
+%% writes made to the current file, thus eliminating delays due to
+%% flushing write buffers in order to be able to safely read from the
+%% current file. The one exception to this is that on start up, the
+%% cache is not populated with msgs found in the current file, and
 %% thus in this case only, reads may have to come from the file
 %% itself. The effect of this is that even if the msg_store process is
 %% heavily overloaded, clients can still write and read messages with
@@ -1609,10 +1610,6 @@ combine_files(#file_summary { file             = Source,
                           %% was then DestinationContiguousTop would
                           %% have been extended by TotalSize
                           Offset < DestinationContiguousTop
-                          %% Given expected access patterns, I suspect
-                          %% that the list should be naturally sorted
-                          %% as we require, however, we need to
-                          %% enforce it anyway
                   end, DestinationWorkList),
             Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
             {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
-- 
cgit v1.2.1


From f772b7ee48750199a477592a9c6222921e720981 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Mon, 17 May 2010 18:34:38 +0100
Subject: Change how we handle a gc finding concurrent readers on the same
 file. Previously we were just waiting and trying again. Now we get the
 readers to signal to the GC when they're done. This itself introduces a race
 and so has to carefully deal with such notifications arriving after the GC
 has completed, but it removes a magic number. Also fixed a bug which mean
 readers were reading from locked files. Whoops.

---
 src/rabbit_msg_store.erl    | 38 ++++++++++++++++++++++++++------------
 src/rabbit_msg_store_gc.erl | 45 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index af0f8de5..ddb53a24 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -89,6 +89,7 @@
           index_state,
           index_module,
           dir,
+          gc_pid,
           file_handles_ets,
           file_summary_ets,
           dedup_cache_ets,
@@ -109,6 +110,7 @@
                                             index_state        :: any(),
                                             index_module       :: atom(),
                                             dir                :: file_path(),
+                                            gc_pid             :: pid(),
                                             file_handles_ets   :: tid(),
                                             file_summary_ets   :: tid(),
                                             dedup_cache_ets    :: tid(),
@@ -137,7 +139,8 @@
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
-               {tid(), file_path(), atom(), any()}) -> non_neg_integer()).
+               {tid(), file_path(), atom(), any()}) ->
+                   'concurrent_readers' | non_neg_integer()).
 
 -endif.
 
@@ -351,13 +354,14 @@ set_maximum_since_use(Server, Age) ->
     gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}).
 
 client_init(Server, Ref) ->
-    {IState, IModule, Dir,
+    {IState, IModule, Dir, GCPid,
      FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} =
         gen_server2:call(Server, {new_client_state, Ref}, infinity),
     #client_msstate { file_handle_cache  = dict:new(),
                       index_state        = IState,
                       index_module       = IModule,
                       dir                = Dir,
+                      gc_pid             = GCPid,
                       file_handles_ets   = FileHandlesEts,
                       file_summary_ets   = FileSummaryEts,
                       dedup_cache_ets    = DedupCacheEts,
@@ -423,10 +427,20 @@ client_read2(Server, false, _Right,
 client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
              CState = #client_msstate { file_handles_ets = FileHandlesEts,
                                         file_summary_ets = FileSummaryEts,
-                                        dedup_cache_ets  = DedupCacheEts }) ->
-    Release = fun() -> ets:update_counter(FileSummaryEts, File,
-                                          {#file_summary.readers, -1})
-              end,
+                                        dedup_cache_ets  = DedupCacheEts,
+                                        gc_pid           = GCPid }) ->
+    Release =
+        fun() -> ok = case ets:update_counter(FileSummaryEts, File,
+                                              {#file_summary.readers, -1}) of
+                          0 -> case ets:lookup(FileSummaryEts, File) of
+                                   [#file_summary { locked = true }] ->
+                                       rabbit_msg_store_gc:no_readers(
+                                         GCPid, File);
+                                   _ -> ok
+                               end;
+                          _ -> ok
+                      end
+        end,
     %% If a GC involving the file hasn't already started, it won't
     %% start now. Need to check again to see if we've been locked in
     %% the meantime, between lookup and update_counter (thus GC
@@ -435,7 +449,7 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
     case ets:lookup(FileSummaryEts, File) of
         [] -> %% GC has deleted our file, just go round again.
             read(Server, Guid, CState);
-        [{#file_summary { locked = true }}] ->
+        [#file_summary { locked = true }] ->
             %% If we get a badarg here, then the GC has finished and
             %% deleted our file. Try going around again. Otherwise,
             %% just defer.
@@ -447,7 +461,7 @@ client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
                 Defer()
             catch error:badarg -> read(Server, Guid, CState)
             end;
-        _ ->
+        [#file_summary { locked = false }] ->
             %% Ok, we're definitely safe to continue - a GC involving
             %% the file cannot start up now, and isn't running, so
             %% nothing will tell us from now on to close the handle if
@@ -569,8 +583,9 @@ handle_call({new_client_state, CRef}, _From,
                                file_summary_ets   = FileSummaryEts,
                                dedup_cache_ets    = DedupCacheEts,
                                cur_file_cache_ets = CurFileCacheEts,
-                               client_refs        = ClientRefs }) ->
-    reply({IndexState, IndexModule, Dir,
+                               client_refs        = ClientRefs,
+                               gc_pid             = GCPid }) ->
+    reply({IndexState, IndexModule, Dir, GCPid,
            FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts},
           State #msstate { client_refs = sets:add_element(CRef, ClientRefs) });
 
@@ -1569,8 +1584,7 @@ gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
                            {#file_summary.contiguous_top,   TotalValidData},
                            {#file_summary.file_size,        TotalValidData}]),
                  SrcFileSize + DstFileSize - TotalValidData;
-        false -> timer:sleep(100),
-                 gc(SrcFile, DstFile, State)
+        false -> concurrent_readers
     end.
 
 combine_files(#file_summary { file             = Source,
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 038d51c4..96280e10 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/4, gc/3, stop/1]).
+-export([start_link/4, gc/3, no_readers/2, stop/1]).
 
 -export([set_maximum_since_use/2]).
 
@@ -45,7 +45,8 @@
          index_state,
          index_module,
          parent,
-         file_summary_ets
+         file_summary_ets,
+         scheduled
         }).
 
 -include("rabbit.hrl").
@@ -60,6 +61,9 @@ start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
 gc(Server, Source, Destination) ->
     gen_server2:cast(Server, {gc, Source, Destination}).
 
+no_readers(Server, File) ->
+    gen_server2:cast(Server, {no_readers, File}).
+
 stop(Server) ->
     gen_server2:call(Server, stop, infinity).
 
@@ -75,7 +79,8 @@ init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
                     index_state      = IndexState,
                     index_module     = IndexModule,
                     parent           = Parent,
-                    file_summary_ets = FileSummaryEts },
+                    file_summary_ets = FileSummaryEts,
+                    scheduled        = undefined },
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -83,14 +88,16 @@ handle_call(stop, _From, State) ->
     {stop, normal, ok, State}.
 
 handle_cast({gc, Source, Destination},
-            State = #gcstate { dir              = Dir,
-                               index_state      = IndexState,
-                               index_module     = Index,
-                               parent           = Parent,
-                               file_summary_ets = FileSummaryEts }) ->
-    Reclaimed = rabbit_msg_store:gc(Source, Destination,
-                                    {FileSummaryEts, Dir, Index, IndexState}),
-    ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source, Destination),
+            State = #gcstate { scheduled = undefined }) ->
+    {noreply, attempt_gc(State #gcstate { scheduled = {Source, Destination} }),
+     hibernate};
+
+handle_cast({no_readers, File},
+            State = #gcstate { scheduled = {Source, Destination} })
+  when File =:= Source orelse File =:= Destination ->
+    {noreply, attempt_gc(State), hibernate};
+
+handle_cast({no_readers, _File}, State) ->
     {noreply, State, hibernate};
 
 handle_cast({set_maximum_since_use, Age}, State) ->
@@ -105,3 +112,19 @@ terminate(_Reason, State) ->
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
+
+attempt_gc(State = #gcstate { dir              = Dir,
+                              index_state      = IndexState,
+                              index_module     = Index,
+                              parent           = Parent,
+                              file_summary_ets = FileSummaryEts,
+                              scheduled        = {Source, Destination} }) ->
+    case rabbit_msg_store:gc(Source, Destination,
+                             {FileSummaryEts, Dir, Index, IndexState}) of
+        concurrent_readers ->
+            State;
+        Reclaimed ->
+            ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source,
+                                          Destination),
+            State #gcstate { scheduled = undefined }
+    end.
-- 
cgit v1.2.1


From 6f860f4d671c015395dc4127e70aabe9f3b7245d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 17 May 2010 21:50:10 +0100
Subject: fix typo

---
 src/rabbit_msg_store.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index ddb53a24..5f93e4e8 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -155,7 +155,7 @@
 %% Index: this is a mapping from Guid to #msg_location{}:
 %%        {Guid, RefCount, File, Offset, TotalSize}
 %%        By default, it's in ets, but it's also pluggable.
-%% FileSummary: this is an ets table which maps File to #file_summary():
+%% FileSummary: this is an ets table which maps File to #file_summary{}:
 %%        {File, ValidTotalSize, ContiguousTop, Left, Right,
 %%         FileSize, Locked, Readers}
 %%
@@ -169,7 +169,7 @@
 %% We need to keep track of which messages are in which files (this is
 %% the Index); how much useful data is in each file and which files
 %% are on the left and right of each other. This is the purpose of the
-%% FileSummary table.
+%% FileSummary ets table.
 %%
 %% As messages are removed from files, holes appear in these
 %% files. The field ValidTotalSize contains the total amount of useful
-- 
cgit v1.2.1


From b6ba85900f867ebdec5a080959534b00ae7473bf Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Mon, 17 May 2010 22:11:02 +0100
Subject: cosmetic

---
 src/rabbit_msg_store_gc.erl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 96280e10..2b6bf9b2 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -121,10 +121,8 @@ attempt_gc(State = #gcstate { dir              = Dir,
                               scheduled        = {Source, Destination} }) ->
     case rabbit_msg_store:gc(Source, Destination,
                              {FileSummaryEts, Dir, Index, IndexState}) of
-        concurrent_readers ->
-            State;
-        Reclaimed ->
-            ok = rabbit_msg_store:gc_done(Parent, Reclaimed, Source,
-                                          Destination),
-            State #gcstate { scheduled = undefined }
+        concurrent_readers -> State;
+        Reclaimed          -> ok = rabbit_msg_store:gc_done(
+                                     Parent, Reclaimed, Source, Destination),
+                              State #gcstate { scheduled = undefined }
     end.
-- 
cgit v1.2.1


From d2c601042d79e731286045b49686c5645b7daa4a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 18 May 2010 07:04:52 +0100
Subject: rename some qi funs

---
 src/rabbit_queue_index.erl    |  89 ++++++++++++++---------------
 src/rabbit_tests.erl          |  30 +++++-----
 src/rabbit_variable_queue.erl | 128 +++++++++++++++++++-----------------------
 3 files changed, 117 insertions(+), 130 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 44df5976..241766b8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,8 +31,8 @@
 
 -module(rabbit_queue_index).
 
--export([init/3, terminate/2, terminate_and_erase/1, write_published/4,
-         write_delivered/2, write_acks/2, sync_seq_ids/2, flush_journal/1,
+-export([init/3, terminate/2, terminate_and_erase/1, publish/4,
+         deliver/2, ack/2, sync/2, flush/1,
          read_segment_entries/2, next_segment_boundary/1, segment_size/0,
          find_lowest_seq_id_seg_and_next_seq_id/1, recover/1]).
 
@@ -189,12 +189,11 @@
              {'undefined' | non_neg_integer(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
--spec(write_published/4 :: (guid(), seq_id(), boolean(), qistate())
-      -> qistate()).
--spec(write_delivered/2 :: (seq_id(), qistate()) -> qistate()).
--spec(write_acks/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(sync_seq_ids/2 :: ([seq_id()], qistate()) -> qistate()).
--spec(flush_journal/1 :: (qistate()) -> qistate()).
+-spec(publish/4 :: (guid(), seq_id(), boolean(), qistate()) -> qistate()).
+-spec(deliver/2 :: (seq_id(), qistate()) -> qistate()).
+-spec(ack/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(flush/1 :: (qistate()) -> qistate()).
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
              {[{guid(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
@@ -291,7 +290,7 @@ terminate_and_erase(State) ->
     ok = rabbit_misc:recursive_delete([State1 #qistate.dir]),
     State1.
 
-write_published(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
+publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
     ?GUID_BYTES = size(Guid),
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
@@ -301,15 +300,15 @@ write_published(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
                            end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]),
     maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)).
 
-write_delivered(SeqId, State) ->
+deliver(SeqId, State) ->
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
            JournalHdl, <<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>),
     maybe_flush_journal(add_to_journal(SeqId, del, State1)).
 
-write_acks([], State) ->
+ack([], State) ->
     State;
-write_acks(SeqIds, State) ->
+ack(SeqIds, State) ->
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
            JournalHdl, [<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>> ||
@@ -318,43 +317,15 @@ write_acks(SeqIds, State) ->
                                             add_to_journal(SeqId, ack, StateN)
                                     end, State1, SeqIds)).
 
-sync_seq_ids([], State) ->
+sync([], State) ->
     State;
-sync_seq_ids(_SeqIds, State = #qistate { journal_handle = undefined }) ->
+sync(_SeqIds, State = #qistate { journal_handle = undefined }) ->
     State;
-sync_seq_ids(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
+sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
     ok = file_handle_cache:sync(JournalHdl),
     State.
 
-flush_journal(State = #qistate { dirty_count = 0 }) ->
-    State;
-flush_journal(State = #qistate { segments = Segments }) ->
-    Segments1 =
-        segment_fold(
-          fun (_Seg, #segment { journal_entries = JEntries,
-                                pubs = PubCount,
-                                acks = AckCount } = Segment, SegmentsN) ->
-                  case PubCount > 0 andalso PubCount == AckCount of
-                      true  -> ok = delete_segment(Segment),
-                               SegmentsN;
-                      false -> segment_store(
-                                 append_journal_to_segment(Segment, JEntries),
-                                 SegmentsN)
-                  end
-          end, segments_new(), Segments),
-    {JournalHdl, State1} =
-        get_journal_handle(State #qistate { segments = Segments1 }),
-    ok = file_handle_cache:clear(JournalHdl),
-    State1 #qistate { dirty_count = 0 }.
-
-append_journal_to_segment(Segment, JEntries) ->
-    case array:sparse_size(JEntries) of
-        0 -> Segment;
-        _ -> {Hdl, Segment1} = get_segment_handle(Segment),
-             array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
-             ok = file_handle_cache:sync(Hdl),
-             Segment1 #segment { journal_entries = array_new() }
-    end.
+flush(State) -> flush_journal(State).
 
 read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
                                                    dir = Dir }) ->
@@ -599,6 +570,36 @@ maybe_flush_journal(State = #qistate { dirty_count = DCount })
 maybe_flush_journal(State) ->
     State.
 
+flush_journal(State = #qistate { dirty_count = 0 }) ->
+    State;
+flush_journal(State = #qistate { segments = Segments }) ->
+    Segments1 =
+        segment_fold(
+          fun (_Seg, #segment { journal_entries = JEntries,
+                                pubs = PubCount,
+                                acks = AckCount } = Segment, SegmentsN) ->
+                  case PubCount > 0 andalso PubCount == AckCount of
+                      true  -> ok = delete_segment(Segment),
+                               SegmentsN;
+                      false -> segment_store(
+                                 append_journal_to_segment(Segment, JEntries),
+                                 SegmentsN)
+                  end
+          end, segments_new(), Segments),
+    {JournalHdl, State1} =
+        get_journal_handle(State #qistate { segments = Segments1 }),
+    ok = file_handle_cache:clear(JournalHdl),
+    State1 #qistate { dirty_count = 0 }.
+
+append_journal_to_segment(Segment, JEntries) ->
+    case array:sparse_size(JEntries) of
+        0 -> Segment;
+        _ -> {Hdl, Segment1} = get_segment_handle(Segment),
+             array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
+             ok = file_handle_cache:sync(Hdl),
+             Segment1 #segment { journal_entries = array_new() }
+    end.
+
 get_journal_handle(State = #qistate { journal_handle = undefined,
                                       dir = Dir }) ->
     Path = filename:join(Dir, ?JOURNAL_FILENAME),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b9f6dfd6..97d74fc9 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1414,8 +1414,8 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
         lists:foldl(
           fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) ->
                   Guid = rabbit_guid:guid(),
-                  QiM = rabbit_queue_index:write_published(Guid, SeqId, Persistent,
-                                                           QiN),
+                  QiM = rabbit_queue_index:publish(
+                          Guid, SeqId, Persistent, QiN),
                   {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid,
                                                            Guid, MSCStateN),
                   {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM}
@@ -1425,13 +1425,11 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
     {A, B}.
 
 queue_index_deliver(SeqIds, Qi) ->
-    lists:foldl(
-      fun (SeqId, QiN) ->
-              rabbit_queue_index:write_delivered(SeqId, QiN)
-      end, Qi, SeqIds).
+    lists:foldl(fun (SeqId, QiN) -> rabbit_queue_index:deliver(SeqId, QiN) end,
+                Qi, SeqIds).
 
-queue_index_flush_journal(Qi) ->
-    rabbit_queue_index:flush_journal(Qi).
+queue_index_flush(Qi) ->
+    rabbit_queue_index:flush(Qi).
 
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
@@ -1491,8 +1489,8 @@ test_queue_index() ->
     {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsGuidsB)),
-    Qi16 = rabbit_queue_index:write_acks(SeqIdsB, Qi15),
-    Qi17 = queue_index_flush_journal(Qi16),
+    Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
+    Qi17 = queue_index_flush(Qi16),
     %% Everything will have gone now because #pubs == #acks
     {0, 0, Qi18} =
         rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
@@ -1512,8 +1510,8 @@ test_queue_index() ->
     {0, _Terms4, Qi22} = test_queue_init(),
     {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = queue_index_deliver(SeqIdsC, Qi23),
-    Qi25 = rabbit_queue_index:write_acks(SeqIdsC, Qi24),
-    Qi26 = queue_index_flush_journal(Qi25),
+    Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
+    Qi26 = queue_index_flush(Qi25),
     {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
     _Qi28 = rabbit_queue_index:terminate_and_erase(Qi27),
     ok = stop_msg_store(),
@@ -1524,8 +1522,8 @@ test_queue_index() ->
     {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = queue_index_deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
-    Qi33 = rabbit_queue_index:write_acks(SeqIdsC, Qi32),
-    Qi34 = queue_index_flush_journal(Qi33),
+    Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
+    Qi34 = queue_index_flush(Qi33),
     _Qi35 = rabbit_queue_index:terminate_and_erase(Qi34),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1535,8 +1533,8 @@ test_queue_index() ->
     {0, _Terms6, Qi36} = test_queue_init(),
     {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
-    Qi39 = rabbit_queue_index:write_acks(SeqIdsD, Qi38),
-    Qi40 = queue_index_flush_journal(Qi39),
+    Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
+    Qi40 = queue_index_flush(Qi39),
     _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 39ef3ec4..079c14eb 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -360,7 +360,7 @@ delete_and_terminate(State) ->
         remove_pending_ack(false, State1),
     %% flushing here is good because it deletes all full segments,
     %% leaving only partial segments around.
-    IndexState1 = rabbit_queue_index:flush_journal(IndexState),
+    IndexState1 = rabbit_queue_index:flush(IndexState),
     IndexState2 =
         case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
                IndexState1) of
@@ -456,7 +456,7 @@ fetch(AckRequired, State =
 
             %% 1. Mark it delivered if necessary
             IndexState1 = case IndexOnDisk andalso not IsDelivered of
-                              true  -> rabbit_queue_index:write_delivered(
+                              true  -> rabbit_queue_index:deliver(
                                          SeqId, IndexState);
                               false -> IndexState
                           end,
@@ -465,29 +465,25 @@ fetch(AckRequired, State =
             MsgStore = find_msg_store(IsPersistent, PersistentStore),
             IndexState2 =
                 case MsgOnDisk andalso not AckRequired of
-                    true -> %% Remove from disk now
-                        ok = case MsgOnDisk of
-                                 true ->
-                                     rabbit_msg_store:remove(MsgStore, [Guid]);
-                                 false ->
-                                     ok
-                             end,
-                        case IndexOnDisk of
-                            true ->
-                                rabbit_queue_index:write_acks([SeqId],
-                                                              IndexState1);
-                            false ->
-                                IndexState1
-                        end;
-                    false ->
-                        IndexState1
+                    %% Remove from disk now
+                    true  -> ok = case MsgOnDisk of
+                                      true  -> rabbit_msg_store:remove(
+                                                 MsgStore, [Guid]);
+                                      false -> ok
+                                  end,
+                             case IndexOnDisk of
+                                 true  -> rabbit_queue_index:ack(
+                                            [SeqId], IndexState1);
+                                 false -> IndexState1
+                             end;
+                    false -> IndexState1
                 end,
 
             %% 3. If it's on disk, not persistent and an ack's
             %% required then remove it from the queue index only.
             IndexState3 =
                 case IndexOnDisk andalso AckRequired andalso not IsPersistent of
-                    true -> rabbit_queue_index:write_acks([SeqId], IndexState2);
+                    true  -> rabbit_queue_index:ack([SeqId], IndexState2);
                     false -> IndexState2
                 end,
 
@@ -539,7 +535,7 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
                            [SeqId | SeqIds], PAN1}
                   end
           end, {dict:new(), [], PA}, AckTags),
-    IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
+    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
@@ -635,7 +631,7 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
                            StateN3}
                   end
           end, {[], dict:new(), State}, AckTags),
-    IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
+    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:release(MsgStore, Guids)
                    end, ok, GuidsByStore),
@@ -707,8 +703,7 @@ needs_sync(_)                                 -> true.
 sync(State) -> tx_commit_index(State).
 
 handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
-    State #vqstate { index_state =
-                     rabbit_queue_index:flush_journal(IndexState) }.
+    State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
 
 status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                   len = Len, on_sync = {_, _, From},
@@ -759,21 +754,19 @@ remove_pending_ack(KeepPersistent,
                   {SeqIdsAcc, Dict, dict:erase(SeqId, PAN)}
           end, {[], dict:new(), PA}, PA),
     case KeepPersistent of
-        true ->
-            State1 = State #vqstate { pending_ack = PA1 },
-            case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
-                error       -> State1;
-                {ok, Guids} -> ok = rabbit_msg_store:remove(
-                                      ?TRANSIENT_MSG_STORE, Guids),
-                               State1
-            end;
-        false ->
-            IndexState1 = rabbit_queue_index:write_acks(SeqIds, IndexState),
-            ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                                   rabbit_msg_store:remove(MsgStore, Guids)
-                           end, ok, GuidsByStore),
-            State #vqstate { pending_ack = dict:new(),
-                             index_state = IndexState1 }
+        true  -> State1 = State #vqstate { pending_ack = PA1 },
+                 case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+                     error       -> State1;
+                     {ok, Guids} -> ok = rabbit_msg_store:remove(
+                                           ?TRANSIENT_MSG_STORE, Guids),
+                                    State1
+                 end;
+        false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+                 ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                                        rabbit_msg_store:remove(MsgStore, Guids)
+                                end, ok, GuidsByStore),
+                 State #vqstate { pending_ack = dict:new(),
+                                  index_state = IndexState1 }
     end.
 
 lookup_tx(Txn) ->
@@ -812,30 +805,27 @@ betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
           fun ({Guid, SeqId, IsPersistent, IsDelivered},
                {FilteredAcc, IndexStateAcc}) ->
                   case SeqId < TransientThreshold andalso not IsPersistent of
-                      true ->
-                          IndexStateAcc1 =
-                              case IsDelivered of
-                                  false -> rabbit_queue_index:write_delivered(
-                                             SeqId, IndexStateAcc);
-                                  true  -> IndexStateAcc
-                              end,
-                          {FilteredAcc, rabbit_queue_index:write_acks(
-                                          [SeqId], IndexStateAcc1)};
-                      false ->
-                          case SeqId < SeqIdLimit of
-                              true ->
-                                  {[#msg_status { msg           = undefined,
-                                                  guid          = Guid,
-                                                  seq_id        = SeqId,
-                                                  is_persistent = IsPersistent,
-                                                  is_delivered  = IsDelivered,
-                                                  msg_on_disk   = true,
-                                                  index_on_disk = true
-                                                 } | FilteredAcc],
-                                   IndexStateAcc};
-                              false ->
-                                  {FilteredAcc, IndexStateAcc}
-                          end
+                      true  -> IndexStateAcc1 =
+                                   case IsDelivered of
+                                       false -> rabbit_queue_index:deliver(
+                                                  SeqId, IndexStateAcc);
+                                       true  -> IndexStateAcc
+                                   end,
+                               {FilteredAcc, rabbit_queue_index:ack(
+                                               [SeqId], IndexStateAcc1)};
+                      false -> case SeqId < SeqIdLimit of
+                                   true  -> {[#msg_status {
+                                                 msg           = undefined,
+                                                 guid          = Guid,
+                                                 seq_id        = SeqId,
+                                                 is_persistent = IsPersistent,
+                                                 is_delivered  = IsDelivered,
+                                                 msg_on_disk   = true,
+                                                 index_on_disk = true
+                                               } | FilteredAcc],
+                                             IndexStateAcc};
+                                   false -> {FilteredAcc, IndexStateAcc}
+                               end
                   end
           end, {[], IndexState}, List),
     {bpqueue:from_list([{true, Filtered}]), IndexState1}.
@@ -966,8 +956,7 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
                        false -> SeqIdsAcc
                    end, StateN1}
           end, {Acks, State1}, Pubs),
-    IndexState1 =
-        rabbit_queue_index:sync_seq_ids(SeqIds, IndexState),
+    IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
     [ Fun() || Fun <- lists:reverse(SFuns) ],
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
@@ -1024,7 +1013,7 @@ remove_queue_entries(PersistentStore, Fold, Q, IndexState) ->
     IndexState2 =
         case SeqIds of
             [] -> IndexState1;
-            _  -> rabbit_queue_index:write_acks(SeqIds, IndexState1)
+            _  -> rabbit_queue_index:ack(SeqIds, IndexState1)
         end,
     {Count, IndexState2}.
 
@@ -1047,8 +1036,7 @@ remove_queue_entries1(
                      false -> SeqIdsAcc
                  end,
     IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
-                       true -> rabbit_queue_index:write_delivered(
-                                 SeqId, IndexStateN);
+                       true  -> rabbit_queue_index:deliver(SeqId, IndexStateN);
                        false -> IndexStateN
                    end,
     {PersistentStore, CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
@@ -1298,11 +1286,11 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
                                    is_delivered = IsDelivered }, IndexState)
   when Force orelse IsPersistent ->
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
-    IndexState1 = rabbit_queue_index:write_published(
-                    Guid, SeqId, IsPersistent, IndexState),
+    IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent,
+                                             IndexState),
     {MsgStatus #msg_status { index_on_disk = true },
      case IsDelivered of
-         true  -> rabbit_queue_index:write_delivered(SeqId, IndexState1);
+         true  -> rabbit_queue_index:deliver(SeqId, IndexState1);
          false -> IndexState1
      end};
 maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
-- 
cgit v1.2.1


From aaed798e8921e1906a5346018ddc975546668834 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 11:08:40 +0100
Subject: Renaming some very long function name to a much shorter name

---
 src/rabbit_queue_index.erl    | 13 ++++++-------
 src/rabbit_tests.erl          | 18 ++++++------------
 src/rabbit_variable_queue.erl |  8 +++-----
 3 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 241766b8..36dd5c6d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,9 +32,8 @@
 -module(rabbit_queue_index).
 
 -export([init/3, terminate/2, terminate_and_erase/1, publish/4,
-         deliver/2, ack/2, sync/2, flush/1,
-         read_segment_entries/2, next_segment_boundary/1, segment_size/0,
-         find_lowest_seq_id_seg_and_next_seq_id/1, recover/1]).
+         deliver/2, ack/2, sync/2, flush/1, read_segment_entries/2,
+         next_segment_boundary/1, segment_size/0, bounds/1, recover/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
 
@@ -198,7 +197,7 @@
              {[{guid(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(segment_size/0 :: () -> non_neg_integer()).
--spec(find_lowest_seq_id_seg_and_next_seq_id/1 :: (qistate()) ->
+-spec(bounds/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
 -spec(recover/1 :: ([queue_name()]) -> {[[any()]], startup_fun_state()}).
 
@@ -347,7 +346,7 @@ next_segment_boundary(SeqId) ->
 segment_size() ->
     ?SEGMENT_ENTRY_COUNT.
 
-find_lowest_seq_id_seg_and_next_seq_id(State) ->
+bounds(State) ->
     SegNums = all_segment_nums(State),
     %% We don't want the lowest seq_id, merely the seq_id of the start
     %% of the lowest segment. That seq_id may not actually exist, but
@@ -358,13 +357,13 @@ find_lowest_seq_id_seg_and_next_seq_id(State) ->
     %% next segment: it makes life much easier.
 
     %% SegNums is sorted, ascending.
-    {LowSeqIdSeg, NextSeqId} =
+    {LowSeqId, NextSeqId} =
         case SegNums of
             []         -> {0, 0};
             [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
                            reconstruct_seq_id(1 + lists:last(SegNums), 0)}
         end,
-    {LowSeqIdSeg, NextSeqId, State}.
+    {LowSeqId, NextSeqId, State}.
 
 recover(DurableQueues) ->
     DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 97d74fc9..f69a9dc5 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1455,11 +1455,9 @@ test_queue_index() ->
     SeqIdsA = lists:seq(0,9999),
     SeqIdsB = lists:seq(10000,19999),
     {0, _Terms, Qi0} = test_queue_init(),
-    {0, 0, Qi1} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi0),
+    {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
-    {0, SegmentSize, Qi3} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi2),
+    {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
     {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
@@ -1469,11 +1467,9 @@ test_queue_index() ->
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
     {0, _Terms1, Qi6} = test_queue_init(),
-    {0, 0, Qi7} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi6),
+    {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
-    {0, TwoSegs, Qi9} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi8),
+    {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
     {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsGuidsB)),
@@ -1483,8 +1479,7 @@ test_queue_index() ->
     %% should get length back as 10000
     LenB = length(SeqIdsB),
     {LenB, _Terms2, Qi12} = test_queue_init(),
-    {0, TwoSegs, Qi13} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi12),
+    {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
     {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
@@ -1492,8 +1487,7 @@ test_queue_index() ->
     Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
     Qi17 = queue_index_flush(Qi16),
     %% Everything will have gone now because #pubs == #acks
-    {0, 0, Qi18} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(Qi17),
+    {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
     _Qi19 = rabbit_queue_index:terminate([], Qi18),
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 079c14eb..eb0f45c1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -283,8 +283,7 @@ init(QueueName, IsDurable, _Recover) ->
         end,
     {DeltaCount, Terms, IndexState} =
         rabbit_queue_index:init(QueueName, MsgStoreRecovered, ContainsCheckFun),
-    {DeltaSeqId, NextSeqId, IndexState1} =
-        rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(IndexState),
+    {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState),
 
     {PRef, TRef, Terms1} =
         case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of
@@ -296,7 +295,7 @@ init(QueueName, IsDurable, _Recover) ->
     DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount),
     Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
                 true  -> ?BLANK_DELTA;
-                false -> #delta { start_seq_id = DeltaSeqId,
+                false -> #delta { start_seq_id = LowSeqId,
                                   count = DeltaCount1,
                                   end_seq_id = NextSeqId }
             end,
@@ -362,8 +361,7 @@ delete_and_terminate(State) ->
     %% leaving only partial segments around.
     IndexState1 = rabbit_queue_index:flush(IndexState),
     IndexState2 =
-        case rabbit_queue_index:find_lowest_seq_id_seg_and_next_seq_id(
-               IndexState1) of
+        case rabbit_queue_index:bounds(IndexState1) of
             {N, N, IndexState3} ->
                 IndexState3;
             {DeltaSeqId, NextSeqId, IndexState3} ->
-- 
cgit v1.2.1


From bc8c96c7b122687cb0a1d7d5274fa34a5e34d640 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 11:25:43 +0100
Subject: Drop segment_size and instead have a current_segment as well as a
 next_segment function

---
 src/rabbit_queue_index.erl    | 10 ++++++----
 src/rabbit_tests.erl          |  6 +++---
 src/rabbit_variable_queue.erl | 16 +++-------------
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 36dd5c6d..764c7950 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -33,7 +33,8 @@
 
 -export([init/3, terminate/2, terminate_and_erase/1, publish/4,
          deliver/2, ack/2, sync/2, flush/1, read_segment_entries/2,
-         next_segment_boundary/1, segment_size/0, bounds/1, recover/1]).
+         next_segment_boundary/1, current_segment_boundary/1, bounds/1,
+         recover/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
 
@@ -196,7 +197,7 @@
 -spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
              {[{guid(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
--spec(segment_size/0 :: () -> non_neg_integer()).
+-spec(current_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(bounds/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
 -spec(recover/1 :: ([queue_name()]) -> {[[any()]], startup_fun_state()}).
@@ -343,8 +344,9 @@ next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     reconstruct_seq_id(Seg + 1, 0).
 
-segment_size() ->
-    ?SEGMENT_ENTRY_COUNT.
+current_segment_boundary(SeqId) ->
+    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(Seg, 0).
 
 bounds(State) ->
     SegNums = all_segment_nums(State),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f69a9dc5..358f857b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1448,7 +1448,7 @@ test_queue_init() ->
       end).
 
 test_queue_index() ->
-    SegmentSize = rabbit_queue_index:segment_size(),
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     TwoSegs = SegmentSize + SegmentSize,
     stop_msg_store(),
     ok = empty_test_queue(),
@@ -1579,7 +1579,7 @@ test_variable_queue() ->
     passed.
 
 test_variable_queue_dynamic_duration_change() ->
-    SegmentSize = rabbit_queue_index:segment_size(),
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     VQ0 = fresh_variable_queue(),
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
@@ -1630,7 +1630,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
     end.
 
 test_variable_queue_partial_segments_delta_thing() ->
-    SegmentSize = rabbit_queue_index:segment_size(),
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     HalfSegment = SegmentSize div 2,
     VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index eb0f45c1..856b1f0c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -829,7 +829,7 @@ betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
     {bpqueue:from_list([{true, Filtered}]), IndexState1}.
 
 read_index_segment(SeqId, IndexState) ->
-    SeqId1 = SeqId + rabbit_queue_index:segment_size(),
+    SeqId1 = rabbit_queue_index:next_segment_boundary(SeqId),
     case rabbit_queue_index:read_segment_entries(SeqId, IndexState) of
         {[], IndexState1} -> read_index_segment(SeqId1, IndexState1);
         {List, IndexState1} -> {List, IndexState1, SeqId1}
@@ -963,7 +963,7 @@ delete1(_PersistentStore, _TransientThreshold, NextSeqId, Count, DeltaSeqId,
     {Count, IndexState};
 delete1(PersistentStore, TransientThreshold, NextSeqId, Count, DeltaSeqId,
         IndexState) ->
-    Delta1SeqId = DeltaSeqId + rabbit_queue_index:segment_size(),
+    Delta1SeqId = rabbit_queue_index:next_segment_boundary(DeltaSeqId),
     case rabbit_queue_index:read_segment_entries(DeltaSeqId, IndexState) of
         {[], IndexState1} ->
             delete1(PersistentStore, TransientThreshold, NextSeqId, Count,
@@ -1203,8 +1203,7 @@ publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
     %% delta may be empty, seq_id > next_segment_boundary from q3
     %% head, so we need to find where the segment boundary is before
     %% or equal to seq_id
-    DeltaSeqId = rabbit_queue_index:next_segment_boundary(SeqId) -
-        rabbit_queue_index:segment_size(),
+    DeltaSeqId = rabbit_queue_index:current_segment_boundary(SeqId),
     Delta1 = #delta { start_seq_id = DeltaSeqId, count = 1,
                       end_seq_id = SeqId + 1 },
     State #vqstate { index_state = IndexState1,
@@ -1491,15 +1490,6 @@ push_betas_to_deltas(State = #vqstate { q2 = Q2, delta = Delta, q3 = Q3,
                 true -> %% already only holding LTE one segment indices in q3
                     State1;
                 false ->
-                    %% ASSERTION
-                    %% This says that if Delta1SeqId /= undefined then
-                    %% the gap from Limit to Delta1SeqId is an integer
-                    %% multiple of segment_size
-                    0 = case Delta1SeqId of
-                            undefined -> 0;
-                            _ -> (Delta1SeqId - Limit) rem
-                                     rabbit_queue_index:segment_size()
-                        end,
                     %% SeqIdMax is low in the sense that it must be
                     %% lower than the seq_id in delta1, in fact either
                     %% delta1 has undefined as its seq_id or there
-- 
cgit v1.2.1


From e3dc5cd9e211bf22b6f657440f50e6672071b588 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 11:40:45 +0100
Subject: Corrections to documentation in queue_index

---
 src/rabbit_queue_index.erl | 55 +++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 764c7950..a6753910 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -42,22 +42,22 @@
 
 %% The queue index is responsible for recording the order of messages
 %% within a queue on disk.
-
+%%
 %% Because of the fact that the queue can decide at any point to send
 %% a queue entry to disk, you can not rely on publishes appearing in
 %% order. The only thing you can rely on is a message being published,
 %% then delivered, then ack'd.
-
+%%
 %% In order to be able to clean up ack'd messages, we write to segment
 %% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT
 %% publishes, delivers and acknowledgements. They are numbered, and so
 %% it is known that the 0th segment contains messages 0 ->
-%% ?SEGMENT_ENTRY_COUNT, the 1st segment contains messages
-%% ?SEGMENT_ENTRY_COUNT +1 -> 2*?SEGMENT_ENTRY_COUNT and so on. As
+%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages
+%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As
 %% such, in the segment files, we only refer to message sequence ids
 %% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a
 %% fixed size.
-
+%%
 %% However, transient messages which are not sent to disk at any point
 %% will cause gaps to appear in segment files. Therefore, we delete a
 %% segment file whenever the number of publishes == number of acks
@@ -66,7 +66,7 @@
 %% also implies == number of delivers). In practise, this does not
 %% cause disk churn in the pathological case because of the journal
 %% and caching (see below).
-
+%%
 %% Because of the fact that publishes, delivers and acks can occur all
 %% over, we wish to avoid lots of seeking. Therefore we have a fixed
 %% sized journal to which all actions are appended. When the number of
@@ -75,30 +75,27 @@
 %% journal is truncated to zero size. Note that entries in the journal
 %% must carry the full sequence id, thus the format of entries in the
 %% journal is different to that in the segments.
-
+%%
 %% The journal is also kept fully in memory, pre-segmented: the state
-%% contains a dict from segment numbers to state-per-segment. Actions
-%% are stored directly in this state. Thus at the point of flushing
-%% the journal, firstly no reading from disk is necessary, but
-%% secondly if the known number of acks and publishes are equal, given
-%% the known state of the segment file, combined with the journal, no
-%% writing needs to be done to the segment file either (in fact it is
-%% deleted if it exists at all). This is safe given that the set of
-%% acks is a subset of the set of publishes. When it's necessary to
-%% sync messages because of transactions, it's only necessary to fsync
-%% on the journal: when entries are distributed from the journal to
+%% contains a mapping from segment numbers to state-per-segment (this
+%% state is held for all segments which have been "seen": thus a
+%% segment which has been read but has no pending entries in the
+%% journal is still held in this mapping). Actions are stored directly
+%% in this state. Thus at the point of flushing the journal, firstly
+%% no reading from disk is necessary, but secondly if the known number
+%% of acks and publishes in a segment qare equal, given the known
+%% state of the segment file combined with the journal, no writing
+%% needs to be done to the segment file either (in fact it is deleted
+%% if it exists at all). This is safe given that the set of acks is a
+%% subset of the set of publishes. When it's necessary to sync
+%% messages because of transactions, it's only necessary to fsync on
+%% the journal: when entries are distributed from the journal to
 %% segment files, those segments appended to are fsync'd prior to the
 %% journal being truncated.
-
-%% It is very common to need to access two particular segments very
-%% frequently: one for publishes, and one for deliveries and acks. As
-%% such, and the poor performance of the erlang dict module, we cache
-%% the per-segment-state for the two most recently used segments in
-%% the state, this provides a substantial performance improvement.
-
+%%
 %% This module is also responsible for scanning the queue index files
 %% and seeding the message store on start up.
-
+%%
 %% Note that in general, the representation of a message's state as
 %% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'),
 %% ('ack'|'no_ack')} is richer than strictly necessary for most
@@ -322,6 +319,14 @@ sync([], State) ->
 sync(_SeqIds, State = #qistate { journal_handle = undefined }) ->
     State;
 sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
+    %% The SeqIds here contains the SeqId of every publish and ack in
+    %% the transaction. Ideally we should go through these seqids and
+    %% only sync the journal if the pubs or acks appear in the
+    %% journal. However, this would be complex to do, and given that
+    %% the variable queue publishes and acks to the qi, and then
+    %% syncs, all in one operation, there is no possibility of the
+    %% seqids not being in the journal, provided the transaction isn't
+    %% emptied (handled above anyway).
     ok = file_handle_cache:sync(JournalHdl),
     State.
 
-- 
cgit v1.2.1


From f38808b01eafa514585d613bb4b59d1a4fd218f8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 12:53:54 +0100
Subject: Reworked reading from the queue to still be limited to a maximum of
 one segment, but with a more natural start+end interface

---
 src/rabbit_queue_index.erl    | 122 ++++++++++++++++++++++++++----------------
 src/rabbit_tests.erl          |   6 +--
 src/rabbit_variable_queue.erl |  83 ++++++++++++++--------------
 3 files changed, 120 insertions(+), 91 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a6753910..7cf36193 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -32,8 +32,8 @@
 -module(rabbit_queue_index).
 
 -export([init/3, terminate/2, terminate_and_erase/1, publish/4,
-         deliver/2, ack/2, sync/2, flush/1, read_segment_entries/2,
-         next_segment_boundary/1, current_segment_boundary/1, bounds/1,
+         deliver/2, ack/2, sync/2, flush/1, read/3,
+         current_segment_boundary/1, next_segment_boundary/1, bounds/1,
          recover/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
@@ -83,15 +83,15 @@
 %% journal is still held in this mapping). Actions are stored directly
 %% in this state. Thus at the point of flushing the journal, firstly
 %% no reading from disk is necessary, but secondly if the known number
-%% of acks and publishes in a segment qare equal, given the known
-%% state of the segment file combined with the journal, no writing
-%% needs to be done to the segment file either (in fact it is deleted
-%% if it exists at all). This is safe given that the set of acks is a
-%% subset of the set of publishes. When it's necessary to sync
-%% messages because of transactions, it's only necessary to fsync on
-%% the journal: when entries are distributed from the journal to
-%% segment files, those segments appended to are fsync'd prior to the
-%% journal being truncated.
+%% of acks and publishes in a segment are equal, given the known state
+%% of the segment file combined with the journal, no writing needs to
+%% be done to the segment file either (in fact it is deleted if it
+%% exists at all). This is safe given that the set of acks is a subset
+%% of the set of publishes. When it's necessary to sync messages
+%% because of transactions, it's only necessary to fsync on the
+%% journal: when entries are distributed from the journal to segment
+%% files, those segments appended to are fsync'd prior to the journal
+%% being truncated.
 %%
 %% This module is also responsible for scanning the queue index files
 %% and seeding the message store on start up.
@@ -191,10 +191,11 @@
 -spec(ack/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush/1 :: (qistate()) -> qistate()).
--spec(read_segment_entries/2 :: (seq_id(), qistate()) ->
-             {[{guid(), seq_id(), boolean(), boolean()}], qistate()}).
--spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
+-spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
+             {[{guid(), seq_id(), boolean(), boolean()}],
+              seq_id() | 'undefined', qistate()}).
 -spec(current_segment_boundary/1 :: (seq_id()) -> seq_id()).
+-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(bounds/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
 -spec(recover/1 :: ([queue_name()]) -> {[[any()]], startup_fun_state()}).
@@ -332,33 +333,50 @@ sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
 
 flush(State) -> flush_journal(State).
 
-read_segment_entries(InitSeqId, State = #qistate { segments = Segments,
-                                                   dir = Dir }) ->
-    {Seg, 0} = seq_id_to_seg_and_rel_seq_id(InitSeqId),
-    Segment = segment_find_or_new(Seg, Dir, Segments),
-    {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
+read(StartEnd, StartEnd, State) ->
+    {[], undefined, State};
+read(Start, End, State = #qistate { segments = Segments,
+                                    dir = Dir }) when Start =< End ->
+    %% Start is inclusive, End is exclusive.
+    {StartSeg, StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
+    {EndSeg, EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End),
+    Start1 = reconstruct_seq_id(StartSeg + 1, 0),
+    Again = case End =< Start1 of
+                true  -> undefined;
+                false -> Start1
+            end,
+    MaxRelSeq = case StartSeg =:= EndSeg of
+                    true  -> EndRelSeq;
+                    false -> ?SEGMENT_ENTRY_COUNT
+                end,
+    Segment = segment_find_or_new(StartSeg, Dir, Segments),
+    {SegEntries, _PubCount, _AckCount, Segment1} =
+        load_segment(false, StartRelSeq, MaxRelSeq, Segment),
     #segment { journal_entries = JEntries } = Segment1,
     {array:sparse_foldr(
        fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc) ->
-               [ {Guid, reconstruct_seq_id(Seg, RelSeq),
+               [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
                   IsPersistent, IsDelivered == del} | Acc ]
-       end, [], journal_plus_segment(JEntries, SegEntries)),
+       end, [],
+       journal_plus_segment(JEntries, SegEntries, StartRelSeq, MaxRelSeq)),
+     Again,
      State #qistate { segments = segment_store(Segment1, Segments) }}.
 
-next_segment_boundary(SeqId) ->
-    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    reconstruct_seq_id(Seg + 1, 0).
-
 current_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     reconstruct_seq_id(Seg, 0).
 
+next_segment_boundary(SeqId) ->
+    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(Seg + 1, 0).
+
 bounds(State) ->
     SegNums = all_segment_nums(State),
-    %% We don't want the lowest seq_id, merely the seq_id of the start
-    %% of the lowest segment. That seq_id may not actually exist, but
-    %% that's fine. The important thing is that the segment exists and
-    %% the seq_id reported is on a segment boundary.
+    %% Don't bother trying to figure out the lowest seq_id, merely the
+    %% seq_id of the start of the lowest segment. That seq_id may not
+    %% actually exist, but that's fine. The important thing is that
+    %% the segment exists and the seq_id reported is on a segment
+    %% boundary.
 
     %% We also don't really care about the max seq_id. Just start the
     %% next segment: it makes life much easier.
@@ -460,7 +478,7 @@ terminate(StoreShutdown, Terms, State =
 
 recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
     {SegEntries, PubCount, AckCount, Segment1} =
-        load_segment(false, Segment),
+        load_segment(false, 0, ?SEGMENT_ENTRY_COUNT, Segment),
     array:sparse_foldl(
       fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment3) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
@@ -518,7 +536,8 @@ queue_index_walker_reader(QueueName, Gatherer) ->
     State1 = lists:foldl(
                fun (Seg, State2) ->
                        SeqId = reconstruct_seq_id(Seg, 0),
-                       {Messages, State3} = read_segment_entries(SeqId, State2),
+                       {Messages, undefined, State3} =
+                           read(SeqId, next_segment_boundary(SeqId), State2),
                        [ok = gatherer:in(Gatherer, {Guid, 1}) ||
                            {Guid, _SeqId, true, _IsDelivered} <- Messages],
                        State3
@@ -633,7 +652,7 @@ load_journal(State) ->
                   %% them if duplicates are in the journal. The counts
                   %% here are purely from the segment itself.
                   {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
-                      load_segment(true, Segment),
+                      load_segment(true, 0, ?SEGMENT_ENTRY_COUNT, Segment),
                   %% Removed counts here are the number of pubs and
                   %% acks that are duplicates - i.e. found in both the
                   %% segment and journal.
@@ -806,8 +825,9 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
 %% number of unacked msgs is PubCount - AckCount. If KeepAcks is
 %% false, then array:sparse_size(SegEntries) == PubCount -
 %% AckCount. If KeepAcks is true, then array:sparse_size(SegEntries)
-%% == PubCount.
-load_segment(KeepAcks, Segment = #segment { path = Path, handle = SegHdl }) ->
+%% == PubCount. StartRelSeq is inclusive, EndRelSeq is exclusive.
+load_segment(KeepAcks, StartRelSeq, EndRelSeq,
+             Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
                         undefined -> filelib:is_file(Path);
                         _         -> true
@@ -817,20 +837,24 @@ load_segment(KeepAcks, Segment = #segment { path = Path, handle = SegHdl }) ->
         true  -> {Hdl, Segment1} = get_segment_handle(Segment),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
                  {SegEntries, PubCount, AckCount} =
-                     load_segment_entries(KeepAcks, Hdl, array_new(), 0, 0),
+                     load_segment_entries(KeepAcks, StartRelSeq, EndRelSeq, Hdl,
+                                          array_new(), 0, 0),
                  {SegEntries, PubCount, AckCount, Segment1}
     end.
 
-load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
+load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries, PubCount,
+                     AckCount) ->
     case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-              RelSeq:?REL_SEQ_BITS>>} ->
+              RelSeq:?REL_SEQ_BITS>>}
+          when StartRel =< RelSeq andalso RelSeq < EndRel ->
             {AckCount1, SegEntries1} =
                 deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries),
-            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount,
-                                 AckCount1);
+            load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries1,
+                                 PubCount, AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
+              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>}
+          when StartRel =< RelSeq andalso RelSeq < EndRel ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
@@ -838,8 +862,11 @@ load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
                 array:set(RelSeq,
                           {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
                           SegEntries),
-            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount + 1,
-                                 AckCount);
+            load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries1,
+                                 PubCount + 1, AckCount);
+        {ok, _SomeBinary} ->
+            load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries,
+                                 PubCount, AckCount);
         _ErrOrEoF ->
             {SegEntries, PubCount, AckCount}
     end.
@@ -867,15 +894,18 @@ bool_to_int(false) -> 0.
 %% Combine what we have just read from a segment file with what we're
 %% holding for that segment in memory. There must be no
 %% duplicates. Used when providing segment entries to the variable
-%% queue.
-journal_plus_segment(JEntries, SegEntries) ->
+%% queue. RelStart is inclusive, RelEnd is exclusive.
+journal_plus_segment(JEntries, SegEntries, RelStart, RelEnd) ->
     array:sparse_foldl(
-      fun (RelSeq, JObj, SegEntriesOut) ->
+      fun (RelSeq, JObj, SegEntriesOut)
+            when RelStart =< RelSeq andalso RelSeq < RelEnd ->
               SegEntry = array:get(RelSeq, SegEntriesOut),
               case journal_plus_segment1(JObj, SegEntry) of
                   undefined -> array:reset(RelSeq, SegEntriesOut);
                   Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
-              end
+              end;
+          (_RelSeq, _JObj, SegEntriesOut) ->
+              SegEntriesOut
       end, SegEntries, JEntries).
 
 %% Here, the result is the item which we may be adding to (for items
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 358f857b..9821367d 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1458,7 +1458,7 @@ test_queue_index() ->
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
-    {ReadA, Qi4} = rabbit_queue_index:read_segment_entries(0, Qi3),
+    {ReadA, undefined, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
     %% call terminate twice to prove it's idempotent
@@ -1470,7 +1470,7 @@ test_queue_index() ->
     {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
-    {ReadB, Qi10} = rabbit_queue_index:read_segment_entries(0, Qi9),
+    {ReadB, undefined, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsGuidsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
@@ -1481,7 +1481,7 @@ test_queue_index() ->
     {LenB, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
-    {ReadC, Qi15} = rabbit_queue_index:read_segment_entries(0, Qi14),
+    {ReadC, undefined, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsGuidsB)),
     Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 856b1f0c..992cf19a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -175,9 +175,9 @@
          }).
 
 -record(delta,
-        { start_seq_id,
+        { start_seq_id, %% start_seq_id is inclusive
           count,
-          end_seq_id %% note the end_seq_id is always >, not >=
+          end_seq_id    %% end_seq_id is exclusive
          }).
 
 -record(tx, { pending_messages, pending_acks }).
@@ -797,7 +797,7 @@ persistent_guids(Pubs) ->
     [Guid || Obj = #basic_message { guid = Guid } <- Pubs,
              Obj #basic_message.is_persistent].
 
-betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
+betas_from_segment_entries(List, TransientThreshold, IndexState) ->
     {Filtered, IndexState1} =
         lists:foldr(
           fun ({Guid, SeqId, IsPersistent, IsDelivered},
@@ -811,28 +811,27 @@ betas_from_segment_entries(List, SeqIdLimit, TransientThreshold, IndexState) ->
                                    end,
                                {FilteredAcc, rabbit_queue_index:ack(
                                                [SeqId], IndexStateAcc1)};
-                      false -> case SeqId < SeqIdLimit of
-                                   true  -> {[#msg_status {
-                                                 msg           = undefined,
-                                                 guid          = Guid,
-                                                 seq_id        = SeqId,
-                                                 is_persistent = IsPersistent,
-                                                 is_delivered  = IsDelivered,
-                                                 msg_on_disk   = true,
-                                                 index_on_disk = true
-                                               } | FilteredAcc],
-                                             IndexStateAcc};
-                                   false -> {FilteredAcc, IndexStateAcc}
-                               end
+                      false -> {[#msg_status { msg           = undefined,
+                                               guid          = Guid,
+                                               seq_id        = SeqId,
+                                               is_persistent = IsPersistent,
+                                               is_delivered  = IsDelivered,
+                                               msg_on_disk   = true,
+                                               index_on_disk = true
+                                             } | FilteredAcc],
+                                IndexStateAcc}
                   end
           end, {[], IndexState}, List),
     {bpqueue:from_list([{true, Filtered}]), IndexState1}.
 
-read_index_segment(SeqId, IndexState) ->
-    SeqId1 = rabbit_queue_index:next_segment_boundary(SeqId),
-    case rabbit_queue_index:read_segment_entries(SeqId, IndexState) of
-        {[], IndexState1} -> read_index_segment(SeqId1, IndexState1);
-        {List, IndexState1} -> {List, IndexState1, SeqId1}
+read_one_index_segment(StartSeqId, EndSeqId, IndexState)
+  when StartSeqId =< EndSeqId ->
+    case rabbit_queue_index:read(StartSeqId, EndSeqId, IndexState) of
+        {List, Again, IndexState1} when List /= [] orelse Again =:= undefined ->
+            {List, IndexState1,
+             rabbit_queue_index:next_segment_boundary(StartSeqId)};
+        {[], StartSeqId1, IndexState1} ->
+            read_one_index_segment(StartSeqId1, EndSeqId, IndexState1)
     end.
 
 ensure_binary_properties(Msg = #basic_message { content = Content }) ->
@@ -959,26 +958,27 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
 delete1(_PersistentStore, _TransientThreshold, NextSeqId, Count, DeltaSeqId,
-        IndexState) when DeltaSeqId >= NextSeqId ->
+        IndexState) when DeltaSeqId =:= undefined
+                         orelse DeltaSeqId >= NextSeqId ->
     {Count, IndexState};
 delete1(PersistentStore, TransientThreshold, NextSeqId, Count, DeltaSeqId,
         IndexState) ->
-    Delta1SeqId = rabbit_queue_index:next_segment_boundary(DeltaSeqId),
-    case rabbit_queue_index:read_segment_entries(DeltaSeqId, IndexState) of
-        {[], IndexState1} ->
-            delete1(PersistentStore, TransientThreshold, NextSeqId, Count,
-                    Delta1SeqId, IndexState1);
-        {List, IndexState1} ->
-            {Q, IndexState2} =
-                betas_from_segment_entries(
-                  List, Delta1SeqId, TransientThreshold, IndexState1),
-            {QCount, IndexState3} =
-                remove_queue_entries(
-                  PersistentStore, fun beta_fold_no_index_on_disk/3,
-                  Q, IndexState2),
-            delete1(PersistentStore, TransientThreshold, NextSeqId,
-                    Count + QCount, Delta1SeqId, IndexState3)
-    end.
+    {List, Again, IndexState1} =
+        rabbit_queue_index:read(DeltaSeqId, NextSeqId, IndexState),
+    {IndexState2, Count1} =
+        case List of
+            [] -> {IndexState1, Count};
+            _  -> {Q, IndexState3} =
+                      betas_from_segment_entries(
+                        List, TransientThreshold, IndexState1),
+                  {Count2, IndexState4} =
+                      remove_queue_entries(
+                        PersistentStore, fun beta_fold_no_index_on_disk/3,
+                        Q, IndexState3),
+                  {IndexState4, Count2 + Count}
+        end,
+    delete1(PersistentStore, TransientThreshold, NextSeqId, Count1, Again,
+            IndexState2).
 
 purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
                                  persistent_store = PersistentStore }) ->
@@ -1370,13 +1370,12 @@ maybe_deltas_to_betas(
             %% segment, or TargetRamMsgCount > 0, meaning we should
             %% really be holding all the betas in memory.
             {List, IndexState1, Delta1SeqId} =
-                read_index_segment(DeltaSeqId, IndexState),
+                read_one_index_segment(DeltaSeqId, DeltaSeqIdEnd, IndexState),
             %% length(List) may be < segment_size because of acks.  It
             %% could be [] if we ignored every message in the segment
             %% due to it being transient and below the threshold
-            {Q3a, IndexState2} =
-                betas_from_segment_entries(
-                  List, DeltaSeqIdEnd, TransientThreshold, IndexState1),
+            {Q3a, IndexState2} = betas_from_segment_entries(
+                                   List, TransientThreshold, IndexState1),
             State1 = State #vqstate { index_state = IndexState2 },
             case bpqueue:len(Q3a) of
                 0 ->
-- 
cgit v1.2.1


From 611b22f53330de9f7b2d12610fd534732931f651 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 13:28:01 +0100
Subject: Apparently it's wrong to do the least amount of work possible.

---
 src/rabbit_queue_index.erl | 55 +++++++++++++++++++---------------------------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 7cf36193..a4bd4cd7 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -351,14 +351,16 @@ read(Start, End, State = #qistate { segments = Segments,
                 end,
     Segment = segment_find_or_new(StartSeg, Dir, Segments),
     {SegEntries, _PubCount, _AckCount, Segment1} =
-        load_segment(false, StartRelSeq, MaxRelSeq, Segment),
+        load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
     {array:sparse_foldr(
-       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc) ->
+       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
+           when StartRelSeq =< RelSeq andalso RelSeq < MaxRelSeq ->
                [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
-                  IsPersistent, IsDelivered == del} | Acc ]
-       end, [],
-       journal_plus_segment(JEntries, SegEntries, StartRelSeq, MaxRelSeq)),
+                  IsPersistent, IsDelivered == del} | Acc ];
+           (_RelSeq, _Value, Acc) ->
+               Acc
+       end, [], journal_plus_segment(JEntries, SegEntries)),
      Again,
      State #qistate { segments = segment_store(Segment1, Segments) }}.
 
@@ -478,7 +480,7 @@ terminate(StoreShutdown, Terms, State =
 
 recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
     {SegEntries, PubCount, AckCount, Segment1} =
-        load_segment(false, 0, ?SEGMENT_ENTRY_COUNT, Segment),
+        load_segment(false, Segment),
     array:sparse_foldl(
       fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment3) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
@@ -652,7 +654,7 @@ load_journal(State) ->
                   %% them if duplicates are in the journal. The counts
                   %% here are purely from the segment itself.
                   {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
-                      load_segment(true, 0, ?SEGMENT_ENTRY_COUNT, Segment),
+                      load_segment(true, Segment),
                   %% Removed counts here are the number of pubs and
                   %% acks that are duplicates - i.e. found in both the
                   %% segment and journal.
@@ -825,9 +827,8 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
 %% number of unacked msgs is PubCount - AckCount. If KeepAcks is
 %% false, then array:sparse_size(SegEntries) == PubCount -
 %% AckCount. If KeepAcks is true, then array:sparse_size(SegEntries)
-%% == PubCount. StartRelSeq is inclusive, EndRelSeq is exclusive.
-load_segment(KeepAcks, StartRelSeq, EndRelSeq,
-             Segment = #segment { path = Path, handle = SegHdl }) ->
+%% == PubCount.
+load_segment(KeepAcks, Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
                         undefined -> filelib:is_file(Path);
                         _         -> true
@@ -837,24 +838,20 @@ load_segment(KeepAcks, StartRelSeq, EndRelSeq,
         true  -> {Hdl, Segment1} = get_segment_handle(Segment),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
                  {SegEntries, PubCount, AckCount} =
-                     load_segment_entries(KeepAcks, StartRelSeq, EndRelSeq, Hdl,
-                                          array_new(), 0, 0),
+                     load_segment_entries(KeepAcks, Hdl, array_new(), 0, 0),
                  {SegEntries, PubCount, AckCount, Segment1}
     end.
 
-load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries, PubCount,
-                     AckCount) ->
+load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
     case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-              RelSeq:?REL_SEQ_BITS>>}
-          when StartRel =< RelSeq andalso RelSeq < EndRel ->
+              RelSeq:?REL_SEQ_BITS>>} ->
             {AckCount1, SegEntries1} =
                 deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries),
-            load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries1,
-                                 PubCount, AckCount1);
+            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount,
+                                 AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
-              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>}
-          when StartRel =< RelSeq andalso RelSeq < EndRel ->
+              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
@@ -862,11 +859,8 @@ load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries, PubCount,
                 array:set(RelSeq,
                           {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
                           SegEntries),
-            load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries1,
-                                 PubCount + 1, AckCount);
-        {ok, _SomeBinary} ->
-            load_segment_entries(KeepAcks, StartRel, EndRel, Hdl, SegEntries,
-                                 PubCount, AckCount);
+            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount + 1,
+                                 AckCount);
         _ErrOrEoF ->
             {SegEntries, PubCount, AckCount}
     end.
@@ -894,18 +888,15 @@ bool_to_int(false) -> 0.
 %% Combine what we have just read from a segment file with what we're
 %% holding for that segment in memory. There must be no
 %% duplicates. Used when providing segment entries to the variable
-%% queue. RelStart is inclusive, RelEnd is exclusive.
-journal_plus_segment(JEntries, SegEntries, RelStart, RelEnd) ->
+%% queue.
+journal_plus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
-      fun (RelSeq, JObj, SegEntriesOut)
-            when RelStart =< RelSeq andalso RelSeq < RelEnd ->
+      fun (RelSeq, JObj, SegEntriesOut) ->
               SegEntry = array:get(RelSeq, SegEntriesOut),
               case journal_plus_segment1(JObj, SegEntry) of
                   undefined -> array:reset(RelSeq, SegEntriesOut);
                   Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
-              end;
-          (_RelSeq, _JObj, SegEntriesOut) ->
-              SegEntriesOut
+              end
       end, SegEntries, JEntries).
 
 %% Here, the result is the item which we may be adding to (for items
-- 
cgit v1.2.1


From ae07f9978ad49eda22795ba3a1c209a1433284d5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 18 May 2010 14:02:18 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index a4bd4cd7..6d39d990 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -339,7 +339,7 @@ read(Start, End, State = #qistate { segments = Segments,
                                     dir = Dir }) when Start =< End ->
     %% Start is inclusive, End is exclusive.
     {StartSeg, StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
-    {EndSeg, EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End),
+    {EndSeg, EndRelSeq}     = seq_id_to_seg_and_rel_seq_id(End),
     Start1 = reconstruct_seq_id(StartSeg + 1, 0),
     Again = case End =< Start1 of
                 true  -> undefined;
@@ -350,8 +350,7 @@ read(Start, End, State = #qistate { segments = Segments,
                     false -> ?SEGMENT_ENTRY_COUNT
                 end,
     Segment = segment_find_or_new(StartSeg, Dir, Segments),
-    {SegEntries, _PubCount, _AckCount, Segment1} =
-        load_segment(false, Segment),
+    {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
     {array:sparse_foldr(
        fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
@@ -397,12 +396,11 @@ recover(DurableQueues) ->
                                      Queue <- DurableQueues ]),
     QueuesDir = queues_dir(),
     Directories = case file:list_dir(QueuesDir) of
-                      {ok, Entries} ->
-                          [ Entry || Entry <- Entries,
-                                     filelib:is_dir(
-                                       filename:join(QueuesDir, Entry)) ];
-                      {error, enoent} ->
-                          []
+                      {ok, Entries}   -> [ Entry || Entry <- Entries,
+                                                    filelib:is_dir(
+                                                      filename:join(
+                                                        QueuesDir, Entry)) ];
+                      {error, enoent} -> []
                   end,
     DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
     {DurableQueueNames, TransientDirs, DurableTerms} =
@@ -479,8 +477,7 @@ terminate(StoreShutdown, Terms, State =
     State #qistate { journal_handle = undefined, segments = undefined }.
 
 recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
-    {SegEntries, PubCount, AckCount, Segment1} =
-        load_segment(false, Segment),
+    {SegEntries, PubCount, AckCount, Segment1} = load_segment(false, Segment),
     array:sparse_foldl(
       fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment3) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
-- 
cgit v1.2.1


From 5e474033fddc141972c04fa80fd0a2e96ef33c3e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 14:22:10 +0100
Subject: Cosmetics and documentation

---
 src/rabbit_queue_index.erl | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 6d39d990..9cd816b2 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -80,18 +80,20 @@
 %% contains a mapping from segment numbers to state-per-segment (this
 %% state is held for all segments which have been "seen": thus a
 %% segment which has been read but has no pending entries in the
-%% journal is still held in this mapping). Actions are stored directly
-%% in this state. Thus at the point of flushing the journal, firstly
-%% no reading from disk is necessary, but secondly if the known number
-%% of acks and publishes in a segment are equal, given the known state
-%% of the segment file combined with the journal, no writing needs to
-%% be done to the segment file either (in fact it is deleted if it
-%% exists at all). This is safe given that the set of acks is a subset
-%% of the set of publishes. When it's necessary to sync messages
-%% because of transactions, it's only necessary to fsync on the
-%% journal: when entries are distributed from the journal to segment
-%% files, those segments appended to are fsync'd prior to the journal
-%% being truncated.
+%% journal is still held in this mapping. Also note that a dict is
+%% used for this mapping, not an array because with an array, you will
+%% always have entries from 0). Actions are stored directly in this
+%% state. Thus at the point of flushing the journal, firstly no
+%% reading from disk is necessary, but secondly if the known number of
+%% acks and publishes in a segment are equal, given the known state of
+%% the segment file combined with the journal, no writing needs to be
+%% done to the segment file either (in fact it is deleted if it exists
+%% at all). This is safe given that the set of acks is a subset of the
+%% set of publishes. When it's necessary to sync messages because of
+%% transactions, it's only necessary to fsync on the journal: when
+%% entries are distributed from the journal to segment files, those
+%% segments appended to are fsync'd prior to the journal being
+%% truncated.
 %%
 %% This module is also responsible for scanning the queue index files
 %% and seeding the message store on start up.
@@ -734,13 +736,6 @@ get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
 get_segment_handle(Segment = #segment { handle = Hdl }) ->
     {Hdl, Segment}.
 
-segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
-    {ok, Segment}; %% 1 or (2, matches head)
-segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) ->
-    {ok, Segment}; %% 2, matches tail
-segment_find(Seg, {Segments, _}) -> %% no match
-    dict:find(Seg, Segments).
-
 segment_new(Seg, Dir) ->
     #segment { pubs = 0,
                acks = 0,
@@ -756,6 +751,13 @@ segment_find_or_new(Seg, Dir, Segments) ->
         {ok, Segment} -> Segment
     end.
 
+segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
+    {ok, Segment}; %% 1 or (2, matches head)
+segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) ->
+    {ok, Segment}; %% 2, matches tail
+segment_find(Seg, {Segments, _}) -> %% no match
+    dict:find(Seg, Segments).
+
 segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
               {Segments, [#segment { num = Seg } | Tail]}) ->
     {Segments, [Segment | Tail]};
-- 
cgit v1.2.1


From 3a16391b305fdf3fd286eb348f366ad245b334ec Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 15:02:25 +0100
Subject: Because of the change to qi:read, we now don't need ? to start on a
 segment boundary. This removes the need for current_segment_boundary

---
 src/rabbit_queue_index.erl    | 8 +-------
 src/rabbit_variable_queue.erl | 6 +-----
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9cd816b2..1f3ce0a8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -33,8 +33,7 @@
 
 -export([init/3, terminate/2, terminate_and_erase/1, publish/4,
          deliver/2, ack/2, sync/2, flush/1, read/3,
-         current_segment_boundary/1, next_segment_boundary/1, bounds/1,
-         recover/1]).
+         next_segment_boundary/1, bounds/1, recover/1]).
 
 -define(CLEAN_FILENAME, "clean.dot").
 
@@ -196,7 +195,6 @@
 -spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
              {[{guid(), seq_id(), boolean(), boolean()}],
               seq_id() | 'undefined', qistate()}).
--spec(current_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(bounds/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
@@ -365,10 +363,6 @@ read(Start, End, State = #qistate { segments = Segments,
      Again,
      State #qistate { segments = segment_store(Segment1, Segments) }}.
 
-current_segment_boundary(SeqId) ->
-    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
-    reconstruct_seq_id(Seg, 0).
-
 next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     reconstruct_seq_id(Seg + 1, 0).
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 992cf19a..d3fa33dc 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1200,11 +1200,7 @@ publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
     {#msg_status { index_on_disk = true }, IndexState1} =
         maybe_write_index_to_disk(true, MsgStatus1, IndexState),
     true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
-    %% delta may be empty, seq_id > next_segment_boundary from q3
-    %% head, so we need to find where the segment boundary is before
-    %% or equal to seq_id
-    DeltaSeqId = rabbit_queue_index:current_segment_boundary(SeqId),
-    Delta1 = #delta { start_seq_id = DeltaSeqId, count = 1,
+    Delta1 = #delta { start_seq_id = SeqId, count = 1,
                       end_seq_id = SeqId + 1 },
     State #vqstate { index_state = IndexState1,
                      delta = combine_deltas(Delta, Delta1),
-- 
cgit v1.2.1


From dd6cbd4c2c6b2d3d2b976d2910f197af645cb2f2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 15:18:39 +0100
Subject: vq:delete1 does not need to return nor pass around any count

---
 src/rabbit_variable_queue.erl | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d3fa33dc..41cdb312 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -365,10 +365,7 @@ delete_and_terminate(State) ->
             {N, N, IndexState3} ->
                 IndexState3;
             {DeltaSeqId, NextSeqId, IndexState3} ->
-                {_DeleteCount, IndexState4} =
-                    delete1(PersistentStore, TransientThreshold, NextSeqId, 0,
-                            DeltaSeqId, IndexState3),
-                IndexState4
+                delete1(PersistentStore, TransientThreshold, NextSeqId, DeltaSeqId, IndexState3)
         end,
     IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
     rabbit_msg_store:delete_client(PersistentStore, PRef),
@@ -957,27 +954,26 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
     [ Fun() || Fun <- lists:reverse(SFuns) ],
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
-delete1(_PersistentStore, _TransientThreshold, NextSeqId, Count, DeltaSeqId,
+delete1(_PersistentStore, _TransientThreshold, NextSeqId, DeltaSeqId,
         IndexState) when DeltaSeqId =:= undefined
                          orelse DeltaSeqId >= NextSeqId ->
-    {Count, IndexState};
-delete1(PersistentStore, TransientThreshold, NextSeqId, Count, DeltaSeqId,
+    IndexState;
+delete1(PersistentStore, TransientThreshold, NextSeqId, DeltaSeqId,
         IndexState) ->
     {List, Again, IndexState1} =
         rabbit_queue_index:read(DeltaSeqId, NextSeqId, IndexState),
-    {IndexState2, Count1} =
+    IndexState2 =
         case List of
-            [] -> {IndexState1, Count};
-            _  -> {Q, IndexState3} =
-                      betas_from_segment_entries(
-                        List, TransientThreshold, IndexState1),
-                  {Count2, IndexState4} =
+            [] -> IndexState1;
+            _  -> {Q, IndexState3} = betas_from_segment_entries(
+                                       List, TransientThreshold, IndexState1),
+                  {_Count, IndexState4} =
                       remove_queue_entries(
                         PersistentStore, fun beta_fold_no_index_on_disk/3,
                         Q, IndexState3),
-                  {IndexState4, Count2 + Count}
+                  IndexState4
         end,
-    delete1(PersistentStore, TransientThreshold, NextSeqId, Count1, Again,
+    delete1(PersistentStore, TransientThreshold, NextSeqId, Again,
             IndexState2).
 
 purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
-- 
cgit v1.2.1


From c1d9bf75ed36b0edc61ce62fca91ba1c81660d59 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 18 May 2010 15:40:24 +0100
Subject: take the journal into consideration in recover_segment which removes
 the need for an initial flush

---
 src/rabbit_queue_index.erl | 95 +++++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 44 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 1f3ce0a8..93fd3759 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -213,28 +213,21 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                 {error, _}   -> [];
                 {ok, Terms1} -> Terms1
             end,
-    %% 1. Load the journal completely. This will also load segments
-    %%    which have entries in the journal and remove duplicates.
-    %%    The counts will correctly reflect the combination of the
-    %%    segment and the journal.
-    State1 = load_journal(State),
-    %% 2. Flush the journal. This makes life easier for everyone, as
-    %%    it means there won't be any publishes in the journal
-    %%    alone. The dirty recovery code below relies on this.
-    State2 = #qistate { dir = Dir, segments = Segments } =
-        flush_journal(State1),
-    %% 3. Load each segment in turn and filter out messages that are
-    %%    not in the msg_store, by adding acks to the journal. These
-    %%    acks only go to the RAM journal as it doesn't matter if we
-    %%    lose them. Also mark delivered if not clean shutdown. Also
-    %%    find the number of unacked messages.
+    %% Load the journal completely. This will also load segments which
+    %% have entries in the journal and remove duplicates.  The counts
+    %% will correctly reflect the combination of the segment and the
+    %% journal.
+    State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State),
     CleanShutdown = detect_clean_shutdown(Dir),
-    %% We know the journal is empty here, so we don't need to combine
-    %% with the journal, and we don't need to worry about messages
-    %% that have been acked.
     {Segments1, Count} =
         case CleanShutdown andalso MsgStoreRecovered of
             false ->
+                %% Load each segment in turn and filter out messages
+                %% that are not in the msg_store, by adding acks to
+                %% the journal. These acks only go to the RAM journal
+                %% as it doesn't matter if we lose them. Also mark
+                %% delivered if not clean shutdown. Also find the
+                %% number of unacked messages.
                 lists:foldl(
                   fun (Seg, {Segments2, CountAcc}) ->
                           Segment = #segment { pubs = PubCount,
@@ -244,7 +237,7 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                                 segment_find_or_new(Seg, Dir, Segments2)),
                           {segment_store(Segment, Segments2),
                            CountAcc + PubCount - AckCount}
-                  end, {Segments, 0}, all_segment_nums(State2));
+                  end, {Segments, 0}, all_segment_nums(State1));
             true ->
                 %% At this stage, we will only know about files that
                 %% were loaded during journal loading, They *will* have
@@ -266,19 +259,23 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                                _ ->
                                    SegmentsN
                            end
-                   end, Segments, all_segment_nums(State2)),
+                   end, Segments, all_segment_nums(State1)),
                  %% the counts above include transient messages, which
                  %% would be the wrong thing to return
                  undefined}
         end,
-    %% flush again so we eagerly remove any segments that have become
-    %% empty due to either ContainsCheckFun returning false in the
-    %% non-clean recovery case or PubCount==AckCount in the clean
-    %% recovery case. Since the latter doesn't go through the journal
-    %% logic we we artificially set the dirty_count non zero.
-    State3 = flush_journal(State2 #qistate { segments = Segments1,
+    %% Flush so we eagerly remove any segments that have become empty
+    %% due to
+    %% a) processing the journal,
+    %% b) ContainsCheckFun returning false in the non-clean
+    %%    recovery case, or
+    %% c) recovering a segment with PubCount==AckCount in the clean
+    %%    recovery case
+    %% Since the latter doesn't go through the journal logic we we
+    %% artificially set the dirty_count non zero.
+    State2 = flush_journal(State1 #qistate { segments = Segments1,
                                              dirty_count = 1 }),
-    {Count, Terms, State3}.
+    {Count, Terms, State2}.
 
 terminate(Terms, State) ->
     terminate(true, Terms, State).
@@ -352,6 +349,8 @@ read(Start, End, State = #qistate { segments = Segments,
     Segment = segment_find_or_new(StartSeg, Dir, Segments),
     {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
+    {SegEntries1, _PubCountDelta, _AckCountDelta} =
+        journal_plus_segment(JEntries, SegEntries),
     {array:sparse_foldr(
        fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
            when StartRelSeq =< RelSeq andalso RelSeq < MaxRelSeq ->
@@ -359,7 +358,7 @@ read(Start, End, State = #qistate { segments = Segments,
                   IsPersistent, IsDelivered == del} | Acc ];
            (_RelSeq, _Value, Acc) ->
                Acc
-       end, [], journal_plus_segment(JEntries, SegEntries)),
+       end, [], SegEntries1),
      Again,
      State #qistate { segments = segment_store(Segment1, Segments) }}.
 
@@ -474,13 +473,17 @@ terminate(StoreShutdown, Terms, State =
 
 recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
     {SegEntries, PubCount, AckCount, Segment1} = load_segment(false, Segment),
+    #segment { journal_entries = JEntries } = Segment1,
+    {SegEntries1, PubCountDelta, AckCountDelta} =
+        journal_plus_segment(JEntries, SegEntries),
     array:sparse_foldl(
-      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment3) ->
+      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment2) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
-                              Del, RelSeq, Segment3)
+                              Del, RelSeq, Segment2)
       end,
-      Segment1 #segment { pubs = PubCount, acks = AckCount },
-      SegEntries).
+      Segment1 #segment { pubs = PubCount + PubCountDelta,
+                          acks = AckCount + AckCountDelta},
+      SegEntries1).
 
 recover_message( true,  true,   _Del, _RelSeq, Segment) ->
     Segment;
@@ -884,31 +887,35 @@ bool_to_int(false) -> 0.
 %% queue.
 journal_plus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
-      fun (RelSeq, JObj, SegEntriesOut) ->
+      fun (RelSeq, JObj, {SegEntriesOut, PubsAdded, AcksAdded}) ->
               SegEntry = array:get(RelSeq, SegEntriesOut),
-              case journal_plus_segment1(JObj, SegEntry) of
-                  undefined -> array:reset(RelSeq, SegEntriesOut);
-                  Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
-              end
-      end, SegEntries, JEntries).
+              {Obj, PubsAddedDelta, AcksAddedDelta} =
+                  journal_plus_segment1(JObj, SegEntry),
+              {case Obj of
+                   undefined -> array:reset(RelSeq, SegEntriesOut);
+                   Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
+               end,
+               PubsAdded + PubsAddedDelta,
+               AcksAdded + AcksAddedDelta}
+      end, {SegEntries, 0, 0}, JEntries).
 
 %% Here, the result is the item which we may be adding to (for items
 %% only in the journal), modifying in (bits in both), or, when
 %% returning 'undefined', erasing from (ack in journal, not segment)
 %% the segment array.
 journal_plus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
-    Obj;
+    {Obj, 1, 0};
 journal_plus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
-    Obj;
+    {Obj, 1, 0};
 journal_plus_segment1({?PUB, del, ack},             undefined) ->
-    undefined;
+    {undefined, 1, 1};
 
 journal_plus_segment1({no_pub, del, no_ack}, {?PUB = Pub, no_del, no_ack}) ->
-    {Pub, del, no_ack};
+    {{Pub, del, no_ack}, 0, 0};
 journal_plus_segment1({no_pub, del, ack},    {?PUB, no_del, no_ack}) ->
-    undefined;
+    {undefined, 0, 1};
 journal_plus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) ->
-    undefined.
+    {undefined, 0, 1}.
 
 %% Remove from the journal entries for a segment, items that are
 %% duplicates of entries found in the segment itself. Used on start up
-- 
cgit v1.2.1


From d6e637eba443e7f46d081842e96d5e8d29a6e98f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 15:55:11 +0100
Subject: Refactored bpqueue tests

---
 src/rabbit_tests.erl | 84 ++++++++++++++++++++++++----------------------------
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 9821367d..4075ddf7 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -297,31 +297,15 @@ test_bpqueue() ->
     {Q12, 0} = F2(Q),
     [] = bpqueue:to_list(Q12),
 
-    FF1 = fun (Prefixes) ->
-                  fun (P) -> lists:member(P, Prefixes) end
-          end,
-    FF2 = fun (Prefix, Stoppers) ->
-                  fun (Val, Num) ->
-                          case lists:member(Val, Stoppers) of
-                              true -> stop;
-                              false -> {Prefix, -Val, 1 + Num}
-                          end
-                  end
-          end,
-    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
-
     BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}],
     BPQ = bpqueue:from_list(BPQL),
 
     %% no effect
-    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_l(
-                                FF1([none]), FF2(none, []), 0, BPQ)),
-    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_l(
-                                FF1([foo,bar]), FF2(none, [1]), 0, BPQ)),
-    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_l(
-                                FF1([bar]), FF2(none, [3]), 0, BPQ)),
-    {BPQL, 0} = Queue_to_list(bpqueue:map_fold_filter_r(
-                                FF1([bar]), FF2(foo, [5]), 0, BPQ)),
+    {BPQL, 0} = bpqueue_mffl([none], {none, []}, BPQ),
+    {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ),
+    {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ),
+    {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ),
+    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
     {[], 0} = Queue_to_list(bpqueue:map_fold_filter_l(
                               fun(_P)-> throw(explosion) end,
                               fun(_V, _N) -> throw(explosion) end, 0, Q)),
@@ -331,48 +315,58 @@ test_bpqueue() ->
 
     %% process 1 item
     {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([foo, bar]), FF2(foo, [2]), 0, BPQ)),
+        bpqueue_mffl([foo,bar], {foo, [2]}, BPQ),
     {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([bar]), FF2(bar, [4]), 0, BPQ)),
+        bpqueue_mffl([bar], {bar, [4]}, BPQ),
     {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} =
-        Queue_to_list(bpqueue:map_fold_filter_r(
-                        FF1([foo, bar]), FF2(foo, [6]), 0, BPQ)),
+        bpqueue_mffr([foo,bar], {foo, [6]}, BPQ),
     {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} =
-        Queue_to_list(bpqueue:map_fold_filter_r(
-                        FF1([bar]), FF2(baz, [4]), 0, BPQ)),
+        bpqueue_mffr([bar], {baz, [4]}, BPQ),
 
     %% change prefix
     {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([foo, bar]), FF2(bar, []), 0, BPQ)),
+        bpqueue_mffl([foo,bar], {bar, []}, BPQ),
     {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([foo]), FF2(bar, [5]), 0, BPQ)),
+        bpqueue_mffl([foo], {bar, [5]}, BPQ),
     {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([foo]), FF2(bar, [7]), 0, BPQ)),
+        bpqueue_mffl([foo], {bar, [7]}, BPQ),
     {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([bar]), FF2(foo, [5]), 0, BPQ)),
+        bpqueue_mffl([bar], {foo, [5]}, BPQ),
     {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([foo]), FF2(bar, []), 0, BPQ)),
+        bpqueue_mffl([foo], {bar, []}, BPQ),
     {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([bar]), FF2(foo, []), 0, BPQ)),
+        bpqueue_mffl([bar], {foo, []}, BPQ),
 
     %% edge cases
     {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} =
-        Queue_to_list(bpqueue:map_fold_filter_l(
-                        FF1([foo]), FF2(foo, [5]), 0, BPQ)),
+        bpqueue_mffl([foo], {foo, [5]}, BPQ),
     {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} =
-        Queue_to_list(bpqueue:map_fold_filter_r(
-                        FF1([foo]), FF2(foo, [2]), 0, BPQ)),
+        bpqueue_mffr([foo], {foo, [2]}, BPQ),
 
     passed.
 
+bpqueue_mffl(FF1A, FF2A, BPQ) ->
+    bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ).
+
+bpqueue_mffr(FF1A, FF2A, BPQ) ->
+    bpqueue_mff(fun bpqueue:map_fold_filter_r/4, FF1A, FF2A, BPQ).
+
+bpqueue_mff(Fold, FF1A, FF2A, BPQ) ->
+    FF1 = fun (Prefixes) ->
+                  fun (P) -> lists:member(P, Prefixes) end
+          end,
+    FF2 = fun ({Prefix, Stoppers}) ->
+                  fun (Val, Num) ->
+                          case lists:member(Val, Stoppers) of
+                              true -> stop;
+                              false -> {Prefix, -Val, 1 + Num}
+                          end
+                  end
+          end,
+    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
+
+    Queue_to_list(Fold(FF1(FF1A), FF2(FF2A), 0, BPQ)).
+
 test_simple_n_element_queue(N) ->
     Items = lists:seq(1, N),
     Q = priority_queue_in_all(priority_queue:new(), Items),
-- 
cgit v1.2.1


From 3264e93e6d2592d4a9fa7d6165f441ddc12f9674 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 18 May 2010 15:56:57 +0100
Subject: correct comment

---
 src/rabbit_queue_index.erl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 93fd3759..39cbf1b3 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -882,9 +882,7 @@ bool_to_int(false) -> 0.
 %%----------------------------------------------------------------------------
 
 %% Combine what we have just read from a segment file with what we're
-%% holding for that segment in memory. There must be no
-%% duplicates. Used when providing segment entries to the variable
-%% queue.
+%% holding for that segment in memory. There must be no duplicates.
 journal_plus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, {SegEntriesOut, PubsAdded, AcksAdded}) ->
@@ -899,10 +897,13 @@ journal_plus_segment(JEntries, SegEntries) ->
                AcksAdded + AcksAddedDelta}
       end, {SegEntries, 0, 0}, JEntries).
 
-%% Here, the result is the item which we may be adding to (for items
-%% only in the journal), modifying in (bits in both), or, when
-%% returning 'undefined', erasing from (ack in journal, not segment)
-%% the segment array.
+%% Here, the result is a triple with the first element containing the
+%% item which we may be adding to (for items only in the journal),
+%% modifying in (bits in both), or, when returning 'undefined',
+%% erasing from (ack in journal, not segment) the segment array. The
+%% other two elements of the triple are the deltas for PubsAdded and
+%% AcksAdded - these get increased when a publish or ack is found in
+%% the journal.
 journal_plus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
     {Obj, 1, 0};
 journal_plus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
-- 
cgit v1.2.1


From e290bce16e7ba42d40b7e687e332cb3382798d0c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 16:21:50 +0100
Subject: Removing erroneous conjunctive clause in test

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 39cbf1b3..40fa0be0 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -601,7 +601,7 @@ flush_journal(State = #qistate { segments = Segments }) ->
           fun (_Seg, #segment { journal_entries = JEntries,
                                 pubs = PubCount,
                                 acks = AckCount } = Segment, SegmentsN) ->
-                  case PubCount > 0 andalso PubCount == AckCount of
+                  case PubCount =:= AckCount of
                       true  -> ok = delete_segment(Segment),
                                SegmentsN;
                       false -> segment_store(
-- 
cgit v1.2.1


From 35b853754388f96c43eda868a32541304be30bd1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 16:29:12 +0100
Subject: No need to build up a list of transient queue indices to delete

---
 src/rabbit_queue_index.erl | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 40fa0be0..53dbf311 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -398,9 +398,9 @@ recover(DurableQueues) ->
                       {error, enoent} -> []
                   end,
     DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
-    {DurableQueueNames, TransientDirs, DurableTerms} =
+    {DurableQueueNames, DurableTerms} =
         lists:foldl(
-          fun (QueueDir, {DurableAcc, TransientAcc, TermsAcc}) ->
+          fun (QueueDir, {DurableAcc, TermsAcc}) ->
                   case sets:is_element(QueueDir, DurableDirectories) of
                       true ->
                           TermsAcc1 =
@@ -410,15 +410,13 @@ recover(DurableQueues) ->
                                   {ok, Terms} -> [Terms | TermsAcc]
                               end,
                           {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
-                           TransientAcc, TermsAcc1};
+                           TermsAcc1};
                       false ->
-                          {DurableAcc, [QueueDir | TransientAcc], TermsAcc}
+                          Dir = filename:join(queues_dir(), QueueDir),
+                          ok = rabbit_misc:recursive_delete([Dir]),
+                          {DurableAcc, TermsAcc}
                   end
-          end, {[], [], []}, Directories),
-    lists:foreach(fun (DirName) ->
-                          Dir = filename:join(queues_dir(), DirName),
-                          ok = rabbit_misc:recursive_delete([Dir])
-                  end, TransientDirs),
+          end, {[], []}, Directories),
     {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
 
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From ac79a77fd1838bd4f490d066f6d5ecfb31c2608b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 16:34:39 +0100
Subject: Added specs for msg_store_gc

---
 src/rabbit_msg_store_gc.erl | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 2b6bf9b2..4b80d088 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -53,6 +53,19 @@
 
 %%----------------------------------------------------------------------------
 
+-ifdef(use_specs).
+
+-spec(start_link/4 :: (file_path(), any(), atom(), tid()) ->
+                           {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok').
+-spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok').
+-spec(stop/1 :: (pid()) -> 'ok').
+-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
 start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
     gen_server2:start_link(
       ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts],
-- 
cgit v1.2.1


From 85bd55a602594ecb9cb5098949b5f5fc5bdf2217 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Tue, 18 May 2010 16:55:44 +0100
Subject: Added documentation regarding the clean shutdown and startup
 optimisations

---
 src/rabbit_msg_store.erl      |  3 +++
 src/rabbit_queue_index.erl    |  5 ++++-
 src/rabbit_variable_queue.erl | 51 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 5f93e4e8..51ad2926 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -298,6 +298,9 @@
 %% itself. The effect of this is that even if the msg_store process is
 %% heavily overloaded, clients can still write and read messages with
 %% very low latency and not block at all.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% variable_queue.
 
 %%----------------------------------------------------------------------------
 %% public API
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 53dbf311..df28bb2d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -104,7 +104,10 @@
 %% correct combination of journal entries with entries read from the
 %% segment on disk, this richer representation vastly simplifies and
 %% clarifies the code.
-
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% variable_queue.
+%%
 %%----------------------------------------------------------------------------
 
 %% ---- Journal details ----
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 41cdb312..cc876b5e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -58,11 +58,11 @@
 %% Note that for persistent messages, the message and its position
 %% within the queue are always held on disk, *in addition* to being in
 %% one of the above classifications.
-
+%%
 %% Also note that within this code, the term gamma never
 %% appears. Instead, gammas are defined by betas who have had their
 %% queue position recorded on disk.
-
+%%
 %% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though
 %% many of these steps are frequently skipped. q1 and q4 only hold
 %% alphas, q2 and q3 hold both betas and gammas (as queues of queues,
@@ -70,18 +70,18 @@
 %% they're betas or gammas). When a message arrives, its
 %% classification is determined. It is then added to the rightmost
 %% appropriate queue.
-
+%%
 %% If a new message is determined to be a beta or gamma, q1 is
 %% empty. If a new message is determined to be a delta, q1 and q2 are
 %% empty (and actually q4 too).
-
+%%
 %% When removing messages from a queue, if q4 is empty then q3 is read
 %% directly. If q3 becomes empty then the next segment's worth of
 %% messages from delta are read into q3, reducing the size of
 %% delta. If the queue is non empty, either q4 or q3 contain
 %% entries. It is never permitted for delta to hold all the messages
 %% in the queue.
-
+%%
 %% The duration indicated to us by the memory_monitor is used to
 %% calculate, given our current ingress and egress rates, how many
 %% messages we should hold in RAM. When we need to push alphas to
@@ -90,13 +90,13 @@
 %% as the messages closer to the tail of the queue stay in the queue
 %% for longer, thus do not need to be replaced as quickly by sending
 %% other messages to disk.
-
+%%
 %% Whilst messages are pushed to disk and forgotten from RAM as soon
 %% as requested by a new setting of the queue RAM duration, the
 %% inverse is not true: we only load messages back into RAM as
 %% demanded as the queue is read from. Thus only publishes to the
 %% queue will take up available spare capacity.
-
+%%
 %% If a queue is full of transient messages, then the transition from
 %% betas to deltas will be potentially very expensive as millions of
 %% entries must be written to disk by the queue_index module. This can
@@ -107,7 +107,7 @@
 %% point at which betas and gammas must be converted to deltas, there
 %% should be very few betas remaining, thus the transition is fast (no
 %% work needs to be done for the gamma -> delta transition).
-
+%%
 %% The conversion of betas to gammas is done on publish, in batches of
 %% exactly ?RAM_INDEX_BATCH_SIZE. This value should not be too small,
 %% otherwise the frequent operations on the queues of q2 and q3 will
@@ -123,14 +123,45 @@
 %% transition doesn't matter, and in the former case the queue's
 %% shrinking length makes it unlikely (though not impossible) that the
 %% duration will become 0.
-
+%%
 %% In the queue we only keep track of messages that are pending
 %% delivery. This is fine for queue purging, but can be expensive for
 %% queue deletion: for queue deletion we must scan all the way through
 %% all remaining segments in the queue index (we start by doing a
 %% purge) and delete messages from the msg_store that we find in the
 %% queue index.
-
+%%
+%% Notes on Clean Shutdown
+%% (This documents behaviour in variable_queue, queue_index and
+%% msg_store.)
+%%
+%% In order to try to achieve as fast a start-up as possible, if a
+%% clean shutdown occurs, we try to save out state to disk to reduce
+%% work on startup. In the msg_store this takes the form of the
+%% index_module's state, plus the file_summary ets table, and client
+%% refs. In the VQ, this takes the form of the count of persistent
+%% messages in the queue and references into the msg_stores. The
+%% queue_index adds to these terms the details of its segments and
+%% stores the terms in the queue directory.
+%%
+%% The references to the msg_stores are there so that the msg_store
+%% knows to only trust its saved state if all of the queues it was
+%% previously talking to come up cleanly. Likewise, the queues
+%% themselves (esp queue_index) skips work in init if all the queues
+%% and msg_store were shutdown cleanly. This gives both good speed
+%% improvements and also robustness so that if anything possibly went
+%% wrong in shutdown (or there was subsequent manual tampering), all
+%% messages and queues that can be recovered are recovered, safely.
+%%
+%% To delete transient messages lazily, the variable_queue, on
+%% startup, stores the next_seq_id reported by the queue_index as the
+%% transient_threshold. From that point on, whenever it's reading a
+%% message off disk via the queue_index, if the seq_id is below this
+%% threshold and the message is transient then it drops the
+%% message. This avoids the expensive operation of scanning the entire
+%% queue on startup in order to delete transient messages that were
+%% only pushed to disk to save memory.
+%%
 %%----------------------------------------------------------------------------
 
 -behaviour(rabbit_backing_queue).
-- 
cgit v1.2.1


From b9843ef9a7cea580ceb8830e36183da9396bf8f2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 18 May 2010 19:27:26 +0100
Subject: minor refactor

---
 src/rabbit_queue_index.erl | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index df28bb2d..f2451725 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -571,22 +571,22 @@ add_to_journal(RelSeq, Action,
 
 %% This is a more relaxed version of deliver_or_ack_msg because we can
 %% have dels or acks in the journal without the corresponding
-%% pub. Also, always want to keep acks. Things must occur in the right
-%% order though.
-add_to_journal(RelSeq, Action, SegJArray) ->
-    case array:get(RelSeq, SegJArray) of
-        undefined ->
-            array:set(RelSeq,
-                      case Action of
-                          {_Msg, _IsPersistent} -> {Action, no_del, no_ack};
-                          del                   -> {no_pub,    del, no_ack};
-                          ack                   -> {no_pub, no_del,    ack}
-                      end, SegJArray);
-        ({Pub, no_del, no_ack}) when Action == del ->
-            array:set(RelSeq, {Pub, del, no_ack}, SegJArray);
-        ({Pub,    Del, no_ack}) when Action == ack ->
-            array:set(RelSeq, {Pub, Del,    ack}, SegJArray)
-    end.
+%% pub. Also, always want to keep ack'd entries. Things must occur in
+%% the right order though.
+add_to_journal(RelSeq, Action, JEntries) ->
+    Val = case array:get(RelSeq, JEntries) of
+              undefined ->
+                  case Action of
+                      ?PUB -> {Action, no_del, no_ack};
+                      del  -> {no_pub,    del, no_ack};
+                      ack  -> {no_pub, no_del,    ack}
+                  end;
+              ({Pub, no_del, no_ack}) when Action == del ->
+                  {Pub, del, no_ack};
+              ({Pub,    Del, no_ack}) when Action == ack ->
+                  {Pub, Del,    ack}
+          end,
+    array:set(RelSeq, Val, JEntries).
 
 maybe_flush_journal(State = #qistate { dirty_count = DCount })
   when DCount > ?MAX_JOURNAL_ENTRY_COUNT ->
-- 
cgit v1.2.1


From ea69795cc6d6297f74bc70445759ec9b5f654c2e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Tue, 18 May 2010 19:27:52 +0100
Subject: KeepAck -> KeepAcked

---
 src/rabbit_queue_index.erl | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f2451725..d061947a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -647,9 +647,9 @@ load_journal(State) ->
           fun (_Seg, Segment = #segment { journal_entries = JEntries,
                                           pubs = PubCountInJournal,
                                           acks = AckCountInJournal }) ->
-                  %% We want to keep acks in so that we can remove
-                  %% them if duplicates are in the journal. The counts
-                  %% here are purely from the segment itself.
+                  %% We want to keep ack'd entries in so that we can
+                  %% remove them if duplicates are in the journal. The
+                  %% counts here are purely from the segment itself.
                   {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
                       load_segment(true, Segment),
                   %% Removed counts here are the number of pubs and
@@ -821,11 +821,11 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
 %%
 %% Does not do any combining with the journal at all. The PubCount
 %% that comes back is the number of publishes in the segment. The
-%% number of unacked msgs is PubCount - AckCount. If KeepAcks is
+%% number of unacked msgs is PubCount - AckCount. If KeepAcked is
 %% false, then array:sparse_size(SegEntries) == PubCount -
-%% AckCount. If KeepAcks is true, then array:sparse_size(SegEntries)
+%% AckCount. If KeepAcked is true, then array:sparse_size(SegEntries)
 %% == PubCount.
-load_segment(KeepAcks, Segment = #segment { path = Path, handle = SegHdl }) ->
+load_segment(KeepAcked, Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
                         undefined -> filelib:is_file(Path);
                         _         -> true
@@ -835,18 +835,12 @@ load_segment(KeepAcks, Segment = #segment { path = Path, handle = SegHdl }) ->
         true  -> {Hdl, Segment1} = get_segment_handle(Segment),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
                  {SegEntries, PubCount, AckCount} =
-                     load_segment_entries(KeepAcks, Hdl, array_new(), 0, 0),
+                     load_segment_entries(KeepAcked, Hdl, array_new(), 0, 0),
                  {SegEntries, PubCount, AckCount, Segment1}
     end.
 
-load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
+load_segment_entries(KeepAcked, Hdl, SegEntries, PubCount, AckCount) ->
     case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
-        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
-              RelSeq:?REL_SEQ_BITS>>} ->
-            {AckCount1, SegEntries1} =
-                deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries),
-            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount,
-                                 AckCount1);
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
               IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
             %% because we specify /binary, and binaries are complete
@@ -856,17 +850,23 @@ load_segment_entries(KeepAcks, Hdl, SegEntries, PubCount, AckCount) ->
                 array:set(RelSeq,
                           {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
                           SegEntries),
-            load_segment_entries(KeepAcks, Hdl, SegEntries1, PubCount + 1,
+            load_segment_entries(KeepAcked, Hdl, SegEntries1, PubCount + 1,
                                  AckCount);
+        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+              RelSeq:?REL_SEQ_BITS>>} ->
+            {AckCount1, SegEntries1} =
+                deliver_or_ack_msg(KeepAcked, RelSeq, AckCount, SegEntries),
+            load_segment_entries(KeepAcked, Hdl, SegEntries1, PubCount,
+                                 AckCount1);
         _ErrOrEoF ->
             {SegEntries, PubCount, AckCount}
     end.
 
-deliver_or_ack_msg(KeepAcks, RelSeq, AckCount, SegEntries) ->
+deliver_or_ack_msg(KeepAcked, RelSeq, AckCount, SegEntries) ->
     case array:get(RelSeq, SegEntries) of
         {Pub, no_del, no_ack} ->
             {AckCount, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
-        {Pub, del, no_ack} when KeepAcks ->
+        {Pub, del, no_ack} when KeepAcked ->
             {AckCount + 1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
         {_Pub, del, no_ack} ->
             {AckCount + 1, array:reset(RelSeq, SegEntries)}
-- 
cgit v1.2.1


From 7865f9e0662c2c661721bfee4229e0e333336f5f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 08:00:33 +0100
Subject: journal_plus_segment -> segment_plus_journal This better reflects
 what we are actually doing, i.e. we take the segment and add the journal.

---
 src/rabbit_queue_index.erl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d061947a..5e53e909 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -353,7 +353,7 @@ read(Start, End, State = #qistate { segments = Segments,
     {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
     {SegEntries1, _PubCountDelta, _AckCountDelta} =
-        journal_plus_segment(JEntries, SegEntries),
+        segment_plus_journal(SegEntries, JEntries),
     {array:sparse_foldr(
        fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
            when StartRelSeq =< RelSeq andalso RelSeq < MaxRelSeq ->
@@ -476,7 +476,7 @@ recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
     {SegEntries, PubCount, AckCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
     {SegEntries1, PubCountDelta, AckCountDelta} =
-        journal_plus_segment(JEntries, SegEntries),
+        segment_plus_journal(SegEntries, JEntries),
     array:sparse_foldl(
       fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment2) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
@@ -884,12 +884,12 @@ bool_to_int(false) -> 0.
 
 %% Combine what we have just read from a segment file with what we're
 %% holding for that segment in memory. There must be no duplicates.
-journal_plus_segment(JEntries, SegEntries) ->
+segment_plus_journal(SegEntries, JEntries) ->
     array:sparse_foldl(
       fun (RelSeq, JObj, {SegEntriesOut, PubsAdded, AcksAdded}) ->
               SegEntry = array:get(RelSeq, SegEntriesOut),
               {Obj, PubsAddedDelta, AcksAddedDelta} =
-                  journal_plus_segment1(JObj, SegEntry),
+                  segment_plus_journal1(SegEntry, JObj),
               {case Obj of
                    undefined -> array:reset(RelSeq, SegEntriesOut);
                    Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
@@ -905,18 +905,18 @@ journal_plus_segment(JEntries, SegEntries) ->
 %% other two elements of the triple are the deltas for PubsAdded and
 %% AcksAdded - these get increased when a publish or ack is found in
 %% the journal.
-journal_plus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
+segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) ->
     {Obj, 1, 0};
-journal_plus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
+segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) ->
     {Obj, 1, 0};
-journal_plus_segment1({?PUB, del, ack},             undefined) ->
+segment_plus_journal1(undefined, {?PUB, del, ack}) ->
     {undefined, 1, 1};
 
-journal_plus_segment1({no_pub, del, no_ack}, {?PUB = Pub, no_del, no_ack}) ->
+segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) ->
     {{Pub, del, no_ack}, 0, 0};
-journal_plus_segment1({no_pub, del, ack},    {?PUB, no_del, no_ack}) ->
+segment_plus_journal1({?PUB, no_del, no_ack},       {no_pub, del, ack}) ->
     {undefined, 0, 1};
-journal_plus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) ->
+segment_plus_journal1({?PUB, del, no_ack},          {no_pub, no_del, ack}) ->
     {undefined, 0, 1}.
 
 %% Remove from the journal entries for a segment, items that are
-- 
cgit v1.2.1


From 186bcbaa88716e454aa8cc87b2b6ba1a345ea11b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 08:56:25 +0100
Subject: seed journal_minus_segment with the original journal ...and
 modify/remove things in/from it; rather than building a new journal from
 scratch. The result is the same, but this way of doing things a) better
 reflects what this function is about, and b) should be more efficient in the
 common case where the journal does not contain stale entries

---
 src/rabbit_queue_index.erl | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5e53e909..ff2d3360 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -929,12 +929,13 @@ journal_minus_segment(JEntries, SegEntries) ->
               {Obj, PubsRemovedDelta, AcksRemovedDelta} =
                   journal_minus_segment1(JObj, SegEntry),
               {case Obj of
-                   undefined -> JEntriesOut;
+                   keep      -> JEntriesOut;
+                   undefined -> array:reset(RelSeq, JEntriesOut);
                    _         -> array:set(RelSeq, Obj, JEntriesOut)
                end,
                PubsRemoved + PubsRemovedDelta,
                AcksRemoved + AcksRemovedDelta}
-      end, {array_new(), 0, 0}, JEntries).
+      end, {JEntries, 0, 0}, JEntries).
 
 %% Here, the result is a triple with the first element containing the
 %% item we are adding to or modifying in the (initially fresh) journal
@@ -944,45 +945,45 @@ journal_minus_segment(JEntries, SegEntries) ->
 %% publish or ack is in both the journal and the segment.
 
 %% Both the same. Must be at least the publish
-journal_minus_segment1({?PUB, _Del, no_ack} = Obj,   Obj) ->
+journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) ->
     {undefined, 1, 0};
-journal_minus_segment1({?PUB, _Del, ack} = Obj,      Obj) ->
+journal_minus_segment1({?PUB, _Del, ack} = Obj,    Obj) ->
     {undefined, 1, 1};
 
 %% Just publish in journal
-journal_minus_segment1({?PUB, no_del, no_ack} = Obj, undefined) ->
-    {Obj, 0, 0};
+journal_minus_segment1({?PUB, no_del, no_ack},     undefined) ->
+    {keep, 0, 0};
 
 %% Publish and deliver in journal
-journal_minus_segment1({?PUB, del, no_ack} = Obj,    undefined) ->
-    {Obj, 0, 0};
-journal_minus_segment1({?PUB = Pub, del, no_ack},    {Pub, no_del, no_ack}) ->
+journal_minus_segment1({?PUB, del, no_ack},        undefined) ->
+    {keep, 0, 0};
+journal_minus_segment1({?PUB = Pub, del, no_ack},  {Pub, no_del, no_ack}) ->
     {{no_pub, del, no_ack}, 1, 0};
 
 %% Publish, deliver and ack in journal
-journal_minus_segment1({?PUB, del, ack},             undefined) ->
+journal_minus_segment1({?PUB, del, ack},           undefined) ->
     {undefined, 0, 0};
-journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, no_del, no_ack}) ->
+journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, no_del, no_ack}) ->
     {{no_pub, del, ack}, 1, 0};
-journal_minus_segment1({?PUB = Pub, del, ack},       {Pub, del, no_ack}) ->
+journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, del, no_ack}) ->
     {{no_pub, no_del, ack}, 1, 0};
 
 %% Just deliver in journal
-journal_minus_segment1({no_pub, del, no_ack} = Obj,  {?PUB, no_del, no_ack}) ->
-    {Obj, 0, 0};
-journal_minus_segment1({no_pub, del, no_ack},        {?PUB, del, no_ack}) ->
+journal_minus_segment1({no_pub, del, no_ack},      {?PUB, no_del, no_ack}) ->
+    {keep, 0, 0};
+journal_minus_segment1({no_pub, del, no_ack},      {?PUB, del, no_ack}) ->
     {undefined, 0, 0};
 
 %% Just ack in journal
-journal_minus_segment1({no_pub, no_del, ack} = Obj,  {?PUB, del, no_ack}) ->
-    {Obj, 0, 0};
-journal_minus_segment1({no_pub, no_del, ack},        {?PUB, del, ack}) ->
+journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, no_ack}) ->
+    {keep, 0, 0};
+journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, ack}) ->
     {undefined, 0, 1};
 
 %% Deliver and ack in journal
-journal_minus_segment1({no_pub, del, ack} = Obj,     {?PUB, no_del, no_ack}) ->
-    {Obj, 0, 0};
-journal_minus_segment1({no_pub, del, ack},           {?PUB, del, no_ack}) ->
+journal_minus_segment1({no_pub, del, ack},         {?PUB, no_del, no_ack}) ->
+    {keep, 0, 0};
+journal_minus_segment1({no_pub, del, ack},         {?PUB, del, no_ack}) ->
     {{no_pub, no_del, ack}, 0, 0};
-journal_minus_segment1({no_pub, del, ack},           {?PUB, del, ack}) ->
+journal_minus_segment1({no_pub, del, ack},         {?PUB, del, ack}) ->
     {undefined, 0, 1}.
-- 
cgit v1.2.1


From d32975897cefd138089ef7d2ad238e25180ae562 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 09:05:03 +0100
Subject: keep complete pub/del/ack entries in journal on recovery IF there is
 no entry for that message in the segment

This is
- more efficient, since there could be quite a few of these and we are
  avoiding an array update for them
- more consistent with the other 'keep' cases
- the same as would happen if the journal had been built up by queue
  operations rather than through recovery

The entries are filtered out eventually when flushing the journal.
---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index ff2d3360..508177ec 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -962,7 +962,7 @@ journal_minus_segment1({?PUB = Pub, del, no_ack},  {Pub, no_del, no_ack}) ->
 
 %% Publish, deliver and ack in journal
 journal_minus_segment1({?PUB, del, ack},           undefined) ->
-    {undefined, 0, 0};
+    {keep, 0, 0};
 journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, no_del, no_ack}) ->
     {{no_pub, del, ack}, 1, 0};
 journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, del, no_ack}) ->
-- 
cgit v1.2.1


From 1b9a649af358ac41479334548c6a2fff7638c126 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 09:57:35 +0100
Subject: avoid unnecessary match

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 508177ec..641cb00c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -892,7 +892,7 @@ segment_plus_journal(SegEntries, JEntries) ->
                   segment_plus_journal1(SegEntry, JObj),
               {case Obj of
                    undefined -> array:reset(RelSeq, SegEntriesOut);
-                   Obj       -> array:set(RelSeq, Obj, SegEntriesOut)
+                   _         -> array:set(RelSeq, Obj, SegEntriesOut)
                end,
                PubsAdded + PubsAddedDelta,
                AcksAdded + AcksAddedDelta}
-- 
cgit v1.2.1


From 07cc83d0f93c1671ac5e1b9eaa0ee241d333ee5a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 11:35:47 +0100
Subject: some inlining and removal of some comments that weren't particularly
 useful

---
 src/rabbit_queue_index.erl | 44 ++++++++++++++++----------------------------
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 641cb00c..956b3797 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -569,10 +569,6 @@ add_to_journal(RelSeq, Action,
         ?PUB -> Segment1 #segment { pubs = PubCount + 1 }
     end;
 
-%% This is a more relaxed version of deliver_or_ack_msg because we can
-%% have dels or acks in the journal without the corresponding
-%% pub. Also, always want to keep ack'd entries. Things must occur in
-%% the right order though.
 add_to_journal(RelSeq, Action, JEntries) ->
     Val = case array:get(RelSeq, JEntries) of
               undefined ->
@@ -821,10 +817,7 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
 %%
 %% Does not do any combining with the journal at all. The PubCount
 %% that comes back is the number of publishes in the segment. The
-%% number of unacked msgs is PubCount - AckCount. If KeepAcked is
-%% false, then array:sparse_size(SegEntries) == PubCount -
-%% AckCount. If KeepAcked is true, then array:sparse_size(SegEntries)
-%% == PubCount.
+%% number of unacked msgs is PubCount - AckCount.
 load_segment(KeepAcked, Segment = #segment { path = Path, handle = SegHdl }) ->
     SegmentExists = case SegHdl of
                         undefined -> filelib:is_file(Path);
@@ -846,32 +839,27 @@ load_segment_entries(KeepAcked, Hdl, SegEntries, PubCount, AckCount) ->
             %% because we specify /binary, and binaries are complete
             %% bytes, the size spec is in bytes, not bits.
             {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
-            SegEntries1 =
-                array:set(RelSeq,
-                          {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
-                          SegEntries),
-            load_segment_entries(KeepAcked, Hdl, SegEntries1, PubCount + 1,
-                                 AckCount);
+            Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
+            SegEntries1 = array:set(RelSeq, Obj, SegEntries),
+            load_segment_entries(KeepAcked, Hdl, SegEntries1,
+                                 PubCount + 1, AckCount);
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
               RelSeq:?REL_SEQ_BITS>>} ->
-            {AckCount1, SegEntries1} =
-                deliver_or_ack_msg(KeepAcked, RelSeq, AckCount, SegEntries),
-            load_segment_entries(KeepAcked, Hdl, SegEntries1, PubCount,
-                                 AckCount1);
+            {AckCountDelta, SegEntries1} =
+                case array:get(RelSeq, SegEntries) of
+                    {Pub, no_del, no_ack} ->
+                        {0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
+                    {Pub, del, no_ack} when KeepAcked ->
+                        {1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
+                    {_Pub, del, no_ack} ->
+                        {1, array:reset(RelSeq, SegEntries)}
+                end,
+            load_segment_entries(KeepAcked, Hdl, SegEntries1,
+                                 PubCount, AckCount + AckCountDelta);
         _ErrOrEoF ->
             {SegEntries, PubCount, AckCount}
     end.
 
-deliver_or_ack_msg(KeepAcked, RelSeq, AckCount, SegEntries) ->
-    case array:get(RelSeq, SegEntries) of
-        {Pub, no_del, no_ack} ->
-            {AckCount, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
-        {Pub, del, no_ack} when KeepAcked ->
-            {AckCount + 1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
-        {_Pub, del, no_ack} ->
-            {AckCount + 1, array:reset(RelSeq, SegEntries)}
-    end.
-
 array_new() ->
     array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
 
-- 
cgit v1.2.1


From 16632aaa618cebc271ff0219010973ac5cc2a993 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 May 2010 12:40:00 +0100
Subject: Drop PubCount and AckCount in favour of UnackedCount

---
 src/rabbit_queue_index.erl | 169 +++++++++++++++++++++------------------------
 1 file changed, 78 insertions(+), 91 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 956b3797..67bf9f52 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -159,7 +159,7 @@
 
 -record(qistate, { dir, segments, journal_handle, dirty_count }).
 
--record(segment, { pubs, acks, handle, journal_entries, path, num }).
+-record(segment, { unacked, handle, journal_entries, path, num }).
 
 -include("rabbit.hrl").
 
@@ -169,8 +169,7 @@
 
 -type(hdl() :: ('undefined' | any())).
 -type(segment() :: ('undefined' |
-                    #segment { pubs            :: non_neg_integer(),
-                               acks            :: non_neg_integer(),
+                    #segment { unacked         :: non_neg_integer(),
                                handle          :: hdl(),
                                journal_entries :: array(),
                                path            :: file_path(),
@@ -233,13 +232,12 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                 %% number of unacked messages.
                 lists:foldl(
                   fun (Seg, {Segments2, CountAcc}) ->
-                          Segment = #segment { pubs = PubCount,
-                                               acks = AckCount } =
+                          Segment = #segment { unacked = UnackedCount } =
                               recover_segment(
                                 ContainsCheckFun, CleanShutdown,
                                 segment_find_or_new(Seg, Dir, Segments2)),
                           {segment_store(Segment, Segments2),
-                           CountAcc + PubCount - AckCount}
+                           CountAcc + UnackedCount}
                   end, {Segments, 0}, all_segment_nums(State1));
             true ->
                 %% At this stage, we will only know about files that
@@ -253,11 +251,11 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                    fun (Seg, SegmentsN) ->
                            case {segment_find(Seg, SegmentsN),
                                  dict:find(Seg, SegmentDictTerms)} of
-                               {error, {ok, {PubCount, AckCount}}} ->
+                               {error, {ok, UnackedCount}} ->
                                    Segment = segment_new(Seg, Dir),
                                    segment_store(
-                                     Segment #segment { pubs = PubCount,
-                                                        acks = AckCount },
+                                     Segment #segment {
+                                       unacked = UnackedCount },
                                      SegmentsN);
                                _ ->
                                    SegmentsN
@@ -350,9 +348,9 @@ read(Start, End, State = #qistate { segments = Segments,
                     false -> ?SEGMENT_ENTRY_COUNT
                 end,
     Segment = segment_find_or_new(StartSeg, Dir, Segments),
-    {SegEntries, _PubCount, _AckCount, Segment1} = load_segment(false, Segment),
+    {SegEntries, _UnackedCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
-    {SegEntries1, _PubCountDelta, _AckCountDelta} =
+    {SegEntries1, _UnackedCountDelta} =
         segment_plus_journal(SegEntries, JEntries),
     {array:sparse_foldr(
        fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
@@ -458,13 +456,12 @@ terminate(StoreShutdown, Terms, State =
          end,
     SegTerms = segment_fold(
                  fun (Seg, #segment { handle = Hdl,
-                                      pubs = PubCount,
-                                      acks = AckCount }, SegTermsAcc) ->
+                                      unacked = UnackedCount }, SegTermsAcc) ->
                          ok = case Hdl of
                                   undefined -> ok;
                                   _         -> file_handle_cache:close(Hdl)
                               end,
-                         [{Seg, {PubCount, AckCount}} | SegTermsAcc]
+                         [{Seg, UnackedCount} | SegTermsAcc]
                  end, [], Segments),
     case StoreShutdown of
         true  -> store_clean_shutdown([{segments, SegTerms} | Terms], Dir);
@@ -473,17 +470,16 @@ terminate(StoreShutdown, Terms, State =
     State #qistate { journal_handle = undefined, segments = undefined }.
 
 recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
-    {SegEntries, PubCount, AckCount, Segment1} = load_segment(false, Segment),
+    {SegEntries, UnackedCount, Segment1} = load_segment(false, Segment),
     #segment { journal_entries = JEntries } = Segment1,
-    {SegEntries1, PubCountDelta, AckCountDelta} =
+    {SegEntries1, UnackedCountDelta} =
         segment_plus_journal(SegEntries, JEntries),
     array:sparse_foldl(
       fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment2) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
                               Del, RelSeq, Segment2)
       end,
-      Segment1 #segment { pubs = PubCount + PubCountDelta,
-                          acks = AckCount + AckCountDelta},
+      Segment1 #segment { unacked = UnackedCount + UnackedCountDelta },
       SegEntries1).
 
 recover_message( true,  true,   _Del, _RelSeq, Segment) ->
@@ -559,14 +555,13 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
 
 add_to_journal(RelSeq, Action,
                Segment = #segment { journal_entries = JEntries,
-                                    pubs = PubCount,
-                                    acks = AckCount }) ->
+                                    unacked = UnackedCount }) ->
     Segment1 = Segment #segment {
                  journal_entries = add_to_journal(RelSeq, Action, JEntries) },
     case Action of
         del  -> Segment1;
-        ack  -> Segment1 #segment { acks = AckCount + 1 };
-        ?PUB -> Segment1 #segment { pubs = PubCount + 1 }
+        ack  -> Segment1 #segment { unacked = UnackedCount - 1 };
+        ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 }
     end;
 
 add_to_journal(RelSeq, Action, JEntries) ->
@@ -596,14 +591,14 @@ flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
           fun (_Seg, #segment { journal_entries = JEntries,
-                                pubs = PubCount,
-                                acks = AckCount } = Segment, SegmentsN) ->
-                  case PubCount =:= AckCount of
-                      true  -> ok = delete_segment(Segment),
-                               SegmentsN;
-                      false -> segment_store(
-                                 append_journal_to_segment(Segment, JEntries),
-                                 SegmentsN)
+                                unacked = UnackedCount } = Segment,
+               SegmentsN) ->
+                  case UnackedCount of
+                      0 -> ok = delete_segment(Segment),
+                           SegmentsN;
+                      _ -> segment_store(
+                             append_journal_to_segment(Segment, JEntries),
+                             SegmentsN)
                   end
           end, segments_new(), Segments),
     {JournalHdl, State1} =
@@ -641,23 +636,21 @@ load_journal(State) ->
     Segments1 =
         segment_map(
           fun (_Seg, Segment = #segment { journal_entries = JEntries,
-                                          pubs = PubCountInJournal,
-                                          acks = AckCountInJournal }) ->
+                                          unacked = UnackedCountInJournal }) ->
                   %% We want to keep ack'd entries in so that we can
                   %% remove them if duplicates are in the journal. The
                   %% counts here are purely from the segment itself.
-                  {SegEntries, PubCountInSeg, AckCountInSeg, Segment1} =
+                  {SegEntries, UnackedCountInSeg, Segment1} =
                       load_segment(true, Segment),
                   %% Removed counts here are the number of pubs and
                   %% acks that are duplicates - i.e. found in both the
                   %% segment and journal.
-                  {JEntries1, PubsRemoved, AcksRemoved} =
+                  {JEntries1, UnackedCountDuplicates} =
                       journal_minus_segment(JEntries, SegEntries),
-                  PubCount1 = PubCountInSeg + PubCountInJournal - PubsRemoved,
-                  AckCount1 = AckCountInSeg + AckCountInJournal - AcksRemoved,
                   Segment1 #segment { journal_entries = JEntries1,
-                                      pubs = PubCount1,
-                                      acks = AckCount1 }
+                                      unacked = UnackedCountInJournal +
+                                          UnackedCountInSeg -
+                                          UnackedCountDuplicates }
           end, Segments),
     State2 #qistate { segments = Segments1 }.
 
@@ -731,8 +724,7 @@ get_segment_handle(Segment = #segment { handle = Hdl }) ->
     {Hdl, Segment}.
 
 segment_new(Seg, Dir) ->
-    #segment { pubs = 0,
-               acks = 0,
+    #segment { unacked = 0,
                handle = undefined,
                journal_entries = array_new(),
                path = seg_num_to_path(Dir, Seg),
@@ -824,15 +816,15 @@ load_segment(KeepAcked, Segment = #segment { path = Path, handle = SegHdl }) ->
                         _         -> true
                     end,
     case SegmentExists of
-        false -> {array_new(), 0, 0, Segment};
+        false -> {array_new(), 0, Segment};
         true  -> {Hdl, Segment1} = get_segment_handle(Segment),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
-                 {SegEntries, PubCount, AckCount} =
-                     load_segment_entries(KeepAcked, Hdl, array_new(), 0, 0),
-                 {SegEntries, PubCount, AckCount, Segment1}
+                 {SegEntries, UnackedCount} =
+                     load_segment_entries(KeepAcked, Hdl, array_new(), 0),
+                 {SegEntries, UnackedCount, Segment1}
     end.
 
-load_segment_entries(KeepAcked, Hdl, SegEntries, PubCount, AckCount) ->
+load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) ->
     case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
         {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
               IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
@@ -842,22 +834,22 @@ load_segment_entries(KeepAcked, Hdl, SegEntries, PubCount, AckCount) ->
             Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
             SegEntries1 = array:set(RelSeq, Obj, SegEntries),
             load_segment_entries(KeepAcked, Hdl, SegEntries1,
-                                 PubCount + 1, AckCount);
+                                 UnackedCount + 1);
         {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
               RelSeq:?REL_SEQ_BITS>>} ->
-            {AckCountDelta, SegEntries1} =
+            {UnackedCountDelta, SegEntries1} =
                 case array:get(RelSeq, SegEntries) of
                     {Pub, no_del, no_ack} ->
-                        {0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
+                        { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
                     {Pub, del, no_ack} when KeepAcked ->
-                        {1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
+                        {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
                     {_Pub, del, no_ack} ->
-                        {1, array:reset(RelSeq, SegEntries)}
+                        {-1, array:reset(RelSeq, SegEntries)}
                 end,
             load_segment_entries(KeepAcked, Hdl, SegEntries1,
-                                 PubCount, AckCount + AckCountDelta);
+                                 UnackedCount + UnackedCountDelta);
         _ErrOrEoF ->
-            {SegEntries, PubCount, AckCount}
+            {SegEntries, UnackedCount}
     end.
 
 array_new() ->
@@ -874,104 +866,99 @@ bool_to_int(false) -> 0.
 %% holding for that segment in memory. There must be no duplicates.
 segment_plus_journal(SegEntries, JEntries) ->
     array:sparse_foldl(
-      fun (RelSeq, JObj, {SegEntriesOut, PubsAdded, AcksAdded}) ->
+      fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) ->
               SegEntry = array:get(RelSeq, SegEntriesOut),
-              {Obj, PubsAddedDelta, AcksAddedDelta} =
+              {Obj, AdditionalUnackedDelta} =
                   segment_plus_journal1(SegEntry, JObj),
               {case Obj of
                    undefined -> array:reset(RelSeq, SegEntriesOut);
                    _         -> array:set(RelSeq, Obj, SegEntriesOut)
                end,
-               PubsAdded + PubsAddedDelta,
-               AcksAdded + AcksAddedDelta}
-      end, {SegEntries, 0, 0}, JEntries).
+               AdditionalUnacked + AdditionalUnackedDelta}
+      end, {SegEntries, 0}, JEntries).
 
-%% Here, the result is a triple with the first element containing the
+%% Here, the result is a tuple with the first element containing the
 %% item which we may be adding to (for items only in the journal),
 %% modifying in (bits in both), or, when returning 'undefined',
 %% erasing from (ack in journal, not segment) the segment array. The
-%% other two elements of the triple are the deltas for PubsAdded and
-%% AcksAdded - these get increased when a publish or ack is found in
-%% the journal.
+%% other element of the tuple is the delta for AdditionalUnacked.
 segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) ->
-    {Obj, 1, 0};
+    {Obj, 1};
 segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) ->
-    {Obj, 1, 0};
+    {Obj, 1};
 segment_plus_journal1(undefined, {?PUB, del, ack}) ->
-    {undefined, 1, 1};
+    {undefined, 0};
 
 segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) ->
-    {{Pub, del, no_ack}, 0, 0};
+    {{Pub, del, no_ack}, 0};
 segment_plus_journal1({?PUB, no_del, no_ack},       {no_pub, del, ack}) ->
-    {undefined, 0, 1};
+    {undefined, -1};
 segment_plus_journal1({?PUB, del, no_ack},          {no_pub, no_del, ack}) ->
-    {undefined, 0, 1}.
+    {undefined, -1}.
 
 %% Remove from the journal entries for a segment, items that are
 %% duplicates of entries found in the segment itself. Used on start up
 %% to clean up the journal.
 journal_minus_segment(JEntries, SegEntries) ->
     array:sparse_foldl(
-      fun (RelSeq, JObj, {JEntriesOut, PubsRemoved, AcksRemoved}) ->
+      fun (RelSeq, JObj, {JEntriesOut, UnackedRemoved}) ->
               SegEntry = array:get(RelSeq, SegEntries),
-              {Obj, PubsRemovedDelta, AcksRemovedDelta} =
+              {Obj, UnackedRemovedDelta} =
                   journal_minus_segment1(JObj, SegEntry),
               {case Obj of
                    keep      -> JEntriesOut;
                    undefined -> array:reset(RelSeq, JEntriesOut);
                    _         -> array:set(RelSeq, Obj, JEntriesOut)
                end,
-               PubsRemoved + PubsRemovedDelta,
-               AcksRemoved + AcksRemovedDelta}
-      end, {JEntries, 0, 0}, JEntries).
+               UnackedRemoved + UnackedRemovedDelta}
+      end, {JEntries, 0}, JEntries).
 
-%% Here, the result is a triple with the first element containing the
+%% Here, the result is a tuple with the first element containing the
 %% item we are adding to or modifying in the (initially fresh) journal
 %% array. If the item is 'undefined' we leave the journal array
-%% alone. The other two elements of the triple are the deltas for
-%% PubsRemoved and AcksRemoved - these only get increased when a
-%% publish or ack is in both the journal and the segment.
+%% alone. The other element of the tuple is the deltas for
+%% UnackedRemoved.
 
 %% Both the same. Must be at least the publish
 journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) ->
-    {undefined, 1, 0};
+    {undefined, 1};
 journal_minus_segment1({?PUB, _Del, ack} = Obj,    Obj) ->
-    {undefined, 1, 1};
+    {undefined, 0};
 
 %% Just publish in journal
 journal_minus_segment1({?PUB, no_del, no_ack},     undefined) ->
-    {keep, 0, 0};
+    {keep, 0};
 
 %% Publish and deliver in journal
 journal_minus_segment1({?PUB, del, no_ack},        undefined) ->
-    {keep, 0, 0};
+    {keep, 0};
 journal_minus_segment1({?PUB = Pub, del, no_ack},  {Pub, no_del, no_ack}) ->
-    {{no_pub, del, no_ack}, 1, 0};
+    {{no_pub, del, no_ack}, 1};
 
 %% Publish, deliver and ack in journal
 journal_minus_segment1({?PUB, del, ack},           undefined) ->
-    {keep, 0, 0};
+    {keep, 0};
 journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, no_del, no_ack}) ->
-    {{no_pub, del, ack}, 1, 0};
+    {{no_pub, del, ack}, 1};
 journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, del, no_ack}) ->
-    {{no_pub, no_del, ack}, 1, 0};
+    {{no_pub, no_del, ack}, 1};
 
 %% Just deliver in journal
 journal_minus_segment1({no_pub, del, no_ack},      {?PUB, no_del, no_ack}) ->
-    {keep, 0, 0};
+    {keep, 0};
 journal_minus_segment1({no_pub, del, no_ack},      {?PUB, del, no_ack}) ->
-    {undefined, 0, 0};
+    {undefined, 0};
 
 %% Just ack in journal
 journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, no_ack}) ->
-    {keep, 0, 0};
+    {keep, 0};
 journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, ack}) ->
-    {undefined, 0, 1};
+    {undefined, -1};
 
 %% Deliver and ack in journal
 journal_minus_segment1({no_pub, del, ack},         {?PUB, no_del, no_ack}) ->
-    {keep, 0, 0};
+    {keep, 0};
 journal_minus_segment1({no_pub, del, ack},         {?PUB, del, no_ack}) ->
-    {{no_pub, no_del, ack}, 0, 0};
+    {{no_pub, no_del, ack}, 0};
 journal_minus_segment1({no_pub, del, ack},         {?PUB, del, ack}) ->
-    {undefined, 0, 1}.
+    {undefined, -1}.
-- 
cgit v1.2.1


From ff988fb6b6cb00832b5650e5604dd52d9d087d66 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 13:33:04 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 67bf9f52..32ada569 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -241,8 +241,8 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
                   end, {Segments, 0}, all_segment_nums(State1));
             true ->
                 %% At this stage, we will only know about files that
-                %% were loaded during journal loading, They *will* have
-                %% correct ack and pub counts, but for all remaining
+                %% were loaded during journal loading, They *will*
+                %% have correct unacked counts, but for all remaining
                 %% segments, if they're not in the Segments store then
                 %% we need to add them and populate with saved data.
                 SegmentDictTerms =
@@ -642,15 +642,12 @@ load_journal(State) ->
                   %% counts here are purely from the segment itself.
                   {SegEntries, UnackedCountInSeg, Segment1} =
                       load_segment(true, Segment),
-                  %% Removed counts here are the number of pubs and
-                  %% acks that are duplicates - i.e. found in both the
-                  %% segment and journal.
                   {JEntries1, UnackedCountDuplicates} =
                       journal_minus_segment(JEntries, SegEntries),
                   Segment1 #segment { journal_entries = JEntries1,
-                                      unacked = UnackedCountInJournal +
-                                          UnackedCountInSeg -
-                                          UnackedCountDuplicates }
+                                      unacked = (UnackedCountInJournal +
+                                                 UnackedCountInSeg -
+                                                 UnackedCountDuplicates) }
           end, Segments),
     State2 #qistate { segments = Segments1 }.
 
@@ -724,12 +721,11 @@ get_segment_handle(Segment = #segment { handle = Hdl }) ->
     {Hdl, Segment}.
 
 segment_new(Seg, Dir) ->
-    #segment { unacked = 0,
-               handle = undefined,
+    #segment { unacked         = 0,
+               handle          = undefined,
                journal_entries = array_new(),
-               path = seg_num_to_path(Dir, Seg),
-               num = Seg
-              }.
+               path            = seg_num_to_path(Dir, Seg),
+               num             = Seg }.
 
 segment_find_or_new(Seg, Dir, Segments) ->
     case segment_find(Seg, Segments) of
-- 
cgit v1.2.1


From 762cb123c7ebf780a0a3534e7f2bc89ccbecd58f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 13:36:13 +0100
Subject: refactor

---
 src/rabbit_queue_index.erl | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 32ada569..acd13a06 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -590,23 +590,18 @@ flush_journal(State = #qistate { dirty_count = 0 }) ->
 flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
-          fun (_Seg, #segment { journal_entries = JEntries,
-                                unacked = UnackedCount } = Segment,
-               SegmentsN) ->
-                  case UnackedCount of
-                      0 -> ok = delete_segment(Segment),
-                           SegmentsN;
-                      _ -> segment_store(
-                             append_journal_to_segment(Segment, JEntries),
-                             SegmentsN)
-                  end
+          fun (_Seg, #segment { unacked = 0 } = Segment, SegmentsN) ->
+                  ok = delete_segment(Segment),
+                  SegmentsN;
+              (_Seg, #segment {} = Segment, SegmentsN) ->
+                  segment_store(append_journal_to_segment(Segment), SegmentsN)
           end, segments_new(), Segments),
     {JournalHdl, State1} =
         get_journal_handle(State #qistate { segments = Segments1 }),
     ok = file_handle_cache:clear(JournalHdl),
     State1 #qistate { dirty_count = 0 }.
 
-append_journal_to_segment(Segment, JEntries) ->
+append_journal_to_segment(#segment { journal_entries = JEntries } = Segment) ->
     case array:sparse_size(JEntries) of
         0 -> Segment;
         _ -> {Hdl, Segment1} = get_segment_handle(Segment),
-- 
cgit v1.2.1


From 10c47431f4023b5a193a82fe2913765e2aa237bb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 May 2010 13:53:27 +0100
Subject: Full code coverage of segment_plus_journal

---
 src/rabbit_tests.erl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4075ddf7..56dc5c4c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1524,6 +1524,27 @@ test_queue_index() ->
     Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
     Qi40 = queue_index_flush(Qi39),
     _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
+    ok = stop_msg_store(),
+    ok = empty_test_queue(),
+
+    %% d) get messages in all states to a segment, then flush, then do
+    %% the same again, don't flush and read. This will hit all
+    %% possibilities in combining the segment with the journal.
+    {0, _Terms7, Qi42} = test_queue_init(),
+    {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], false, Qi42),
+    Qi44 = queue_index_deliver([0,1,4], Qi43),
+    Qi45 = rabbit_queue_index:ack([0], Qi44),
+    Qi46 = queue_index_flush(Qi45),
+    {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
+    Qi48 = queue_index_deliver([2,3,5,6], Qi47),
+    Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
+    {[], undefined, Qi50} = rabbit_queue_index:read(0, 4, Qi49),
+    {ReadD, undefined, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
+    ok = verify_read_with_published(true, false, ReadD, [Four, Five, Six]),
+    {ReadE, undefined, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
+    ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
+    _Qi53 = rabbit_queue_index:terminate_and_erase(Qi52),
+
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
     ok = stop_msg_store(),
-- 
cgit v1.2.1


From d8bab157f8924bce92e493fe37ebfbb047de4f21 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 May 2010 15:08:35 +0100
Subject: Added test which hits all cases in journal_minus_segment which do not
 require duplicates between segment and journal

---
 src/rabbit_tests.erl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 56dc5c4c..49ebb32b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1544,6 +1544,27 @@ test_queue_index() ->
     {ReadE, undefined, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
     ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
     _Qi53 = rabbit_queue_index:terminate_and_erase(Qi52),
+    ok = stop_msg_store(),
+    ok = empty_test_queue(),
+
+    %% e) as for (d), but use terminate instead of read, which will
+    %% exercise journal_minus_segment, not segment_plus_journal.
+    {0, _Terms8, Qi54} = test_queue_init(),
+    {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], true, Qi54),
+    Qi56 = queue_index_deliver([0,1,4], Qi55),
+    Qi57 = rabbit_queue_index:ack([0], Qi56),
+    _Qi58 = rabbit_queue_index:terminate([], Qi57),
+    ok = stop_msg_store(),
+    ok = rabbit_variable_queue:start([test_queue()]),
+    {5, _Terms9, Qi59} = test_queue_init(),
+    {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
+    Qi61 = queue_index_deliver([2,3,5,6], Qi60),
+    Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
+    _Qi63 = rabbit_queue_index:terminate([], Qi62),
+    ok = stop_msg_store(),
+    ok = rabbit_variable_queue:start([test_queue()]),
+    {5, _Terms10, Qi64} = test_queue_init(),
+    _Qi65 = rabbit_queue_index:terminate_and_erase(Qi64),
 
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
-- 
cgit v1.2.1


From d8f453b409217afa8c90bdad612c194ce290fa2c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 15:33:27 +0100
Subject: do the minimum amount of work necessary on clean queue recovery Only
 read from the journal. No segment reading. No writing.

---
 src/rabbit_queue_index.erl | 127 +++++++++++++++++++++------------------------
 1 file changed, 60 insertions(+), 67 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index acd13a06..e02483ef 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -210,73 +210,19 @@
 %%----------------------------------------------------------------------------
 
 init(Name, MsgStoreRecovered, ContainsCheckFun) ->
-    State = blank_state(Name),
-    Terms = case read_shutdown_terms(State #qistate.dir) of
+    State = #qistate { dir = Dir } = blank_state(Name),
+    Terms = case read_shutdown_terms(Dir) of
                 {error, _}   -> [];
                 {ok, Terms1} -> Terms1
             end,
-    %% Load the journal completely. This will also load segments which
-    %% have entries in the journal and remove duplicates.  The counts
-    %% will correctly reflect the combination of the segment and the
-    %% journal.
-    State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State),
     CleanShutdown = detect_clean_shutdown(Dir),
-    {Segments1, Count} =
+    {Count, State1} =
         case CleanShutdown andalso MsgStoreRecovered of
-            false ->
-                %% Load each segment in turn and filter out messages
-                %% that are not in the msg_store, by adding acks to
-                %% the journal. These acks only go to the RAM journal
-                %% as it doesn't matter if we lose them. Also mark
-                %% delivered if not clean shutdown. Also find the
-                %% number of unacked messages.
-                lists:foldl(
-                  fun (Seg, {Segments2, CountAcc}) ->
-                          Segment = #segment { unacked = UnackedCount } =
-                              recover_segment(
-                                ContainsCheckFun, CleanShutdown,
-                                segment_find_or_new(Seg, Dir, Segments2)),
-                          {segment_store(Segment, Segments2),
-                           CountAcc + UnackedCount}
-                  end, {Segments, 0}, all_segment_nums(State1));
-            true ->
-                %% At this stage, we will only know about files that
-                %% were loaded during journal loading, They *will*
-                %% have correct unacked counts, but for all remaining
-                %% segments, if they're not in the Segments store then
-                %% we need to add them and populate with saved data.
-                SegmentDictTerms =
-                    dict:from_list(proplists:get_value(segments, Terms, [])),
-                {lists:foldl(
-                   fun (Seg, SegmentsN) ->
-                           case {segment_find(Seg, SegmentsN),
-                                 dict:find(Seg, SegmentDictTerms)} of
-                               {error, {ok, UnackedCount}} ->
-                                   Segment = segment_new(Seg, Dir),
-                                   segment_store(
-                                     Segment #segment {
-                                       unacked = UnackedCount },
-                                     SegmentsN);
-                               _ ->
-                                   SegmentsN
-                           end
-                   end, Segments, all_segment_nums(State1)),
-                 %% the counts above include transient messages, which
-                 %% would be the wrong thing to return
-                 undefined}
+            true  -> RecoveredCounts = proplists:get_value(segments, Terms, []),
+                     init_clean(RecoveredCounts, State);
+            false -> init_dirty(CleanShutdown, ContainsCheckFun, State)
         end,
-    %% Flush so we eagerly remove any segments that have become empty
-    %% due to
-    %% a) processing the journal,
-    %% b) ContainsCheckFun returning false in the non-clean
-    %%    recovery case, or
-    %% c) recovering a segment with PubCount==AckCount in the clean
-    %%    recovery case
-    %% Since the latter doesn't go through the journal logic we we
-    %% artificially set the dirty_count non zero.
-    State2 = flush_journal(State1 #qistate { segments = Segments1,
-                                             dirty_count = 1 }),
-    {Count, Terms, State2}.
+    {Count, Terms, State1}.
 
 terminate(Terms, State) ->
     terminate(true, Terms, State).
@@ -329,7 +275,8 @@ sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
     ok = file_handle_cache:sync(JournalHdl),
     State.
 
-flush(State) -> flush_journal(State).
+flush(State = #qistate { dirty_count = 0 }) -> State;
+flush(State)                                -> flush_journal(State).
 
 read(StartEnd, StartEnd, State) ->
     {[], undefined, State};
@@ -445,6 +392,50 @@ read_shutdown_terms(Dir) ->
 store_clean_shutdown(Terms, Dir) ->
     rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
 
+init_clean(RecoveredCounts, State) ->
+    %% Load the journal. Since this is a clean recovery this (almost)
+    %% gets us back to where we were on shutdown.
+    State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State),
+    %% The journal loading only creates records for segments touched
+    %% by the journal, and the counts are based on the journal entries
+    %% only. We need *complete* counts for *all* segments. By an
+    %% amazing coincidence we stored that information on shutdown.
+    Segments1 =
+        lists:foldl(
+          fun ({Seg, UnackedCount}, SegmentsN) ->
+                  Segment = segment_find_or_new(Seg, Dir, SegmentsN),
+                  segment_store(Segment #segment {unacked = UnackedCount },
+                                SegmentsN)
+          end, Segments, RecoveredCounts),
+    %% the counts above include transient messages, which would be the
+    %% wrong thing to return
+    {undefined, State1 # qistate { segments = Segments1 }}.
+
+init_dirty(CleanShutdown, ContainsCheckFun, State) ->
+    %% Recover the journal completely. This will also load segments
+    %% which have entries in the journal and remove duplicates.  The
+    %% counts will correctly reflect the combination of the segment
+    %% and the journal.
+    State1 = #qistate { dir = Dir, segments = Segments } =
+        recover_journal(State),
+    {Segments1, Count} =
+        %% Load each segment in turn and filter out messages that are
+        %% not in the msg_store, by adding acks to the journal. These
+        %% acks only go to the RAM journal as it doesn't matter if we
+        %% lose them. Also mark delivered if not clean shutdown. Also
+        %% find the number of unacked messages.
+        lists:foldl(
+          fun (Seg, {Segments2, CountAcc}) ->
+                  Segment = #segment { unacked = UnackedCount } =
+                      recover_segment(ContainsCheckFun, CleanShutdown,
+                                      segment_find_or_new(Seg, Dir, Segments2)),
+                  {segment_store(Segment, Segments2), CountAcc + UnackedCount}
+          end, {Segments, 0}, all_segment_nums(State1)),
+    %% Unconditionally flush since the dirty_count doesn't get updated
+    %% by the above foldl.
+    State2 = flush_journal(State1 #qistate { segments = Segments1 }),
+    {Count, State2}.
+
 terminate(_StoreShutdown, _Terms, State = #qistate { segments = undefined }) ->
     State;
 terminate(StoreShutdown, Terms, State =
@@ -527,7 +518,7 @@ queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
     end.
 
 queue_index_walker_reader(QueueName, Gatherer) ->
-    State = load_journal(blank_state(QueueName)),
+    State = recover_journal(blank_state(QueueName)),
     State1 = lists:foldl(
                fun (Seg, State2) ->
                        SeqId = reconstruct_seq_id(Seg, 0),
@@ -585,8 +576,6 @@ maybe_flush_journal(State = #qistate { dirty_count = DCount })
 maybe_flush_journal(State) ->
     State.
 
-flush_journal(State = #qistate { dirty_count = 0 }) ->
-    State;
 flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
@@ -627,7 +616,11 @@ get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
 load_journal(State) ->
     {JournalHdl, State1} = get_journal_handle(State),
     {ok, 0} = file_handle_cache:position(JournalHdl, 0),
-    State2 = #qistate { segments = Segments } = load_journal_entries(State1),
+    load_journal_entries(State1).
+
+%% ditto
+recover_journal(State) ->
+    State1 = #qistate { segments = Segments } = load_journal(State),
     Segments1 =
         segment_map(
           fun (_Seg, Segment = #segment { journal_entries = JEntries,
@@ -644,7 +637,7 @@ load_journal(State) ->
                                                  UnackedCountInSeg -
                                                  UnackedCountDuplicates) }
           end, Segments),
-    State2 #qistate { segments = Segments1 }.
+    State1 #qistate { segments = Segments1 }.
 
 load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
     case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
-- 
cgit v1.2.1


From b40ad362e3e477d6f8b3ed67ff99bf0258ec51a0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 May 2010 16:21:16 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e02483ef..7aaf2857 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -404,7 +404,7 @@ init_clean(RecoveredCounts, State) ->
         lists:foldl(
           fun ({Seg, UnackedCount}, SegmentsN) ->
                   Segment = segment_find_or_new(Seg, Dir, SegmentsN),
-                  segment_store(Segment #segment {unacked = UnackedCount },
+                  segment_store(Segment #segment { unacked = UnackedCount },
                                 SegmentsN)
           end, Segments, RecoveredCounts),
     %% the counts above include transient messages, which would be the
-- 
cgit v1.2.1


From 85df084f15fef4ed84cf0c36da53b0f8aefd9da4 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@lshift.net>
Date: Wed, 19 May 2010 16:22:14 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 7aaf2857..92127da1 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -413,7 +413,7 @@ init_clean(RecoveredCounts, State) ->
 
 init_dirty(CleanShutdown, ContainsCheckFun, State) ->
     %% Recover the journal completely. This will also load segments
-    %% which have entries in the journal and remove duplicates.  The
+    %% which have entries in the journal and remove duplicates. The
     %% counts will correctly reflect the combination of the segment
     %% and the journal.
     State1 = #qistate { dir = Dir, segments = Segments } =
-- 
cgit v1.2.1


From 33bef63c5c5552927d6a0bf335f417da146a5dd8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 19 May 2010 17:58:25 +0100
Subject: more efficient 'bounds' function This no longer touches the file
 system. It's still O(n * log n) in the number of segments though.

---
 src/rabbit_queue_index.erl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index e02483ef..93dce89e 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -314,17 +314,19 @@ next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
     reconstruct_seq_id(Seg + 1, 0).
 
-bounds(State) ->
-    SegNums = all_segment_nums(State),
+bounds(State = #qistate { segments = Segments }) ->
+    %% This is not particularly efficient, but only gets invoked on
+    %% queue initialisation and termination.
+    SegNums = lists:sort(segment_fetch_keys(Segments)),
     %% Don't bother trying to figure out the lowest seq_id, merely the
     %% seq_id of the start of the lowest segment. That seq_id may not
     %% actually exist, but that's fine. The important thing is that
     %% the segment exists and the seq_id reported is on a segment
     %% boundary.
-
+    %%
     %% We also don't really care about the max seq_id. Just start the
     %% next segment: it makes life much easier.
-
+    %%
     %% SegNums is sorted, ascending.
     {LowSeqId, NextSeqId} =
         case SegNums of
-- 
cgit v1.2.1


From 78ed468ff08c63b7a9df93e633291a9ac611ed72 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 20 May 2010 23:42:19 +0100
Subject: rename queue_index:terminate_and_erase to delete_and_terminate to
 match the corresponding function in variable_queue

---
 src/rabbit_queue_index.erl    |  6 +++---
 src/rabbit_tests.erl          | 14 +++++++-------
 src/rabbit_variable_queue.erl |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 92127da1..f1b4346a 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_queue_index).
 
--export([init/3, terminate/2, terminate_and_erase/1, publish/4,
+-export([init/3, terminate/2, delete_and_terminate/1, publish/4,
          deliver/2, ack/2, sync/2, flush/1, read/3,
          next_segment_boundary/1, bounds/1, recover/1]).
 
@@ -188,7 +188,7 @@
 -spec(init/3 :: (queue_name(), boolean(), fun ((guid()) -> boolean())) ->
              {'undefined' | non_neg_integer(), [any()], qistate()}).
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
--spec(terminate_and_erase/1 :: (qistate()) -> qistate()).
+-spec(delete_and_terminate/1 :: (qistate()) -> qistate()).
 -spec(publish/4 :: (guid(), seq_id(), boolean(), qistate()) -> qistate()).
 -spec(deliver/2 :: (seq_id(), qistate()) -> qistate()).
 -spec(ack/2 :: ([seq_id()], qistate()) -> qistate()).
@@ -227,7 +227,7 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
 terminate(Terms, State) ->
     terminate(true, Terms, State).
 
-terminate_and_erase(State) ->
+delete_and_terminate(State) ->
     State1 = terminate(false, [], State),
     ok = rabbit_misc:recursive_delete([State1 #qistate.dir]),
     State1.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 49ebb32b..646ac03f 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1395,7 +1395,7 @@ test_queue() ->
 empty_test_queue() ->
     ok = rabbit_variable_queue:start([]),
     {0, _Terms, Qi1} = test_queue_init(),
-    _Qi2 = rabbit_queue_index:terminate_and_erase(Qi1),
+    _Qi2 = rabbit_queue_index:delete_and_terminate(Qi1),
     ok.
 
 queue_index_publish(SeqIds, Persistent, Qi) ->
@@ -1487,7 +1487,7 @@ test_queue_index() ->
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
     {0, _Terms3, Qi20} = test_queue_init(),
-    _Qi21 = rabbit_queue_index:terminate_and_erase(Qi20),
+    _Qi21 = rabbit_queue_index:delete_and_terminate(Qi20),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
 
@@ -1501,7 +1501,7 @@ test_queue_index() ->
     Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
     Qi26 = queue_index_flush(Qi25),
     {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
-    _Qi28 = rabbit_queue_index:terminate_and_erase(Qi27),
+    _Qi28 = rabbit_queue_index:delete_and_terminate(Qi27),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
 
@@ -1512,7 +1512,7 @@ test_queue_index() ->
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
     Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
     Qi34 = queue_index_flush(Qi33),
-    _Qi35 = rabbit_queue_index:terminate_and_erase(Qi34),
+    _Qi35 = rabbit_queue_index:delete_and_terminate(Qi34),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
 
@@ -1523,7 +1523,7 @@ test_queue_index() ->
     Qi38 = queue_index_deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
     Qi40 = queue_index_flush(Qi39),
-    _Qi41 = rabbit_queue_index:terminate_and_erase(Qi40),
+    _Qi41 = rabbit_queue_index:delete_and_terminate(Qi40),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
 
@@ -1543,7 +1543,7 @@ test_queue_index() ->
     ok = verify_read_with_published(true, false, ReadD, [Four, Five, Six]),
     {ReadE, undefined, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
     ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
-    _Qi53 = rabbit_queue_index:terminate_and_erase(Qi52),
+    _Qi53 = rabbit_queue_index:delete_and_terminate(Qi52),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
 
@@ -1564,7 +1564,7 @@ test_queue_index() ->
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     {5, _Terms10, Qi64} = test_queue_init(),
-    _Qi65 = rabbit_queue_index:terminate_and_erase(Qi64),
+    _Qi65 = rabbit_queue_index:delete_and_terminate(Qi64),
 
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cc876b5e..3d485106 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -398,7 +398,7 @@ delete_and_terminate(State) ->
             {DeltaSeqId, NextSeqId, IndexState3} ->
                 delete1(PersistentStore, TransientThreshold, NextSeqId, DeltaSeqId, IndexState3)
         end,
-    IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
+    IndexState5 = rabbit_queue_index:delete_and_terminate(IndexState2),
     rabbit_msg_store:delete_client(PersistentStore, PRef),
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateP),
-- 
cgit v1.2.1


From 1549142606fe1595383616d06882fa632f42a824 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Fri, 21 May 2010 17:18:44 +0100
Subject: Stop persistent msgs in non-durable queues from reaching disk

---
 src/rabbit_variable_queue.erl | 45 ++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cc876b5e..b10703b4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -192,7 +192,8 @@
           persistent_store,
           persistent_count,
           transient_threshold,
-          pending_ack
+          pending_ack,
+          durable
          }).
 
 -record(msg_status,
@@ -268,7 +269,8 @@
              persistent_store     :: pid() | atom(),
              persistent_count     :: non_neg_integer(),
              transient_threshold  :: non_neg_integer(),
-             pending_ack          :: dict()
+             pending_ack          :: dict(),
+             durable              :: boolean()
             }).
 
 -include("rabbit_backing_queue_spec.hrl").
@@ -360,7 +362,8 @@ init(QueueName, IsDurable, _Recover) ->
       persistent_store     = PersistentStore,
       persistent_count     = DeltaCount1,
       transient_threshold  = NextSeqId,
-      pending_ack          = dict:new()
+      pending_ack          = dict:new(),
+      durable              = IsDurable
      },
     maybe_deltas_to_betas(State).
 
@@ -433,9 +436,11 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                                      msg_store_clients = MSCState,
                                      persistent_store = PersistentStore,
                                      persistent_count = PCount,
-                                     pending_ack = PA }) ->
+                                     pending_ack = PA,
+                                     durable = IsDurable }) ->
     MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent,
+      msg = Msg, guid = Guid, seq_id = SeqId,
+      is_persistent = IsDurable andalso IsPersistent,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
     {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
                                                       MsgStatus, MSCState),
@@ -575,7 +580,8 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
 tx_publish(Txn,
            Msg = #basic_message { is_persistent = true, guid = Guid },
            State = #vqstate { msg_store_clients = MSCState,
-                              persistent_store = PersistentStore }) ->
+                              persistent_store = PersistentStore,
+                              durable = true }) ->
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
@@ -591,13 +597,19 @@ tx_ack(Txn, AckTags, State) ->
     ack_in_tx(Txn, AckTags),
     State.
 
-tx_rollback(Txn, State = #vqstate { persistent_store = PersistentStore }) ->
+tx_rollback(Txn, State = #vqstate { persistent_store = PersistentStore,
+                                    durable = IsDurable }) ->
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
     erase_tx(Txn),
-    ok = rabbit_msg_store:remove(PersistentStore, persistent_guids(Pubs)),
+    ok = case IsDurable of
+             true  -> rabbit_msg_store:remove(PersistentStore,
+                                              persistent_guids(Pubs));
+             false -> ok
+         end,
     {lists:flatten(AckTags), State}.
 
-tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore }) ->
+tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore,
+                                       durable = IsDurable }) ->
     %% If we are a non-durable queue, or we have no persistent pubs,
     %% we can skip the msg_store loop.
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
@@ -605,7 +617,7 @@ tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore }) ->
     PubsOrdered = lists:reverse(Pubs),
     AckTags1 = lists:flatten(AckTags),
     PersistentGuids = persistent_guids(PubsOrdered),
-    IsTransientPubs = [] == PersistentGuids,
+    IsTransientPubs = (not IsDurable) orelse [] == PersistentGuids,
     {AckTags1,
      case IsTransientPubs orelse
          ?TRANSIENT_MSG_STORE == PersistentStore of
@@ -965,18 +977,18 @@ tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun, State =
 tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
     State;
 tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
-                                   persistent_store = PersistentStore }) ->
+                                   durable = IsDurable }) ->
     Acks = lists:flatten(SAcks),
     State1 = ack(Acks, State),
-    IsPersistentStore = ?PERSISTENT_MSG_STORE == PersistentStore,
     Pubs = lists:flatten(lists:reverse(SPubs)),
     {SeqIds, State2 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
                {SeqIdsAcc, StateN}) ->
                   {SeqId, StateN1} =
-                      publish(Msg, false, IsPersistent, StateN),
-                  {case IsPersistentStore andalso IsPersistent of
+                      publish(Msg, false, IsDurable andalso IsPersistent,
+                              StateN),
+                  {case IsDurable andalso IsPersistent of
                        true  -> [SeqId | SeqIdsAcc];
                        false -> SeqIdsAcc
                    end, StateN1}
@@ -1172,9 +1184,10 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
 publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
         IsDelivered, MsgOnDisk, State =
         #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount,
-                   persistent_count = PCount }) ->
+                   persistent_count = PCount, durable = IsDurable }) ->
     MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent,
+      msg = Msg, guid = Guid, seq_id = SeqId,
+      is_persistent = IsDurable andalso IsPersistent,
       is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
       index_on_disk = false },
     PCount1 = PCount + case IsPersistent of
-- 
cgit v1.2.1


From a38fefbdd3f4c950351f65bc9b5e593b34cc4d81 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Fri, 21 May 2010 18:01:41 +0100
Subject: Rip out now-unnecessary logic for setting the persistent store to the
 transient store: that solution allowed the persistent flag to be honoured in
 non-durable queues by sending the message to the transient store; we've now
 decided that we don't want to send non-persistent msgs to any store and are
 handling that by (effectively) unsetting the is_persistent flag for all msgs
 arriving in a non-durable queue

---
 src/rabbit_variable_queue.erl | 198 ++++++++++++++++++------------------------
 1 file changed, 84 insertions(+), 114 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b10703b4..9424fab5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -189,7 +189,6 @@
           len,
           on_sync,
           msg_store_clients,
-          persistent_store,
           persistent_count,
           transient_threshold,
           pending_ack,
@@ -266,7 +265,6 @@
                                       [fun (() -> any())]},
              msg_store_clients    :: 'undefined' | {{any(), binary()},
                                                     {any(), binary()}},
-             persistent_store     :: pid() | atom(),
              persistent_count     :: non_neg_integer(),
              transient_threshold  :: non_neg_integer(),
              pending_ack          :: dict(),
@@ -304,12 +302,8 @@ start(DurableQueues) ->
                                  Refs, StartFunState]).
 
 init(QueueName, IsDurable, _Recover) ->
-    PersistentStore = case IsDurable of
-                          true  -> ?PERSISTENT_MSG_STORE;
-                          false -> ?TRANSIENT_MSG_STORE
-                      end,
     MsgStoreRecovered =
-        rabbit_msg_store:successfully_recovered_state(PersistentStore),
+        rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE),
     ContainsCheckFun =
         fun (Guid) ->
                 rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
@@ -333,7 +327,11 @@ init(QueueName, IsDurable, _Recover) ->
                                   end_seq_id = NextSeqId }
             end,
     Now = now(),
-    PersistentClient = rabbit_msg_store:client_init(PersistentStore, PRef),
+    PersistentClient =
+        case IsDurable of
+            true  -> rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, PRef);
+            false -> undefined
+        end,
     TransientClient  = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef),
     State = #vqstate {
       q1                   = queue:new(),
@@ -359,7 +357,6 @@ init(QueueName, IsDurable, _Recover) ->
       on_sync              = {[], [], []},
       msg_store_clients    = {{PersistentClient, PRef},
                               {TransientClient, TRef}},
-      persistent_store     = PersistentStore,
       persistent_count     = DeltaCount1,
       transient_threshold  = NextSeqId,
       pending_ack          = dict:new(),
@@ -388,7 +385,6 @@ delete_and_terminate(State) ->
     State2 = #vqstate {
       index_state = IndexState,
       msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
-      persistent_store = PersistentStore,
       transient_threshold = TransientThreshold } =
         remove_pending_ack(false, State1),
     %% flushing here is good because it deletes all full segments,
@@ -399,21 +395,22 @@ delete_and_terminate(State) ->
             {N, N, IndexState3} ->
                 IndexState3;
             {DeltaSeqId, NextSeqId, IndexState3} ->
-                delete1(PersistentStore, TransientThreshold, NextSeqId, DeltaSeqId, IndexState3)
+                delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState3)
         end,
     IndexState5 = rabbit_queue_index:terminate_and_erase(IndexState2),
-    rabbit_msg_store:delete_client(PersistentStore, PRef),
+    case MSCStateP of
+        undefined -> ok;
+        _         -> rabbit_msg_store:delete_client(?PERSISTENT_MSG_STORE, PRef),
+                     rabbit_msg_store:client_terminate(MSCStateP)
+    end,
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
-    rabbit_msg_store:client_terminate(MSCStateP),
     rabbit_msg_store:client_terminate(MSCStateT),
     State2 #vqstate { index_state = IndexState5,
                       msg_store_clients = undefined }.
 
-purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len,
-                         persistent_store = PersistentStore }) ->
+purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} =
-        remove_queue_entries(PersistentStore, fun rabbit_misc:queue_fold/3,
-                             Q4, IndexState),
+        remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, IndexState),
     {Len, State1} =
         purge1(Q4Count, State #vqstate { index_state = IndexState1,
                                          q4 = queue:new() }),
@@ -434,18 +431,17 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                                      out_counter = OutCount,
                                      in_counter = InCount,
                                      msg_store_clients = MSCState,
-                                     persistent_store = PersistentStore,
                                      persistent_count = PCount,
                                      pending_ack = PA,
                                      durable = IsDurable }) ->
+    IsPersistent1 = IsDurable andalso IsPersistent,
     MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = SeqId,
-      is_persistent = IsDurable andalso IsPersistent,
+      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent1,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
-    {MsgStatus1, MSCState1} = maybe_write_msg_to_disk(PersistentStore, false,
-                                                      MsgStatus, MSCState),
+    {MsgStatus1, MSCState1} =
+        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
     State1 = State #vqstate { msg_store_clients = MSCState1,
-                              persistent_count = PCount + case IsPersistent of
+                              persistent_count = PCount + case IsPersistent1 of
                                                               true  -> 1;
                                                               false -> 0
                                                           end,
@@ -467,7 +463,7 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
 fetch(AckRequired, State =
       #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
                  index_state = IndexState, len = Len, persistent_count = PCount,
-                 persistent_store = PersistentStore, pending_ack = PA }) ->
+                 pending_ack = PA }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
             case fetch_from_q3_or_delta(State) of
@@ -493,7 +489,7 @@ fetch(AckRequired, State =
                           end,
 
             %% 2. If it's on disk and there's no Ack required, remove it
-            MsgStore = find_msg_store(IsPersistent, PersistentStore),
+            MsgStore = find_msg_store(IsPersistent),
             IndexState2 =
                 case MsgOnDisk andalso not AckRequired of
                     %% Remove from disk now
@@ -547,7 +543,6 @@ ack([], State) ->
     State;
 ack(AckTags, State = #vqstate { index_state = IndexState,
                                 persistent_count = PCount,
-                                persistent_store = PersistentStore,
                                 pending_ack = PA }) ->
     {GuidsByStore, SeqIds, PA1} =
         lists:foldl(
@@ -558,19 +553,20 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
                                          msg_on_disk = false,
                                          is_persistent = false }} ->
                           {Dict, SeqIds, PAN1};
-                      {ok, {false, Guid}} ->
-                          {rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, Guid,
-                                                 Dict), SeqIds, PAN1};
-                      {ok, {true, Guid}} ->
-                          {rabbit_misc:dict_cons(PersistentStore, Guid, Dict),
-                           [SeqId | SeqIds], PAN1}
+                      {ok, {IsPersistent, Guid}} ->
+                          SeqIds1 = case IsPersistent of
+                                        true  -> [SeqId | SeqIds];
+                                        false -> SeqIds
+                                    end,
+                          {rabbit_misc:dict_cons(find_msg_store(IsPersistent),
+                                                 Guid, Dict), SeqIds1, PAN1}
                   end
           end, {dict:new(), [], PA}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
-    PCount1 = PCount - case dict:find(PersistentStore, GuidsByStore) of
+    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
                            error        -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
@@ -580,13 +576,12 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
 tx_publish(Txn,
            Msg = #basic_message { is_persistent = true, guid = Guid },
            State = #vqstate { msg_store_clients = MSCState,
-                              persistent_store = PersistentStore,
                               durable = true }) ->
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
     {#msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
     publish_in_tx(Txn, Msg),
     State #vqstate { msg_store_clients = MSCState1 };
 tx_publish(Txn, Msg, State) ->
@@ -597,19 +592,17 @@ tx_ack(Txn, AckTags, State) ->
     ack_in_tx(Txn, AckTags),
     State.
 
-tx_rollback(Txn, State = #vqstate { persistent_store = PersistentStore,
-                                    durable = IsDurable }) ->
+tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
     erase_tx(Txn),
     ok = case IsDurable of
-             true  -> rabbit_msg_store:remove(PersistentStore,
+             true  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE,
                                               persistent_guids(Pubs));
              false -> ok
          end,
     {lists:flatten(AckTags), State}.
 
-tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore,
-                                       durable = IsDurable }) ->
+tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
     %% If we are a non-durable queue, or we have no persistent pubs,
     %% we can skip the msg_store loop.
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
@@ -617,10 +610,9 @@ tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore,
     PubsOrdered = lists:reverse(Pubs),
     AckTags1 = lists:flatten(AckTags),
     PersistentGuids = persistent_guids(PubsOrdered),
-    IsTransientPubs = (not IsDurable) orelse [] == PersistentGuids,
+    IsTransientPubs = [] == PersistentGuids,
     {AckTags1,
-     case IsTransientPubs orelse
-         ?TRANSIENT_MSG_STORE == PersistentStore of
+     case (not IsDurable) orelse IsTransientPubs of
          true ->
              tx_commit_post_msg_store(
                IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
@@ -632,7 +624,7 @@ tx_commit(Txn, Fun, State = #vqstate { persistent_store = PersistentStore,
              State
      end}.
 
-requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
+requeue(AckTags, State) ->
     {SeqIds, GuidsByStore,
      State1 = #vqstate { index_state = IndexState,
                          persistent_count = PCount }} =
@@ -653,27 +645,24 @@ requeue(AckTags, State = #vqstate { persistent_store = PersistentStore }) ->
                       {ok, {IsPersistent, Guid}} ->
                           {{ok, Msg = #basic_message{}}, MSCStateN1} =
                               read_from_msg_store(
-                                PersistentStore, MSCStateN, IsPersistent, Guid),
+                                MSCStateN, IsPersistent, Guid),
                           StateN2 = StateN1 #vqstate {
                                       msg_store_clients = MSCStateN1 },
                           {_SeqId, StateN3} = publish(Msg, true, true, StateN2),
-                          {SeqIdsAcc1, MsgStore} =
-                              case IsPersistent of
-                                  true ->
-                                      {[SeqId | SeqIdsAcc], PersistentStore};
-                                  false ->
-                                      {SeqIdsAcc, ?TRANSIENT_MSG_STORE}
-                              end,
+                          MsgStore = find_msg_store(IsPersistent),
+                          SeqIdsAcc1 = case IsPersistent of
+                                           true  -> [SeqId | SeqIdsAcc];
+                                           false -> SeqIdsAcc
+                                       end,
                           {SeqIdsAcc1,
-                           rabbit_misc:dict_cons(MsgStore, Guid, Dict),
-                           StateN3}
+                           rabbit_misc:dict_cons(MsgStore, Guid, Dict), StateN3}
                   end
           end, {[], dict:new(), State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:release(MsgStore, Guids)
                    end, ok, GuidsByStore),
-    PCount1 = PCount - case dict:find(PersistentStore, GuidsByStore) of
+    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
                            error        -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
@@ -771,7 +760,6 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 
 remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
-                                      persistent_store = PersistentStore,
                                       index_state = IndexState }) ->
     {SeqIds, GuidsByStore, PA1} =
         dict:fold(
@@ -783,7 +771,7 @@ remove_pending_ack(KeepPersistent,
                   case IsPersistent of
                       true  -> {[SeqId | SeqIdsAcc],
                                 rabbit_misc:dict_cons(
-                                  PersistentStore, Guid, Dict), PAN1};
+                                  ?PERSISTENT_MSG_STORE, Guid, Dict), PAN1};
                       false -> {SeqIdsAcc,
                                 rabbit_misc:dict_cons(
                                   ?TRANSIENT_MSG_STORE, Guid, Dict), PAN1}
@@ -952,11 +940,10 @@ msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
 
 tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun, State =
                          #vqstate { on_sync = OnSync = {SAcks, SPubs, SFuns},
-                                    persistent_store = PersistentStore,
-                                    pending_ack = PA }) ->
+                                    pending_ack = PA, durable = IsDurable }) ->
     %% If we are a non-durable queue, or (no persisent pubs, and no
     %% persistent acks) then we can skip the queue_index loop.
-    case PersistentStore == ?TRANSIENT_MSG_STORE orelse
+    case (not IsDurable) orelse
         (IsTransientPubs andalso
          lists:foldl(
            fun (AckTag,  true ) ->
@@ -997,12 +984,10 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
     [ Fun() || Fun <- lists:reverse(SFuns) ],
     State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
-delete1(_PersistentStore, _TransientThreshold, NextSeqId, DeltaSeqId,
-        IndexState) when DeltaSeqId =:= undefined
-                         orelse DeltaSeqId >= NextSeqId ->
+delete1(_TransientThreshold, NextSeqId, DeltaSeqId, IndexState)
+  when DeltaSeqId =:= undefined orelse DeltaSeqId >= NextSeqId ->
     IndexState;
-delete1(PersistentStore, TransientThreshold, NextSeqId, DeltaSeqId,
-        IndexState) ->
+delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
     {List, Again, IndexState1} =
         rabbit_queue_index:read(DeltaSeqId, NextSeqId, IndexState),
     IndexState2 =
@@ -1012,38 +997,33 @@ delete1(PersistentStore, TransientThreshold, NextSeqId, DeltaSeqId,
                                        List, TransientThreshold, IndexState1),
                   {_Count, IndexState4} =
                       remove_queue_entries(
-                        PersistentStore, fun beta_fold_no_index_on_disk/3,
-                        Q, IndexState3),
+                        fun beta_fold_no_index_on_disk/3, Q, IndexState3),
                   IndexState4
         end,
-    delete1(PersistentStore, TransientThreshold, NextSeqId, Again,
-            IndexState2).
+    delete1(TransientThreshold, NextSeqId, Again, IndexState2).
 
-purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState,
-                                 persistent_store = PersistentStore }) ->
+purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
     case bpqueue:is_empty(Q3) of
         true ->
             {Q1Count, IndexState1} =
                 remove_queue_entries(
-                  PersistentStore, fun rabbit_misc:queue_fold/3,
-                  State #vqstate.q1, IndexState),
+                  fun rabbit_misc:queue_fold/3, State #vqstate.q1, IndexState),
             {Count + Q1Count, State #vqstate { q1 = queue:new(),
                                                index_state = IndexState1 }};
         false ->
             {Q3Count, IndexState1} =
                 remove_queue_entries(
-                  PersistentStore, fun beta_fold_no_index_on_disk/3,
-                  Q3, IndexState),
+                  fun beta_fold_no_index_on_disk/3, Q3, IndexState),
             purge1(Count + Q3Count,
                    maybe_deltas_to_betas(
                      State #vqstate { index_state = IndexState1,
                                       q3 = bpqueue:new() }))
     end.
 
-remove_queue_entries(PersistentStore, Fold, Q, IndexState) ->
-    {_PersistentStore, Count, GuidsByStore, SeqIds, IndexState1} =
+remove_queue_entries(Fold, Q, IndexState) ->
+    {Count, GuidsByStore, SeqIds, IndexState1} =
         Fold(fun remove_queue_entries1/2,
-             {PersistentStore, 0, dict:new(), [], IndexState}, Q),
+             {0, dict:new(), [], IndexState}, Q),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
@@ -1058,11 +1038,11 @@ remove_queue_entries1(
   #msg_status { guid = Guid, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {PersistentStore, CountN, GuidsByStore, SeqIdsAcc, IndexStateN}) ->
+  {CountN, GuidsByStore, SeqIdsAcc, IndexStateN}) ->
     GuidsByStore1 =
         case {MsgOnDisk, IsPersistent} of
             {true,  true}  ->
-                rabbit_misc:dict_cons(PersistentStore, Guid, GuidsByStore);
+                rabbit_misc:dict_cons(?PERSISTENT_MSG_STORE, Guid, GuidsByStore);
             {true,  false} ->
                 rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, Guid, GuidsByStore);
             {false, _}     ->
@@ -1076,15 +1056,14 @@ remove_queue_entries1(
                        true  -> rabbit_queue_index:deliver(SeqId, IndexStateN);
                        false -> IndexStateN
                    end,
-    {PersistentStore, CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
+    {CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
                          q1 = Q1, q2 = Q2,
                          delta = #delta { count = DeltaCount },
                          q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
                          ram_index_count = RamIndexCount,
-                         msg_store_clients = MSCState,
-                         persistent_store = PersistentStore }) ->
+                         msg_store_clients = MSCState }) ->
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
             0 = DeltaCount, %% ASSERTION
@@ -1096,8 +1075,7 @@ fetch_from_q3_or_delta(State = #vqstate {
                                 is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message { is_persistent = IsPersistent,
                                          guid = Guid }}, MSCState1} =
-                read_from_msg_store(
-                  PersistentStore, MSCState, IsPersistent, Guid),
+                read_from_msg_store(MSCState, IsPersistent, Guid),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
             RamIndexCount1 = case IndexOnDisk of
                                  true  -> RamIndexCount;
@@ -1201,10 +1179,9 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
 
 publish(msg, MsgStatus, #vqstate {
                index_state = IndexState, ram_msg_count = RamMsgCount,
-               msg_store_clients = MSCState,
-               persistent_store = PersistentStore } = State) ->
+               msg_store_clients = MSCState } = State) ->
     {MsgStatus1, MSCState1} =
-        maybe_write_msg_to_disk(PersistentStore, false, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(false, MsgStatus1, IndexState),
     State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
@@ -1213,11 +1190,10 @@ publish(msg, MsgStatus, #vqstate {
     store_alpha_entry(MsgStatus2, State1);
 
 publish(index, MsgStatus, #vqstate {
-                 index_state = IndexState, q1 = Q1,
                  ram_index_count = RamIndexCount, msg_store_clients = MSCState,
-                 persistent_store = PersistentStore } = State) ->
+                 index_state = IndexState, q1 = Q1 } = State) ->
     {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(PersistentStore, true, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
     ForceIndex = should_force_index_to_disk(State),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
@@ -1233,10 +1209,9 @@ publish(index, MsgStatus, #vqstate {
 
 publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
         #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                   delta = Delta, msg_store_clients = MSCState,
-                   persistent_store = PersistentStore }) ->
+                   delta = Delta, msg_store_clients = MSCState }) ->
     {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(PersistentStore, true, MsgStatus, MSCState),
+        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
     {#msg_status { index_on_disk = true }, IndexState1} =
         maybe_write_index_to_disk(true, MsgStatus1, IndexState),
     true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
@@ -1271,42 +1246,39 @@ store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
             State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
     end.
 
-find_msg_store(true, PersistentStore)   -> PersistentStore;
-find_msg_store(false, _PersistentStore) -> ?TRANSIENT_MSG_STORE.
+find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
+find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
 
-with_msg_store_state(PersistentStore, {{MSCStateP, PRef}, MSCStateT}, true,
-                     Fun) ->
-    {Result, MSCStateP1} = Fun(PersistentStore, MSCStateP),
+with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) ->
+    {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP),
     {Result, {{MSCStateP1, PRef}, MSCStateT}};
-with_msg_store_state(_PersistentStore, {MSCStateP, {MSCStateT, TRef}}, false,
-                     Fun) ->
+with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) ->
     {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
     {Result, {MSCStateP, {MSCStateT1, TRef}}}.
 
-read_from_msg_store(PersistentStore, MSCState, IsPersistent, Guid) ->
+read_from_msg_store(MSCState, IsPersistent, Guid) ->
     with_msg_store_state(
-      PersistentStore, MSCState, IsPersistent,
+      MSCState, IsPersistent,
       fun (MsgStore, MSCState1) ->
               rabbit_msg_store:read(MsgStore, Guid, MSCState1)
       end).
 
-maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus =
+maybe_write_msg_to_disk(_Force, MsgStatus =
                         #msg_status { msg_on_disk = true }, MSCState) ->
     {MsgStatus, MSCState};
-maybe_write_msg_to_disk(PersistentStore, Force,
-                        MsgStatus = #msg_status {
-                          msg = Msg, guid = Guid,
-                          is_persistent = IsPersistent }, MSCState)
+maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
+                                 msg = Msg, guid = Guid,
+                                 is_persistent = IsPersistent }, MSCState)
   when Force orelse IsPersistent ->
     {ok, MSCState1} =
         with_msg_store_state(
-          PersistentStore, MSCState, IsPersistent,
+          MSCState, IsPersistent,
           fun (MsgStore, MSCState2) ->
                   rabbit_msg_store:write(
                     MsgStore, Guid, ensure_binary_properties(Msg), MSCState2)
           end),
     {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
-maybe_write_msg_to_disk(_PersistentStore, _Force, MsgStatus, MSCState) ->
+maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
     {MsgStatus, MSCState}.
 
 maybe_write_index_to_disk(_Force, MsgStatus =
@@ -1465,14 +1437,12 @@ maybe_push_alphas_to_betas(
 maybe_push_alphas_to_betas(
   Generator, Consumer, Q, State =
   #vqstate { ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount,
-             index_state = IndexState, msg_store_clients = MSCState,
-             persistent_store = PersistentStore }) ->
+             index_state = IndexState, msg_store_clients = MSCState }) ->
     case Generator(Q) of
         {empty, _Q} -> State;
         {{value, MsgStatus}, Qa} ->
             {MsgStatus1, MSCState1} =
-                maybe_write_msg_to_disk(
-                  PersistentStore, true, MsgStatus, MSCState),
+                maybe_write_msg_to_disk(true, MsgStatus, MSCState),
             ForceIndex = should_force_index_to_disk(State),
             {MsgStatus2, IndexState1} =
                 maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
-- 
cgit v1.2.1


From 7922f7d7aeaf68d5b809e7f93b6f37243354570e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 21 May 2010 23:12:31 +0100
Subject: better factoring of queue_index termination - make the whole thing
 less parametric - get rid of idempotence - we don't need it

---
 src/rabbit_queue_index.erl | 43 ++++++++++++++++++++-----------------------
 src/rabbit_tests.erl       |  3 +--
 2 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index f1b4346a..b63161b3 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -225,11 +225,13 @@ init(Name, MsgStoreRecovered, ContainsCheckFun) ->
     {Count, Terms, State1}.
 
 terminate(Terms, State) ->
-    terminate(true, Terms, State).
+    {SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+    store_clean_shutdown([{segments, SegmentCounts} | Terms], Dir),
+    State1.
 
 delete_and_terminate(State) ->
-    State1 = terminate(false, [], State),
-    ok = rabbit_misc:recursive_delete([State1 #qistate.dir]),
+    {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+    ok = rabbit_misc:recursive_delete([Dir]),
     State1.
 
 publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
@@ -436,29 +438,24 @@ init_dirty(CleanShutdown, ContainsCheckFun, State) ->
     State2 = flush_journal(State1 #qistate { segments = Segments1 }),
     {Count, State2}.
 
-terminate(_StoreShutdown, _Terms, State = #qistate { segments = undefined }) ->
-    State;
-terminate(StoreShutdown, Terms, State =
-          #qistate { journal_handle = JournalHdl,
-                     dir = Dir, segments = Segments }) ->
+terminate(State = #qistate { journal_handle = JournalHdl,
+                             segments = Segments }) ->
     ok = case JournalHdl of
              undefined -> ok;
              _         -> file_handle_cache:close(JournalHdl)
          end,
-    SegTerms = segment_fold(
-                 fun (Seg, #segment { handle = Hdl,
-                                      unacked = UnackedCount }, SegTermsAcc) ->
-                         ok = case Hdl of
-                                  undefined -> ok;
-                                  _         -> file_handle_cache:close(Hdl)
-                              end,
-                         [{Seg, UnackedCount} | SegTermsAcc]
-                 end, [], Segments),
-    case StoreShutdown of
-        true  -> store_clean_shutdown([{segments, SegTerms} | Terms], Dir);
-        false -> ok
-    end,
-    State #qistate { journal_handle = undefined, segments = undefined }.
+    SegmentCounts =
+        segment_fold(
+          fun (Seg, #segment { handle = Hdl, unacked = UnackedCount },
+               SegmentCountsAcc) ->
+                  ok = case Hdl of
+                           undefined -> ok;
+                           _         -> file_handle_cache:close(Hdl)
+                       end,
+                  [{Seg, UnackedCount} | SegmentCountsAcc]
+          end, [], Segments),
+    {SegmentCounts, State #qistate { journal_handle = undefined,
+                                     segments = undefined }}.
 
 recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
     {SegEntries, UnackedCount, Segment1} = load_segment(false, Segment),
@@ -528,7 +525,7 @@ queue_index_walker_reader(QueueName, Gatherer) ->
                            {Guid, _SeqId, true, _IsDelivered} <- Messages],
                        State3
                end, State, all_segment_nums(State)),
-    _State = terminate(false, [], State1),
+    {_SegmentCounts, _State} = terminate(State1),
     ok = gatherer:finish(Gatherer).
 
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 646ac03f..e1ee5d08 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1455,8 +1455,7 @@ test_queue_index() ->
     {ReadA, undefined, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
-    %% call terminate twice to prove it's idempotent
-    _Qi5 = rabbit_queue_index:terminate([], rabbit_queue_index:terminate([], Qi4)),
+    _Qi5 = rabbit_queue_index:terminate([], Qi4),
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
-- 
cgit v1.2.1


From 96db0c6163a448ea86da31446bad86df88ba9e65 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 25 May 2010 18:12:57 +0100
Subject: Whoops

---
 src/rabbit_variable_queue.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 88c2719a..30f16fd2 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -369,7 +369,10 @@ terminate(State) ->
       persistent_count = PCount, index_state = IndexState,
       msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} } =
         remove_pending_ack(true, tx_commit_index(State)),
-    rabbit_msg_store:client_terminate(MSCStateP),
+    case MSCStateP of
+        undefined -> ok;
+        _         -> rabbit_msg_store:client_terminate(MSCStateP)
+    end,
     rabbit_msg_store:client_terminate(MSCStateT),
     Terms = [{persistent_ref, PRef},
              {transient_ref, TRef},
-- 
cgit v1.2.1


From dcf15d7fdf7bb6d542df206147e1f9548cc311cc Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 27 May 2010 12:47:15 +0100
Subject: close segment handles straight after use This keeps the number of
 open fds low (just two - one for the journal and segment) whereas previously
 it could be as high as the total number of segments during a) the qi walk in
 msg_store recovery, b) qi dirty recovery, qi journal flushing. There are two
 nice side effects of this change: - #segment records no longer need to
 contain a file handle - load_segment can use a read-only handle

---
 src/rabbit_queue_index.erl | 110 ++++++++++++++++++---------------------------
 1 file changed, 43 insertions(+), 67 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 02d0d8ad..858ade26 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -155,11 +155,13 @@
 
 -define(PUB, {_Guid, _IsPersistent}).
 
+-define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]).
+
 %%----------------------------------------------------------------------------
 
 -record(qistate, { dir, segments, journal_handle, dirty_count }).
 
--record(segment, { unacked, handle, journal_entries, path, num }).
+-record(segment, { unacked, journal_entries, path, num }).
 
 -include("rabbit.hrl").
 
@@ -170,7 +172,6 @@
 -type(hdl() :: ('undefined' | any())).
 -type(segment() :: ('undefined' |
                     #segment { unacked         :: non_neg_integer(),
-                               handle          :: hdl(),
                                journal_entries :: array(),
                                path            :: file_path(),
                                num             :: non_neg_integer()
@@ -296,9 +297,9 @@ read(Start, End, State = #qistate { segments = Segments,
                     true  -> EndRelSeq;
                     false -> ?SEGMENT_ENTRY_COUNT
                 end,
-    Segment = segment_find_or_new(StartSeg, Dir, Segments),
-    {SegEntries, _UnackedCount, Segment1} = load_segment(false, Segment),
-    #segment { journal_entries = JEntries } = Segment1,
+    Segment = #segment { journal_entries = JEntries } =
+        segment_find_or_new(StartSeg, Dir, Segments),
+    {SegEntries, _UnackedCount} = load_segment(false, Segment),
     {SegEntries1, _UnackedCountDelta} =
         segment_plus_journal(SegEntries, JEntries),
     {array:sparse_foldr(
@@ -310,7 +311,7 @@ read(Start, End, State = #qistate { segments = Segments,
                Acc
        end, [], SegEntries1),
      Again,
-     State #qistate { segments = segment_store(Segment1, Segments) }}.
+     State #qistate { segments = segment_store(Segment, Segments) }}.
 
 next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
@@ -448,28 +449,23 @@ terminate(State = #qistate { journal_handle = JournalHdl,
          end,
     SegmentCounts =
         segment_fold(
-          fun (Seg, #segment { handle = Hdl, unacked = UnackedCount },
-               SegmentCountsAcc) ->
-                  ok = case Hdl of
-                           undefined -> ok;
-                           _         -> file_handle_cache:close(Hdl)
-                       end,
+          fun (Seg, #segment { unacked = UnackedCount }, SegmentCountsAcc) ->
                   [{Seg, UnackedCount} | SegmentCountsAcc]
           end, [], Segments),
     {SegmentCounts, State #qistate { journal_handle = undefined,
                                      segments = undefined }}.
 
-recover_segment(ContainsCheckFun, CleanShutdown, Segment) ->
-    {SegEntries, UnackedCount, Segment1} = load_segment(false, Segment),
-    #segment { journal_entries = JEntries } = Segment1,
+recover_segment(ContainsCheckFun, CleanShutdown,
+                Segment = #segment { journal_entries = JEntries }) ->
+    {SegEntries, UnackedCount} = load_segment(false, Segment),
     {SegEntries1, UnackedCountDelta} =
         segment_plus_journal(SegEntries, JEntries),
     array:sparse_foldl(
-      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment2) ->
+      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment1) ->
               recover_message(ContainsCheckFun(Guid), CleanShutdown,
-                              Del, RelSeq, Segment2)
+                              Del, RelSeq, Segment1)
       end,
-      Segment1 #segment { unacked = UnackedCount + UnackedCountDelta },
+      Segment #segment { unacked = UnackedCount + UnackedCountDelta },
       SegEntries1).
 
 recover_message( true,  true,   _Del, _RelSeq, Segment) ->
@@ -578,8 +574,11 @@ maybe_flush_journal(State) ->
 flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
-          fun (_Seg, #segment { unacked = 0 } = Segment, SegmentsN) ->
-                  ok = delete_segment(Segment),
+          fun (_Seg, #segment { unacked = 0, path = Path }, SegmentsN) ->
+                  case filelib:is_file(Path) of
+                      true  -> ok = file:delete(Path);
+                      false -> ok
+                  end,
                   SegmentsN;
               (_Seg, #segment {} = Segment, SegmentsN) ->
                   segment_store(append_journal_to_segment(Segment), SegmentsN)
@@ -589,21 +588,21 @@ flush_journal(State = #qistate { segments = Segments }) ->
     ok = file_handle_cache:clear(JournalHdl),
     State1 #qistate { dirty_count = 0 }.
 
-append_journal_to_segment(#segment { journal_entries = JEntries } = Segment) ->
+append_journal_to_segment(#segment { journal_entries = JEntries,
+                                     path = Path } = Segment) ->
     case array:sparse_size(JEntries) of
         0 -> Segment;
-        _ -> {Hdl, Segment1} = get_segment_handle(Segment),
+        _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
+                                                [{write_buffer, infinity}]),
              array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
-             ok = file_handle_cache:sync(Hdl),
-             Segment1 #segment { journal_entries = array_new() }
+             file_handle_cache:close(Hdl),
+             Segment #segment { journal_entries = array_new() }
     end.
 
 get_journal_handle(State = #qistate { journal_handle = undefined,
                                       dir = Dir }) ->
     Path = filename:join(Dir, ?JOURNAL_FILENAME),
-    {ok, Hdl} = file_handle_cache:open(Path,
-                                       [binary, raw, read, write,
-                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
+    {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
                                        [{write_buffer, infinity}]),
     {Hdl, State #qistate { journal_handle = Hdl }};
 get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
@@ -627,14 +626,13 @@ recover_journal(State) ->
                   %% We want to keep ack'd entries in so that we can
                   %% remove them if duplicates are in the journal. The
                   %% counts here are purely from the segment itself.
-                  {SegEntries, UnackedCountInSeg, Segment1} =
-                      load_segment(true, Segment),
+                  {SegEntries, UnackedCountInSeg} = load_segment(true, Segment),
                   {JEntries1, UnackedCountDuplicates} =
                       journal_minus_segment(JEntries, SegEntries),
-                  Segment1 #segment { journal_entries = JEntries1,
-                                      unacked = (UnackedCountInJournal +
-                                                 UnackedCountInSeg -
-                                                 UnackedCountDuplicates) }
+                  Segment #segment { journal_entries = JEntries1,
+                                     unacked = (UnackedCountInJournal +
+                                                UnackedCountInSeg -
+                                                UnackedCountDuplicates) }
           end, Segments),
     State1 #qistate { segments = Segments1 }.
 
@@ -693,31 +691,13 @@ all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
           end, sets:from_list(segment_fetch_keys(Segments)),
           filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
 
-delete_segment(#segment { handle = undefined }) ->
-    ok;
-delete_segment(#segment { handle = Hdl }) ->
-    ok = file_handle_cache:delete(Hdl).
-
-get_segment_handle(Segment = #segment { handle = undefined, path = Path }) ->
-    {ok, Hdl} = file_handle_cache:open(Path,
-                                       [binary, raw, read, write,
-                                        {read_ahead, ?SEGMENT_TOTAL_SIZE}],
-                                       [{write_buffer, infinity}]),
-    {Hdl, Segment #segment { handle = Hdl }};
-get_segment_handle(Segment = #segment { handle = Hdl }) ->
-    {Hdl, Segment}.
-
-segment_new(Seg, Dir) ->
-    #segment { unacked         = 0,
-               handle          = undefined,
-               journal_entries = array_new(),
-               path            = seg_num_to_path(Dir, Seg),
-               num             = Seg }.
-
 segment_find_or_new(Seg, Dir, Segments) ->
     case segment_find(Seg, Segments) of
-        error -> segment_new(Seg, Dir);
-        {ok, Segment} -> Segment
+        {ok, Segment} -> Segment;
+        error         -> #segment { unacked         = 0,
+                                    journal_entries = array_new(),
+                                    path            = seg_num_to_path(Dir, Seg),
+                                    num             = Seg }
     end.
 
 segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
@@ -793,18 +773,14 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
 %% Does not do any combining with the journal at all. The PubCount
 %% that comes back is the number of publishes in the segment. The
 %% number of unacked msgs is PubCount - AckCount.
-load_segment(KeepAcked, Segment = #segment { path = Path, handle = SegHdl }) ->
-    SegmentExists = case SegHdl of
-                        undefined -> filelib:is_file(Path);
-                        _         -> true
-                    end,
-    case SegmentExists of
-        false -> {array_new(), 0, Segment};
-        true  -> {Hdl, Segment1} = get_segment_handle(Segment),
+load_segment(KeepAcked, #segment { path = Path }) ->
+    case filelib:is_file(Path) of
+        false -> {array_new(), 0};
+        true  -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
-                 {SegEntries, UnackedCount} =
-                     load_segment_entries(KeepAcked, Hdl, array_new(), 0),
-                 {SegEntries, UnackedCount, Segment1}
+                 Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0),
+                 file_handle_cache:close(Hdl),
+                 Res
     end.
 
 load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) ->
-- 
cgit v1.2.1


From dc940d5be8028344b3ea6a3ea8f687a0a772e446 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 27 May 2010 13:36:57 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 858ade26..d960bdb1 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -161,7 +161,7 @@
 
 -record(qistate, { dir, segments, journal_handle, dirty_count }).
 
--record(segment, { unacked, journal_entries, path, num }).
+-record(segment, { num, path, journal_entries, unacked }).
 
 -include("rabbit.hrl").
 
@@ -171,10 +171,10 @@
 
 -type(hdl() :: ('undefined' | any())).
 -type(segment() :: ('undefined' |
-                    #segment { unacked         :: non_neg_integer(),
-                               journal_entries :: array(),
+                    #segment { num             :: non_neg_integer(),
                                path            :: file_path(),
-                               num             :: non_neg_integer()
+                               journal_entries :: array(),
+                               unacked         :: non_neg_integer()
                               })).
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
@@ -694,10 +694,10 @@ all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
 segment_find_or_new(Seg, Dir, Segments) ->
     case segment_find(Seg, Segments) of
         {ok, Segment} -> Segment;
-        error         -> #segment { unacked         = 0,
-                                    journal_entries = array_new(),
+        error         -> #segment { num             = Seg,
                                     path            = seg_num_to_path(Dir, Seg),
-                                    num             = Seg }
+                                    journal_entries = array_new(),
+                                    unacked         = 0 }
     end.
 
 segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
@@ -736,7 +736,7 @@ segment_map(Fun, {Segments, CachedSegments}) ->
                CachedSegments)}.
 
 segment_fetch_keys({Segments, CachedSegments}) ->
-    lists:map(fun (Segment) -> Segment#segment.num end, CachedSegments) ++
+    lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++
         dict:fetch_keys(Segments).
 
 segments_new() ->
-- 
cgit v1.2.1


From 1656d1df71cd3e2e7a145cc40a859b6c9547e5d7 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 27 May 2010 14:35:41 +0100
Subject: Don't ignore return codes

---
 src/rabbit_queue_index.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d960bdb1..487430fc 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -595,7 +595,7 @@ append_journal_to_segment(#segment { journal_entries = JEntries,
         _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
                                                 [{write_buffer, infinity}]),
              array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
-             file_handle_cache:close(Hdl),
+             ok = file_handle_cache:close(Hdl),
              Segment #segment { journal_entries = array_new() }
     end.
 
@@ -779,7 +779,7 @@ load_segment(KeepAcked, #segment { path = Path }) ->
         true  -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []),
                  {ok, 0} = file_handle_cache:position(Hdl, bof),
                  Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0),
-                 file_handle_cache:close(Hdl),
+                 ok = file_handle_cache:close(Hdl),
                  Res
     end.
 
-- 
cgit v1.2.1


From 07b0a64fa9ff03f47a560630f0221588797d15c8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 May 2010 13:54:58 +0100
Subject: introduce helper fun to fold over segment entries and use that in
 'read' and 'queue_index_walker_reader'. That makes the code in the latter
 less of a jumping-through-hoops exercise.

---
 src/rabbit_queue_index.erl | 53 +++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 487430fc..c34523d1 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -297,21 +297,17 @@ read(Start, End, State = #qistate { segments = Segments,
                     true  -> EndRelSeq;
                     false -> ?SEGMENT_ENTRY_COUNT
                 end,
-    Segment = #segment { journal_entries = JEntries } =
-        segment_find_or_new(StartSeg, Dir, Segments),
-    {SegEntries, _UnackedCount} = load_segment(false, Segment),
-    {SegEntries1, _UnackedCountDelta} =
-        segment_plus_journal(SegEntries, JEntries),
-    {array:sparse_foldr(
-       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
-           when StartRelSeq =< RelSeq andalso RelSeq < MaxRelSeq ->
-               [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
-                  IsPersistent, IsDelivered == del} | Acc ];
-           (_RelSeq, _Value, Acc) ->
-               Acc
-       end, [], SegEntries1),
-     Again,
-     State #qistate { segments = segment_store(Segment, Segments) }}.
+    Segment = segment_find_or_new(StartSeg, Dir, Segments),
+    Messages = segment_entries_foldr(
+                 fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
+                     when StartRelSeq =< RelSeq andalso RelSeq < MaxRelSeq ->
+                         [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
+                            IsPersistent, IsDelivered == del} | Acc ];
+                     (_RelSeq, _Value, Acc) ->
+                         Acc
+                 end, [], Segment),
+    Segments1 = segment_store(Segment, Segments),
+    {Messages, Again, State #qistate { segments = Segments1 }}.
 
 next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
@@ -513,17 +509,16 @@ queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
     end.
 
 queue_index_walker_reader(QueueName, Gatherer) ->
-    State = recover_journal(blank_state(QueueName)),
-    State1 = lists:foldl(
-               fun (Seg, State2) ->
-                       SeqId = reconstruct_seq_id(Seg, 0),
-                       {Messages, undefined, State3} =
-                           read(SeqId, next_segment_boundary(SeqId), State2),
-                       [ok = gatherer:in(Gatherer, {Guid, 1}) ||
-                           {Guid, _SeqId, true, _IsDelivered} <- Messages],
-                       State3
-               end, State, all_segment_nums(State)),
-    {_SegmentCounts, _State} = terminate(State1),
+    State = #qistate { segments = Segments, dir = Dir } =
+        recover_journal(blank_state(QueueName)),
+    [ok = segment_entries_foldr(
+            fun (_RelSeq, {{Guid, true}, _IsDelivered, no_ack}, ok) ->
+                    gatherer:in(Gatherer, {Guid, 1});
+                (_RelSeq, _Value, Acc) ->
+                    Acc
+            end, ok, segment_find_or_new(Seg, Dir, Segments)) ||
+        Seg <- all_segment_nums(State)],
+    {_SegmentCounts, _State} = terminate(State),
     ok = gatherer:finish(Gatherer).
 
 %%----------------------------------------------------------------------------
@@ -768,6 +763,12 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
          end,
     Hdl.
 
+segment_entries_foldr(Fun, Init,
+                      Segment = #segment { journal_entries = JEntries }) ->
+    {SegEntries, _UnackedCount} = load_segment(false, Segment),
+    {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries),
+    array:sparse_foldr(Fun, Init, SegEntries1).
+
 %% Loading segments
 %%
 %% Does not do any combining with the journal at all. The PubCount
-- 
cgit v1.2.1


From 007578b19e38d3abcf7555983428f659e6f7571e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 May 2010 16:25:12 +0100
Subject: inlining

---
 src/rabbit_queue_index.erl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c34523d1..1cb7fa92 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -670,10 +670,6 @@ seq_id_to_seg_and_rel_seq_id(SeqId) ->
 reconstruct_seq_id(Seg, RelSeq) ->
     (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
 
-seg_num_to_path(Dir, Seg) ->
-    SegName = integer_to_list(Seg),
-    filename:join(Dir, SegName ++ ?SEGMENT_EXTENSION).
-
 all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
     lists:sort(
       sets:to_list(
@@ -689,8 +685,10 @@ all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
 segment_find_or_new(Seg, Dir, Segments) ->
     case segment_find(Seg, Segments) of
         {ok, Segment} -> Segment;
-        error         -> #segment { num             = Seg,
-                                    path            = seg_num_to_path(Dir, Seg),
+        error         -> SegName = integer_to_list(Seg)  ++ ?SEGMENT_EXTENSION,
+                         Path = filename:join(Dir, SegName),
+                         #segment { num             = Seg,
+                                    path            = Path,
                                     journal_entries = array_new(),
                                     unacked         = 0 }
     end.
-- 
cgit v1.2.1


From e7ff65ab5ff0f81dcba35e6ef3b080a879a4a34d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Fri, 28 May 2010 16:25:25 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 1cb7fa92..d08531a9 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -677,7 +677,7 @@ all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
           fun (SegName, Set) ->
                   sets:add_element(
                     list_to_integer(
-                      lists:takewhile(fun(C) -> $0 =< C andalso C =< $9 end,
+                      lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end,
                                       SegName)), Set)
           end, sets:from_list(segment_fetch_keys(Segments)),
           filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
-- 
cgit v1.2.1


From 7fa67cbb460bfe641b045d2326a5ca41b6fa158d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 2 Jun 2010 21:17:35 +0100
Subject: remove erroneous comment

---
 src/rabbit_queue_index.erl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d08531a9..653d65c8 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -769,9 +769,7 @@ segment_entries_foldr(Fun, Init,
 
 %% Loading segments
 %%
-%% Does not do any combining with the journal at all. The PubCount
-%% that comes back is the number of publishes in the segment. The
-%% number of unacked msgs is PubCount - AckCount.
+%% Does not do any combining with the journal at all.
 load_segment(KeepAcked, #segment { path = Path }) ->
     case filelib:is_file(Path) of
         false -> {array_new(), 0};
-- 
cgit v1.2.1


From 0137f84d2a82ba7d5f16be82f9aa0bd5fa75a3c0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 2 Jun 2010 22:42:33 +0100
Subject: tweak: more LRU-like cache behaviour

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 653d65c8..994c1241 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -705,7 +705,7 @@ segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
     {Segments, [Segment | Tail]};
 segment_store(Segment = #segment { num = Seg }, %% 2, matches tail
               {Segments, [SegmentA, #segment { num = Seg }]}) ->
-    {Segments, [SegmentA, Segment]};
+    {Segments, [Segment, SegmentA]};
 segment_store(Segment = #segment { num = Seg }, {Segments, []}) ->
     {dict:erase(Seg, Segments), [Segment]};
 segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) ->
-- 
cgit v1.2.1


From 332336bc266da95f294ce00a8221f07b870c9326 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 2 Jun 2010 22:44:28 +0100
Subject: refactor: remove superfluous clause ...which was an optimisation too
 far, particular since a similar clause is not present in the related
 functions

---
 src/rabbit_queue_index.erl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 994c1241..07a2c70f 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -715,8 +715,6 @@ segment_store(Segment = #segment { num = Seg },
     {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)),
      [Segment, SegmentA]}.
 
-segment_fold(Fun, Acc, {Segments, []}) ->
-    dict:fold(Fun, Acc, Segments);
 segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
     Acc1 = lists:foldl(fun (Segment = #segment { num = Num }, AccN) ->
                                Fun(Num, Segment, AccN)
-- 
cgit v1.2.1


From 7b75c19f19e721b854cb655faabcdebc08b44c71 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 2 Jun 2010 22:45:48 +0100
Subject: refactor: rename segment_fetch_keys to segments_nums to abstract away
 from the dict-ness of the segment store

---
 src/rabbit_queue_index.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 07a2c70f..59330b20 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -316,7 +316,7 @@ next_segment_boundary(SeqId) ->
 bounds(State = #qistate { segments = Segments }) ->
     %% This is not particularly efficient, but only gets invoked on
     %% queue initialisation and termination.
-    SegNums = lists:sort(segment_fetch_keys(Segments)),
+    SegNums = lists:sort(segment_nums(Segments)),
     %% Don't bother trying to figure out the lowest seq_id, merely the
     %% seq_id of the start of the lowest segment. That seq_id may not
     %% actually exist, but that's fine. The important thing is that
@@ -679,7 +679,7 @@ all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
                     list_to_integer(
                       lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end,
                                       SegName)), Set)
-          end, sets:from_list(segment_fetch_keys(Segments)),
+          end, sets:from_list(segment_nums(Segments)),
           filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
 
 segment_find_or_new(Seg, Dir, Segments) ->
@@ -726,7 +726,7 @@ segment_map(Fun, {Segments, CachedSegments}) ->
      lists:map(fun (Segment = #segment { num = Num }) -> Fun(Num, Segment) end,
                CachedSegments)}.
 
-segment_fetch_keys({Segments, CachedSegments}) ->
+segment_nums({Segments, CachedSegments}) ->
     lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++
         dict:fetch_keys(Segments).
 
-- 
cgit v1.2.1


From d4828de8ca6a265af8921b9978a6193695f6943b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Wed, 2 Jun 2010 22:48:23 +0100
Subject: drop segment num from fun call in segment_{fold,map} to abstract away
 from the dict-ness of the segment store.

The segment number isn't actually needed in most cases and can be
easily extracted from the #segment.
---
 src/rabbit_queue_index.erl | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 59330b20..62d2d588 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -445,8 +445,8 @@ terminate(State = #qistate { journal_handle = JournalHdl,
          end,
     SegmentCounts =
         segment_fold(
-          fun (Seg, #segment { unacked = UnackedCount }, SegmentCountsAcc) ->
-                  [{Seg, UnackedCount} | SegmentCountsAcc]
+          fun (#segment { num = Seg, unacked = UnackedCount }, Acc) ->
+                  [{Seg, UnackedCount} | Acc]
           end, [], Segments),
     {SegmentCounts, State #qistate { journal_handle = undefined,
                                      segments = undefined }}.
@@ -569,13 +569,13 @@ maybe_flush_journal(State) ->
 flush_journal(State = #qistate { segments = Segments }) ->
     Segments1 =
         segment_fold(
-          fun (_Seg, #segment { unacked = 0, path = Path }, SegmentsN) ->
+          fun (#segment { unacked = 0, path = Path }, SegmentsN) ->
                   case filelib:is_file(Path) of
                       true  -> ok = file:delete(Path);
                       false -> ok
                   end,
                   SegmentsN;
-              (_Seg, #segment {} = Segment, SegmentsN) ->
+              (#segment {} = Segment, SegmentsN) ->
                   segment_store(append_journal_to_segment(Segment), SegmentsN)
           end, segments_new(), Segments),
     {JournalHdl, State1} =
@@ -616,8 +616,8 @@ recover_journal(State) ->
     State1 = #qistate { segments = Segments } = load_journal(State),
     Segments1 =
         segment_map(
-          fun (_Seg, Segment = #segment { journal_entries = JEntries,
-                                          unacked = UnackedCountInJournal }) ->
+          fun (Segment = #segment { journal_entries = JEntries,
+                                    unacked = UnackedCountInJournal }) ->
                   %% We want to keep ack'd entries in so that we can
                   %% remove them if duplicates are in the journal. The
                   %% counts here are purely from the segment itself.
@@ -716,15 +716,12 @@ segment_store(Segment = #segment { num = Seg },
      [Segment, SegmentA]}.
 
 segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
-    Acc1 = lists:foldl(fun (Segment = #segment { num = Num }, AccN) ->
-                               Fun(Num, Segment, AccN)
-                       end, Acc, CachedSegments),
-    dict:fold(Fun, Acc1, Segments).
+    dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc) end,
+              lists:foldl(Fun, Acc, CachedSegments), Segments).
 
 segment_map(Fun, {Segments, CachedSegments}) ->
-    {dict:map(Fun, Segments),
-     lists:map(fun (Segment = #segment { num = Num }) -> Fun(Num, Segment) end,
-               CachedSegments)}.
+    {dict:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments),
+     lists:map(Fun, CachedSegments)}.
 
 segment_nums({Segments, CachedSegments}) ->
     lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++
-- 
cgit v1.2.1


From c15e318edd5e90403a235bb8a424f128fdb11df5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Jun 2010 07:52:05 +0100
Subject: refactor: simplify code with some helper funs

---
 src/rabbit_variable_queue.erl | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 30f16fd2..5b1419ef 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -443,11 +443,9 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
     {MsgStatus1, MSCState1} =
         maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+    PCount1 = maybe_inc(PCount, IsPersistent1),
     State1 = State #vqstate { msg_store_clients = MSCState1,
-                              persistent_count = PCount + case IsPersistent1 of
-                                                              true  -> 1;
-                                                              false -> 0
-                                                          end,
+                              persistent_count = PCount1,
                               next_seq_id = SeqId + 1,
                               out_counter = OutCount + 1,
                               in_counter = InCount + 1 },
@@ -530,10 +528,7 @@ fetch(AckRequired, State =
                       false -> PA
                   end,
 
-            PCount1 = case IsPersistent andalso not AckRequired of
-                          true  -> PCount - 1;
-                          false -> PCount
-                      end,
+            PCount1 = maybe_dec(PCount, IsPersistent andalso not AckRequired),
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
@@ -761,6 +756,12 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
+maybe_inc(N, true ) -> N + 1;
+maybe_inc(N, false) -> N.
+
+maybe_dec(N, true ) -> N - 1;
+maybe_dec(N, false) -> N.
+
 remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       index_state = IndexState }) ->
@@ -1080,10 +1081,7 @@ fetch_from_q3_or_delta(State = #vqstate {
                                          guid = Guid }}, MSCState1} =
                 read_from_msg_store(MSCState, IsPersistent, Guid),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
-            RamIndexCount1 = case IndexOnDisk of
-                                 true  -> RamIndexCount;
-                                 false -> RamIndexCount - 1
-                             end,
+            RamIndexCount1 = maybe_dec(RamIndexCount, not IndexOnDisk),
             true = RamIndexCount1 >= 0, %% ASSERTION
             State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
                                       ram_msg_count = RamMsgCount + 1,
@@ -1171,10 +1169,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
       is_persistent = IsDurable andalso IsPersistent,
       is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
       index_on_disk = false },
-    PCount1 = PCount + case IsPersistent of
-                           true  -> 1;
-                           false -> 0
-                       end,
+    PCount1 = maybe_inc(PCount, IsPersistent),
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id = SeqId + 1, len = Len + 1,
                                      in_counter = InCount + 1,
@@ -1200,10 +1195,8 @@ publish(index, MsgStatus, #vqstate {
     ForceIndex = should_force_index_to_disk(State),
     {MsgStatus2, IndexState1} =
         maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
-    RamIndexCount1 = case MsgStatus2 #msg_status.index_on_disk of
-                         true  -> RamIndexCount;
-                         false -> RamIndexCount + 1
-                     end,
+    IndexOnDisk = MsgStatus2 #msg_status.index_on_disk,
+    RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
     State1 = State #vqstate { index_state = IndexState1,
                               ram_index_count = RamIndexCount1,
                               msg_store_clients = MSCState1 },
@@ -1449,10 +1442,8 @@ maybe_push_alphas_to_betas(
             ForceIndex = should_force_index_to_disk(State),
             {MsgStatus2, IndexState1} =
                 maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
-            RamIndexCount1 = case MsgStatus2 #msg_status.index_on_disk of
-                                 true  -> RamIndexCount;
-                                 false -> RamIndexCount + 1
-                             end,
+            IndexOnDisk = MsgStatus2 #msg_status.index_on_disk,
+            RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
             State1 = State #vqstate { ram_msg_count = RamMsgCount - 1,
                                       ram_index_count = RamIndexCount1,
                                       index_state = IndexState1,
-- 
cgit v1.2.1


From 0264a1d0278d6d60d398394d1d557e3f909b0ab0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Jun 2010 08:09:07 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 137 +++++++++++++++++++-----------------------
 1 file changed, 63 insertions(+), 74 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 5b1419ef..3e66e000 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -451,14 +451,13 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                               in_counter = InCount + 1 },
     {SeqId,
      case MsgStatus1 #msg_status.msg_on_disk of
-         true ->
-             {#msg_status { index_on_disk = true }, IndexState1} =
-                 maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-             State1 #vqstate { index_state = IndexState1,
-                               pending_ack = dict:store(SeqId, {true, Guid},
-                                                        PA) };
-         false ->
-             State1 #vqstate { pending_ack = dict:store(SeqId, MsgStatus1, PA) }
+         true  -> {#msg_status { index_on_disk = true }, IndexState1} =
+                      maybe_write_index_to_disk(false, MsgStatus1, IndexState),
+                  PA1 = dict:store(SeqId, {true, Guid}, PA),
+                  State1 #vqstate { index_state = IndexState1,
+                                    pending_ack = PA1 };
+         false -> PA1 = dict:store(SeqId, MsgStatus1, PA),
+                  State1 #vqstate { pending_ack = PA1 }
      end}.
 
 fetch(AckRequired, State =
@@ -517,14 +516,13 @@ fetch(AckRequired, State =
 
             %% 4. If an ack is required, add something sensible to PA
             PA1 = case AckRequired of
-                      true  ->
-                          Entry =
-                              case MsgOnDisk of
-                                  true  -> {IsPersistent, Guid};
-                                  false -> MsgStatus #msg_status {
-                                             is_delivered = true }
-                              end,
-                          dict:store(SeqId, Entry, PA);
+                      true  -> Entry =
+                                   case MsgOnDisk of
+                                       true  -> {IsPersistent, Guid};
+                                       false -> MsgStatus #msg_status {
+                                                  is_delivered = true }
+                                   end,
+                               dict:store(SeqId, Entry, PA);
                       false -> PA
                   end,
 
@@ -611,15 +609,13 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
     IsTransientPubs = [] == PersistentGuids,
     {AckTags1,
      case (not IsDurable) orelse IsTransientPubs of
-         true ->
-             tx_commit_post_msg_store(
-               IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
-         false ->
-             ok = rabbit_msg_store:sync(
-                    ?PERSISTENT_MSG_STORE, PersistentGuids,
-                    msg_store_callback(PersistentGuids, IsTransientPubs,
-                                       PubsOrdered, AckTags1, Fun)),
-             State
+         true  -> tx_commit_post_msg_store(
+                    IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
+         false -> ok = rabbit_msg_store:sync(
+                         ?PERSISTENT_MSG_STORE, PersistentGuids,
+                         msg_store_callback(PersistentGuids, IsTransientPubs,
+                                            PubsOrdered, AckTags1, Fun)),
+                  State
      end}.
 
 requeue(AckTags, State) ->
@@ -681,9 +677,9 @@ set_ram_duration_target(
     Rate = AvgEgressRate + AvgIngressRate,
     TargetRamMsgCount1 =
         case DurationTarget of
-            infinity -> undefined;
+            infinity  -> undefined;
             undefined -> undefined;
-            _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
+            _         -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
         end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target = DurationTarget },
@@ -895,9 +891,7 @@ combine_deltas(#delta { start_seq_id = StartLow,  count = CountLow,
     #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }.
 
 beta_fold_no_index_on_disk(Fun, Init, Q) ->
-    bpqueue:foldr(fun (_Prefix, Value, Acc) ->
-                          Fun(Value, Acc)
-                  end, Init, Q).
+    bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
     undefined;
@@ -905,13 +899,11 @@ permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3,
                                      delta = #delta { count = DeltaCount } }) ->
     AlphaBetaLen = Len - DeltaCount,
     case AlphaBetaLen == 0 of
-        true ->
-            undefined;
-        false ->
-            BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
-            %% the fraction of the alphas+betas that are betas
-            BetaFrac =  BetaLen / AlphaBetaLen,
-            BetaLen - trunc(BetaFrac * BetaLen)
+        true  -> undefined;
+        false -> BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
+                 %% the fraction of the alphas+betas that are betas
+                 BetaFrac =  BetaLen / AlphaBetaLen,
+                 BetaLen - trunc(BetaFrac * BetaLen)
     end.
 
 
@@ -1008,20 +1000,19 @@ delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
 
 purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
     case bpqueue:is_empty(Q3) of
-        true ->
-            {Q1Count, IndexState1} =
-                remove_queue_entries(
-                  fun rabbit_misc:queue_fold/3, State #vqstate.q1, IndexState),
-            {Count + Q1Count, State #vqstate { q1 = queue:new(),
-                                               index_state = IndexState1 }};
-        false ->
-            {Q3Count, IndexState1} =
-                remove_queue_entries(
-                  fun beta_fold_no_index_on_disk/3, Q3, IndexState),
-            purge1(Count + Q3Count,
-                   maybe_deltas_to_betas(
-                     State #vqstate { index_state = IndexState1,
-                                      q3 = bpqueue:new() }))
+        true  -> {Q1Count, IndexState1} =
+                     remove_queue_entries(fun rabbit_misc:queue_fold/3,
+                                          State #vqstate.q1, IndexState),
+                 {Count + Q1Count,
+                  State #vqstate { q1 = queue:new(),
+                                   index_state = IndexState1 }};
+        false -> {Q3Count, IndexState1} =
+                     remove_queue_entries(fun beta_fold_no_index_on_disk/3,
+                                          Q3, IndexState),
+                 purge1(Count + Q3Count,
+                        maybe_deltas_to_betas(
+                          State #vqstate { index_state = IndexState1,
+                                           q3 = bpqueue:new() }))
     end.
 
 remove_queue_entries(Fold, Q, IndexState) ->
@@ -1031,12 +1022,10 @@ remove_queue_entries(Fold, Q, IndexState) ->
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
-    IndexState2 =
-        case SeqIds of
-            [] -> IndexState1;
-            _  -> rabbit_queue_index:ack(SeqIds, IndexState1)
-        end,
-    {Count, IndexState2}.
+    {Count, case SeqIds of
+                [] -> IndexState1;
+                _  -> rabbit_queue_index:ack(SeqIds, IndexState1)
+            end}.
 
 remove_queue_entries1(
   #msg_status { guid = Guid, seq_id = SeqId,
@@ -1045,12 +1034,11 @@ remove_queue_entries1(
   {CountN, GuidsByStore, SeqIdsAcc, IndexStateN}) ->
     GuidsByStore1 =
         case {MsgOnDisk, IsPersistent} of
-            {true,  true}  ->
-                rabbit_misc:dict_cons(?PERSISTENT_MSG_STORE, Guid, GuidsByStore);
-            {true,  false} ->
-                rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE, Guid, GuidsByStore);
-            {false, _}     ->
-                GuidsByStore
+            {true,  true}  -> rabbit_misc:dict_cons(?PERSISTENT_MSG_STORE,
+                                                    Guid, GuidsByStore);
+            {true,  false} -> rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE,
+                                                    Guid, GuidsByStore);
+            {false, _}     -> GuidsByStore
         end,
     SeqIdsAcc1 = case IndexOnDisk of
                      true  -> [SeqId | SeqIdsAcc];
@@ -1236,10 +1224,10 @@ store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
                                     q3 = Q3 }) ->
     MsgStatus1 = MsgStatus #msg_status { msg = undefined },
     case DeltaCount == 0 of
-        true ->
-            State #vqstate { q3 = bpqueue:in(IndexOnDisk, MsgStatus1, Q3) };
-        false ->
-            State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
+        true  -> State #vqstate { q3 = bpqueue:in(IndexOnDisk, MsgStatus1,
+                                                  Q3) };
+        false -> State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1,
+                                                  Q2) }
     end.
 
 find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
@@ -1309,12 +1297,12 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
             Reduction = lists:min([RamIndexCount - Permitted,
                                    ?RAM_INDEX_BATCH_SIZE]),
             case Reduction < ?RAM_INDEX_BATCH_SIZE of
-                true ->
-                    State;
-                false ->
-                    {Reduction1, State1} = limit_q2_ram_index(Reduction, State),
-                    {_Red2, State2} = limit_q3_ram_index(Reduction1, State1),
-                    State2
+                true  -> State;
+                false -> {Reduction1, State1} =
+                             limit_q2_ram_index(Reduction, State),
+                         {_Red2, State2} =
+                             limit_q3_ram_index(Reduction1, State1),
+                         State2
             end;
         _ ->
             State
@@ -1531,7 +1519,8 @@ push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
         {{value, IndexOnDisk, MsgStatus}, Qa} ->
             {RamIndexCount1, IndexState1} =
                 case IndexOnDisk of
-                    true -> {RamIndexCount, IndexState};
+                    true ->
+                        {RamIndexCount, IndexState};
                     false ->
                         {#msg_status { index_on_disk = true }, IndexState2} =
                             maybe_write_index_to_disk(true, MsgStatus,
-- 
cgit v1.2.1


From 4e3eb6983e3ee4d1fd5ab61e072ef6aa0666a3b5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Jun 2010 14:07:28 +0100
Subject: refactor: introduce helper fun

---
 src/rabbit_variable_queue.erl | 87 +++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 3e66e000..81f346f9 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1163,47 +1163,31 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
                                      in_counter = InCount + 1,
                                      persistent_count = PCount1 })}.
 
-publish(msg, MsgStatus, #vqstate {
-               index_state = IndexState, ram_msg_count = RamMsgCount,
-               msg_store_clients = MSCState } = State) ->
-    {MsgStatus1, MSCState1} =
-        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
-    {MsgStatus2, IndexState1} =
-        maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-    State1 = State #vqstate { ram_msg_count = RamMsgCount + 1,
-                              index_state = IndexState1,
-                              msg_store_clients = MSCState1 },
-    store_alpha_entry(MsgStatus2, State1);
-
-publish(index, MsgStatus, #vqstate {
-                 ram_index_count = RamIndexCount, msg_store_clients = MSCState,
-                 index_state = IndexState, q1 = Q1 } = State) ->
-    {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
+publish(msg, MsgStatus, State) ->
+    {MsgStatus1, State1 = #vqstate { ram_msg_count = RamMsgCount }} =
+        maybe_write_to_disk(false, false, MsgStatus, State),
+    State2 = State1 # vqstate {ram_msg_count = RamMsgCount + 1 },
+    store_alpha_entry(MsgStatus1, State2);
+
+publish(index, MsgStatus, State) ->
     ForceIndex = should_force_index_to_disk(State),
-    {MsgStatus2, IndexState1} =
-        maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
-    IndexOnDisk = MsgStatus2 #msg_status.index_on_disk,
+    {MsgStatus1 = #msg_status { msg_on_disk = true,
+                                index_on_disk = IndexOnDisk },
+     State1 = #vqstate { ram_index_count = RamIndexCount, q1 = Q1 }} =
+        maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
     RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
-    State1 = State #vqstate { index_state = IndexState1,
-                              ram_index_count = RamIndexCount1,
-                              msg_store_clients = MSCState1 },
+    State2 = State1 #vqstate { ram_index_count = RamIndexCount1 },
     true = queue:is_empty(Q1), %% ASSERTION
-    store_beta_entry(MsgStatus2, State1);
-
-publish(neither, MsgStatus = #msg_status { seq_id = SeqId }, State =
-        #vqstate { index_state = IndexState, q1 = Q1, q2 = Q2,
-                   delta = Delta, msg_store_clients = MSCState }) ->
-    {MsgStatus1 = #msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(true, MsgStatus, MSCState),
-    {#msg_status { index_on_disk = true }, IndexState1} =
-        maybe_write_index_to_disk(true, MsgStatus1, IndexState),
+    store_beta_entry(MsgStatus1, State2);
+
+publish(neither, MsgStatus, State) ->
+    {#msg_status { msg_on_disk = true, index_on_disk = true, seq_id = SeqId },
+     State1 = #vqstate { q1 = Q1, q2 = Q2, delta = Delta }} =
+        maybe_write_to_disk(true, true, MsgStatus, State),
     true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
     Delta1 = #delta { start_seq_id = SeqId, count = 1,
                       end_seq_id = SeqId + 1 },
-    State #vqstate { index_state = IndexState1,
-                     delta = combine_deltas(Delta, Delta1),
-                     msg_store_clients = MSCState1 }.
+    State1 #vqstate { delta = combine_deltas(Delta, Delta1) }.
 
 store_alpha_entry(MsgStatus, State =
                   #vqstate { q1 = Q1, q2 = Q2,
@@ -1285,6 +1269,16 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
 maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
     {MsgStatus, IndexState}.
 
+maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
+                    State = #vqstate { msg_store_clients = MSCState,
+                                       index_state = IndexState }) ->
+    {MsgStatus1, MSCState1}   = maybe_write_msg_to_disk(
+                                  ForceMsg, MsgStatus, MSCState),
+    {MsgStatus2, IndexState1} = maybe_write_index_to_disk(
+                                  ForceIndex, MsgStatus1, IndexState),
+    {MsgStatus2, State #vqstate { index_state = IndexState1,
+                                  msg_store_clients = MSCState1 }}.
+
 %%----------------------------------------------------------------------------
 %% Phase changes
 %%----------------------------------------------------------------------------
@@ -1418,26 +1412,21 @@ maybe_push_alphas_to_betas(
                      target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
-maybe_push_alphas_to_betas(
-  Generator, Consumer, Q, State =
-  #vqstate { ram_msg_count = RamMsgCount, ram_index_count = RamIndexCount,
-             index_state = IndexState, msg_store_clients = MSCState }) ->
+maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
     case Generator(Q) of
         {empty, _Q} -> State;
         {{value, MsgStatus}, Qa} ->
-            {MsgStatus1, MSCState1} =
-                maybe_write_msg_to_disk(true, MsgStatus, MSCState),
             ForceIndex = should_force_index_to_disk(State),
-            {MsgStatus2, IndexState1} =
-                maybe_write_index_to_disk(ForceIndex, MsgStatus1, IndexState),
-            IndexOnDisk = MsgStatus2 #msg_status.index_on_disk,
+            {MsgStatus1 = #msg_status { msg_on_disk = true,
+                                        index_on_disk = IndexOnDisk },
+             State1 = #vqstate { ram_msg_count = RamMsgCount,
+                                 ram_index_count = RamIndexCount }} =
+                maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
             RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
-            State1 = State #vqstate { ram_msg_count = RamMsgCount - 1,
-                                      ram_index_count = RamIndexCount1,
-                                      index_state = IndexState1,
-                                      msg_store_clients = MSCState1 },
+            State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
+                                       ram_index_count = RamIndexCount1 },
             maybe_push_alphas_to_betas(Generator, Consumer, Qa,
-                                       Consumer(MsgStatus2, Qa, State1))
+                                       Consumer(MsgStatus1, Qa, State2))
     end.
 
 push_betas_to_deltas(State = #vqstate { q2 = Q2, delta = Delta, q3 = Q3,
-- 
cgit v1.2.1


From 8c770327d9ce6ff3e6a7349dbd17e88e4220a558 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Jun 2010 14:33:47 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 81f346f9..fb3d1b51 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -221,8 +221,8 @@
 %% write more - we can always come back on the next publish to do
 %% more.
 -define(RAM_INDEX_BATCH_SIZE, 64).
--define(PERSISTENT_MSG_STORE,     msg_store_persistent).
--define(TRANSIENT_MSG_STORE,      msg_store_transient).
+-define(PERSISTENT_MSG_STORE, msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,  msg_store_transient).
 
 -include("rabbit.hrl").
 
-- 
cgit v1.2.1


From 27b8f894f07e295a503ca8aa92a99e2fc72ced7c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Jun 2010 21:29:28 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index fb3d1b51..c0aecd1b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -385,10 +385,10 @@ terminate(State) ->
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
     {_PurgeCount, State1} = purge(State),
-    State2 = #vqstate {
-      index_state = IndexState,
-      msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}},
-      transient_threshold = TransientThreshold } =
+    State2 = #vqstate { index_state = IndexState,
+                        msg_store_clients = {{MSCStateP, PRef},
+                                             {MSCStateT, TRef}},
+                        transient_threshold = TransientThreshold } =
         remove_pending_ack(false, State1),
     %% flushing here is good because it deletes all full segments,
     %% leaving only partial segments around.
@@ -403,7 +403,8 @@ delete_and_terminate(State) ->
     IndexState5 = rabbit_queue_index:delete_and_terminate(IndexState2),
     case MSCStateP of
         undefined -> ok;
-        _         -> rabbit_msg_store:delete_client(?PERSISTENT_MSG_STORE, PRef),
+        _         -> rabbit_msg_store:delete_client(
+                       ?PERSISTENT_MSG_STORE, PRef),
                      rabbit_msg_store:client_terminate(MSCStateP)
     end,
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
@@ -415,8 +416,8 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} =
         remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, IndexState),
     {Len, State1} =
-        purge1(Q4Count, State #vqstate { index_state = IndexState1,
-                                         q4 = queue:new() }),
+        purge1(Q4Count, State #vqstate { q4 = queue:new(),
+                                         index_state = IndexState1 }),
     {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0,
                             persistent_count = 0 }}.
 
@@ -429,7 +430,8 @@ publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
     {blank_ack, State};
 publish_delivered(true, Msg = #basic_message { guid = Guid,
                                                is_persistent = IsPersistent },
-                  State = #vqstate { len = 0, index_state = IndexState,
+                  State = #vqstate { len = 0,
+                                     index_state = IndexState,
                                      next_seq_id = SeqId,
                                      out_counter = OutCount,
                                      in_counter = InCount,
@@ -1017,8 +1019,7 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
 
 remove_queue_entries(Fold, Q, IndexState) ->
     {Count, GuidsByStore, SeqIds, IndexState1} =
-        Fold(fun remove_queue_entries1/2,
-             {0, dict:new(), [], IndexState}, Q),
+        Fold(fun remove_queue_entries1/2, {0, dict:new(), [], IndexState}, Q),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
-- 
cgit v1.2.1


From 61c5bce600b7532c5a15d2b9b4c0d3043f8aecfb Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@lshift.net>
Date: Thu, 3 Jun 2010 22:16:23 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 386 +++++++++++++++++++++++-------------------
 1 file changed, 212 insertions(+), 174 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c0aecd1b..b2eb4dc4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -235,8 +235,8 @@
 -type(ack()     :: seq_id() | 'blank_ack').
 
 -type(delta() :: #delta { start_seq_id :: non_neg_integer(),
-                          count :: non_neg_integer (),
-                          end_seq_id :: non_neg_integer() }).
+                          count        :: non_neg_integer (),
+                          end_seq_id   :: non_neg_integer() }).
 
 -type(state() :: #vqstate {
              q1                   :: queue(),
@@ -276,11 +276,11 @@
 -endif.
 
 -define(BLANK_DELTA, #delta { start_seq_id = undefined,
-                              count = 0,
-                              end_seq_id = undefined }).
+                              count        = 0,
+                              end_seq_id   = undefined }).
 -define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z,
-                                         count = 0,
-                                         end_seq_id = Z }).
+                                         count        = 0,
+                                         end_seq_id   = Z }).
 
 %%----------------------------------------------------------------------------
 %% Public API
@@ -323,8 +323,8 @@ init(QueueName, IsDurable, _Recover) ->
     Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
                 true  -> ?BLANK_DELTA;
                 false -> #delta { start_seq_id = LowSeqId,
-                                  count = DeltaCount1,
-                                  end_seq_id = NextSeqId }
+                                  count        = DeltaCount1,
+                                  end_seq_id   = NextSeqId }
             end,
     Now = now(),
     PersistentClient =
@@ -365,9 +365,10 @@ init(QueueName, IsDurable, _Recover) ->
     maybe_deltas_to_betas(State).
 
 terminate(State) ->
-    State1 = #vqstate {
-      persistent_count = PCount, index_state = IndexState,
-      msg_store_clients = {{MSCStateP, PRef}, {MSCStateT, TRef}} } =
+    State1 = #vqstate { persistent_count  = PCount,
+                        index_state       = IndexState,
+                        msg_store_clients = {{MSCStateP, PRef},
+                                             {MSCStateT, TRef}} } =
         remove_pending_ack(true, tx_commit_index(State)),
     case MSCStateP of
         undefined -> ok;
@@ -377,17 +378,17 @@ terminate(State) ->
     Terms = [{persistent_ref, PRef},
              {transient_ref, TRef},
              {persistent_count, PCount}],
-    State1 #vqstate { index_state = rabbit_queue_index:terminate(
-                                      Terms, IndexState),
+    State1 #vqstate { index_state       = rabbit_queue_index:terminate(
+                                            Terms, IndexState),
                       msg_store_clients = undefined }.
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
     {_PurgeCount, State1} = purge(State),
-    State2 = #vqstate { index_state = IndexState,
-                        msg_store_clients = {{MSCStateP, PRef},
-                                             {MSCStateT, TRef}},
+    State2 = #vqstate { index_state         = IndexState,
+                        msg_store_clients   = {{MSCStateP, PRef},
+                                               {MSCStateT, TRef}},
                         transient_threshold = TransientThreshold } =
         remove_pending_ack(false, State1),
     %% flushing here is good because it deletes all full segments,
@@ -409,16 +410,18 @@ delete_and_terminate(State) ->
     end,
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateT),
-    State2 #vqstate { index_state = IndexState5,
+    State2 #vqstate { index_state       = IndexState5,
                       msg_store_clients = undefined }.
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} =
         remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, IndexState),
     {Len, State1} =
-        purge1(Q4Count, State #vqstate { q4 = queue:new(),
+        purge1(Q4Count, State #vqstate { q4          = queue:new(),
                                          index_state = IndexState1 }),
-    {Len, State1 #vqstate { len = 0, ram_msg_count = 0, ram_index_count = 0,
+    {Len, State1 #vqstate { len              = 0,
+                            ram_msg_count    = 0,
+                            ram_index_count  = 0,
                             persistent_count = 0 }}.
 
 publish(Msg, State) ->
@@ -430,15 +433,15 @@ publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
     {blank_ack, State};
 publish_delivered(true, Msg = #basic_message { guid = Guid,
                                                is_persistent = IsPersistent },
-                  State = #vqstate { len = 0,
-                                     index_state = IndexState,
-                                     next_seq_id = SeqId,
-                                     out_counter = OutCount,
-                                     in_counter = InCount,
+                  State = #vqstate { len               = 0,
+                                     index_state       = IndexState,
                                      msg_store_clients = MSCState,
-                                     persistent_count = PCount,
-                                     pending_ack = PA,
-                                     durable = IsDurable }) ->
+                                     next_seq_id       = SeqId,
+                                     out_counter       = OutCount,
+                                     in_counter        = InCount,
+                                     persistent_count  = PCount,
+                                     pending_ack       = PA,
+                                     durable           = IsDurable }) ->
     IsPersistent1 = IsDurable andalso IsPersistent,
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent1,
@@ -447,10 +450,10 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
         maybe_write_msg_to_disk(false, MsgStatus, MSCState),
     PCount1 = maybe_inc(PCount, IsPersistent1),
     State1 = State #vqstate { msg_store_clients = MSCState1,
-                              persistent_count = PCount1,
-                              next_seq_id = SeqId + 1,
-                              out_counter = OutCount + 1,
-                              in_counter = InCount + 1 },
+                              persistent_count  = PCount1,
+                              next_seq_id       = SeqId    + 1,
+                              out_counter       = OutCount + 1,
+                              in_counter        = InCount  + 1 },
     {SeqId,
      case MsgStatus1 #msg_status.msg_on_disk of
          true  -> {#msg_status { index_on_disk = true }, IndexState1} =
@@ -462,10 +465,13 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                   State1 #vqstate { pending_ack = PA1 }
      end}.
 
-fetch(AckRequired, State =
-      #vqstate { q4 = Q4, ram_msg_count = RamMsgCount, out_counter = OutCount,
-                 index_state = IndexState, len = Len, persistent_count = PCount,
-                 pending_ack = PA }) ->
+fetch(AckRequired, State = #vqstate { q4               = Q4,
+                                      ram_msg_count    = RamMsgCount,
+                                      out_counter      = OutCount,
+                                      index_state      = IndexState,
+                                      len              = Len,
+                                      persistent_count = PCount,
+                                      pending_ack      = PA }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
             case fetch_from_q3_or_delta(State) of
@@ -531,17 +537,20 @@ fetch(AckRequired, State =
             PCount1 = maybe_dec(PCount, IsPersistent andalso not AckRequired),
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
-             State #vqstate { q4 = Q4a, out_counter = OutCount + 1,
-                              ram_msg_count = RamMsgCount - 1,
-                              index_state = IndexState3, len = Len1,
-                              pending_ack = PA1, persistent_count = PCount1 }}
+             State #vqstate { q4               = Q4a,
+                              ram_msg_count    = RamMsgCount - 1,
+                              out_counter      = OutCount + 1,
+                              index_state      = IndexState3,
+                              len              = Len1,
+                              persistent_count = PCount1,
+                              pending_ack      = PA1 }}
     end.
 
 ack([], State) ->
     State;
-ack(AckTags, State = #vqstate { index_state = IndexState,
+ack(AckTags, State = #vqstate { index_state      = IndexState,
                                 persistent_count = PCount,
-                                pending_ack = PA }) ->
+                                pending_ack      = PA }) ->
     {GuidsByStore, SeqIds, PA1} =
         lists:foldl(
           fun (SeqId, {Dict, SeqIds, PAN}) ->
@@ -568,13 +577,13 @@ ack(AckTags, State = #vqstate { index_state = IndexState,
                            error        -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
-    State #vqstate { index_state = IndexState1, persistent_count = PCount1,
-                     pending_ack = PA1 }.
+    State #vqstate { index_state      = IndexState1,
+                     persistent_count = PCount1,
+                     pending_ack      = PA1 }.
 
 tx_publish(Txn,
            Msg = #basic_message { is_persistent = true, guid = Guid },
-           State = #vqstate { msg_store_clients = MSCState,
-                              durable = true }) ->
+           State = #vqstate { msg_store_clients = MSCState, durable = true }) ->
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
       is_delivered = false, msg_on_disk = false, index_on_disk = false },
@@ -622,12 +631,12 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
 
 requeue(AckTags, State) ->
     {SeqIds, GuidsByStore,
-     State1 = #vqstate { index_state = IndexState,
+     State1 = #vqstate { index_state      = IndexState,
                          persistent_count = PCount }} =
         lists:foldl(
-          fun (SeqId, {SeqIdsAcc, Dict, StateN =
-                       #vqstate { msg_store_clients = MSCStateN,
-                                  pending_ack = PAN }}) ->
+          fun (SeqId, {SeqIdsAcc, Dict, StateN = #vqstate {
+                                          msg_store_clients = MSCStateN,
+                                          pending_ack       = PAN }}) ->
                   PAN1 = dict:erase(SeqId, PAN),
                   StateN1 = StateN #vqstate { pending_ack = PAN1 },
                   case dict:find(SeqId, PAN) of
@@ -662,7 +671,7 @@ requeue(AckTags, State) ->
                            error        -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
-    State1 #vqstate { index_state = IndexState1,
+    State1 #vqstate { index_state      = IndexState1,
                       persistent_count = PCount1 }.
 
 len(#vqstate { len = Len }) ->
@@ -671,11 +680,11 @@ len(#vqstate { len = Len }) ->
 is_empty(State) ->
     0 == len(State).
 
-set_ram_duration_target(
-  DurationTarget, State = #vqstate { avg_egress_rate = AvgEgressRate,
-                                     avg_ingress_rate = AvgIngressRate,
-                                     target_ram_msg_count = TargetRamMsgCount
-                                    }) ->
+set_ram_duration_target(DurationTarget,
+                        State = #vqstate {
+                          avg_egress_rate      = AvgEgressRate,
+                          avg_ingress_rate     = AvgIngressRate,
+                          target_ram_msg_count = TargetRamMsgCount }) ->
     Rate = AvgEgressRate + AvgIngressRate,
     TargetRamMsgCount1 =
         case DurationTarget of
@@ -684,23 +693,23 @@ set_ram_duration_target(
             _         -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
         end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
-                              duration_target = DurationTarget },
+                              duration_target      = DurationTarget },
     case TargetRamMsgCount1 == undefined orelse
         TargetRamMsgCount1 >= TargetRamMsgCount of
         true  -> State1;
         false -> reduce_memory_use(State1)
     end.
 
-ram_duration(State = #vqstate { egress_rate = Egress,
-                                ingress_rate = Ingress,
-                                rate_timestamp = Timestamp,
-                                in_counter = InCount,
-                                out_counter = OutCount,
-                                ram_msg_count = RamMsgCount,
-                                duration_target = DurationTarget,
+ram_duration(State = #vqstate { egress_rate        = Egress,
+                                ingress_rate       = Ingress,
+                                rate_timestamp     = Timestamp,
+                                in_counter         = InCount,
+                                out_counter        = OutCount,
+                                ram_msg_count      = RamMsgCount,
+                                duration_target    = DurationTarget,
                                 ram_msg_count_prev = RamMsgCountPrev }) ->
     Now = now(),
-    {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
+    {AvgEgressRate,   Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
     {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
 
     Duration = %% msgs / (msgs/sec) == sec
@@ -710,15 +719,16 @@ ram_duration(State = #vqstate { egress_rate = Egress,
                          (2 * (AvgEgressRate + AvgIngressRate))
         end,
 
-    {Duration, set_ram_duration_target(
-                 DurationTarget,
-                 State #vqstate { egress_rate = Egress1,
-                                  avg_egress_rate = AvgEgressRate,
-                                  ingress_rate = Ingress1,
-                                  avg_ingress_rate = AvgIngressRate,
-                                  rate_timestamp = Now,
-                                  ram_msg_count_prev = RamMsgCount,
-                                  out_counter = 0, in_counter = 0 })}.
+    {Duration, set_ram_duration_target(DurationTarget,
+                                       State #vqstate {
+                                         egress_rate        = Egress1,
+                                         avg_egress_rate    = AvgEgressRate,
+                                         ingress_rate       = Ingress1,
+                                         avg_ingress_rate   = AvgIngressRate,
+                                         rate_timestamp     = Now,
+                                         in_counter         = 0,
+                                         out_counter        = 0,
+                                         ram_msg_count_prev = RamMsgCount })}.
 
 needs_sync(#vqstate { on_sync = {_, _, []} }) -> false;
 needs_sync(_)                                 -> true.
@@ -729,26 +739,27 @@ handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
 
 status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
-                  len = Len, on_sync = {_, _, From},
+                  len                  = Len,
+                  on_sync              = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
-                  ram_msg_count = RamMsgCount,
-                  ram_index_count = RamIndexCount,
-                  avg_egress_rate = AvgEgressRate,
-                  avg_ingress_rate = AvgIngressRate,
-                  next_seq_id = NextSeqId }) ->
-    [ {q1, queue:len(Q1)},
-      {q2, bpqueue:len(Q2)},
-      {delta, Delta},
-      {q3, bpqueue:len(Q3)},
-      {q4, queue:len(Q4)},
-      {len, Len},
-      {outstanding_txns, length(From)},
-      {target_ram_msg_count, TargetRamMsgCount},
-      {ram_msg_count, RamMsgCount},
-      {ram_index_count, RamIndexCount},
-      {avg_egress_rate, AvgEgressRate},
-      {avg_ingress_rate, AvgIngressRate},
-      {next_seq_id, NextSeqId} ].
+                  ram_msg_count        = RamMsgCount,
+                  ram_index_count      = RamIndexCount,
+                  avg_egress_rate      = AvgEgressRate,
+                  avg_ingress_rate     = AvgIngressRate,
+                  next_seq_id          = NextSeqId }) ->
+    [ {q1                   , queue:len(Q1)},
+      {q2                   , bpqueue:len(Q2)},
+      {delta                , Delta},
+      {q3                   , bpqueue:len(Q3)},
+      {q4                   , queue:len(Q4)},
+      {len                  , Len},
+      {outstanding_txns     , length(From)},
+      {target_ram_msg_count , TargetRamMsgCount},
+      {ram_msg_count        , RamMsgCount},
+      {ram_index_count      , RamIndexCount},
+      {avg_egress_rate      , AvgEgressRate},
+      {avg_ingress_rate     , AvgIngressRate},
+      {next_seq_id          , NextSeqId} ].
 
 %%----------------------------------------------------------------------------
 %% Minor helpers
@@ -872,19 +883,22 @@ ensure_binary_properties(Msg = #basic_message { content = Content }) ->
 %% the first arg is the older delta
 combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) ->
     ?BLANK_DELTA;
-combine_deltas(?BLANK_DELTA_PATTERN(X),
-               #delta { start_seq_id = Start, count = Count,
-                        end_seq_id = End } = B) ->
+combine_deltas(?BLANK_DELTA_PATTERN(X), #delta { start_seq_id = Start,
+                                                 count        = Count,
+                                                 end_seq_id   = End } = B) ->
     true = Start + Count =< End, %% ASSERTION
     B;
-combine_deltas(#delta { start_seq_id = Start, count = Count,
-                        end_seq_id = End } = A, ?BLANK_DELTA_PATTERN(Y)) ->
+combine_deltas(#delta { start_seq_id = Start,
+                        count        = Count,
+                        end_seq_id   = End } = A, ?BLANK_DELTA_PATTERN(Y)) ->
     true = Start + Count =< End, %% ASSERTION
     A;
-combine_deltas(#delta { start_seq_id = StartLow,  count = CountLow,
-                        end_seq_id = EndLow },
-               #delta { start_seq_id = StartHigh, count = CountHigh,
-                        end_seq_id = EndHigh }) ->
+combine_deltas(#delta { start_seq_id = StartLow,
+                        count        = CountLow,
+                        end_seq_id   = EndLow },
+               #delta { start_seq_id = StartHigh,
+                        count        = CountHigh,
+                        end_seq_id   = EndHigh }) ->
     Count = CountLow + CountHigh,
     true = (StartLow =< StartHigh) %% ASSERTIONS
         andalso ((StartLow + CountLow) =< EndLow)
@@ -897,7 +911,9 @@ beta_fold_no_index_on_disk(Fun, Init, Q) ->
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
     undefined;
-permitted_ram_index_count(#vqstate { len = Len, q2 = Q2, q3 = Q3,
+permitted_ram_index_count(#vqstate { len   = Len,
+                                     q2    = Q2,
+                                     q3    = Q3,
                                      delta = #delta { count = DeltaCount } }) ->
     AlphaBetaLen = Len - DeltaCount,
     case AlphaBetaLen == 0 of
@@ -936,9 +952,11 @@ msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
                     end)
     end.
 
-tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun, State =
-                         #vqstate { on_sync = OnSync = {SAcks, SPubs, SFuns},
-                                    pending_ack = PA, durable = IsDurable }) ->
+tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun,
+                         State = #vqstate {
+                           on_sync     = OnSync = {SAcks, SPubs, SFuns},
+                           pending_ack = PA,
+                           durable     = IsDurable }) ->
     %% If we are a non-durable queue, or (no persisent pubs, and no
     %% persistent acks) then we can skip the queue_index loop.
     case (not IsDurable) orelse
@@ -946,7 +964,7 @@ tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun, State =
          lists:foldl(
            fun (AckTag,  true ) ->
                    case dict:find(AckTag, PA) of
-                       {ok, #msg_status{}}         -> true;
+                       {ok, #msg_status {}}         -> true;
                        {ok, {IsPersistent, _Guid}} -> not IsPersistent
                    end;
                (_AckTag, false) -> false
@@ -1006,15 +1024,15 @@ purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
                      remove_queue_entries(fun rabbit_misc:queue_fold/3,
                                           State #vqstate.q1, IndexState),
                  {Count + Q1Count,
-                  State #vqstate { q1 = queue:new(),
+                  State #vqstate { q1          = queue:new(),
                                    index_state = IndexState1 }};
         false -> {Q3Count, IndexState1} =
                      remove_queue_entries(fun beta_fold_no_index_on_disk/3,
                                           Q3, IndexState),
                  purge1(Count + Q3Count,
                         maybe_deltas_to_betas(
-                          State #vqstate { index_state = IndexState1,
-                                           q3 = bpqueue:new() }))
+                          State #vqstate { q3          = bpqueue:new(),
+                                           index_state = IndexState1 }))
     end.
 
 remove_queue_entries(Fold, Q, IndexState) ->
@@ -1052,10 +1070,13 @@ remove_queue_entries1(
     {CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
-                         q1 = Q1, q2 = Q2,
-                         delta = #delta { count = DeltaCount },
-                         q3 = Q3, q4 = Q4, ram_msg_count = RamMsgCount,
-                         ram_index_count = RamIndexCount,
+                         q1                = Q1,
+                         q2                = Q2,
+                         delta             = #delta { count = DeltaCount },
+                         q3                = Q3,
+                         q4                = Q4,
+                         ram_msg_count     = RamMsgCount,
+                         ram_index_count   = RamIndexCount,
                          msg_store_clients = MSCState }) ->
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
@@ -1072,9 +1093,10 @@ fetch_from_q3_or_delta(State = #vqstate {
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
             RamIndexCount1 = maybe_dec(RamIndexCount, not IndexOnDisk),
             true = RamIndexCount1 >= 0, %% ASSERTION
-            State1 = State #vqstate { q3 = Q3a, q4 = Q4a,
-                                      ram_msg_count = RamMsgCount + 1,
-                                      ram_index_count = RamIndexCount1,
+            State1 = State #vqstate { q3                = Q3a,
+                                      q4                = Q4a,
+                                      ram_msg_count     = RamMsgCount + 1,
+                                      ram_index_count   = RamIndexCount1,
                                       msg_store_clients = MSCState1 },
             State2 =
                 case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
@@ -1096,12 +1118,13 @@ fetch_from_q3_or_delta(State = #vqstate {
             {loaded, State2}
     end.
 
-reduce_memory_use(State = #vqstate { ram_msg_count = RamMsgCount,
-                                     target_ram_msg_count = TargetRamMsgCount })
+reduce_memory_use(State = #vqstate {
+                    ram_msg_count        = RamMsgCount,
+                    target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
-reduce_memory_use(State =
-                  #vqstate { target_ram_msg_count = TargetRamMsgCount }) ->
+reduce_memory_use(State = #vqstate {
+                    target_ram_msg_count = TargetRamMsgCount }) ->
     State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
     case TargetRamMsgCount of
         0 -> push_betas_to_deltas(State1);
@@ -1113,8 +1136,9 @@ reduce_memory_use(State =
 %%----------------------------------------------------------------------------
 
 test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
-                                       ram_msg_count = RamMsgCount,
-                                       q1 = Q1, q3 = Q3 }) ->
+                                       ram_msg_count        = RamMsgCount,
+                                       q1                   = Q1,
+                                       q3                   = Q3 }) ->
     case TargetRamMsgCount of
         undefined ->
             msg;
@@ -1150,9 +1174,12 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
     end.
 
 publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
-        IsDelivered, MsgOnDisk, State =
-        #vqstate { next_seq_id = SeqId, len = Len, in_counter = InCount,
-                   persistent_count = PCount, durable = IsDurable }) ->
+        IsDelivered, MsgOnDisk,
+        State = #vqstate { next_seq_id      = SeqId,
+                           len              = Len,
+                           in_counter       = InCount,
+                           persistent_count = PCount,
+                           durable          = IsDurable }) ->
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = SeqId,
       is_persistent = IsDurable andalso IsPersistent,
@@ -1160,8 +1187,9 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
       index_on_disk = false },
     PCount1 = maybe_inc(PCount, IsPersistent),
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
-                    State #vqstate { next_seq_id = SeqId + 1, len = Len + 1,
-                                     in_counter = InCount + 1,
+                    State #vqstate { next_seq_id      = SeqId   + 1,
+                                     len              = Len     + 1,
+                                     in_counter       = InCount + 1,
                                      persistent_count = PCount1 })}.
 
 publish(msg, MsgStatus, State) ->
@@ -1186,14 +1214,17 @@ publish(neither, MsgStatus, State) ->
      State1 = #vqstate { q1 = Q1, q2 = Q2, delta = Delta }} =
         maybe_write_to_disk(true, true, MsgStatus, State),
     true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
-    Delta1 = #delta { start_seq_id = SeqId, count = 1,
-                      end_seq_id = SeqId + 1 },
+    Delta1 = #delta { start_seq_id = SeqId,
+                      count        = 1,
+                      end_seq_id   = SeqId + 1 },
     State1 #vqstate { delta = combine_deltas(Delta, Delta1) }.
 
-store_alpha_entry(MsgStatus, State =
-                  #vqstate { q1 = Q1, q2 = Q2,
-                             delta = #delta { count = DeltaCount },
-                             q3 = Q3, q4 = Q4 }) ->
+store_alpha_entry(MsgStatus, State = #vqstate {
+                               q1    = Q1,
+                               q2    = Q2,
+                               delta = #delta { count = DeltaCount },
+                               q3    = Q3,
+                               q4    = Q4 }) ->
     case bpqueue:is_empty(Q2) andalso 0 == DeltaCount andalso
         bpqueue:is_empty(Q3) of
         true  -> true = queue:is_empty(Q1), %% ASSERTION
@@ -1204,9 +1235,9 @@ store_alpha_entry(MsgStatus, State =
 
 store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
                                            index_on_disk = IndexOnDisk },
-                 State = #vqstate { q2 = Q2,
+                 State = #vqstate { q2    = Q2,
                                     delta = #delta { count = DeltaCount },
-                                    q3 = Q3 }) ->
+                                    q3    = Q3 }) ->
     MsgStatus1 = MsgStatus #msg_status { msg = undefined },
     case DeltaCount == 0 of
         true  -> State #vqstate { q3 = bpqueue:in(IndexOnDisk, MsgStatus1,
@@ -1271,13 +1302,13 @@ maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
     {MsgStatus, IndexState}.
 
 maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
-                    State = #vqstate { msg_store_clients = MSCState,
-                                       index_state = IndexState }) ->
+                    State = #vqstate { index_state       = IndexState,
+                                       msg_store_clients = MSCState }) ->
     {MsgStatus1, MSCState1}   = maybe_write_msg_to_disk(
                                   ForceMsg, MsgStatus, MSCState),
     {MsgStatus2, IndexState1} = maybe_write_index_to_disk(
                                   ForceIndex, MsgStatus1, IndexState),
-    {MsgStatus2, State #vqstate { index_state = IndexState1,
+    {MsgStatus2, State #vqstate { index_state       = IndexState1,
                                   msg_store_clients = MSCState1 }}.
 
 %%----------------------------------------------------------------------------
@@ -1321,9 +1352,9 @@ limit_q3_ram_index(Reduction, State = #vqstate { q3 = Q3 })
 limit_q3_ram_index(Reduction, State) ->
     {Reduction, State}.
 
-limit_ram_index(MapFoldFilterFun, Q, Reduction, State =
-                #vqstate { ram_index_count = RamIndexCount,
-                           index_state = IndexState }) ->
+limit_ram_index(MapFoldFilterFun, Q, Reduction,
+                State = #vqstate { ram_index_count = RamIndexCount,
+                                   index_state     = IndexState }) ->
     {Qa, {Reduction1, IndexState1}} =
         MapFoldFilterFun(
           fun erlang:'not'/1,
@@ -1337,18 +1368,18 @@ limit_ram_index(MapFoldFilterFun, Q, Reduction, State =
                   {true, MsgStatus1, {N-1, IndexStateN1}}
           end, {Reduction, IndexState}, Q),
     RamIndexCount1 = RamIndexCount - (Reduction - Reduction1),
-    {Qa, Reduction1, State #vqstate { index_state = IndexState1,
+    {Qa, Reduction1, State #vqstate { index_state     = IndexState1,
                                       ram_index_count = RamIndexCount1 }}.
 
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
-maybe_deltas_to_betas(
-  State = #vqstate { index_state = IndexState, q2 = Q2, q3 = Q3,
-                     target_ram_msg_count = TargetRamMsgCount,
-                     delta = Delta = #delta { start_seq_id = DeltaSeqId,
-                                              count = DeltaCount,
-                                              end_seq_id = DeltaSeqIdEnd },
-                     transient_threshold = TransientThreshold}) ->
+maybe_deltas_to_betas(State = #vqstate {
+                        q2                   = Q2,
+                        delta                = Delta,
+                        q3                   = Q3,
+                        index_state          = IndexState,
+                        target_ram_msg_count = TargetRamMsgCount,
+                        transient_threshold  = TransientThreshold }) ->
     case (not bpqueue:is_empty(Q3)) andalso (0 == TargetRamMsgCount) of
         true ->
             State;
@@ -1356,6 +1387,9 @@ maybe_deltas_to_betas(
             %% either q3 is empty, in which case we load at least one
             %% segment, or TargetRamMsgCount > 0, meaning we should
             %% really be holding all the betas in memory.
+            #delta { start_seq_id = DeltaSeqId,
+                     count        = DeltaCount,
+                     end_seq_id   = DeltaSeqIdEnd } = Delta,
             {List, IndexState1, Delta1SeqId} =
                 read_one_index_segment(DeltaSeqId, DeltaSeqIdEnd, IndexState),
             %% length(List) may be < segment_size because of acks.  It
@@ -1375,15 +1409,15 @@ maybe_deltas_to_betas(
                         0 ->
                             %% delta is now empty, but it wasn't
                             %% before, so can now join q2 onto q3
-                            State1 #vqstate { delta = ?BLANK_DELTA,
-                                              q2 = bpqueue:new(),
-                                              q3 = bpqueue:join(Q3b, Q2) };
+                            State1 #vqstate { q2    = bpqueue:new(),
+                                              delta = ?BLANK_DELTA,
+                                              q3    = bpqueue:join(Q3b, Q2) };
                         N when N > 0 ->
-                            State1 #vqstate {
-                              q3 = Q3b,
-                              delta = #delta { start_seq_id = Delta1SeqId,
-                                               count = N,
-                                               end_seq_id = DeltaSeqIdEnd } }
+                            Delta1 = #delta { start_seq_id = Delta1SeqId,
+                                              count        = N,
+                                              end_seq_id   = DeltaSeqIdEnd },
+                            State1 #vqstate { delta = Delta1,
+                                              q3    = Q3b }
                     end
             end
     end.
@@ -1407,10 +1441,10 @@ maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
                                 q4 = Q4a }
       end, Q4, State).
 
-maybe_push_alphas_to_betas(
-  _Generator, _Consumer, _Q,
-  State = #vqstate { ram_msg_count = RamMsgCount,
-                     target_ram_msg_count = TargetRamMsgCount })
+maybe_push_alphas_to_betas(_Generator, _Consumer, _Q,
+                           State = #vqstate {
+                             ram_msg_count        = RamMsgCount,
+                             target_ram_msg_count = TargetRamMsgCount })
   when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
@@ -1420,7 +1454,7 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
             ForceIndex = should_force_index_to_disk(State),
             {MsgStatus1 = #msg_status { msg_on_disk = true,
                                         index_on_disk = IndexOnDisk },
-             State1 = #vqstate { ram_msg_count = RamMsgCount,
+             State1 = #vqstate { ram_msg_count   = RamMsgCount,
                                  ram_index_count = RamIndexCount }} =
                 maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
             RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
@@ -1430,9 +1464,11 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
                                        Consumer(MsgStatus1, Qa, State2))
     end.
 
-push_betas_to_deltas(State = #vqstate { q2 = Q2, delta = Delta, q3 = Q3,
+push_betas_to_deltas(State = #vqstate { q2              = Q2,
+                                        delta           = Delta,
+                                        q3              = Q3,
                                         ram_index_count = RamIndexCount,
-                                        index_state = IndexState }) ->
+                                        index_state     = IndexState }) ->
     %% HighSeqId is high in the sense that it must be higher than the
     %% seq_id in Delta, but it's also the lowest of the betas that we
     %% transfer from q2 to delta.
@@ -1449,10 +1485,11 @@ push_betas_to_deltas(State = #vqstate { q2 = Q2, delta = Delta, q3 = Q3,
         end,
     Delta1 = #delta { start_seq_id = Delta1SeqId } =
         combine_deltas(Delta, #delta { start_seq_id = HighSeqId,
-                                       count = Len1,
-                                       end_seq_id = EndSeqId }),
-    State1 = State #vqstate { q2 = bpqueue:new(), delta = Delta1,
-                              index_state = IndexState1,
+                                       count        = Len1,
+                                       end_seq_id   = EndSeqId }),
+    State1 = State #vqstate { q2              = bpqueue:new(),
+                              delta           = Delta1,
+                              index_state     = IndexState1,
                               ram_index_count = RamIndexCount1 },
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
@@ -1479,12 +1516,13 @@ push_betas_to_deltas(State = #vqstate { q2 = Q2, delta = Delta, q3 = Q3,
                     {SeqIdMax, Len2, Q3a, RamIndexCount2, IndexState2} =
                         push_betas_to_deltas(fun bpqueue:out_r/1, Limit, Q3,
                                              RamIndexCount1, IndexState1),
-                    Delta2 = combine_deltas(#delta { start_seq_id = Limit,
-                                                     count = Len2,
-                                                     end_seq_id = SeqIdMax+1 },
-                                            Delta1),
-                    State1 #vqstate { q3 = Q3a, delta = Delta2,
-                                      index_state = IndexState2,
+                    Delta2 = #delta { start_seq_id = Limit,
+                                      count        = Len2,
+                                      end_seq_id   = SeqIdMax + 1 },
+                    Delta3 = combine_deltas(Delta2, Delta1),
+                    State1 #vqstate { delta           = Delta3,
+                                      q3              = Q3a,
+                                      index_state     = IndexState2,
                                       ram_index_count = RamIndexCount2 }
             end
     end.
-- 
cgit v1.2.1


From a1db84bf930be2419549d14c656f54bd49cf684e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 06:55:14 +0100
Subject: fix small persistent_count counting bug ...which doesn't actually
 matter since it would only result in the persistent_count of transient queues
 being wrong, which get nuked on startup anyway (the only place that makes use
 of the persistent_count).

---
 src/rabbit_variable_queue.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b2eb4dc4..e0cb3978 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1180,12 +1180,12 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
                            in_counter       = InCount,
                            persistent_count = PCount,
                            durable          = IsDurable }) ->
+    IsPersistent1 = IsDurable andalso IsPersistent,
     MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = SeqId,
-      is_persistent = IsDurable andalso IsPersistent,
+      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent1,
       is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
       index_on_disk = false },
-    PCount1 = maybe_inc(PCount, IsPersistent),
+    PCount1 = maybe_inc(PCount, IsPersistent1),
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id      = SeqId   + 1,
                                      len              = Len     + 1,
-- 
cgit v1.2.1


From 24b692e7122ce8df272b8ea657f9f9b5b064b12d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 07:39:05 +0100
Subject: refactor: make 'ack' and 'requeue' look very similar indeed ...prior
 to combining them

---
 src/rabbit_variable_queue.erl | 74 +++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index e0cb3978..e21b7f6f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -548,27 +548,29 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
 
 ack([], State) ->
     State;
-ack(AckTags, State = #vqstate { index_state      = IndexState,
-                                persistent_count = PCount,
-                                pending_ack      = PA }) ->
-    {GuidsByStore, SeqIds, PA1} =
+ack(AckTags, State) ->
+    {SeqIds, GuidsByStore, State1 = #vqstate { index_state      = IndexState,
+                                               persistent_count = PCount }} =
         lists:foldl(
-          fun (SeqId, {Dict, SeqIds, PAN}) ->
-                  PAN1 = dict:erase(SeqId, PAN),
-                  case dict:find(SeqId, PAN) of
+          fun (SeqId, {SeqIdsAcc, Dict, State2 = #vqstate {
+                                          pending_ack = PA }}) ->
+                  PA1 = dict:erase(SeqId, PA),
+                  State3 = State2 #vqstate { pending_ack = PA1 },
+                  case dict:find(SeqId, PA) of
                       {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
                                          msg_on_disk = false,
                                          is_persistent = false }} ->
-                          {Dict, SeqIds, PAN1};
+                          {SeqIdsAcc, Dict, State3};
                       {ok, {IsPersistent, Guid}} ->
-                          SeqIds1 = case IsPersistent of
-                                        true  -> [SeqId | SeqIds];
-                                        false -> SeqIds
-                                    end,
-                          {rabbit_misc:dict_cons(find_msg_store(IsPersistent),
-                                                 Guid, Dict), SeqIds1, PAN1}
+                          MsgStore = find_msg_store(IsPersistent),
+                          SeqIdsAcc1 = case IsPersistent of
+                                           true  -> [SeqId | SeqIdsAcc];
+                                           false -> SeqIdsAcc
+                                       end,
+                          {SeqIdsAcc1,
+                           rabbit_misc:dict_cons(MsgStore, Guid, Dict), State3}
                   end
-          end, {dict:new(), [], PA}, AckTags),
+          end, {[], dict:new(), State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
@@ -577,9 +579,8 @@ ack(AckTags, State = #vqstate { index_state      = IndexState,
                            error        -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
-    State #vqstate { index_state      = IndexState1,
-                     persistent_count = PCount1,
-                     pending_ack      = PA1 }.
+    State1 #vqstate { index_state      = IndexState1,
+                      persistent_count = PCount1 }.
 
 tx_publish(Txn,
            Msg = #basic_message { is_persistent = true, guid = Guid },
@@ -630,37 +631,36 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
      end}.
 
 requeue(AckTags, State) ->
-    {SeqIds, GuidsByStore,
-     State1 = #vqstate { index_state      = IndexState,
-                         persistent_count = PCount }} =
+    {SeqIds, GuidsByStore, State1 = #vqstate { index_state      = IndexState,
+                                               persistent_count = PCount }} =
         lists:foldl(
-          fun (SeqId, {SeqIdsAcc, Dict, StateN = #vqstate {
-                                          msg_store_clients = MSCStateN,
-                                          pending_ack       = PAN }}) ->
-                  PAN1 = dict:erase(SeqId, PAN),
-                  StateN1 = StateN #vqstate { pending_ack = PAN1 },
-                  case dict:find(SeqId, PAN) of
-                      {ok, #msg_status { index_on_disk = false,
+          fun (SeqId, {SeqIdsAcc, Dict, State2 = #vqstate {
+                                          msg_store_clients = MSCState,
+                                          pending_ack       = PA }}) ->
+                  PA1 = dict:erase(SeqId, PA),
+                  State3 = State2 #vqstate { pending_ack = PA1 },
+                  case dict:find(SeqId, PA) of
+                      {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
                                          msg_on_disk = false,
                                          is_persistent = false,
                                          msg = Msg }} ->
-                          {_SeqId, StateN2} =
-                              publish(Msg, true, false, StateN1),
-                          {SeqIdsAcc, Dict, StateN2};
+                          {_SeqId, State4} =
+                              publish(Msg, true, false, State3),
+                          {SeqIdsAcc, Dict, State4};
                       {ok, {IsPersistent, Guid}} ->
-                          {{ok, Msg = #basic_message{}}, MSCStateN1} =
+                          {{ok, Msg = #basic_message{}}, MSCState1} =
                               read_from_msg_store(
-                                MSCStateN, IsPersistent, Guid),
-                          StateN2 = StateN1 #vqstate {
-                                      msg_store_clients = MSCStateN1 },
-                          {_SeqId, StateN3} = publish(Msg, true, true, StateN2),
+                                MSCState, IsPersistent, Guid),
+                          State4 = State3 #vqstate {
+                                     msg_store_clients = MSCState1 },
+                          {_SeqId, State5} = publish(Msg, true, true, State4),
                           MsgStore = find_msg_store(IsPersistent),
                           SeqIdsAcc1 = case IsPersistent of
                                            true  -> [SeqId | SeqIdsAcc];
                                            false -> SeqIdsAcc
                                        end,
                           {SeqIdsAcc1,
-                           rabbit_misc:dict_cons(MsgStore, Guid, Dict), StateN3}
+                           rabbit_misc:dict_cons(MsgStore, Guid, Dict), State5}
                   end
           end, {[], dict:new(), State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-- 
cgit v1.2.1


From 25fdff44df3cb3516ef1984bb3a6fdb90e19f9da Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 08:39:35 +0100
Subject: refactor: extract similarities between ack and requeue

---
 src/rabbit_variable_queue.erl | 124 ++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 76 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index e21b7f6f..73bb6e19 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -546,41 +546,8 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
                               pending_ack      = PA1 }}
     end.
 
-ack([], State) ->
-    State;
 ack(AckTags, State) ->
-    {SeqIds, GuidsByStore, State1 = #vqstate { index_state      = IndexState,
-                                               persistent_count = PCount }} =
-        lists:foldl(
-          fun (SeqId, {SeqIdsAcc, Dict, State2 = #vqstate {
-                                          pending_ack = PA }}) ->
-                  PA1 = dict:erase(SeqId, PA),
-                  State3 = State2 #vqstate { pending_ack = PA1 },
-                  case dict:find(SeqId, PA) of
-                      {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
-                                         msg_on_disk = false,
-                                         is_persistent = false }} ->
-                          {SeqIdsAcc, Dict, State3};
-                      {ok, {IsPersistent, Guid}} ->
-                          MsgStore = find_msg_store(IsPersistent),
-                          SeqIdsAcc1 = case IsPersistent of
-                                           true  -> [SeqId | SeqIdsAcc];
-                                           false -> SeqIdsAcc
-                                       end,
-                          {SeqIdsAcc1,
-                           rabbit_misc:dict_cons(MsgStore, Guid, Dict), State3}
-                  end
-          end, {[], dict:new(), State}, AckTags),
-    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-    ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           rabbit_msg_store:remove(MsgStore, Guids)
-                   end, ok, GuidsByStore),
-    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
-                           error        -> 0;
-                           {ok, Guids} -> length(Guids)
-                       end,
-    State1 #vqstate { index_state      = IndexState1,
-                      persistent_count = PCount1 }.
+    ack(fun (_AckEntry, State1) -> State1 end, AckTags, State).
 
 tx_publish(Txn,
            Msg = #basic_message { is_persistent = true, guid = Guid },
@@ -631,48 +598,18 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
      end}.
 
 requeue(AckTags, State) ->
-    {SeqIds, GuidsByStore, State1 = #vqstate { index_state      = IndexState,
-                                               persistent_count = PCount }} =
-        lists:foldl(
-          fun (SeqId, {SeqIdsAcc, Dict, State2 = #vqstate {
-                                          msg_store_clients = MSCState,
-                                          pending_ack       = PA }}) ->
-                  PA1 = dict:erase(SeqId, PA),
-                  State3 = State2 #vqstate { pending_ack = PA1 },
-                  case dict:find(SeqId, PA) of
-                      {ok, #msg_status { index_on_disk = false, %% ASSERTIONS
-                                         msg_on_disk = false,
-                                         is_persistent = false,
-                                         msg = Msg }} ->
-                          {_SeqId, State4} =
-                              publish(Msg, true, false, State3),
-                          {SeqIdsAcc, Dict, State4};
-                      {ok, {IsPersistent, Guid}} ->
-                          {{ok, Msg = #basic_message{}}, MSCState1} =
-                              read_from_msg_store(
-                                MSCState, IsPersistent, Guid),
-                          State4 = State3 #vqstate {
-                                     msg_store_clients = MSCState1 },
-                          {_SeqId, State5} = publish(Msg, true, true, State4),
-                          MsgStore = find_msg_store(IsPersistent),
-                          SeqIdsAcc1 = case IsPersistent of
-                                           true  -> [SeqId | SeqIdsAcc];
-                                           false -> SeqIdsAcc
-                                       end,
-                          {SeqIdsAcc1,
-                           rabbit_misc:dict_cons(MsgStore, Guid, Dict), State5}
-                  end
-          end, {[], dict:new(), State}, AckTags),
-    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-    ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           rabbit_msg_store:release(MsgStore, Guids)
-                   end, ok, GuidsByStore),
-    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
-                           error        -> 0;
-                           {ok, Guids} -> length(Guids)
-                       end,
-    State1 #vqstate { index_state      = IndexState1,
-                      persistent_count = PCount1 }.
+    ack(fun (#msg_status { msg = Msg }, State1) ->
+                {_SeqId, State2} = publish(Msg, true, false, State1),
+                State2;
+            ({IsPersistent, Guid}, State1 = #vqstate {
+                                     msg_store_clients = MSCState }) ->
+                {{ok, Msg = #basic_message{}}, MSCState1} =
+                    read_from_msg_store(MSCState, IsPersistent, Guid),
+                {_SeqId, State2} = publish(Msg, true, true,
+                                           State1 #vqstate {
+                                             msg_store_clients = MSCState1 }),
+                State2
+        end, AckTags, State).
 
 len(#vqstate { len = Len }) ->
     Len.
@@ -936,6 +873,41 @@ should_force_index_to_disk(State =
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
+ack(_Fun, [], State) ->
+    State;
+ack(Fun, AckTags, State) ->
+    {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
+                                                 persistent_count = PCount }} =
+        lists:foldl(
+          fun (SeqId, {{SeqIdsAcc, Dict}, State2 = #vqstate {
+                                            pending_ack = PA }}) ->
+                  {ok, AckEntry} = dict:find(SeqId, PA),
+                  {case AckEntry of
+                       #msg_status { index_on_disk = false, %% ASSERTIONS
+                                     msg_on_disk   = false,
+                                     is_persistent = false } ->
+                           {SeqIdsAcc, Dict};
+                       {IsPersistent, Guid} ->
+                           {case IsPersistent of
+                                true  -> [SeqId | SeqIdsAcc];
+                                false -> SeqIdsAcc
+                            end,
+                            rabbit_misc:dict_cons(find_msg_store(IsPersistent),
+                                                  Guid, Dict)}
+                   end, Fun(AckEntry, State2 #vqstate {
+                                        pending_ack = dict:erase(SeqId, PA) })}
+          end, {{[], dict:new()}, State}, AckTags),
+    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+    ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                           rabbit_msg_store:release(MsgStore, Guids)
+                   end, ok, GuidsByStore),
+    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
+                           error        -> 0;
+                           {ok, Guids} -> length(Guids)
+                       end,
+    State1 #vqstate { index_state      = IndexState1,
+                      persistent_count = PCount1 }.
+
 msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
     Self = self(),
     F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
-- 
cgit v1.2.1


From 83b9042301c7d2a4199630cac724be51cab2673d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 12:56:04 +0100
Subject: refactor: simplify publish_delivered - extract recoding of pending
 acks - use maybe_write_to_disk

---
 src/rabbit_variable_queue.erl | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 73bb6e19..0c9995b9 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -434,8 +434,6 @@ publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
 publish_delivered(true, Msg = #basic_message { guid = Guid,
                                                is_persistent = IsPersistent },
                   State = #vqstate { len               = 0,
-                                     index_state       = IndexState,
-                                     msg_store_clients = MSCState,
                                      next_seq_id       = SeqId,
                                      out_counter       = OutCount,
                                      in_counter        = InCount,
@@ -446,24 +444,14 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
     MsgStatus = #msg_status {
       msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent1,
       is_delivered = true, msg_on_disk = false, index_on_disk = false },
-    {MsgStatus1, MSCState1} =
-        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+    {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
+    PA1 = record_pending_ack(MsgStatus1, PA),
     PCount1 = maybe_inc(PCount, IsPersistent1),
-    State1 = State #vqstate { msg_store_clients = MSCState1,
-                              persistent_count  = PCount1,
-                              next_seq_id       = SeqId    + 1,
+    {SeqId, State1 #vqstate { next_seq_id       = SeqId    + 1,
                               out_counter       = OutCount + 1,
-                              in_counter        = InCount  + 1 },
-    {SeqId,
-     case MsgStatus1 #msg_status.msg_on_disk of
-         true  -> {#msg_status { index_on_disk = true }, IndexState1} =
-                      maybe_write_index_to_disk(false, MsgStatus1, IndexState),
-                  PA1 = dict:store(SeqId, {true, Guid}, PA),
-                  State1 #vqstate { index_state = IndexState1,
-                                    pending_ack = PA1 };
-         false -> PA1 = dict:store(SeqId, MsgStatus1, PA),
-                  State1 #vqstate { pending_ack = PA1 }
-     end}.
+                              in_counter        = InCount  + 1,
+                              persistent_count  = PCount1,
+                              pending_ack       = PA1 }}.
 
 fetch(AckRequired, State = #vqstate { q4               = Q4,
                                       ram_msg_count    = RamMsgCount,
@@ -524,13 +512,8 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
 
             %% 4. If an ack is required, add something sensible to PA
             PA1 = case AckRequired of
-                      true  -> Entry =
-                                   case MsgOnDisk of
-                                       true  -> {IsPersistent, Guid};
-                                       false -> MsgStatus #msg_status {
-                                                  is_delivered = true }
-                                   end,
-                               dict:store(SeqId, Entry, PA);
+                      true  -> record_pending_ack(MsgStatus #msg_status {
+                                                    is_delivered = true }, PA);
                       false -> PA
                   end,
 
@@ -708,6 +691,15 @@ maybe_inc(N, false) -> N.
 maybe_dec(N, true ) -> N - 1;
 maybe_dec(N, false) -> N.
 
+record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
+                                 is_persistent = IsPersistent,
+                                 msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
+    AckEntry = case MsgOnDisk of
+                   true  -> {IsPersistent, Guid};
+                   false -> MsgStatus
+               end,
+    dict:store(SeqId, AckEntry, PA).
+
 remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       index_state = IndexState }) ->
-- 
cgit v1.2.1


From 18c2dac9ec9107d8d53287055c3aea45136bb7ba Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 13:38:14 +0100
Subject: refactor: extract #msg_status initialisation

---
 src/rabbit_variable_queue.erl | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0c9995b9..3a283bbf 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -431,8 +431,7 @@ publish(Msg, State) ->
 
 publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
     {blank_ack, State};
-publish_delivered(true, Msg = #basic_message { guid = Guid,
-                                               is_persistent = IsPersistent },
+publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
                   State = #vqstate { len               = 0,
                                      next_seq_id       = SeqId,
                                      out_counter       = OutCount,
@@ -441,9 +440,8 @@ publish_delivered(true, Msg = #basic_message { guid = Guid,
                                      pending_ack       = PA,
                                      durable           = IsDurable }) ->
     IsPersistent1 = IsDurable andalso IsPersistent,
-    MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent1,
-      is_delivered = true, msg_on_disk = false, index_on_disk = false },
+    MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
+        #msg_status { is_delivered = true },
     {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
     PA1 = record_pending_ack(MsgStatus1, PA),
     PCount1 = maybe_inc(PCount, IsPersistent1),
@@ -532,12 +530,9 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
 ack(AckTags, State) ->
     ack(fun (_AckEntry, State1) -> State1 end, AckTags, State).
 
-tx_publish(Txn,
-           Msg = #basic_message { is_persistent = true, guid = Guid },
+tx_publish(Txn, Msg = #basic_message { is_persistent = true },
            State = #vqstate { msg_store_clients = MSCState, durable = true }) ->
-    MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = undefined, is_persistent = true,
-      is_delivered = false, msg_on_disk = false, index_on_disk = false },
+    MsgStatus = msg_status(true, undefined, Msg),
     {#msg_status { msg_on_disk = true }, MSCState1} =
         maybe_write_msg_to_disk(false, MsgStatus, MSCState),
     publish_in_tx(Txn, Msg),
@@ -691,6 +686,11 @@ maybe_inc(N, false) -> N.
 maybe_dec(N, true ) -> N - 1;
 maybe_dec(N, false) -> N.
 
+msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
+    #msg_status { seq_id = SeqId, guid = Guid, msg = Msg,
+                  is_persistent = IsPersistent, is_delivered = false,
+                  msg_on_disk = false, index_on_disk = false }.
+
 record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
                                  is_persistent = IsPersistent,
                                  msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
@@ -1137,7 +1137,7 @@ test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
             end
     end.
 
-publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
+publish(Msg = #basic_message { is_persistent = IsPersistent },
         IsDelivered, MsgOnDisk,
         State = #vqstate { next_seq_id      = SeqId,
                            len              = Len,
@@ -1145,10 +1145,8 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, guid = Guid },
                            persistent_count = PCount,
                            durable          = IsDurable }) ->
     IsPersistent1 = IsDurable andalso IsPersistent,
-    MsgStatus = #msg_status {
-      msg = Msg, guid = Guid, seq_id = SeqId, is_persistent = IsPersistent1,
-      is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
-      index_on_disk = false },
+    MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
+        #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
     PCount1 = maybe_inc(PCount, IsPersistent1),
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id      = SeqId   + 1,
-- 
cgit v1.2.1


From 9b7a0ff764dfb228187ef47c571282fa81700ecc Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 14:03:40 +0100
Subject: refactor: extract conditional calls to qi:deliver

---
 src/rabbit_variable_queue.erl | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 3a283bbf..6efe07ba 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -476,11 +476,9 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
                      end,
 
             %% 1. Mark it delivered if necessary
-            IndexState1 = case IndexOnDisk andalso not IsDelivered of
-                              true  -> rabbit_queue_index:deliver(
-                                         SeqId, IndexState);
-                              false -> IndexState
-                          end,
+            IndexState1 = maybe_write_delivered(
+                            IndexOnDisk andalso not IsDelivered,
+                            SeqId, IndexState),
 
             %% 2. If it's on disk and there's no Ack required, remove it
             MsgStore = find_msg_store(IsPersistent),
@@ -691,6 +689,11 @@ msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
                   is_persistent = IsPersistent, is_delivered = false,
                   msg_on_disk = false, index_on_disk = false }.
 
+maybe_write_delivered(false, _SeqId, IndexState) ->
+    IndexState;
+maybe_write_delivered(true, SeqId, IndexState) ->
+    rabbit_queue_index:deliver(SeqId, IndexState).
+
 record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
                                  is_persistent = IsPersistent,
                                  msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
@@ -773,14 +776,11 @@ betas_from_segment_entries(List, TransientThreshold, IndexState) ->
           fun ({Guid, SeqId, IsPersistent, IsDelivered},
                {FilteredAcc, IndexStateAcc}) ->
                   case SeqId < TransientThreshold andalso not IsPersistent of
-                      true  -> IndexStateAcc1 =
-                                   case IsDelivered of
-                                       false -> rabbit_queue_index:deliver(
-                                                  SeqId, IndexStateAcc);
-                                       true  -> IndexStateAcc
-                                   end,
-                               {FilteredAcc, rabbit_queue_index:ack(
-                                               [SeqId], IndexStateAcc1)};
+                      true  -> {FilteredAcc,
+                                rabbit_queue_index:ack(
+                                  [SeqId], maybe_write_delivered(
+                                             not IsDelivered,
+                                             SeqId, IndexStateAcc))};
                       false -> {[#msg_status { msg           = undefined,
                                                guid          = Guid,
                                                seq_id        = SeqId,
@@ -1014,7 +1014,7 @@ remove_queue_entries1(
   #msg_status { guid = Guid, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {CountN, GuidsByStore, SeqIdsAcc, IndexStateN}) ->
+  {Count, GuidsByStore, SeqIdsAcc, IndexState}) ->
     GuidsByStore1 =
         case {MsgOnDisk, IsPersistent} of
             {true,  true}  -> rabbit_misc:dict_cons(?PERSISTENT_MSG_STORE,
@@ -1027,11 +1027,10 @@ remove_queue_entries1(
                      true  -> [SeqId | SeqIdsAcc];
                      false -> SeqIdsAcc
                  end,
-    IndexStateN1 = case IndexOnDisk andalso not IsDelivered of
-                       true  -> rabbit_queue_index:deliver(SeqId, IndexStateN);
-                       false -> IndexStateN
-                   end,
-    {CountN + 1, GuidsByStore1, SeqIdsAcc1, IndexStateN1}.
+    IndexState1 = maybe_write_delivered(
+                    IndexOnDisk andalso not IsDelivered,
+                    SeqId, IndexState),
+    {Count + 1, GuidsByStore1, SeqIdsAcc1, IndexState1}.
 
 fetch_from_q3_or_delta(State = #vqstate {
                          q1                = Q1,
@@ -1256,10 +1255,7 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
     IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent,
                                              IndexState),
     {MsgStatus #msg_status { index_on_disk = true },
-     case IsDelivered of
-         true  -> rabbit_queue_index:deliver(SeqId, IndexState1);
-         false -> IndexState1
-     end};
+     maybe_write_delivered(IsDelivered, SeqId, IndexState1)};
 maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
     {MsgStatus, IndexState}.
 
-- 
cgit v1.2.1


From 99357f1306d19bac5e660337ccc4c4528240826b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 16:51:57 +0100
Subject: refactor: simplify 'fetch'

---
 src/rabbit_variable_queue.erl | 51 +++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6efe07ba..f8ced680 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -470,48 +470,31 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
                    msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
          Q4a} ->
 
-            AckTag = case AckRequired of
-                         true  -> SeqId;
-                         false -> blank_ack
-                     end,
-
             %% 1. Mark it delivered if necessary
             IndexState1 = maybe_write_delivered(
                             IndexOnDisk andalso not IsDelivered,
                             SeqId, IndexState),
 
-            %% 2. If it's on disk and there's no Ack required, remove it
+            %% 2. Remove from msg_store and queue index, if necessary
             MsgStore = find_msg_store(IsPersistent),
+            Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end,
+            Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end,
             IndexState2 =
-                case MsgOnDisk andalso not AckRequired of
-                    %% Remove from disk now
-                    true  -> ok = case MsgOnDisk of
-                                      true  -> rabbit_msg_store:remove(
-                                                 MsgStore, [Guid]);
-                                      false -> ok
-                                  end,
-                             case IndexOnDisk of
-                                 true  -> rabbit_queue_index:ack(
-                                            [SeqId], IndexState1);
-                                 false -> IndexState1
-                             end;
-                    false -> IndexState1
-                end,
-
-            %% 3. If it's on disk, not persistent and an ack's
-            %% required then remove it from the queue index only.
-            IndexState3 =
-                case IndexOnDisk andalso AckRequired andalso not IsPersistent of
-                    true  -> rabbit_queue_index:ack([SeqId], IndexState2);
-                    false -> IndexState2
+                case {MsgOnDisk, IndexOnDisk, AckRequired, IsPersistent} of
+                    {true, false, false,     _} -> Rem(), IndexState1;
+                    {true,  true, false,     _} -> Rem(), Ack();
+                    {true,  true,  true, false} -> Ack();
+                    _                           -> IndexState1
                 end,
 
-            %% 4. If an ack is required, add something sensible to PA
-            PA1 = case AckRequired of
-                      true  -> record_pending_ack(MsgStatus #msg_status {
-                                                    is_delivered = true }, PA);
-                      false -> PA
-                  end,
+            %% 3. If an ack is required, add something sensible to PA
+            {AckTag, PA1} = case AckRequired of
+                                true  -> PA2 = record_pending_ack(
+                                                 MsgStatus #msg_status {
+                                                   is_delivered = true }, PA),
+                                         {SeqId, PA2};
+                                false -> {blank_ack, PA}
+                            end,
 
             PCount1 = maybe_dec(PCount, IsPersistent andalso not AckRequired),
             Len1 = Len - 1,
@@ -519,7 +502,7 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
              State #vqstate { q4               = Q4a,
                               ram_msg_count    = RamMsgCount - 1,
                               out_counter      = OutCount + 1,
-                              index_state      = IndexState3,
+                              index_state      = IndexState2,
                               len              = Len1,
                               persistent_count = PCount1,
                               pending_ack      = PA1 }}
-- 
cgit v1.2.1


From d5d76dbd9317f1b4d73b7c21ed4d35a8c2d9fbee Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 12 Jun 2010 16:53:19 +0100
Subject: refactor: extract accumulation of SeqIds and Guids from acks

---
 src/rabbit_variable_queue.erl | 59 ++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f8ced680..1319979c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -677,6 +677,12 @@ maybe_write_delivered(false, _SeqId, IndexState) ->
 maybe_write_delivered(true, SeqId, IndexState) ->
     rabbit_queue_index:deliver(SeqId, IndexState).
 
+accumulate_ack(SeqId, IsPersistent, Guid, {SeqIdsAcc, Dict}) ->
+    {case IsPersistent of
+         true  -> [SeqId | SeqIdsAcc];
+         false -> SeqIdsAcc
+     end, rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
+
 record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
                                  is_persistent = IsPersistent,
                                  msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
@@ -689,24 +695,17 @@ record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
 remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       index_state = IndexState }) ->
-    {SeqIds, GuidsByStore, PA1} =
+    {{SeqIds, GuidsByStore}, PA1} =
         dict:fold(
-          fun (SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict, PAN}) ->
-                  PAN1 = case KeepPersistent andalso IsPersistent of
-                             true  -> PAN;
-                             false -> dict:erase(SeqId, PAN)
-                         end,
-                  case IsPersistent of
-                      true  -> {[SeqId | SeqIdsAcc],
-                                rabbit_misc:dict_cons(
-                                  ?PERSISTENT_MSG_STORE, Guid, Dict), PAN1};
-                      false -> {SeqIdsAcc,
-                                rabbit_misc:dict_cons(
-                                  ?TRANSIENT_MSG_STORE, Guid, Dict), PAN1}
-                  end;
-              (SeqId, #msg_status {}, {SeqIdsAcc, Dict, PAN}) ->
-                  {SeqIdsAcc, Dict, dict:erase(SeqId, PAN)}
-          end, {[], dict:new(), PA}, PA),
+          fun (SeqId, {IsPersistent, Guid}, {Acc, PA2}) ->
+                  {accumulate_ack(SeqId, IsPersistent, Guid, Acc),
+                   case KeepPersistent andalso IsPersistent of
+                       true  -> PA2;
+                       false -> dict:erase(SeqId, PA2)
+                   end};
+              (SeqId, #msg_status {}, {Acc, PA2}) ->
+                  {Acc, dict:erase(SeqId, PA2)}
+          end, {{[], dict:new()}, PA}, PA),
     case KeepPersistent of
         true  -> State1 = State #vqstate { pending_ack = PA1 },
                  case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
@@ -854,21 +853,15 @@ ack(Fun, AckTags, State) ->
     {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
                                                  persistent_count = PCount }} =
         lists:foldl(
-          fun (SeqId, {{SeqIdsAcc, Dict}, State2 = #vqstate {
-                                            pending_ack = PA }}) ->
+          fun (SeqId, {Acc, State2 = #vqstate {pending_ack = PA }}) ->
                   {ok, AckEntry} = dict:find(SeqId, PA),
                   {case AckEntry of
                        #msg_status { index_on_disk = false, %% ASSERTIONS
                                      msg_on_disk   = false,
                                      is_persistent = false } ->
-                           {SeqIdsAcc, Dict};
+                           Acc;
                        {IsPersistent, Guid} ->
-                           {case IsPersistent of
-                                true  -> [SeqId | SeqIdsAcc];
-                                false -> SeqIdsAcc
-                            end,
-                            rabbit_misc:dict_cons(find_msg_store(IsPersistent),
-                                                  Guid, Dict)}
+                           accumulate_ack(SeqId, IsPersistent, Guid, Acc)
                    end, Fun(AckEntry, State2 #vqstate {
                                         pending_ack = dict:erase(SeqId, PA) })}
           end, {{[], dict:new()}, State}, AckTags),
@@ -998,14 +991,12 @@ remove_queue_entries1(
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
   {Count, GuidsByStore, SeqIdsAcc, IndexState}) ->
-    GuidsByStore1 =
-        case {MsgOnDisk, IsPersistent} of
-            {true,  true}  -> rabbit_misc:dict_cons(?PERSISTENT_MSG_STORE,
-                                                    Guid, GuidsByStore);
-            {true,  false} -> rabbit_misc:dict_cons(?TRANSIENT_MSG_STORE,
-                                                    Guid, GuidsByStore);
-            {false, _}     -> GuidsByStore
-        end,
+    GuidsByStore1 = case MsgOnDisk of
+                        true  -> rabbit_misc:dict_cons(
+                                   find_msg_store(IsPersistent),
+                                   Guid, GuidsByStore);
+                        false -> GuidsByStore
+                    end,
     SeqIdsAcc1 = case IndexOnDisk of
                      true  -> [SeqId | SeqIdsAcc];
                      false -> SeqIdsAcc
-- 
cgit v1.2.1


From d56dad54b943217091bd2e47c1c74951d2a813c2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sun, 13 Jun 2010 14:47:17 +0100
Subject: minor refactor

---
 src/rabbit_variable_queue.erl | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 1319979c..20ef326e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -444,7 +444,7 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
         #msg_status { is_delivered = true },
     {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
     PA1 = record_pending_ack(MsgStatus1, PA),
-    PCount1 = maybe_inc(PCount, IsPersistent1),
+    PCount1 = PCount + one_if(IsPersistent1),
     {SeqId, State1 #vqstate { next_seq_id       = SeqId    + 1,
                               out_counter       = OutCount + 1,
                               in_counter        = InCount  + 1,
@@ -496,7 +496,7 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
                                 false -> {blank_ack, PA}
                             end,
 
-            PCount1 = maybe_dec(PCount, IsPersistent andalso not AckRequired),
+            PCount1 = PCount - one_if(IsPersistent andalso not AckRequired),
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
              State #vqstate { q4               = Q4a,
@@ -661,11 +661,8 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
-maybe_inc(N, true ) -> N + 1;
-maybe_inc(N, false) -> N.
-
-maybe_dec(N, true ) -> N - 1;
-maybe_dec(N, false) -> N.
+one_if(true ) -> 1;
+one_if(false) -> 0.
 
 msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
     #msg_status { seq_id = SeqId, guid = Guid, msg = Msg,
@@ -1028,7 +1025,7 @@ fetch_from_q3_or_delta(State = #vqstate {
                                          guid = Guid }}, MSCState1} =
                 read_from_msg_store(MSCState, IsPersistent, Guid),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
-            RamIndexCount1 = maybe_dec(RamIndexCount, not IndexOnDisk),
+            RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
             true = RamIndexCount1 >= 0, %% ASSERTION
             State1 = State #vqstate { q3                = Q3a,
                                       q4                = Q4a,
@@ -1120,7 +1117,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent },
     IsPersistent1 = IsDurable andalso IsPersistent,
     MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
         #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
-    PCount1 = maybe_inc(PCount, IsPersistent1),
+    PCount1 = PCount + one_if(IsPersistent1),
     {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id      = SeqId   + 1,
                                      len              = Len     + 1,
@@ -1139,7 +1136,7 @@ publish(index, MsgStatus, State) ->
                                 index_on_disk = IndexOnDisk },
      State1 = #vqstate { ram_index_count = RamIndexCount, q1 = Q1 }} =
         maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
-    RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
+    RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
     State2 = State1 #vqstate { ram_index_count = RamIndexCount1 },
     true = queue:is_empty(Q1), %% ASSERTION
     store_beta_entry(MsgStatus1, State2);
@@ -1389,7 +1386,7 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
              State1 = #vqstate { ram_msg_count   = RamMsgCount,
                                  ram_index_count = RamIndexCount }} =
                 maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
-            RamIndexCount1 = maybe_inc(RamIndexCount, not IndexOnDisk),
+            RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
             State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
                                        ram_index_count = RamIndexCount1 },
             maybe_push_alphas_to_betas(Generator, Consumer, Qa,
-- 
cgit v1.2.1


From 609f231a9f253ac794c995182536e46213ebf1e1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 14 Jun 2010 08:43:26 +0100
Subject: cosmetic: more sensible ordering of state components essential vars
 first, followed by derived vars and counters/rates/etc

---
 src/rabbit_variable_queue.erl | 72 +++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 20ef326e..0a3a3d23 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -172,27 +172,29 @@
           delta,
           q3,
           q4,
+          next_seq_id,
+          pending_ack,
+          index_state,
+          msg_store_clients,
+          on_sync,
+          durable,
+          transient_threshold,
+
+          len,
+          persistent_count,
+
           duration_target,
           target_ram_msg_count,
           ram_msg_count,
           ram_msg_count_prev,
           ram_index_count,
-          index_state,
-          next_seq_id,
           out_counter,
           in_counter,
           egress_rate,
           avg_egress_rate,
           ingress_rate,
           avg_ingress_rate,
-          rate_timestamp,
-          len,
-          on_sync,
-          msg_store_clients,
-          persistent_count,
-          transient_threshold,
-          pending_ack,
-          durable
+          rate_timestamp
          }).
 
 -record(msg_status,
@@ -244,13 +246,24 @@
              delta                :: delta(),
              q3                   :: bpqueue(),
              q4                   :: queue(),
+             next_seq_id          :: seq_id(),
+             pending_ack          :: dict(),
+             index_state          :: any(),
+             msg_store_clients    :: 'undefined' | {{any(), binary()},
+                                                    {any(), binary()}},
+             on_sync              :: {[[ack()]], [[guid()]],
+                                      [fun (() -> any())]},
+             durable              :: boolean(),
+
+             len                  :: non_neg_integer(),
+             persistent_count     :: non_neg_integer(),
+
+             transient_threshold  :: non_neg_integer(),
              duration_target      :: non_neg_integer(),
              target_ram_msg_count :: non_neg_integer(),
              ram_msg_count        :: non_neg_integer(),
              ram_msg_count_prev   :: non_neg_integer(),
              ram_index_count      :: non_neg_integer(),
-             index_state          :: any(),
-             next_seq_id          :: seq_id(),
              out_counter          :: non_neg_integer(),
              in_counter           :: non_neg_integer(),
              egress_rate          :: {{integer(), integer(), integer()},
@@ -259,16 +272,7 @@
              ingress_rate         :: {{integer(), integer(), integer()},
                                       non_neg_integer()},
              avg_ingress_rate     :: float(),
-             rate_timestamp       :: {integer(), integer(), integer()},
-             len                  :: non_neg_integer(),
-             on_sync              :: {[[ack()]], [[guid()]],
-                                      [fun (() -> any())]},
-             msg_store_clients    :: 'undefined' | {{any(), binary()},
-                                                    {any(), binary()}},
-             persistent_count     :: non_neg_integer(),
-             transient_threshold  :: non_neg_integer(),
-             pending_ack          :: dict(),
-             durable              :: boolean()
+             rate_timestamp       :: {integer(), integer(), integer()}
             }).
 
 -include("rabbit_backing_queue_spec.hrl").
@@ -339,28 +343,30 @@ init(QueueName, IsDurable, _Recover) ->
       delta                = Delta,
       q3                   = bpqueue:new(),
       q4                   = queue:new(),
+      next_seq_id          = NextSeqId,
+      pending_ack          = dict:new(),
+      index_state          = IndexState1,
+      msg_store_clients    = {{PersistentClient, PRef},
+                              {TransientClient, TRef}},
+      on_sync              = {[], [], []},
+      durable              = IsDurable,
+      transient_threshold  = NextSeqId,
+
+      len                  = DeltaCount1,
+      persistent_count     = DeltaCount1,
+
       duration_target      = undefined,
       target_ram_msg_count = undefined,
       ram_msg_count        = 0,
       ram_msg_count_prev   = 0,
       ram_index_count      = 0,
-      index_state          = IndexState1,
-      next_seq_id          = NextSeqId,
       out_counter          = 0,
       in_counter           = 0,
       egress_rate          = {Now, 0},
       avg_egress_rate      = 0,
       ingress_rate         = {Now, DeltaCount1},
       avg_ingress_rate     = 0,
-      rate_timestamp       = Now,
-      len                  = DeltaCount1,
-      on_sync              = {[], [], []},
-      msg_store_clients    = {{PersistentClient, PRef},
-                              {TransientClient, TRef}},
-      persistent_count     = DeltaCount1,
-      transient_threshold  = NextSeqId,
-      pending_ack          = dict:new(),
-      durable              = IsDurable
+      rate_timestamp       = Now
      },
     maybe_deltas_to_betas(State).
 
-- 
cgit v1.2.1


From afc0ca5ac3ab7468b9083c92608a0be453b8d903 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 14 Jun 2010 09:22:02 +0100
Subject: refactor: rename and simplify test_keep_msg_in_ram

---
 src/rabbit_variable_queue.erl | 62 ++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0a3a3d23..dfea1054 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1075,42 +1075,32 @@ reduce_memory_use(State = #vqstate {
 %% Internal gubbins for publishing
 %%----------------------------------------------------------------------------
 
-test_keep_msg_in_ram(SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
-                                       ram_msg_count        = RamMsgCount,
-                                       q1                   = Q1,
-                                       q3                   = Q3 }) ->
-    case TargetRamMsgCount of
-        undefined ->
-            msg;
-        0 ->
-            case bpqueue:out(Q3) of
-                {empty, _Q3} ->
-                    %% if TargetRamMsgCount == 0, we know we have no
-                    %% alphas. If q3 is empty then delta must be empty
-                    %% too, so create a beta, which should end up in
-                    %% q3
-                    index;
-                {{value, _IndexOnDisk, #msg_status { seq_id = OldSeqId }},
-                 _Q3a} ->
-                    %% Don't look at the current delta as it may be
-                    %% empty. If the SeqId is still within the current
-                    %% segment, it'll be a beta, else it'll go into
-                    %% delta
-                    case SeqId >= rabbit_queue_index:next_segment_boundary(
-                                    OldSeqId) of
-                        true  -> neither;
-                        false -> index
-                    end
-            end;
-        _ when TargetRamMsgCount > RamMsgCount ->
-            msg;
-        _ ->
-            case queue:is_empty(Q1) of
-                true  -> index;
-                %% Can push out elders (in q1) to disk. This may also
-                %% result in the msg itself going to disk and q2/q3.
-                false -> msg
+msg_storage_type(_SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
+                                    ram_msg_count        = RamMsgCount })
+  when TargetRamMsgCount == undefined orelse TargetRamMsgCount > RamMsgCount ->
+    msg;
+msg_storage_type( SeqId, #vqstate { target_ram_msg_count = 0, q3 = Q3 }) ->
+    case bpqueue:out(Q3) of
+        {empty, _Q3} ->
+            %% if TargetRamMsgCount == 0, we know we have no
+            %% alphas. If q3 is empty then delta must be empty too, so
+            %% create a beta, which should end up in q3
+            index;
+        {{value, _IndexOnDisk, #msg_status { seq_id = OldSeqId }}, _Q3a} ->
+            %% Don't look at the current delta as it may be empty. If
+            %% the SeqId is still within the current segment, it'll be
+            %% a beta, else it'll go into delta
+            case SeqId >= rabbit_queue_index:next_segment_boundary(OldSeqId) of
+                true  -> neither;
+                false -> index
             end
+    end;
+msg_storage_type(_SeqId, #vqstate { q1 = Q1 }) ->
+    case queue:is_empty(Q1) of
+        true  -> index;
+        %% Can push out elders (in q1) to disk. This may also result
+        %% in the msg itself going to disk and q2/q3.
+        false -> msg
     end.
 
 publish(Msg = #basic_message { is_persistent = IsPersistent },
@@ -1124,7 +1114,7 @@ publish(Msg = #basic_message { is_persistent = IsPersistent },
     MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
         #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
     PCount1 = PCount + one_if(IsPersistent1),
-    {SeqId, publish(test_keep_msg_in_ram(SeqId, State), MsgStatus,
+    {SeqId, publish(msg_storage_type(SeqId, State), MsgStatus,
                     State #vqstate { next_seq_id      = SeqId   + 1,
                                      len              = Len     + 1,
                                      in_counter       = InCount + 1,
-- 
cgit v1.2.1


From 42474463709ab38af40e25560dab7e210ad204c8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 14 Jun 2010 09:46:00 +0100
Subject: minor refactor

---
 src/rabbit_variable_queue.erl | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index dfea1054..32c51764 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -925,23 +925,21 @@ tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
 tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
                                    durable = IsDurable }) ->
     Acks = lists:flatten(SAcks),
-    State1 = ack(Acks, State),
     Pubs = lists:flatten(lists:reverse(SPubs)),
-    {SeqIds, State2 = #vqstate { index_state = IndexState }} =
+    {SeqIds, State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
-               {SeqIdsAcc, StateN}) ->
-                  {SeqId, StateN1} =
-                      publish(Msg, false, IsDurable andalso IsPersistent,
-                              StateN),
-                  {case IsDurable andalso IsPersistent of
+               {SeqIdsAcc, State2}) ->
+                  IsPersistent1 = IsDurable andalso IsPersistent,
+                  {SeqId, State3} = publish(Msg, false, IsPersistent1, State2),
+                  {case IsPersistent1 of
                        true  -> [SeqId | SeqIdsAcc];
                        false -> SeqIdsAcc
-                   end, StateN1}
-          end, {Acks, State1}, Pubs),
+                   end, State3}
+          end, {Acks, ack(Acks, State)}, Pubs),
     IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
     [ Fun() || Fun <- lists:reverse(SFuns) ],
-    State2 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
+    State1 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
 delete1(_TransientThreshold, NextSeqId, DeltaSeqId, IndexState)
   when DeltaSeqId =:= undefined orelse DeltaSeqId >= NextSeqId ->
-- 
cgit v1.2.1


From 91004380799fd3c4e76db81c040a06b3f54856a9 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 14 Jun 2010 10:05:52 +0100
Subject: refactor: inline {publish,ack}_in_tx

---
 src/rabbit_variable_queue.erl | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 32c51764..b18ea2c5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -517,19 +517,22 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
 ack(AckTags, State) ->
     ack(fun (_AckEntry, State1) -> State1 end, AckTags, State).
 
-tx_publish(Txn, Msg = #basic_message { is_persistent = true },
-           State = #vqstate { msg_store_clients = MSCState, durable = true }) ->
-    MsgStatus = msg_status(true, undefined, Msg),
-    {#msg_status { msg_on_disk = true }, MSCState1} =
-        maybe_write_msg_to_disk(false, MsgStatus, MSCState),
-    publish_in_tx(Txn, Msg),
-    State #vqstate { msg_store_clients = MSCState1 };
-tx_publish(Txn, Msg, State) ->
-    publish_in_tx(Txn, Msg),
-    State.
+tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent },
+           State = #vqstate { durable           = IsDurable,
+                              msg_store_clients = MSCState }) ->
+    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }),
+    case IsPersistent andalso IsDurable of
+        true  -> MsgStatus = msg_status(true, undefined, Msg),
+                 {#msg_status { msg_on_disk = true }, MSCState1} =
+                     maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+                 State #vqstate { msg_store_clients = MSCState1 };
+        false -> State
+    end.
 
 tx_ack(Txn, AckTags, State) ->
-    ack_in_tx(Txn, AckTags),
+    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }),
     State.
 
 tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
@@ -738,14 +741,6 @@ store_tx(Txn, Tx) ->
 erase_tx(Txn) ->
     erase({txn, Txn}).
 
-publish_in_tx(Txn, Msg) ->
-    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
-    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }).
-
-ack_in_tx(Txn, AckTags) ->
-    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
-    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }).
-
 update_rate(Now, Then, Count, {OThen, OCount}) ->
     %% form the avg over the current period and the previous
     Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
-- 
cgit v1.2.1


From 8c7d806d6a17a3e22f84e884fc0fefc401ab1fbc Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 14 Jun 2010 18:42:55 +0100
Subject: Very important cosmetic fix

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b18ea2c5..ca3be6d2 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -851,7 +851,7 @@ ack(Fun, AckTags, State) ->
     {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
                                                  persistent_count = PCount }} =
         lists:foldl(
-          fun (SeqId, {Acc, State2 = #vqstate {pending_ack = PA }}) ->
+          fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) ->
                   {ok, AckEntry} = dict:find(SeqId, PA),
                   {case AckEntry of
                        #msg_status { index_on_disk = false, %% ASSERTIONS
-- 
cgit v1.2.1


From 6d212010ec889d0375b7a2719d0542845c1e0315 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 15 Jun 2010 06:02:50 +0100
Subject: minor refactor: rename function

---
 src/rabbit_variable_queue.erl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ca3be6d2..a5547f6b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -466,7 +466,7 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
                                       pending_ack      = PA }) ->
     case queue:out(Q4) of
         {empty, _Q4} ->
-            case fetch_from_q3_or_delta(State) of
+            case fetch_from_q3_to_q4(State) of
                 {empty, _State1} = Result -> Result;
                 {loaded, State1}          -> fetch(AckRequired, State1)
             end;
@@ -1002,15 +1002,15 @@ remove_queue_entries1(
                     SeqId, IndexState),
     {Count + 1, GuidsByStore1, SeqIdsAcc1, IndexState1}.
 
-fetch_from_q3_or_delta(State = #vqstate {
-                         q1                = Q1,
-                         q2                = Q2,
-                         delta             = #delta { count = DeltaCount },
-                         q3                = Q3,
-                         q4                = Q4,
-                         ram_msg_count     = RamMsgCount,
-                         ram_index_count   = RamIndexCount,
-                         msg_store_clients = MSCState }) ->
+fetch_from_q3_to_q4(State = #vqstate {
+                      q1                = Q1,
+                      q2                = Q2,
+                      delta             = #delta { count = DeltaCount },
+                      q3                = Q3,
+                      q4                = Q4,
+                      ram_msg_count     = RamMsgCount,
+                      ram_index_count   = RamIndexCount,
+                      msg_store_clients = MSCState }) ->
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
             0 = DeltaCount, %% ASSERTION
-- 
cgit v1.2.1


From 1369b25a88395933f0a2421f74a097e8dce02eed Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 15 Jun 2010 06:33:04 +0100
Subject: tweak: trust our own code a bit more

---
 src/rabbit_variable_queue.erl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a5547f6b..c75a2b11 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1020,8 +1020,7 @@ fetch_from_q3_to_q4(State = #vqstate {
         {{value, IndexOnDisk, MsgStatus = #msg_status {
                                 msg = undefined, guid = Guid,
                                 is_persistent = IsPersistent }}, Q3a} ->
-            {{ok, Msg = #basic_message { is_persistent = IsPersistent,
-                                         guid = Guid }}, MSCState1} =
+            {{ok, Msg = #basic_message {}}, MSCState1} =
                 read_from_msg_store(MSCState, IsPersistent, Guid),
             Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
             RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
-- 
cgit v1.2.1


From ec36bdf8b84a0fdccf8f523740a3fb000c845dcd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 15 Jun 2010 06:43:14 +0100
Subject: minor refactor

---
 src/rabbit_variable_queue.erl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c75a2b11..7d92e04d 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -569,14 +569,13 @@ requeue(AckTags, State) ->
     ack(fun (#msg_status { msg = Msg }, State1) ->
                 {_SeqId, State2} = publish(Msg, true, false, State1),
                 State2;
-            ({IsPersistent, Guid}, State1 = #vqstate {
-                                     msg_store_clients = MSCState }) ->
+            ({IsPersistent, Guid}, State1) ->
+                #vqstate { msg_store_clients = MSCState } = State1,
                 {{ok, Msg = #basic_message{}}, MSCState1} =
                     read_from_msg_store(MSCState, IsPersistent, Guid),
-                {_SeqId, State2} = publish(Msg, true, true,
-                                           State1 #vqstate {
-                                             msg_store_clients = MSCState1 }),
-                State2
+                State2 = State1 #vqstate { msg_store_clients = MSCState1 },
+                {_SeqId, State3} = publish(Msg, true, true, State2),
+                State3
         end, AckTags, State).
 
 len(#vqstate { len = Len }) ->
-- 
cgit v1.2.1


From 9c05766b3b52282d92a634ba28d09845a64a9e3d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 15 Jun 2010 08:29:55 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7d92e04d..003088cb 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -578,11 +578,9 @@ requeue(AckTags, State) ->
                 State3
         end, AckTags, State).
 
-len(#vqstate { len = Len }) ->
-    Len.
+len(#vqstate { len = Len }) -> Len.
 
-is_empty(State) ->
-    0 == len(State).
+is_empty(State) -> 0 == len(State).
 
 set_ram_duration_target(DurationTarget,
                         State = #vqstate {
@@ -727,27 +725,23 @@ remove_pending_ack(KeepPersistent,
                                   index_state = IndexState1 }
     end.
 
-lookup_tx(Txn) ->
-    case get({txn, Txn}) of
-        undefined -> #tx { pending_messages = [],
-                           pending_acks     = [] };
-        V         -> V
-    end.
+lookup_tx(Txn) -> case get({txn, Txn}) of
+                      undefined -> #tx { pending_messages = [],
+                                         pending_acks     = [] };
+                      V         -> V
+                  end.
 
-store_tx(Txn, Tx) ->
-    put({txn, Txn}, Tx).
+store_tx(Txn, Tx) -> put({txn, Txn}, Tx).
 
-erase_tx(Txn) ->
-    erase({txn, Txn}).
+erase_tx(Txn) -> erase({txn, Txn}).
 
 update_rate(Now, Then, Count, {OThen, OCount}) ->
     %% form the avg over the current period and the previous
     Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
     {Avg, {Then, Count}}.
 
-persistent_guids(Pubs) ->
-    [Guid || Obj = #basic_message { guid = Guid } <- Pubs,
-             Obj #basic_message.is_persistent].
+persistent_guids(Pubs) -> [Guid || Obj = #basic_message { guid = Guid } <- Pubs,
+                                   Obj #basic_message.is_persistent].
 
 betas_from_segment_entries(List, TransientThreshold, IndexState) ->
     {Filtered, IndexState1} =
@@ -1232,10 +1226,8 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
 %%----------------------------------------------------------------------------
 
 limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
-    case permitted_ram_index_count(State) of
-        undefined ->
-            State;
-        Permitted when RamIndexCount > Permitted ->
+    Permitted = permitted_ram_index_count(State),
+    if Permitted =/= undefined andalso RamIndexCount > Permitted ->
             Reduction = lists:min([RamIndexCount - Permitted,
                                    ?RAM_INDEX_BATCH_SIZE]),
             case Reduction < ?RAM_INDEX_BATCH_SIZE of
@@ -1246,7 +1238,7 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
                              limit_q3_ram_index(Reduction1, State1),
                          State2
             end;
-        _ ->
+       true ->
             State
     end.
 
-- 
cgit v1.2.1


From 1338d9513a2da7f78cc48add587d7fee08534669 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 15 Jun 2010 16:08:13 +0100
Subject: Good lord, how did that get through?

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 62d2d588..5f94cf9d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -716,7 +716,7 @@ segment_store(Segment = #segment { num = Seg },
      [Segment, SegmentA]}.
 
 segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
-    dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc) end,
+    dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end,
               lists:foldl(Fun, Acc, CachedSegments), Segments).
 
 segment_map(Fun, {Segments, CachedSegments}) ->
-- 
cgit v1.2.1


From cae30fab9a44f41ab54d218d90bf24f6b517e998 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 15 Jun 2010 18:40:20 +0100
Subject: assert vq state invariants as a post condition on all exported
 functions

---
 src/rabbit_variable_queue.erl | 143 +++++++++++++++++++++++-------------------
 1 file changed, 79 insertions(+), 64 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 003088cb..c2f90bac 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -368,7 +368,7 @@ init(QueueName, IsDurable, _Recover) ->
       avg_ingress_rate     = 0,
       rate_timestamp       = Now
      },
-    maybe_deltas_to_betas(State).
+    a(maybe_deltas_to_betas(State)).
 
 terminate(State) ->
     State1 = #vqstate { persistent_count  = PCount,
@@ -384,9 +384,9 @@ terminate(State) ->
     Terms = [{persistent_ref, PRef},
              {transient_ref, TRef},
              {persistent_count, PCount}],
-    State1 #vqstate { index_state       = rabbit_queue_index:terminate(
-                                            Terms, IndexState),
-                      msg_store_clients = undefined }.
+    a(State1 #vqstate { index_state       = rabbit_queue_index:terminate(
+                                              Terms, IndexState),
+                        msg_store_clients = undefined }).
 
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
@@ -416,8 +416,8 @@ delete_and_terminate(State) ->
     end,
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateT),
-    State2 #vqstate { index_state       = IndexState5,
-                      msg_store_clients = undefined }.
+    a(State2 #vqstate { index_state       = IndexState5,
+                        msg_store_clients = undefined }).
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Q4Count, IndexState1} =
@@ -425,18 +425,18 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     {Len, State1} =
         purge1(Q4Count, State #vqstate { q4          = queue:new(),
                                          index_state = IndexState1 }),
-    {Len, State1 #vqstate { len              = 0,
-                            ram_msg_count    = 0,
-                            ram_index_count  = 0,
-                            persistent_count = 0 }}.
+    {Len, a(State1 #vqstate { len              = 0,
+                              ram_msg_count    = 0,
+                              ram_index_count  = 0,
+                              persistent_count = 0 })}.
 
 publish(Msg, State) ->
     State1 = limit_ram_index(State),
     {_SeqId, State2} = publish(Msg, false, false, State1),
-    State2.
+    a(State2).
 
 publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
-    {blank_ack, State};
+    {blank_ack, a(State)};
 publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
                   State = #vqstate { len               = 0,
                                      next_seq_id       = SeqId,
@@ -451,11 +451,11 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
     {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
     PA1 = record_pending_ack(MsgStatus1, PA),
     PCount1 = PCount + one_if(IsPersistent1),
-    {SeqId, State1 #vqstate { next_seq_id       = SeqId    + 1,
-                              out_counter       = OutCount + 1,
-                              in_counter        = InCount  + 1,
-                              persistent_count  = PCount1,
-                              pending_ack       = PA1 }}.
+    {SeqId, a(State1 #vqstate { next_seq_id       = SeqId    + 1,
+                                out_counter       = OutCount + 1,
+                                in_counter        = InCount  + 1,
+                                persistent_count  = PCount1,
+                                pending_ack       = PA1 })}.
 
 fetch(AckRequired, State = #vqstate { q4               = Q4,
                                       ram_msg_count    = RamMsgCount,
@@ -467,8 +467,8 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
     case queue:out(Q4) of
         {empty, _Q4} ->
             case fetch_from_q3_to_q4(State) of
-                {empty, _State1} = Result -> Result;
-                {loaded, State1}          -> fetch(AckRequired, State1)
+                {empty, State1} = Result -> a(State1), Result;
+                {loaded, State1}         -> fetch(AckRequired, State1)
             end;
         {{value, MsgStatus = #msg_status {
                    msg = Msg, guid = Guid, seq_id = SeqId,
@@ -505,30 +505,30 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
             PCount1 = PCount - one_if(IsPersistent andalso not AckRequired),
             Len1 = Len - 1,
             {{Msg, IsDelivered, AckTag, Len1},
-             State #vqstate { q4               = Q4a,
-                              ram_msg_count    = RamMsgCount - 1,
-                              out_counter      = OutCount + 1,
-                              index_state      = IndexState2,
-                              len              = Len1,
-                              persistent_count = PCount1,
-                              pending_ack      = PA1 }}
+             a(State #vqstate { q4               = Q4a,
+                                ram_msg_count    = RamMsgCount - 1,
+                                out_counter      = OutCount + 1,
+                                index_state      = IndexState2,
+                                len              = Len1,
+                                persistent_count = PCount1,
+                                pending_ack      = PA1 })}
     end.
 
 ack(AckTags, State) ->
-    ack(fun (_AckEntry, State1) -> State1 end, AckTags, State).
+    a(ack(fun (_AckEntry, State1) -> State1 end, AckTags, State)).
 
 tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent },
            State = #vqstate { durable           = IsDurable,
                               msg_store_clients = MSCState }) ->
     Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
     store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }),
-    case IsPersistent andalso IsDurable of
-        true  -> MsgStatus = msg_status(true, undefined, Msg),
-                 {#msg_status { msg_on_disk = true }, MSCState1} =
-                     maybe_write_msg_to_disk(false, MsgStatus, MSCState),
-                 State #vqstate { msg_store_clients = MSCState1 };
-        false -> State
-    end.
+    a(case IsPersistent andalso IsDurable of
+          true  -> MsgStatus = msg_status(true, undefined, Msg),
+                   {#msg_status { msg_on_disk = true }, MSCState1} =
+                       maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+                   State #vqstate { msg_store_clients = MSCState1 };
+          false -> State
+      end).
 
 tx_ack(Txn, AckTags, State) ->
     Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
@@ -543,7 +543,7 @@ tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
                                               persistent_guids(Pubs));
              false -> ok
          end,
-    {lists:flatten(AckTags), State}.
+    {lists:flatten(AckTags), a(State)}.
 
 tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
     %% If we are a non-durable queue, or we have no persistent pubs,
@@ -555,28 +555,28 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
     PersistentGuids = persistent_guids(PubsOrdered),
     IsTransientPubs = [] == PersistentGuids,
     {AckTags1,
-     case (not IsDurable) orelse IsTransientPubs of
-         true  -> tx_commit_post_msg_store(
-                    IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
-         false -> ok = rabbit_msg_store:sync(
-                         ?PERSISTENT_MSG_STORE, PersistentGuids,
-                         msg_store_callback(PersistentGuids, IsTransientPubs,
-                                            PubsOrdered, AckTags1, Fun)),
-                  State
-     end}.
+     a(case (not IsDurable) orelse IsTransientPubs of
+           true  -> tx_commit_post_msg_store(
+                      IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
+           false -> ok = rabbit_msg_store:sync(
+                           ?PERSISTENT_MSG_STORE, PersistentGuids,
+                           msg_store_callback(PersistentGuids, IsTransientPubs,
+                                              PubsOrdered, AckTags1, Fun)),
+                    State
+       end)}.
 
 requeue(AckTags, State) ->
-    ack(fun (#msg_status { msg = Msg }, State1) ->
-                {_SeqId, State2} = publish(Msg, true, false, State1),
-                State2;
-            ({IsPersistent, Guid}, State1) ->
-                #vqstate { msg_store_clients = MSCState } = State1,
-                {{ok, Msg = #basic_message{}}, MSCState1} =
-                    read_from_msg_store(MSCState, IsPersistent, Guid),
-                State2 = State1 #vqstate { msg_store_clients = MSCState1 },
-                {_SeqId, State3} = publish(Msg, true, true, State2),
-                State3
-        end, AckTags, State).
+    a(ack(fun (#msg_status { msg = Msg }, State1) ->
+                  {_SeqId, State2} = publish(Msg, true, false, State1),
+                  State2;
+              ({IsPersistent, Guid}, State1) ->
+                  #vqstate { msg_store_clients = MSCState } = State1,
+                  {{ok, Msg = #basic_message{}}, MSCState1} =
+                      read_from_msg_store(MSCState, IsPersistent, Guid),
+                  State2 = State1 #vqstate { msg_store_clients = MSCState1 },
+                  {_SeqId, State3} = publish(Msg, true, true, State2),
+                  State3
+          end, AckTags, State)).
 
 len(#vqstate { len = Len }) -> Len.
 
@@ -596,11 +596,11 @@ set_ram_duration_target(DurationTarget,
         end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target      = DurationTarget },
-    case TargetRamMsgCount1 == undefined orelse
-        TargetRamMsgCount1 >= TargetRamMsgCount of
-        true  -> State1;
-        false -> reduce_memory_use(State1)
-    end.
+    a(case TargetRamMsgCount1 == undefined orelse
+          TargetRamMsgCount1 >= TargetRamMsgCount of
+          true  -> State1;
+          false -> reduce_memory_use(State1)
+      end).
 
 ram_duration(State = #vqstate { egress_rate        = Egress,
                                 ingress_rate       = Ingress,
@@ -635,7 +635,7 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
 needs_sync(#vqstate { on_sync = {_, _, []} }) -> false;
 needs_sync(_)                                 -> true.
 
-sync(State) -> tx_commit_index(State).
+sync(State) -> a(tx_commit_index(State)).
 
 handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
@@ -667,6 +667,24 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 %% Minor helpers
 %%----------------------------------------------------------------------------
 
+a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+                     len = Len, target_ram_msg_count = TargetRamMsgCount }) ->
+    E1 = queue:is_empty(Q1),
+    E2 = bpqueue:is_empty(Q2),
+    ED = Delta#delta.count == 0,
+    E3 = bpqueue:is_empty(Q3),
+    E4 = queue:is_empty(Q4),
+    TZ = TargetRamMsgCount == 0,
+    LZ = Len == 0,
+
+    true = E1 or not E3,
+    true = E2 or not ED,
+    true = ED or not E3,
+    true = (E1 and E2 and E4) or not TZ,
+    true = LZ == (E3 and E4),
+
+    State.
+
 one_if(true ) -> 1;
 one_if(false) -> 0.
 
@@ -1006,9 +1024,6 @@ fetch_from_q3_to_q4(State = #vqstate {
                       msg_store_clients = MSCState }) ->
     case bpqueue:out(Q3) of
         {empty, _Q3} ->
-            0 = DeltaCount, %% ASSERTION
-            true = bpqueue:is_empty(Q2), %% ASSERTION
-            true = queue:is_empty(Q1), %% ASSERTION
             {empty, State};
         {{value, IndexOnDisk, MsgStatus = #msg_status {
                                 msg = undefined, guid = Guid,
-- 
cgit v1.2.1


From 91c4c6f0ef5b04d7b792540f7d9fe8c241f32f9e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 15 Jun 2010 18:59:39 +0100
Subject: Improved test coverage of the queue index walker

---
 src/rabbit_amqqueue.erl |  2 +-
 src/rabbit_tests.erl    | 39 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 3c9c41bd..12891447 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -121,7 +121,7 @@
 
 start() ->
     DurableQueues = find_durable_queues(),
-    {ok, BQ} = application:get_env(backing_queue_module),
+    {ok, BQ} = application:get_env(rabbit, backing_queue_module),
     ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7ce03e5d..d49208c3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1313,7 +1313,8 @@ test_backing_queue() ->
         {ok, rabbit_variable_queue} ->
             passed = test_msg_store(),
             passed = test_queue_index(),
-            passed = test_variable_queue();
+            passed = test_variable_queue(),
+            passed = test_queue_recover();
         _ ->
             passed
     end.
@@ -1518,10 +1519,10 @@ test_msg_store() ->
     passed.
 
 queue_name(Name) ->
-    rabbit_misc:r(<<"/">>, queue, term_to_binary(Name)).
+    rabbit_misc:r(<<"/">>, queue, Name).
 
 test_queue() ->
-    queue_name(test).
+    queue_name(<<"test">>).
 
 empty_test_queue() ->
     ok = rabbit_variable_queue:start([]),
@@ -1767,7 +1768,7 @@ test_variable_queue_dynamic_duration_change() ->
     VQ10 = rabbit_variable_queue:handle_pre_hibernate(VQ9),
     {empty, VQ11} = rabbit_variable_queue:fetch(true, VQ10),
 
-    rabbit_variable_queue:terminate(VQ11),
+    rabbit_variable_queue:delete_and_terminate(VQ11),
 
     passed.
 
@@ -1833,6 +1834,34 @@ test_variable_queue_partial_segments_delta_thing() ->
     VQ8 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ7),
     %% should be empty now
     {empty, VQ9} = rabbit_variable_queue:fetch(true, VQ8),
-    rabbit_variable_queue:terminate(VQ9),
+    rabbit_variable_queue:delete_and_terminate(VQ9),
 
     passed.
+
+test_queue_recover() ->
+    Count = 2*rabbit_queue_index:next_segment_boundary(0),
+    #amqqueue { pid = QPid, name = QName } = Q =
+        rabbit_amqqueue:declare(test_queue(), true, false, [], none),
+    Msg = fun() -> rabbit_basic:message(
+                     rabbit_misc:r(<<>>, exchange, <<>>),
+                     <<>>, #'P_basic'{delivery_mode = 2}, <<>>) end,
+    Delivery = #delivery{mandatory = false,
+                         immediate = false,
+                         txn = none,
+                         sender = self(),
+                         message = Msg()},
+    [true = rabbit_amqqueue:deliver(QPid, Delivery) || _ <- lists:seq(1, Count)],
+    rabbit_amqqueue:stat(Q),
+    exit(QPid, shutdown),
+    MRef = erlang:monitor(process, QPid),
+    receive {'DOWN', MRef, process, QPid, _Info} -> ok
+    after 10000 -> exit(timeout_waiting_for_queue_death)
+    end,
+    ok = stop_msg_store(),
+    ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
+    ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup),
+    ok = rabbit_amqqueue:start(),
+    {ok, Count} = rabbit_amqqueue:with_or_die(
+                    QName,
+                    fun (Q1) -> rabbit_amqqueue:delete(Q1, false, false) end),
+    passed.
-- 
cgit v1.2.1


From 577738532c0f9306dc7a4a4cd4826cb85d04b810 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 08:01:53 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 53 ++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c2f90bac..031ed882 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -31,11 +31,12 @@
 
 -module(rabbit_variable_queue).
 
--export([init/3, terminate/1, publish/2, publish_delivered/3,
-         set_ram_duration_target/2, ram_duration/1, fetch/2, ack/2, len/1,
-         is_empty/1, purge/1, delete_and_terminate/1, requeue/2, tx_publish/3,
-         tx_ack/3, tx_rollback/2, tx_commit/3, needs_sync/1, sync/1,
-         handle_pre_hibernate/1, status/1]).
+-export([init/3, terminate/1, delete_and_terminate/1,
+         purge/1, publish/2, publish_delivered/3, fetch/2, ack/2,
+         tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3,
+         requeue/2, len/1, is_empty/1,
+         set_ram_duration_target/2, ram_duration/1,
+         needs_sync/1, sync/1, handle_pre_hibernate/1, status/1]).
 
 -export([start/1]).
 
@@ -693,6 +694,23 @@ msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
                   is_persistent = IsPersistent, is_delivered = false,
                   msg_on_disk = false, index_on_disk = false }.
 
+find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
+find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
+
+with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) ->
+    {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP),
+    {Result, {{MSCStateP1, PRef}, MSCStateT}};
+with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) ->
+    {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
+    {Result, {MSCStateP, {MSCStateT1, TRef}}}.
+
+read_from_msg_store(MSCState, IsPersistent, Guid) ->
+    with_msg_store_state(
+      MSCState, IsPersistent,
+      fun (MsgStore, MSCState1) ->
+              rabbit_msg_store:read(MsgStore, Guid, MSCState1)
+      end).
+
 maybe_write_delivered(false, _SeqId, IndexState) ->
     IndexState;
 maybe_write_delivered(true, SeqId, IndexState) ->
@@ -1174,25 +1192,8 @@ store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
                                                   Q2) }
     end.
 
-find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
-find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
-
-with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) ->
-    {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP),
-    {Result, {{MSCStateP1, PRef}, MSCStateT}};
-with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) ->
-    {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
-    {Result, {MSCStateP, {MSCStateT1, TRef}}}.
-
-read_from_msg_store(MSCState, IsPersistent, Guid) ->
-    with_msg_store_state(
-      MSCState, IsPersistent,
-      fun (MsgStore, MSCState1) ->
-              rabbit_msg_store:read(MsgStore, Guid, MSCState1)
-      end).
-
-maybe_write_msg_to_disk(_Force, MsgStatus =
-                        #msg_status { msg_on_disk = true }, MSCState) ->
+maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
+                                  msg_on_disk = true }, MSCState) ->
     {MsgStatus, MSCState};
 maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
                                  msg = Msg, guid = Guid,
@@ -1209,8 +1210,8 @@ maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
 maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
     {MsgStatus, MSCState}.
 
-maybe_write_index_to_disk(_Force, MsgStatus =
-                          #msg_status { index_on_disk = true }, IndexState) ->
+maybe_write_index_to_disk(_Force, MsgStatus = #msg_status {
+                                    index_on_disk = true }, IndexState) ->
     true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
     {MsgStatus, IndexState};
 maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
-- 
cgit v1.2.1


From 33a45a15451a0a64236137af7ed543b980e70d80 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 08:17:54 +0100
Subject: simplify 'purge' it doesn't need to compute the Count since we
 already know it

---
 src/rabbit_variable_queue.erl | 50 ++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 031ed882..88b765d6 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -421,11 +421,10 @@ delete_and_terminate(State) ->
                         msg_store_clients = undefined }).
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
-    {Q4Count, IndexState1} =
-        remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, IndexState),
-    {Len, State1} =
-        purge1(Q4Count, State #vqstate { q4          = queue:new(),
-                                         index_state = IndexState1 }),
+    IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4,
+                                       IndexState),
+    State1 = purge1(State #vqstate { q4          = queue:new(),
+                                     index_state = IndexState1 }),
     {Len, a(State1 #vqstate { len              = 0,
                               ram_msg_count    = 0,
                               ram_index_count  = 0,
@@ -976,46 +975,39 @@ delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
             [] -> IndexState1;
             _  -> {Q, IndexState3} = betas_from_segment_entries(
                                        List, TransientThreshold, IndexState1),
-                  {_Count, IndexState4} =
-                      remove_queue_entries(
-                        fun beta_fold_no_index_on_disk/3, Q, IndexState3),
-                  IndexState4
+                  remove_queue_entries(fun beta_fold_no_index_on_disk/3, Q,
+                                       IndexState3)
         end,
     delete1(TransientThreshold, NextSeqId, Again, IndexState2).
 
-purge1(Count, State = #vqstate { q3 = Q3, index_state = IndexState }) ->
+purge1(State = #vqstate { q1 = Q1, q3 = Q3, index_state = IndexState }) ->
     case bpqueue:is_empty(Q3) of
-        true  -> {Q1Count, IndexState1} =
-                     remove_queue_entries(fun rabbit_misc:queue_fold/3,
-                                          State #vqstate.q1, IndexState),
-                 {Count + Q1Count,
-                  State #vqstate { q1          = queue:new(),
-                                   index_state = IndexState1 }};
-        false -> {Q3Count, IndexState1} =
-                     remove_queue_entries(fun beta_fold_no_index_on_disk/3,
-                                          Q3, IndexState),
-                 purge1(Count + Q3Count,
-                        maybe_deltas_to_betas(
+        true  -> IndexState1 =
+                     remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1,
+                                          IndexState),
+                 State #vqstate { q1          = queue:new(),
+                                  index_state = IndexState1 };
+        false -> IndexState1 =
+                     remove_queue_entries(fun beta_fold_no_index_on_disk/3, Q3,
+                                          IndexState),
+                 purge1(maybe_deltas_to_betas(
                           State #vqstate { q3          = bpqueue:new(),
                                            index_state = IndexState1 }))
     end.
 
 remove_queue_entries(Fold, Q, IndexState) ->
-    {Count, GuidsByStore, SeqIds, IndexState1} =
-        Fold(fun remove_queue_entries1/2, {0, dict:new(), [], IndexState}, Q),
+    {GuidsByStore, SeqIds, IndexState1} =
+        Fold(fun remove_queue_entries1/2, {dict:new(), [], IndexState}, Q),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
-    {Count, case SeqIds of
-                [] -> IndexState1;
-                _  -> rabbit_queue_index:ack(SeqIds, IndexState1)
-            end}.
+    rabbit_queue_index:ack(SeqIds, IndexState1).
 
 remove_queue_entries1(
   #msg_status { guid = Guid, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {Count, GuidsByStore, SeqIdsAcc, IndexState}) ->
+  {GuidsByStore, SeqIdsAcc, IndexState}) ->
     GuidsByStore1 = case MsgOnDisk of
                         true  -> rabbit_misc:dict_cons(
                                    find_msg_store(IsPersistent),
@@ -1029,7 +1021,7 @@ remove_queue_entries1(
     IndexState1 = maybe_write_delivered(
                     IndexOnDisk andalso not IsDelivered,
                     SeqId, IndexState),
-    {Count + 1, GuidsByStore1, SeqIdsAcc1, IndexState1}.
+    {GuidsByStore1, SeqIdsAcc1, IndexState1}.
 
 fetch_from_q3_to_q4(State = #vqstate {
                       q1                = Q1,
-- 
cgit v1.2.1


From ac86773d5eebfa6d53c2ca0a6be6bc174237029e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 08:38:34 +0100
Subject: restructure 'purge' in order to make its operation more obvious

---
 src/rabbit_variable_queue.erl | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 88b765d6..10f06d19 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -423,9 +423,14 @@ delete_and_terminate(State) ->
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
     IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4,
                                        IndexState),
-    State1 = purge1(State #vqstate { q4          = queue:new(),
-                                     index_state = IndexState1 }),
-    {Len, a(State1 #vqstate { len              = 0,
+    State1 = #vqstate { q1 = Q1, index_state = IndexState2 } =
+        purge_betas_and_deltas(State #vqstate { q4          = queue:new(),
+                                                index_state = IndexState1 }),
+    IndexState3 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1,
+                                       IndexState2),
+    {Len, a(State1 #vqstate { q1               = queue:new(),
+                              index_state      = IndexState3,
+                              len              = 0,
                               ram_msg_count    = 0,
                               ram_index_count  = 0,
                               persistent_count = 0 })}.
@@ -980,19 +985,17 @@ delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
         end,
     delete1(TransientThreshold, NextSeqId, Again, IndexState2).
 
-purge1(State = #vqstate { q1 = Q1, q3 = Q3, index_state = IndexState }) ->
+purge_betas_and_deltas(State = #vqstate { q3          = Q3,
+                                          index_state = IndexState }) ->
     case bpqueue:is_empty(Q3) of
-        true  -> IndexState1 =
-                     remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1,
-                                          IndexState),
-                 State #vqstate { q1          = queue:new(),
-                                  index_state = IndexState1 };
-        false -> IndexState1 =
-                     remove_queue_entries(fun beta_fold_no_index_on_disk/3, Q3,
-                                          IndexState),
-                 purge1(maybe_deltas_to_betas(
-                          State #vqstate { q3          = bpqueue:new(),
-                                           index_state = IndexState1 }))
+        true  -> State;
+        false -> IndexState1 = remove_queue_entries(
+                                 fun beta_fold_no_index_on_disk/3, Q3,
+                                 IndexState),
+                 purge_betas_and_deltas(
+                   maybe_deltas_to_betas(
+                     State #vqstate { q3          = bpqueue:new(),
+                                      index_state = IndexState1 }))
     end.
 
 remove_queue_entries(Fold, Q, IndexState) ->
-- 
cgit v1.2.1


From 4eb191cbdeea01956f3f356f86bc3ce504578cff Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 15:12:14 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 10f06d19..cba1dbc3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1303,25 +1303,21 @@ maybe_deltas_to_betas(State = #vqstate {
         true ->
             State;
         false ->
-            %% either q3 is empty, in which case we load at least one
-            %% segment, or TargetRamMsgCount > 0, meaning we should
-            %% really be holding all the betas in memory.
             #delta { start_seq_id = DeltaSeqId,
                      count        = DeltaCount,
                      end_seq_id   = DeltaSeqIdEnd } = Delta,
-            {List, IndexState1, Delta1SeqId} =
+            {List, IndexState1, DeltaSeqId1} =
                 read_one_index_segment(DeltaSeqId, DeltaSeqIdEnd, IndexState),
-            %% length(List) may be < segment_size because of acks.  It
-            %% could be [] if we ignored every message in the segment
-            %% due to it being transient and below the threshold
             {Q3a, IndexState2} = betas_from_segment_entries(
                                    List, TransientThreshold, IndexState1),
             State1 = State #vqstate { index_state = IndexState2 },
             case bpqueue:len(Q3a) of
                 0 ->
+                    %% we ignored every message in the segment due to
+                    %% it being transient and below the threshold
                     maybe_deltas_to_betas(
                       State #vqstate {
-                        delta = Delta #delta { start_seq_id = Delta1SeqId }});
+                        delta = Delta #delta { start_seq_id = DeltaSeqId1 }});
                 Q3aLen ->
                     Q3b = bpqueue:join(Q3, Q3a),
                     case DeltaCount - Q3aLen of
@@ -1332,7 +1328,7 @@ maybe_deltas_to_betas(State = #vqstate {
                                               delta = ?BLANK_DELTA,
                                               q3    = bpqueue:join(Q3b, Q2) };
                         N when N > 0 ->
-                            Delta1 = #delta { start_seq_id = Delta1SeqId,
+                            Delta1 = #delta { start_seq_id = DeltaSeqId1,
                                               count        = N,
                                               end_seq_id   = DeltaSeqIdEnd },
                             State1 #vqstate { delta = Delta1,
-- 
cgit v1.2.1


From ece99d46b1fdeab5550d10afbca6005bdd077044 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 15:24:59 +0100
Subject: minor refactor

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cba1dbc3..4f9666c7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -780,8 +780,8 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
     Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
     {Avg, {Then, Count}}.
 
-persistent_guids(Pubs) -> [Guid || Obj = #basic_message { guid = Guid } <- Pubs,
-                                   Obj #basic_message.is_persistent].
+persistent_guids(Pubs) ->
+    [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
 
 betas_from_segment_entries(List, TransientThreshold, IndexState) ->
     {Filtered, IndexState1} =
-- 
cgit v1.2.1


From 275247c6fa35513d701000be7ba26adfdd55574c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 15:35:40 +0100
Subject: refactor: invert condition to make it easier to read

---
 src/rabbit_variable_queue.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4f9666c7..4d6e6e04 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1299,10 +1299,10 @@ maybe_deltas_to_betas(State = #vqstate {
                         index_state          = IndexState,
                         target_ram_msg_count = TargetRamMsgCount,
                         transient_threshold  = TransientThreshold }) ->
-    case (not bpqueue:is_empty(Q3)) andalso (0 == TargetRamMsgCount) of
-        true ->
-            State;
+    case bpqueue:is_empty(Q3) orelse (TargetRamMsgCount /= 0) of
         false ->
+            State;
+        true ->
             #delta { start_seq_id = DeltaSeqId,
                      count        = DeltaCount,
                      end_seq_id   = DeltaSeqIdEnd } = Delta,
-- 
cgit v1.2.1


From 0d6873ff8b5a3e297113dc1125bb53c8f2ac1a67 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 16 Jun 2010 17:53:26 +0100
Subject: rename

---
 src/rabbit_variable_queue.erl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4d6e6e04..377b5737 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -848,7 +848,7 @@ combine_deltas(#delta { start_seq_id = StartLow,
         andalso ((StartLow + Count) =< EndHigh),
     #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }.
 
-beta_fold_no_index_on_disk(Fun, Init, Q) ->
+beta_fold(Fun, Init, Q) ->
     bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
@@ -980,8 +980,7 @@ delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
             [] -> IndexState1;
             _  -> {Q, IndexState3} = betas_from_segment_entries(
                                        List, TransientThreshold, IndexState1),
-                  remove_queue_entries(fun beta_fold_no_index_on_disk/3, Q,
-                                       IndexState3)
+                  remove_queue_entries(fun beta_fold/3, Q, IndexState3)
         end,
     delete1(TransientThreshold, NextSeqId, Again, IndexState2).
 
@@ -989,9 +988,8 @@ purge_betas_and_deltas(State = #vqstate { q3          = Q3,
                                           index_state = IndexState }) ->
     case bpqueue:is_empty(Q3) of
         true  -> State;
-        false -> IndexState1 = remove_queue_entries(
-                                 fun beta_fold_no_index_on_disk/3, Q3,
-                                 IndexState),
+        false -> IndexState1 = remove_queue_entries(fun beta_fold/3, Q3,
+                                                    IndexState),
                  purge_betas_and_deltas(
                    maybe_deltas_to_betas(
                      State #vqstate { q3          = bpqueue:new(),
-- 
cgit v1.2.1


From 79c78f47e0dba7170a888fa47e8f558e29f13fee Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 17 Jun 2010 10:33:00 +0100
Subject: get rid of vq:read_one_index_segment The purpose of the function was
 to keep reading until either some data was found or the end was reached.
 However, both call sites contain loops already that effectively do the same,
 making the function redundant.

There is also a small tweak to the qi:read API - it now returns the
"next seq id to read". Previously it was returning 'undefined' when
the requested (exclusive) End was inside the same segment as the
Start, so now we simply return that End in that case.
---
 src/rabbit_queue_index.erl    | 15 +++++++--------
 src/rabbit_tests.erl          | 12 ++++++------
 src/rabbit_variable_queue.erl | 20 +++++---------------
 3 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5f94cf9d..d062c4fd 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -196,8 +196,7 @@
 -spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush/1 :: (qistate()) -> qistate()).
 -spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
-             {[{guid(), seq_id(), boolean(), boolean()}],
-              seq_id() | 'undefined', qistate()}).
+             {[{guid(), seq_id(), boolean(), boolean()}], seq_id(), qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(bounds/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
@@ -282,17 +281,17 @@ flush(State = #qistate { dirty_count = 0 }) -> State;
 flush(State)                                -> flush_journal(State).
 
 read(StartEnd, StartEnd, State) ->
-    {[], undefined, State};
+    {[], StartEnd, State};
 read(Start, End, State = #qistate { segments = Segments,
                                     dir = Dir }) when Start =< End ->
     %% Start is inclusive, End is exclusive.
     {StartSeg, StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
     {EndSeg, EndRelSeq}     = seq_id_to_seg_and_rel_seq_id(End),
     Start1 = reconstruct_seq_id(StartSeg + 1, 0),
-    Again = case End =< Start1 of
-                true  -> undefined;
-                false -> Start1
-            end,
+    Next = case End =< Start1 of
+               true  -> End;
+               false -> Start1
+           end,
     MaxRelSeq = case StartSeg =:= EndSeg of
                     true  -> EndRelSeq;
                     false -> ?SEGMENT_ENTRY_COUNT
@@ -307,7 +306,7 @@ read(Start, End, State = #qistate { segments = Segments,
                          Acc
                  end, [], Segment),
     Segments1 = segment_store(Segment, Segments),
-    {Messages, Again, State #qistate { segments = Segments1 }}.
+    {Messages, Next, State #qistate { segments = Segments1 }}.
 
 next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index d49208c3..f3df66ca 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1584,7 +1584,7 @@ test_queue_index() ->
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
-    {ReadA, undefined, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
+    {ReadA, SegmentSize, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
     _Qi5 = rabbit_queue_index:terminate([], Qi4),
@@ -1595,7 +1595,7 @@ test_queue_index() ->
     {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
-    {ReadB, undefined, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
+    {ReadB, SegmentSize, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsGuidsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
@@ -1606,7 +1606,7 @@ test_queue_index() ->
     {LenB, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
     Qi14 = queue_index_deliver(SeqIdsB, Qi13),
-    {ReadC, undefined, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
+    {ReadC, SegmentSize, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsGuidsB)),
     Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
@@ -1669,10 +1669,10 @@ test_queue_index() ->
     {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
     Qi48 = queue_index_deliver([2,3,5,6], Qi47),
     Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
-    {[], undefined, Qi50} = rabbit_queue_index:read(0, 4, Qi49),
-    {ReadD, undefined, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
+    {[], 4, Qi50} = rabbit_queue_index:read(0, 4, Qi49),
+    {ReadD, 7, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
     ok = verify_read_with_published(true, false, ReadD, [Four, Five, Six]),
-    {ReadE, undefined, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
+    {ReadE, 9, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
     ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
     _Qi53 = rabbit_queue_index:delete_and_terminate(Qi52),
     ok = stop_msg_store(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 377b5737..61437229 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -807,16 +807,6 @@ betas_from_segment_entries(List, TransientThreshold, IndexState) ->
           end, {[], IndexState}, List),
     {bpqueue:from_list([{true, Filtered}]), IndexState1}.
 
-read_one_index_segment(StartSeqId, EndSeqId, IndexState)
-  when StartSeqId =< EndSeqId ->
-    case rabbit_queue_index:read(StartSeqId, EndSeqId, IndexState) of
-        {List, Again, IndexState1} when List /= [] orelse Again =:= undefined ->
-            {List, IndexState1,
-             rabbit_queue_index:next_segment_boundary(StartSeqId)};
-        {[], StartSeqId1, IndexState1} ->
-            read_one_index_segment(StartSeqId1, EndSeqId, IndexState1)
-    end.
-
 ensure_binary_properties(Msg = #basic_message { content = Content }) ->
     Msg #basic_message {
       content = rabbit_binary_parser:clear_decoded_content(
@@ -970,10 +960,10 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
     State1 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
 delete1(_TransientThreshold, NextSeqId, DeltaSeqId, IndexState)
-  when DeltaSeqId =:= undefined orelse DeltaSeqId >= NextSeqId ->
+  when DeltaSeqId >= NextSeqId ->
     IndexState;
 delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
-    {List, Again, IndexState1} =
+    {List, Next, IndexState1} =
         rabbit_queue_index:read(DeltaSeqId, NextSeqId, IndexState),
     IndexState2 =
         case List of
@@ -982,7 +972,7 @@ delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
                                        List, TransientThreshold, IndexState1),
                   remove_queue_entries(fun beta_fold/3, Q, IndexState3)
         end,
-    delete1(TransientThreshold, NextSeqId, Again, IndexState2).
+    delete1(TransientThreshold, NextSeqId, Next, IndexState2).
 
 purge_betas_and_deltas(State = #vqstate { q3          = Q3,
                                           index_state = IndexState }) ->
@@ -1304,8 +1294,8 @@ maybe_deltas_to_betas(State = #vqstate {
             #delta { start_seq_id = DeltaSeqId,
                      count        = DeltaCount,
                      end_seq_id   = DeltaSeqIdEnd } = Delta,
-            {List, IndexState1, DeltaSeqId1} =
-                read_one_index_segment(DeltaSeqId, DeltaSeqIdEnd, IndexState),
+            {List, DeltaSeqId1, IndexState1} =
+                rabbit_queue_index:read(DeltaSeqId, DeltaSeqIdEnd, IndexState),
             {Q3a, IndexState2} = betas_from_segment_entries(
                                    List, TransientThreshold, IndexState1),
             State1 = State #vqstate { index_state = IndexState2 },
-- 
cgit v1.2.1


From 0428d0d8e6c65a1caade13e862c1c168c4fa13d1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 17 Jun 2010 12:29:11 +0100
Subject: make queue_index:deliver take a list of SeqIds ...rather than just a
 single SeqId. This brings it in line with 'ack', simplifies the code, and is
 useful in variable_queue.

---
 src/rabbit_queue_index.erl    | 30 +++++++++++++++---------------
 src/rabbit_tests.erl          | 33 +++++++++++++--------------------
 src/rabbit_variable_queue.erl | 38 +++++++++++++++++++-------------------
 3 files changed, 47 insertions(+), 54 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index d062c4fd..5123ca21 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -191,7 +191,7 @@
 -spec(terminate/2 :: ([any()], qistate()) -> qistate()).
 -spec(delete_and_terminate/1 :: (qistate()) -> qistate()).
 -spec(publish/4 :: (guid(), seq_id(), boolean(), qistate()) -> qistate()).
--spec(deliver/2 :: (seq_id(), qistate()) -> qistate()).
+-spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(ack/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush/1 :: (qistate()) -> qistate()).
@@ -244,22 +244,11 @@ publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
                            end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]),
     maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)).
 
-deliver(SeqId, State) ->
-    {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(
-           JournalHdl, <<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>),
-    maybe_flush_journal(add_to_journal(SeqId, del, State1)).
+deliver(SeqIds, State) ->
+    deliver_or_ack(del, ?DEL_JPREFIX, SeqIds, State).
 
-ack([], State) ->
-    State;
 ack(SeqIds, State) ->
-    {JournalHdl, State1} = get_journal_handle(State),
-    ok = file_handle_cache:append(
-           JournalHdl, [<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>> ||
-                           SeqId <- SeqIds]),
-    maybe_flush_journal(lists:foldl(fun (SeqId, StateN) ->
-                                            add_to_journal(SeqId, ack, StateN)
-                                    end, State1, SeqIds)).
+    deliver_or_ack(ack, ?ACK_JPREFIX, SeqIds, State).
 
 sync([], State) ->
     State;
@@ -659,6 +648,17 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
         _ErrOrEoF -> State
     end.
 
+deliver_or_ack(_Kind, _JPrefix, [], State) ->
+    State;
+deliver_or_ack(Kind, JPrefix, SeqIds, State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(
+           JournalHdl,
+           [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]),
+    maybe_flush_journal(lists:foldl(fun (SeqId, StateN) ->
+                                            add_to_journal(SeqId, Kind, StateN)
+                                    end, State1, SeqIds)).
+
 %%----------------------------------------------------------------------------
 %% segment manipulation
 %%----------------------------------------------------------------------------
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f3df66ca..8e99780d 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1550,13 +1550,6 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
     ok = rabbit_msg_store:client_terminate(MSCStateEnd),
     {A, B}.
 
-queue_index_deliver(SeqIds, Qi) ->
-    lists:foldl(fun (SeqId, QiN) -> rabbit_queue_index:deliver(SeqId, QiN) end,
-                Qi, SeqIds).
-
-queue_index_flush(Qi) ->
-    rabbit_queue_index:flush(Qi).
-
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
     ok;
 verify_read_with_published(Delivered, Persistent,
@@ -1605,12 +1598,12 @@ test_queue_index() ->
     LenB = length(SeqIdsB),
     {LenB, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
-    Qi14 = queue_index_deliver(SeqIdsB, Qi13),
+    Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
     {ReadC, SegmentSize, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsGuidsB)),
     Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
-    Qi17 = queue_index_flush(Qi16),
+    Qi17 = rabbit_queue_index:flush(Qi16),
     %% Everything will have gone now because #pubs == #acks
     {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
     _Qi19 = rabbit_queue_index:terminate([], Qi18),
@@ -1628,9 +1621,9 @@ test_queue_index() ->
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
     {0, _Terms4, Qi22} = test_queue_init(),
     {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
-    Qi24 = queue_index_deliver(SeqIdsC, Qi23),
+    Qi24 = rabbit_queue_index:deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
-    Qi26 = queue_index_flush(Qi25),
+    Qi26 = rabbit_queue_index:flush(Qi25),
     {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
     _Qi28 = rabbit_queue_index:delete_and_terminate(Qi27),
     ok = stop_msg_store(),
@@ -1639,10 +1632,10 @@ test_queue_index() ->
     %% b) partial pub+del, then move to new segment, then ack all in old segment
     {0, _Terms5, Qi29} = test_queue_init(),
     {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
-    Qi31 = queue_index_deliver(SeqIdsC, Qi30),
+    Qi31 = rabbit_queue_index:deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
     Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
-    Qi34 = queue_index_flush(Qi33),
+    Qi34 = rabbit_queue_index:flush(Qi33),
     _Qi35 = rabbit_queue_index:delete_and_terminate(Qi34),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1651,9 +1644,9 @@ test_queue_index() ->
     SeqIdsD = lists:seq(0,SegmentSize*4),
     {0, _Terms6, Qi36} = test_queue_init(),
     {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
-    Qi38 = queue_index_deliver(SeqIdsD, Qi37),
+    Qi38 = rabbit_queue_index:deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
-    Qi40 = queue_index_flush(Qi39),
+    Qi40 = rabbit_queue_index:flush(Qi39),
     _Qi41 = rabbit_queue_index:delete_and_terminate(Qi40),
     ok = stop_msg_store(),
     ok = empty_test_queue(),
@@ -1663,11 +1656,11 @@ test_queue_index() ->
     %% possibilities in combining the segment with the journal.
     {0, _Terms7, Qi42} = test_queue_init(),
     {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], false, Qi42),
-    Qi44 = queue_index_deliver([0,1,4], Qi43),
+    Qi44 = rabbit_queue_index:deliver([0,1,4], Qi43),
     Qi45 = rabbit_queue_index:ack([0], Qi44),
-    Qi46 = queue_index_flush(Qi45),
+    Qi46 = rabbit_queue_index:flush(Qi45),
     {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
-    Qi48 = queue_index_deliver([2,3,5,6], Qi47),
+    Qi48 = rabbit_queue_index:deliver([2,3,5,6], Qi47),
     Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
     {[], 4, Qi50} = rabbit_queue_index:read(0, 4, Qi49),
     {ReadD, 7, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
@@ -1682,14 +1675,14 @@ test_queue_index() ->
     %% exercise journal_minus_segment, not segment_plus_journal.
     {0, _Terms8, Qi54} = test_queue_init(),
     {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], true, Qi54),
-    Qi56 = queue_index_deliver([0,1,4], Qi55),
+    Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
     Qi57 = rabbit_queue_index:ack([0], Qi56),
     _Qi58 = rabbit_queue_index:terminate([], Qi57),
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     {5, _Terms9, Qi59} = test_queue_init(),
     {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
-    Qi61 = queue_index_deliver([2,3,5,6], Qi60),
+    Qi61 = rabbit_queue_index:deliver([2,3,5,6], Qi60),
     Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
     _Qi63 = rabbit_queue_index:terminate([], Qi62),
     ok = stop_msg_store(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 61437229..72add2af 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -718,7 +718,7 @@ read_from_msg_store(MSCState, IsPersistent, Guid) ->
 maybe_write_delivered(false, _SeqId, IndexState) ->
     IndexState;
 maybe_write_delivered(true, SeqId, IndexState) ->
-    rabbit_queue_index:deliver(SeqId, IndexState).
+    rabbit_queue_index:deliver([SeqId], IndexState).
 
 accumulate_ack(SeqId, IsPersistent, Guid, {SeqIdsAcc, Dict}) ->
     {case IsPersistent of
@@ -987,32 +987,32 @@ purge_betas_and_deltas(State = #vqstate { q3          = Q3,
     end.
 
 remove_queue_entries(Fold, Q, IndexState) ->
-    {GuidsByStore, SeqIds, IndexState1} =
-        Fold(fun remove_queue_entries1/2, {dict:new(), [], IndexState}, Q),
+    {GuidsByStore, Delivers, Acks} =
+        Fold(fun remove_queue_entries1/2, {dict:new(), [], []}, Q),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
                            rabbit_msg_store:remove(MsgStore, Guids)
                    end, ok, GuidsByStore),
-    rabbit_queue_index:ack(SeqIds, IndexState1).
+    rabbit_queue_index:ack(Acks,
+                           rabbit_queue_index:deliver(Delivers, IndexState)).
 
 remove_queue_entries1(
   #msg_status { guid = Guid, seq_id = SeqId,
                 is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
-  {GuidsByStore, SeqIdsAcc, IndexState}) ->
-    GuidsByStore1 = case MsgOnDisk of
-                        true  -> rabbit_misc:dict_cons(
-                                   find_msg_store(IsPersistent),
-                                   Guid, GuidsByStore);
-                        false -> GuidsByStore
-                    end,
-    SeqIdsAcc1 = case IndexOnDisk of
-                     true  -> [SeqId | SeqIdsAcc];
-                     false -> SeqIdsAcc
-                 end,
-    IndexState1 = maybe_write_delivered(
-                    IndexOnDisk andalso not IsDelivered,
-                    SeqId, IndexState),
-    {GuidsByStore1, SeqIdsAcc1, IndexState1}.
+  {GuidsByStore, Delivers, Acks}) ->
+    {case MsgOnDisk of
+         true  -> rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid,
+                                        GuidsByStore);
+         false -> GuidsByStore
+     end,
+     case IndexOnDisk andalso not IsDelivered of
+         true  -> [SeqId | Delivers];
+         false -> Delivers
+     end,
+     case IndexOnDisk of
+         true  -> [SeqId | Acks];
+         false -> Acks
+     end}.
 
 fetch_from_q3_to_q4(State = #vqstate {
                       q1                = Q1,
-- 
cgit v1.2.1


From 57b5778a423e0e37f441351cb469a278146e9ae7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 17 Jun 2010 12:53:07 +0100
Subject: rename betas_from_segment_entries and make it more efficient ...by
 exploiting the ability of queue_index:{deliver,ack} to take multiple SeqIds

---
 src/rabbit_variable_queue.erl | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 72add2af..4a64d14d 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -783,17 +783,18 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
 persistent_guids(Pubs) ->
     [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
 
-betas_from_segment_entries(List, TransientThreshold, IndexState) ->
-    {Filtered, IndexState1} =
+betas_from_index_entries(List, TransientThreshold, IndexState) ->
+    {Filtered, Delivers, Acks} =
         lists:foldr(
           fun ({Guid, SeqId, IsPersistent, IsDelivered},
-               {FilteredAcc, IndexStateAcc}) ->
+               {Filtered1, Delivers1, Acks1}) ->
                   case SeqId < TransientThreshold andalso not IsPersistent of
-                      true  -> {FilteredAcc,
-                                rabbit_queue_index:ack(
-                                  [SeqId], maybe_write_delivered(
-                                             not IsDelivered,
-                                             SeqId, IndexStateAcc))};
+                      true  -> {Filtered1,
+                                case IsDelivered of
+                                    true  -> Delivers1;
+                                    false -> [SeqId | Delivers1]
+                                end,
+                                [SeqId | Acks1]};
                       false -> {[#msg_status { msg           = undefined,
                                                guid          = Guid,
                                                seq_id        = SeqId,
@@ -801,11 +802,14 @@ betas_from_segment_entries(List, TransientThreshold, IndexState) ->
                                                is_delivered  = IsDelivered,
                                                msg_on_disk   = true,
                                                index_on_disk = true
-                                             } | FilteredAcc],
-                                IndexStateAcc}
+                                             } | Filtered1],
+                                Delivers1,
+                                Acks1}
                   end
-          end, {[], IndexState}, List),
-    {bpqueue:from_list([{true, Filtered}]), IndexState1}.
+          end, {[], [], []}, List),
+    {bpqueue:from_list([{true, Filtered}]),
+     rabbit_queue_index:ack(Acks,
+                            rabbit_queue_index:deliver(Delivers, IndexState))}.
 
 ensure_binary_properties(Msg = #basic_message { content = Content }) ->
     Msg #basic_message {
@@ -968,7 +972,7 @@ delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
     IndexState2 =
         case List of
             [] -> IndexState1;
-            _  -> {Q, IndexState3} = betas_from_segment_entries(
+            _  -> {Q, IndexState3} = betas_from_index_entries(
                                        List, TransientThreshold, IndexState1),
                   remove_queue_entries(fun beta_fold/3, Q, IndexState3)
         end,
@@ -1296,7 +1300,7 @@ maybe_deltas_to_betas(State = #vqstate {
                      end_seq_id   = DeltaSeqIdEnd } = Delta,
             {List, DeltaSeqId1, IndexState1} =
                 rabbit_queue_index:read(DeltaSeqId, DeltaSeqIdEnd, IndexState),
-            {Q3a, IndexState2} = betas_from_segment_entries(
+            {Q3a, IndexState2} = betas_from_index_entries(
                                    List, TransientThreshold, IndexState1),
             State1 = State #vqstate { index_state = IndexState2 },
             case bpqueue:len(Q3a) of
-- 
cgit v1.2.1


From 69a6303ba8f0602ca962b23d02500d0228238595 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 13:12:27 +0100
Subject: Removed unenforced redundancy in function args

---
 src/rabbit_queue_index.erl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 5123ca21..417401be 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -245,10 +245,10 @@ publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
     maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)).
 
 deliver(SeqIds, State) ->
-    deliver_or_ack(del, ?DEL_JPREFIX, SeqIds, State).
+    deliver_or_ack(del, SeqIds, State).
 
 ack(SeqIds, State) ->
-    deliver_or_ack(ack, ?ACK_JPREFIX, SeqIds, State).
+    deliver_or_ack(ack, SeqIds, State).
 
 sync([], State) ->
     State;
@@ -648,9 +648,10 @@ load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
         _ErrOrEoF -> State
     end.
 
-deliver_or_ack(_Kind, _JPrefix, [], State) ->
+deliver_or_ack(_Kind, [], State) ->
     State;
-deliver_or_ack(Kind, JPrefix, SeqIds, State) ->
+deliver_or_ack(Kind, SeqIds, State) ->
+    JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end,
     {JournalHdl, State1} = get_journal_handle(State),
     ok = file_handle_cache:append(
            JournalHdl,
-- 
cgit v1.2.1


From 99aa79f3523271a9268a7bfe602cc98cb5caa25e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 15:02:29 +0100
Subject: Added test to invoke the queue index walker

---
 src/rabbit_tests.erl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 8e99780d..3597efe3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1833,6 +1833,7 @@ test_variable_queue_partial_segments_delta_thing() ->
 
 test_queue_recover() ->
     Count = 2*rabbit_queue_index:next_segment_boundary(0),
+    TxID = rabbit_guid:guid(),
     #amqqueue { pid = QPid, name = QName } = Q =
         rabbit_amqqueue:declare(test_queue(), true, false, [], none),
     Msg = fun() -> rabbit_basic:message(
@@ -1840,12 +1841,12 @@ test_queue_recover() ->
                      <<>>, #'P_basic'{delivery_mode = 2}, <<>>) end,
     Delivery = #delivery{mandatory = false,
                          immediate = false,
-                         txn = none,
+                         txn = TxID,
                          sender = self(),
                          message = Msg()},
     [true = rabbit_amqqueue:deliver(QPid, Delivery) || _ <- lists:seq(1, Count)],
-    rabbit_amqqueue:stat(Q),
-    exit(QPid, shutdown),
+    rabbit_amqqueue:commit_all([QPid], TxID, self()),
+    exit(QPid, kill),
     MRef = erlang:monitor(process, QPid),
     receive {'DOWN', MRef, process, QPid, _Info} -> ok
     after 10000 -> exit(timeout_waiting_for_queue_death)
-- 
cgit v1.2.1


From 5a7dce913f7d153910fd5ff87587f28757cdc1a9 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 16:34:19 +0100
Subject: Expose the msg_store file size limit to configuration. Also some
 suitable derivisions in tests.

---
 ebin/rabbit_app.in           |  1 +
 include/rabbit_msg_store.hrl |  2 --
 src/rabbit_msg_file.erl      | 33 +++++++++---------
 src/rabbit_msg_store.erl     | 79 +++++++++++++++++++++++++++-----------------
 src/rabbit_msg_store_gc.erl  | 19 +++++++----
 src/rabbit_tests.erl         | 17 ++++++----
 6 files changed, 89 insertions(+), 62 deletions(-)

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index beeb87bb..ef80efc0 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -22,6 +22,7 @@
          {backing_queue_module, rabbit_variable_queue},
          {persister_max_wrap_entries, 500},
          {persister_hibernate_after, 10000},
+         {msg_store_file_size_limit, 16777216},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl
index 36b908a7..d96fa758 100644
--- a/include/rabbit_msg_store.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -31,8 +31,6 @@
 
 -include("rabbit.hrl").
 
--define(FILE_SIZE_LIMIT,       (16*1024*1024)).
-
 -ifdef(use_specs).
 
 -type(msg() :: any()).
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 3dbf8ead..dd0579e9 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_file).
 
--export([append/3, read/2, scan/2]).
+-export([append/3, read/2, scan/3]).
 
 %%----------------------------------------------------------------------------
 
@@ -44,7 +44,7 @@
 -define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
 -define(GUID_SIZE_BYTES,         16).
 -define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
--define(SCAN_BLOCK_SIZE,         ?FILE_SIZE_LIMIT div 4).
+-define(SCAN_BLOCK_SIZE(LIM),    (LIM div 4)).
 
 %%----------------------------------------------------------------------------
 
@@ -59,7 +59,7 @@
              ({'ok', msg_size()} | {'error', any()})).
 -spec(read/2 :: (io_device(), msg_size()) ->
              ({'ok', {guid(), msg()}} | {'error', any()})).
--spec(scan/2 :: (io_device(), file_size()) ->
+-spec(scan/3 :: (io_device(), file_size(), file_size()) ->
              {'ok', [{guid(), msg_size(), position()}], position()}).
 
 -endif.
@@ -92,28 +92,29 @@ read(FileHdl, TotalSize) ->
         KO -> KO
     end.
 
-scan(FileHdl, FileSize) when FileSize >= 0 ->
-    scan(FileHdl, FileSize, <<>>, 0, [], 0).
+scan(FileHdl, FileSize, FileSizeLim) when FileSize >= 0 ->
+    scan(FileHdl, FileSize, FileSizeLim, <<>>, 0, [], 0).
 
-scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
+scan(_FileHdl, FileSize, _FileSizeLim, _Data, FileSize, Acc, ScanOffset) ->
     {ok, Acc, ScanOffset};
-scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
-    Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
+scan(FileHdl, FileSize, FileSizeLim, Data, ReadOffset, Acc, ScanOffset) ->
+    Read = lists:min([?SCAN_BLOCK_SIZE(FileSizeLim), (FileSize - ReadOffset)]),
     case file_handle_cache:read(FileHdl, Read) of
         {ok, Data1} ->
             {Data2, Acc1, ScanOffset1} =
-                scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
+                scan1(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
             ReadOffset1 = ReadOffset + size(Data1),
-            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1);
+            scan(FileHdl, FileSize, FileSizeLim, Data2, ReadOffset1, Acc1,
+                 ScanOffset1);
         _KO ->
             {ok, Acc, ScanOffset}
     end.
 
-scan(<<>>, Acc, Offset) ->
+scan1(<<>>, Acc, Offset) ->
     {<<>>, Acc, Offset};
-scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
+scan1(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
     {<<>>, Acc, Offset}; %% Nothing to do other than stop.
-scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
+scan1(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
        WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
     TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
     case WriteMarker of
@@ -126,9 +127,9 @@ scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
             <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> =
                 <<GuidAndMsg:Size/binary>>,
             <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>,
-            scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
+            scan1(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
         _ ->
-            scan(Rest, Acc, Offset + TotalSize)
+            scan1(Rest, Acc, Offset + TotalSize)
     end;
-scan(Data, Acc, Offset) ->
+scan1(Data, Acc, Offset) ->
     {Data, Acc, Offset}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 51ad2926..c28302dd 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -81,7 +81,8 @@
           dedup_cache_ets,        %% tid of dedup cache table
           cur_file_cache_ets,     %% tid of current file cache table
           client_refs,            %% set of references of all registered clients
-          successfully_recovered  %% boolean: did we recover state?
+          successfully_recovered, %% boolean: did we recover state?
+          file_size_limit         %% how big are our files allowed to get?
          }).
 
 -record(client_msstate,
@@ -106,6 +107,7 @@
 
 -type(server() :: pid() | atom()).
 -type(file_num() :: non_neg_integer()).
+-type(file_size() :: non_neg_integer()).
 -type(client_msstate() :: #client_msstate { file_handle_cache  :: dict(),
                                             index_state        :: any(),
                                             index_module       :: atom(),
@@ -139,7 +141,7 @@
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
-               {tid(), file_path(), atom(), any()}) ->
+               {tid(), file_path(), atom(), any(), file_size()}) ->
                    'concurrent_readers' | non_neg_integer()).
 
 -endif.
@@ -518,6 +520,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                               [ordered_set, public]),
     CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
 
+    {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit),
+
     State = #msstate { dir                    = Dir,
                        index_module           = IndexModule,
                        index_state            = IndexState,
@@ -536,7 +540,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                        dedup_cache_ets        = DedupCacheEts,
                        cur_file_cache_ets     = CurFileCacheEts,
                        client_refs            = ClientRefs1,
-                       successfully_recovered = AllCleanShutdown
+                       successfully_recovered = AllCleanShutdown,
+                       file_size_limit        = FileSizeLimit
                       },
 
     ok = case AllCleanShutdown of
@@ -548,7 +553,8 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
-    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames),
+    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames,
+                                     FileSizeLimit),
 
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
@@ -563,7 +569,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     ok = file_handle_cache:truncate(CurHdl),
 
     {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
-                                                 FileSummaryEts),
+                                                 FileSummaryEts, FileSizeLimit),
 
     {ok, maybe_compact(
            State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }),
@@ -1207,22 +1213,24 @@ count_msg_refs(Gen, Seed, State) ->
             count_msg_refs(Gen, Next, State)
     end.
 
-recover_crashed_compactions(Dir, FileNames, TmpFileNames) ->
+recover_crashed_compactions(Dir, FileNames, TmpFileNames, FileSizeLimit) ->
     lists:foreach(
       fun (TmpFileName) ->
               NonTmpRelatedFileName =
                   filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
               true = lists:member(NonTmpRelatedFileName, FileNames),
               ok = recover_crashed_compaction(
-                     Dir, TmpFileName, NonTmpRelatedFileName)
+                     Dir, TmpFileName, NonTmpRelatedFileName, FileSizeLimit)
       end, TmpFileNames),
     ok.
 
-recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
+recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName,
+                           FileSizeLimit) ->
     {ok, UncorruptedMessagesTmp, GuidsTmp} =
-        scan_file_for_valid_messages_and_guids(Dir, TmpFileName),
+        scan_file_for_valid_messages_and_guids(Dir, TmpFileName, FileSizeLimit),
     {ok, UncorruptedMessages, Guids} =
-        scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName),
+        scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName,
+                                               FileSizeLimit),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -1299,7 +1307,7 @@ recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
 
             {ok, _MainMessages, GuidsMain} =
                 scan_file_for_valid_messages_and_guids(
-                  Dir, NonTmpRelatedFileName),
+                  Dir, NonTmpRelatedFileName, FileSizeLimit),
             %% check that everything in Guids1 is in GuidsMain
             true = is_sublist(Guids1, GuidsMain),
             %% check that everything in GuidsTmp is in GuidsMain
@@ -1313,11 +1321,12 @@ is_sublist(SmallerL, BiggerL) ->
 is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
-scan_file_for_valid_messages(Dir, FileName) ->
+scan_file_for_valid_messages(Dir, FileName, FileSizeLimit) ->
     case open_file(Dir, FileName, ?READ_MODE) of
         {ok, Hdl}       -> Valid = rabbit_msg_file:scan(
                                      Hdl, filelib:file_size(
-                                            form_filename(Dir, FileName))),
+                                            form_filename(Dir, FileName)),
+                                     FileSizeLimit),
                            %% if something really bad has happened,
                            %% the close could fail, but ignore
                            file_handle_cache:close(Hdl),
@@ -1326,8 +1335,9 @@ scan_file_for_valid_messages(Dir, FileName) ->
         {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
     end.
 
-scan_file_for_valid_messages_and_guids(Dir, FileName) ->
-    {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName),
+scan_file_for_valid_messages_and_guids(Dir, FileName, FileSizeLimit) ->
+    {ok, Messages, _FileSize} =
+        scan_file_for_valid_messages(Dir, FileName, FileSizeLimit),
     {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}.
 
 %% Takes the list in *ascending* order (i.e. eldest message
@@ -1395,10 +1405,11 @@ build_index(Gatherer, Left, [File|Files], State) ->
            end),
     build_index(Gatherer, File, Files, State).
 
-build_index_worker(Gatherer, State = #msstate { dir = Dir },
+build_index_worker(Gatherer, State = #msstate { file_size_limit = FileSizeLimit,
+                                                dir = Dir },
                    Left, File, Files) ->
     {ok, Messages, FileSize} =
-        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+        scan_file_for_valid_messages(Dir, filenum_to_name(File), FileSizeLimit),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
           fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
@@ -1450,8 +1461,9 @@ maybe_roll_to_new_file(
                      current_file_handle = CurHdl,
                      current_file        = CurFile,
                      file_summary_ets    = FileSummaryEts,
-                     cur_file_cache_ets  = CurFileCacheEts })
-  when Offset >= ?FILE_SIZE_LIMIT ->
+                     cur_file_cache_ets  = CurFileCacheEts,
+                     file_size_limit     = FileSizeLimit })
+  when Offset >= FileSizeLimit ->
     State1 = internal_sync(State),
     ok = file_handle_cache:close(CurHdl),
     NextFile = CurFile + 1,
@@ -1477,8 +1489,9 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
                                  sum_file_size    = SumFileSize,
                                  gc_active        = false,
                                  gc_pid           = GCPid,
-                                 file_summary_ets = FileSummaryEts })
-  when (SumFileSize > 2 * ?FILE_SIZE_LIMIT andalso
+                                 file_summary_ets = FileSummaryEts,
+                                 file_size_limit  = FileSizeLimit })
+  when (SumFileSize > 2 * FileSizeLimit andalso
         (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) ->
     %% TODO: the algorithm here is sub-optimal - it may result in a
     %% complete traversal of FileSummaryEts.
@@ -1486,7 +1499,7 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
         '$end_of_table' ->
             State;
         First ->
-            case find_files_to_gc(FileSummaryEts,
+            case find_files_to_gc(FileSummaryEts, FileSizeLimit,
                                   ets:lookup(FileSummaryEts, First)) of
                 not_found ->
                     State;
@@ -1503,7 +1516,7 @@ maybe_compact(State = #msstate { sum_valid_data   = SumValid,
 maybe_compact(State) ->
     State.
 
-find_files_to_gc(FileSummaryEts,
+find_files_to_gc(FileSummaryEts, FileSizeLimit,
                  [#file_summary { file             = Dst,
                                   valid_total_size = DstValid,
                                   right            = Src }]) ->
@@ -1518,9 +1531,10 @@ find_files_to_gc(FileSummaryEts,
                 ets:lookup(FileSummaryEts, Src),
             case SrcRight of
                 undefined -> not_found;
-                _         -> case DstValid + SrcValid =< ?FILE_SIZE_LIMIT of
+                _         -> case DstValid + SrcValid =< FileSizeLimit of
                                  true  -> {Src, Dst};
-                                 false -> find_files_to_gc(FileSummaryEts, Next)
+                                 false -> find_files_to_gc(
+                                            FileSummaryEts, FileSizeLimit, Next)
                              end
             end
     end.
@@ -1565,7 +1579,8 @@ delete_file_if_empty(File, State = #msstate {
 %% garbage collection / compaction / aggregation -- external
 %%----------------------------------------------------------------------------
 
-gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
+gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState,
+                              _FileSizeLimit}) ->
     [SrcObj = #file_summary {
        readers          = SrcReaders,
        left             = DstFile,
@@ -1597,7 +1612,8 @@ combine_files(#file_summary { file             = Source,
                               valid_total_size = DestinationValid,
                               contiguous_top   = DestinationContiguousTop,
                               right            = Source },
-              State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
+              State = {_FileSummaryEts, Dir, _Index, _IndexState,
+                       _FileSizeLimit}) ->
     SourceName      = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
     {ok, SourceHdl}      = open_file(Dir, SourceName,
@@ -1656,11 +1672,11 @@ combine_files(#file_summary { file             = Source,
     ok = file_handle_cache:delete(SourceHdl),
     ExpectedSize.
 
-find_unremoved_messages_in_file(File,
-                                {_FileSummaryEts, Dir, Index, IndexState}) ->
+find_unremoved_messages_in_file(
+  File, {_FileSummaryEts, Dir, Index, IndexState, FileSizeLimit}) ->
     %% Messages here will be end-of-file at start-of-list
     {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+        scan_file_for_valid_messages(Dir, filenum_to_name(File), FileSizeLimit),
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
                         case Index:lookup(Guid, IndexState) of
@@ -1672,7 +1688,8 @@ find_unremoved_messages_in_file(File,
                 end, {[], 0}, Messages).
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
-              Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
+              Destination,
+              {_FileSummaryEts, _Dir, Index, IndexState, _FileSizeLimit}) ->
     Copy = fun ({BlockStart, BlockEnd}) ->
                    BSize = BlockEnd - BlockStart,
                    {ok, BlockStart} =
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 4b80d088..f29bf1a4 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/4, gc/3, no_readers/2, stop/1]).
+-export([start_link/5, gc/3, no_readers/2, stop/1]).
 
 -export([set_maximum_since_use/2]).
 
@@ -46,6 +46,7 @@
          index_module,
          parent,
          file_summary_ets,
+         file_size_limit,
          scheduled
         }).
 
@@ -55,7 +56,9 @@
 
 -ifdef(use_specs).
 
--spec(start_link/4 :: (file_path(), any(), atom(), tid()) ->
+-type(file_size() :: non_neg_integer()).
+
+-spec(start_link/5 :: (file_path(), any(), atom(), tid(), file_size()) ->
                            {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok').
 -spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok').
@@ -66,9 +69,10 @@
 
 %%----------------------------------------------------------------------------
 
-start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
+start_link(Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit) ->
     gen_server2:start_link(
-      ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts],
+      ?MODULE,
+      [self(), Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit],
       [{timeout, infinity}]).
 
 gc(Server, Source, Destination) ->
@@ -85,7 +89,7 @@ set_maximum_since_use(Pid, Age) ->
 
 %%----------------------------------------------------------------------------
 
-init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
+init([Parent, Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit]) ->
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
                                              [self()]),
     {ok, #gcstate { dir              = Dir,
@@ -93,6 +97,7 @@ init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
                     index_module     = IndexModule,
                     parent           = Parent,
                     file_summary_ets = FileSummaryEts,
+                    file_size_limit  = FileSizeLimit,
                     scheduled        = undefined },
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -131,9 +136,11 @@ attempt_gc(State = #gcstate { dir              = Dir,
                               index_module     = Index,
                               parent           = Parent,
                               file_summary_ets = FileSummaryEts,
+                              file_size_limit  = FileSizeLimit,
                               scheduled        = {Source, Destination} }) ->
     case rabbit_msg_store:gc(Source, Destination,
-                             {FileSummaryEts, Dir, Index, IndexState}) of
+                             {FileSummaryEts, Dir, Index, IndexState,
+                              FileSizeLimit}) of
         concurrent_readers -> State;
         Reclaimed          -> ok = rabbit_msg_store:gc_done(
                                      Parent, Reclaimed, Source, Destination),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 3597efe3..36fa855a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1475,10 +1475,12 @@ test_msg_store() ->
     %% restart empty
     ok = stop_msg_store(),
     ok = start_msg_store_empty(), %% now safe to reuse guids
-    %% push a lot of msgs in...
-    BigCount = 100000,
+    %% push a lot of msgs in... at least 100 files worth
+    {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit),
+    PayloadSizeBits = 65536,
+    BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)),
     GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
-    Payload = << 0:65536 >>,
+    Payload = << 0:PayloadSizeBits >>,
     ok = rabbit_msg_store:client_terminate(
            lists:foldl(
              fun (Guid, MSCStateN) ->
@@ -1569,10 +1571,11 @@ test_queue_init() ->
 test_queue_index() ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     TwoSegs = SegmentSize + SegmentSize,
+    MostOfASegment = trunc(SegmentSize*0.75),
     stop_msg_store(),
     ok = empty_test_queue(),
-    SeqIdsA = lists:seq(0,9999),
-    SeqIdsB = lists:seq(10000,19999),
+    SeqIdsA = lists:seq(0,MostOfASegment-1),
+    SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
     {0, _Terms, Qi0} = test_queue_init(),
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
@@ -1594,7 +1597,7 @@ test_queue_index() ->
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
     ok = stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
-    %% should get length back as 10000
+    %% should get length back as MostOfASegment
     LenB = length(SeqIdsB),
     {LenB, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
@@ -1834,7 +1837,7 @@ test_variable_queue_partial_segments_delta_thing() ->
 test_queue_recover() ->
     Count = 2*rabbit_queue_index:next_segment_boundary(0),
     TxID = rabbit_guid:guid(),
-    #amqqueue { pid = QPid, name = QName } = Q =
+    #amqqueue { pid = QPid, name = QName } =
         rabbit_amqqueue:declare(test_queue(), true, false, [], none),
     Msg = fun() -> rabbit_basic:message(
                      rabbit_misc:r(<<>>, exchange, <<>>),
-- 
cgit v1.2.1


From e1430890a16591a405019a080b908a419649806c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 17:12:58 +0100
Subject: Fix msg_file scan block size at 4MB

---
 src/rabbit_msg_file.erl     | 32 ++++++++++++++---------------
 src/rabbit_msg_store.erl    | 50 +++++++++++++++++++--------------------------
 src/rabbit_msg_store_gc.erl | 18 ++++++----------
 3 files changed, 43 insertions(+), 57 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index dd0579e9..51d875ac 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -31,7 +31,7 @@
 
 -module(rabbit_msg_file).
 
--export([append/3, read/2, scan/3]).
+-export([append/3, read/2, scan/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -44,7 +44,7 @@
 -define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
 -define(GUID_SIZE_BYTES,         16).
 -define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
--define(SCAN_BLOCK_SIZE(LIM),    (LIM div 4)).
+-define(SCAN_BLOCK_SIZE,         4194304). %% 4MB
 
 %%----------------------------------------------------------------------------
 
@@ -59,7 +59,7 @@
              ({'ok', msg_size()} | {'error', any()})).
 -spec(read/2 :: (io_device(), msg_size()) ->
              ({'ok', {guid(), msg()}} | {'error', any()})).
--spec(scan/3 :: (io_device(), file_size(), file_size()) ->
+-spec(scan/2 :: (io_device(), file_size()) ->
              {'ok', [{guid(), msg_size(), position()}], position()}).
 
 -endif.
@@ -92,29 +92,29 @@ read(FileHdl, TotalSize) ->
         KO -> KO
     end.
 
-scan(FileHdl, FileSize, FileSizeLim) when FileSize >= 0 ->
-    scan(FileHdl, FileSize, FileSizeLim, <<>>, 0, [], 0).
+scan(FileHdl, FileSize) when FileSize >= 0 ->
+    scan(FileHdl, FileSize, <<>>, 0, [], 0).
 
-scan(_FileHdl, FileSize, _FileSizeLim, _Data, FileSize, Acc, ScanOffset) ->
+scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
     {ok, Acc, ScanOffset};
-scan(FileHdl, FileSize, FileSizeLim, Data, ReadOffset, Acc, ScanOffset) ->
-    Read = lists:min([?SCAN_BLOCK_SIZE(FileSizeLim), (FileSize - ReadOffset)]),
+scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
+    Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
     case file_handle_cache:read(FileHdl, Read) of
         {ok, Data1} ->
             {Data2, Acc1, ScanOffset1} =
-                scan1(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
+                scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
             ReadOffset1 = ReadOffset + size(Data1),
-            scan(FileHdl, FileSize, FileSizeLim, Data2, ReadOffset1, Acc1,
+            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1,
                  ScanOffset1);
         _KO ->
             {ok, Acc, ScanOffset}
     end.
 
-scan1(<<>>, Acc, Offset) ->
+scan(<<>>, Acc, Offset) ->
     {<<>>, Acc, Offset};
-scan1(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
+scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
     {<<>>, Acc, Offset}; %% Nothing to do other than stop.
-scan1(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
+scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
        WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
     TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
     case WriteMarker of
@@ -127,9 +127,9 @@ scan1(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
             <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> =
                 <<GuidAndMsg:Size/binary>>,
             <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>,
-            scan1(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
+            scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
         _ ->
-            scan1(Rest, Acc, Offset + TotalSize)
+            scan(Rest, Acc, Offset + TotalSize)
     end;
-scan1(Data, Acc, Offset) ->
+scan(Data, Acc, Offset) ->
     {Data, Acc, Offset}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index c28302dd..0f3f57a0 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -107,7 +107,6 @@
 
 -type(server() :: pid() | atom()).
 -type(file_num() :: non_neg_integer()).
--type(file_size() :: non_neg_integer()).
 -type(client_msstate() :: #client_msstate { file_handle_cache  :: dict(),
                                             index_state        :: any(),
                                             index_module       :: atom(),
@@ -141,7 +140,7 @@
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
-               {tid(), file_path(), atom(), any(), file_size()}) ->
+               {tid(), file_path(), atom(), any()}) ->
                    'concurrent_readers' | non_neg_integer()).
 
 -endif.
@@ -553,8 +552,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
     TmpFileNames =
         sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
-    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames,
-                                     FileSizeLimit),
+    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames),
 
     %% There should be no more tmp files now, so go ahead and load the
     %% whole lot
@@ -569,7 +567,7 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
     ok = file_handle_cache:truncate(CurHdl),
 
     {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
-                                                 FileSummaryEts, FileSizeLimit),
+                                                 FileSummaryEts),
 
     {ok, maybe_compact(
            State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }),
@@ -1213,24 +1211,22 @@ count_msg_refs(Gen, Seed, State) ->
             count_msg_refs(Gen, Next, State)
     end.
 
-recover_crashed_compactions(Dir, FileNames, TmpFileNames, FileSizeLimit) ->
+recover_crashed_compactions(Dir, FileNames, TmpFileNames) ->
     lists:foreach(
       fun (TmpFileName) ->
               NonTmpRelatedFileName =
                   filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
               true = lists:member(NonTmpRelatedFileName, FileNames),
               ok = recover_crashed_compaction(
-                     Dir, TmpFileName, NonTmpRelatedFileName, FileSizeLimit)
+                     Dir, TmpFileName, NonTmpRelatedFileName)
       end, TmpFileNames),
     ok.
 
-recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName,
-                           FileSizeLimit) ->
+recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
     {ok, UncorruptedMessagesTmp, GuidsTmp} =
-        scan_file_for_valid_messages_and_guids(Dir, TmpFileName, FileSizeLimit),
+        scan_file_for_valid_messages_and_guids(Dir, TmpFileName),
     {ok, UncorruptedMessages, Guids} =
-        scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName,
-                                               FileSizeLimit),
+        scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName),
     %% 1) It's possible that everything in the tmp file is also in the
     %%    main file such that the main file is (prefix ++
     %%    tmpfile). This means that compaction failed immediately
@@ -1307,7 +1303,7 @@ recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName,
 
             {ok, _MainMessages, GuidsMain} =
                 scan_file_for_valid_messages_and_guids(
-                  Dir, NonTmpRelatedFileName, FileSizeLimit),
+                  Dir, NonTmpRelatedFileName),
             %% check that everything in Guids1 is in GuidsMain
             true = is_sublist(Guids1, GuidsMain),
             %% check that everything in GuidsTmp is in GuidsMain
@@ -1321,12 +1317,11 @@ is_sublist(SmallerL, BiggerL) ->
 is_disjoint(SmallerL, BiggerL) ->
     lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
 
-scan_file_for_valid_messages(Dir, FileName, FileSizeLimit) ->
+scan_file_for_valid_messages(Dir, FileName) ->
     case open_file(Dir, FileName, ?READ_MODE) of
         {ok, Hdl}       -> Valid = rabbit_msg_file:scan(
                                      Hdl, filelib:file_size(
-                                            form_filename(Dir, FileName)),
-                                     FileSizeLimit),
+                                            form_filename(Dir, FileName))),
                            %% if something really bad has happened,
                            %% the close could fail, but ignore
                            file_handle_cache:close(Hdl),
@@ -1335,9 +1330,9 @@ scan_file_for_valid_messages(Dir, FileName, FileSizeLimit) ->
         {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
     end.
 
-scan_file_for_valid_messages_and_guids(Dir, FileName, FileSizeLimit) ->
+scan_file_for_valid_messages_and_guids(Dir, FileName) ->
     {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(Dir, FileName, FileSizeLimit),
+        scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}.
 
 %% Takes the list in *ascending* order (i.e. eldest message
@@ -1405,11 +1400,10 @@ build_index(Gatherer, Left, [File|Files], State) ->
            end),
     build_index(Gatherer, File, Files, State).
 
-build_index_worker(Gatherer, State = #msstate { file_size_limit = FileSizeLimit,
-                                                dir = Dir },
+build_index_worker(Gatherer, State = #msstate { dir = Dir },
                    Left, File, Files) ->
     {ok, Messages, FileSize} =
-        scan_file_for_valid_messages(Dir, filenum_to_name(File), FileSizeLimit),
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     {ValidMessages, ValidTotalSize} =
         lists:foldl(
           fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
@@ -1579,8 +1573,7 @@ delete_file_if_empty(File, State = #msstate {
 %% garbage collection / compaction / aggregation -- external
 %%----------------------------------------------------------------------------
 
-gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState,
-                              _FileSizeLimit}) ->
+gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
     [SrcObj = #file_summary {
        readers          = SrcReaders,
        left             = DstFile,
@@ -1612,8 +1605,7 @@ combine_files(#file_summary { file             = Source,
                               valid_total_size = DestinationValid,
                               contiguous_top   = DestinationContiguousTop,
                               right            = Source },
-              State = {_FileSummaryEts, Dir, _Index, _IndexState,
-                       _FileSizeLimit}) ->
+              State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
     SourceName      = filenum_to_name(Source),
     DestinationName = filenum_to_name(Destination),
     {ok, SourceHdl}      = open_file(Dir, SourceName,
@@ -1672,11 +1664,11 @@ combine_files(#file_summary { file             = Source,
     ok = file_handle_cache:delete(SourceHdl),
     ExpectedSize.
 
-find_unremoved_messages_in_file(
-  File, {_FileSummaryEts, Dir, Index, IndexState, FileSizeLimit}) ->
+find_unremoved_messages_in_file(File,
+                                {_FileSummaryEts, Dir, Index, IndexState}) ->
     %% Messages here will be end-of-file at start-of-list
     {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(Dir, filenum_to_name(File), FileSizeLimit),
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
     %% foldl will reverse so will end up with msgs in ascending offset order
     lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
                         case Index:lookup(Guid, IndexState) of
@@ -1689,7 +1681,7 @@ find_unremoved_messages_in_file(
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
               Destination,
-              {_FileSummaryEts, _Dir, Index, IndexState, _FileSizeLimit}) ->
+              {_FileSummaryEts, _Dir, Index, IndexState}) ->
     Copy = fun ({BlockStart, BlockEnd}) ->
                    BSize = BlockEnd - BlockStart,
                    {ok, BlockStart} =
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index f29bf1a4..56cd422b 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -33,7 +33,7 @@
 
 -behaviour(gen_server2).
 
--export([start_link/5, gc/3, no_readers/2, stop/1]).
+-export([start_link/4, gc/3, no_readers/2, stop/1]).
 
 -export([set_maximum_since_use/2]).
 
@@ -46,7 +46,6 @@
          index_module,
          parent,
          file_summary_ets,
-         file_size_limit,
          scheduled
         }).
 
@@ -56,9 +55,7 @@
 
 -ifdef(use_specs).
 
--type(file_size() :: non_neg_integer()).
-
--spec(start_link/5 :: (file_path(), any(), atom(), tid(), file_size()) ->
+-spec(start_link/4 :: (file_path(), any(), atom(), tid()) ->
                            {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok').
 -spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok').
@@ -69,10 +66,10 @@
 
 %%----------------------------------------------------------------------------
 
-start_link(Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit) ->
+start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
     gen_server2:start_link(
       ?MODULE,
-      [self(), Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit],
+      [self(), Dir, IndexState, IndexModule, FileSummaryEts],
       [{timeout, infinity}]).
 
 gc(Server, Source, Destination) ->
@@ -89,7 +86,7 @@ set_maximum_since_use(Pid, Age) ->
 
 %%----------------------------------------------------------------------------
 
-init([Parent, Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit]) ->
+init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
     ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
                                              [self()]),
     {ok, #gcstate { dir              = Dir,
@@ -97,7 +94,6 @@ init([Parent, Dir, IndexState, IndexModule, FileSummaryEts, FileSizeLimit]) ->
                     index_module     = IndexModule,
                     parent           = Parent,
                     file_summary_ets = FileSummaryEts,
-                    file_size_limit  = FileSizeLimit,
                     scheduled        = undefined },
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
@@ -136,11 +132,9 @@ attempt_gc(State = #gcstate { dir              = Dir,
                               index_module     = Index,
                               parent           = Parent,
                               file_summary_ets = FileSummaryEts,
-                              file_size_limit  = FileSizeLimit,
                               scheduled        = {Source, Destination} }) ->
     case rabbit_msg_store:gc(Source, Destination,
-                             {FileSummaryEts, Dir, Index, IndexState,
-                              FileSizeLimit}) of
+                             {FileSummaryEts, Dir, Index, IndexState}) of
         concurrent_readers -> State;
         Reclaimed          -> ok = rabbit_msg_store:gc_done(
                                      Parent, Reclaimed, Source, Destination),
-- 
cgit v1.2.1


From fa20cf59c7e11c6e4b78cf3f9af0aec58c69baf3 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 17:28:34 +0100
Subject: Make the queue_index max_journal_entry_count configurable

---
 ebin/rabbit_app.in         |  1 +
 src/rabbit_queue_index.erl | 28 +++++++++++++++-------------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index ef80efc0..0ada27d4 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -23,6 +23,7 @@
          {persister_max_wrap_entries, 500},
          {persister_hibernate_after, 10000},
          {msg_store_file_size_limit, 16777216},
+         {queue_index_max_journal_entry_count, 262144},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 417401be..62cbfbf3 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -69,10 +69,10 @@
 %% Because of the fact that publishes, delivers and acks can occur all
 %% over, we wish to avoid lots of seeking. Therefore we have a fixed
 %% sized journal to which all actions are appended. When the number of
-%% entries in this journal reaches ?MAX_JOURNAL_ENTRY_COUNT, the
-%% journal entries are scattered out to their relevant files, and the
-%% journal is truncated to zero size. Note that entries in the journal
-%% must carry the full sequence id, thus the format of entries in the
+%% entries in this journal reaches max_journal_entries, the journal
+%% entries are scattered out to their relevant files, and the journal
+%% is truncated to zero size. Note that entries in the journal must
+%% carry the full sequence id, thus the format of entries in the
 %% journal is different to that in the segments.
 %%
 %% The journal is also kept fully in memory, pre-segmented: the state
@@ -112,7 +112,6 @@
 
 %% ---- Journal details ----
 
--define(MAX_JOURNAL_ENTRY_COUNT, 262144).
 -define(JOURNAL_FILENAME, "journal.jif").
 
 -define(PUB_PERSIST_JPREFIX, 2#00).
@@ -159,7 +158,8 @@
 
 %%----------------------------------------------------------------------------
 
--record(qistate, { dir, segments, journal_handle, dirty_count }).
+-record(qistate, { dir, segments, journal_handle, dirty_count,
+                   max_journal_entries }).
 
 -record(segment, { num, path, journal_entries, unacked }).
 
@@ -178,11 +178,12 @@
                               })).
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
--type(qistate() :: #qistate { dir             :: file_path(),
-                              segments        :: 'undefined' | seg_dict(),
-                              journal_handle  :: hdl(),
-                              dirty_count     :: integer()
-                             }).
+-type(qistate() :: #qistate {  dir                 :: file_path(),
+                               segments            :: 'undefined' | seg_dict(),
+                               journal_handle      :: hdl(),
+                               dirty_count         :: integer(),
+                               max_journal_entries :: non_neg_integer()
+                               }).
 -type(startup_fun_state() ::
         {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
@@ -548,8 +549,9 @@ add_to_journal(RelSeq, Action, JEntries) ->
           end,
     array:set(RelSeq, Val, JEntries).
 
-maybe_flush_journal(State = #qistate { dirty_count = DCount })
-  when DCount > ?MAX_JOURNAL_ENTRY_COUNT ->
+maybe_flush_journal(State = #qistate { dirty_count = DCount,
+                                       max_journal_entries = MaxJournal })
+  when DCount > MaxJournal ->
     flush_journal(State);
 maybe_flush_journal(State) ->
     State.
-- 
cgit v1.2.1


From fc8ca6b0fdfa1b849ed6640054c91f45aad0c8e0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 17:46:01 +0100
Subject: Reduce msg store file size limit and queue index journal size
 temporarily whilst running intensive tests

---
 ebin/rabbit_app.in   |  2 +-
 src/rabbit_tests.erl | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index 0ada27d4..95d59b43 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -23,7 +23,7 @@
          {persister_max_wrap_entries, 500},
          {persister_hibernate_after, 10000},
          {msg_store_file_size_limit, 16777216},
-         {queue_index_max_journal_entry_count, 262144},
+         {queue_index_max_journal_entries, 262144},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 36fa855a..dce4d46e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1311,10 +1311,23 @@ extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
 test_backing_queue() ->
     case application:get_env(rabbit, backing_queue_module) of
         {ok, rabbit_variable_queue} ->
+            {ok, FileSizeLimit} =
+                application:get_env(rabbit, msg_store_file_size_limit),
+            application:set_env(rabbit, msg_store_file_size_limit, 128,
+                                infinity),
+            {ok, MaxJournal} =
+                application:get_env(rabbit, queue_index_max_journal_entries),
+            application:set_env(rabbit, queue_index_max_journal_entries, 128,
+                                infinity),
             passed = test_msg_store(),
+            application:set_env(rabbit, msg_store_file_size_limit,
+                                FileSizeLimit, infinity),
             passed = test_queue_index(),
             passed = test_variable_queue(),
-            passed = test_queue_recover();
+            passed = test_queue_recover(),
+            application:set_env(rabbit, queue_index_max_journal_entries,
+                                MaxJournal, infinity),
+            passed;
         _ ->
             passed
     end.
-- 
cgit v1.2.1


From 3cccf1fa0806cfbc2a7a8260ca04c386810caf72 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 17 Jun 2010 18:12:05 +0100
Subject: delete1 is not needed and never called

---
 src/rabbit_variable_queue.erl | 32 +++-----------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4a64d14d..a68c428c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -395,20 +395,9 @@ delete_and_terminate(State) ->
     {_PurgeCount, State1} = purge(State),
     State2 = #vqstate { index_state         = IndexState,
                         msg_store_clients   = {{MSCStateP, PRef},
-                                               {MSCStateT, TRef}},
-                        transient_threshold = TransientThreshold } =
+                                               {MSCStateT, TRef}} } =
         remove_pending_ack(false, State1),
-    %% flushing here is good because it deletes all full segments,
-    %% leaving only partial segments around.
-    IndexState1 = rabbit_queue_index:flush(IndexState),
-    IndexState2 =
-        case rabbit_queue_index:bounds(IndexState1) of
-            {N, N, IndexState3} ->
-                IndexState3;
-            {DeltaSeqId, NextSeqId, IndexState3} ->
-                delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState3)
-        end,
-    IndexState5 = rabbit_queue_index:delete_and_terminate(IndexState2),
+    IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState),
     case MSCStateP of
         undefined -> ok;
         _         -> rabbit_msg_store:delete_client(
@@ -417,7 +406,7 @@ delete_and_terminate(State) ->
     end,
     rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
     rabbit_msg_store:client_terminate(MSCStateT),
-    a(State2 #vqstate { index_state       = IndexState5,
+    a(State2 #vqstate { index_state       = IndexState1,
                         msg_store_clients = undefined }).
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
@@ -963,21 +952,6 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
     [ Fun() || Fun <- lists:reverse(SFuns) ],
     State1 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
 
-delete1(_TransientThreshold, NextSeqId, DeltaSeqId, IndexState)
-  when DeltaSeqId >= NextSeqId ->
-    IndexState;
-delete1(TransientThreshold, NextSeqId, DeltaSeqId, IndexState) ->
-    {List, Next, IndexState1} =
-        rabbit_queue_index:read(DeltaSeqId, NextSeqId, IndexState),
-    IndexState2 =
-        case List of
-            [] -> IndexState1;
-            _  -> {Q, IndexState3} = betas_from_index_entries(
-                                       List, TransientThreshold, IndexState1),
-                  remove_queue_entries(fun beta_fold/3, Q, IndexState3)
-        end,
-    delete1(TransientThreshold, NextSeqId, Next, IndexState2).
-
 purge_betas_and_deltas(State = #vqstate { q3          = Q3,
                                           index_state = IndexState }) ->
     case bpqueue:is_empty(Q3) of
-- 
cgit v1.2.1


From 8a777dd72e31367181663daf8846152048e10b70 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 17 Jun 2010 18:22:00 +0100
Subject: cosmetic

---
 src/rabbit_msg_file.erl     | 3 +--
 src/rabbit_msg_store.erl    | 6 ++----
 src/rabbit_msg_store_gc.erl | 3 +--
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
index 51d875ac..0bf702e4 100644
--- a/src/rabbit_msg_file.erl
+++ b/src/rabbit_msg_file.erl
@@ -104,8 +104,7 @@ scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
             {Data2, Acc1, ScanOffset1} =
                 scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
             ReadOffset1 = ReadOffset + size(Data1),
-            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1,
-                 ScanOffset1);
+            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1);
         _KO ->
             {ok, Acc, ScanOffset}
     end.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 0f3f57a0..706a7fae 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -1331,8 +1331,7 @@ scan_file_for_valid_messages(Dir, FileName) ->
     end.
 
 scan_file_for_valid_messages_and_guids(Dir, FileName) ->
-    {ok, Messages, _FileSize} =
-        scan_file_for_valid_messages(Dir, FileName),
+    {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName),
     {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}.
 
 %% Takes the list in *ascending* order (i.e. eldest message
@@ -1680,8 +1679,7 @@ find_unremoved_messages_in_file(File,
                 end, {[], 0}, Messages).
 
 copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
-              Destination,
-              {_FileSummaryEts, _Dir, Index, IndexState}) ->
+              Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
     Copy = fun ({BlockStart, BlockEnd}) ->
                    BSize = BlockEnd - BlockStart,
                    {ok, BlockStart} =
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 56cd422b..4b80d088 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -68,8 +68,7 @@
 
 start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
     gen_server2:start_link(
-      ?MODULE,
-      [self(), Dir, IndexState, IndexModule, FileSummaryEts],
+      ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts],
       [{timeout, infinity}]).
 
 gc(Server, Source, Destination) ->
-- 
cgit v1.2.1


From 5ecca41b5997c363bcdead2b8b1635625b271878 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 17 Jun 2010 18:35:22 +0100
Subject: correct comment

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 62cbfbf3..49daaa3d 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -304,7 +304,7 @@ next_segment_boundary(SeqId) ->
 
 bounds(State = #qistate { segments = Segments }) ->
     %% This is not particularly efficient, but only gets invoked on
-    %% queue initialisation and termination.
+    %% queue initialisation.
     SegNums = lists:sort(segment_nums(Segments)),
     %% Don't bother trying to figure out the lowest seq_id, merely the
     %% seq_id of the start of the lowest segment. That seq_id may not
-- 
cgit v1.2.1


From 4e4d7ffb285e01ded9178fe155a76bbcef8e1e97 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 10:22:58 +0100
Subject: slight tidying up of permitted_ram_index_count

---
 src/rabbit_variable_queue.erl | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a68c428c..34fc8cfd 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -103,8 +103,9 @@
 %% entries must be written to disk by the queue_index module. This can
 %% badly stall the queue. In order to avoid this, the proportion of
 %% gammas / (betas+gammas) must not be lower than (betas+gammas) /
-%% (alphas+betas+gammas). Thus as the queue grows, and the proportion
-%% of alphas shrink, the proportion of gammas will grow, thus at the
+%% (alphas+betas+gammas). As the queue grows or available memory
+%% shrinks, the latter ratio increases, requiring the conversion of
+%% more gammas to betas in order to maintain the invariant. At the
 %% point at which betas and gammas must be converted to deltas, there
 %% should be very few betas remaining, thus the transition is fast (no
 %% work needs to be done for the gamma -> delta transition).
@@ -835,25 +836,22 @@ beta_fold(Fun, Init, Q) ->
     bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
-    undefined;
+    infinity;
 permitted_ram_index_count(#vqstate { len   = Len,
                                      q2    = Q2,
                                      q3    = Q3,
                                      delta = #delta { count = DeltaCount } }) ->
     AlphaBetaLen = Len - DeltaCount,
     case AlphaBetaLen == 0 of
-        true  -> undefined;
+        true  -> infinity;
         false -> BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
-                 %% the fraction of the alphas+betas that are betas
-                 BetaFrac =  BetaLen / AlphaBetaLen,
-                 BetaLen - trunc(BetaFrac * BetaLen)
+                 BetaLen - trunc(BetaLen * BetaLen / AlphaBetaLen)
     end.
 
-
 should_force_index_to_disk(State =
                            #vqstate { ram_index_count = RamIndexCount }) ->
     case permitted_ram_index_count(State) of
-        undefined -> false;
+        infinity  -> false;
         Permitted -> RamIndexCount >= Permitted
     end.
 
@@ -1204,7 +1202,7 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
 
 limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
     Permitted = permitted_ram_index_count(State),
-    if Permitted =/= undefined andalso RamIndexCount > Permitted ->
+    if Permitted =/= infinity andalso RamIndexCount > Permitted ->
             Reduction = lists:min([RamIndexCount - Permitted,
                                    ?RAM_INDEX_BATCH_SIZE]),
             case Reduction < ?RAM_INDEX_BATCH_SIZE of
-- 
cgit v1.2.1


From 1eb7c2541ed34370e54bba8cd346316005c49514 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Fri, 18 Jun 2010 12:59:30 +0100
Subject: cosmetic

---
 src/rabbit_queue_index.erl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 49daaa3d..9c757864 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -178,12 +178,12 @@
                               })).
 -type(seq_id() :: integer()).
 -type(seg_dict() :: {dict(), [segment()]}).
--type(qistate() :: #qistate {  dir                 :: file_path(),
-                               segments            :: 'undefined' | seg_dict(),
-                               journal_handle      :: hdl(),
-                               dirty_count         :: integer(),
-                               max_journal_entries :: non_neg_integer()
-                               }).
+-type(qistate() :: #qistate { dir                 :: file_path(),
+                              segments            :: 'undefined' | seg_dict(),
+                              journal_handle      :: hdl(),
+                              dirty_count         :: integer(),
+                              max_journal_entries :: non_neg_integer()
+                             }).
 -type(startup_fun_state() ::
         {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
-- 
cgit v1.2.1


From 1c67edc2c8f63f4152daf20d43cd8f2a1722340a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Fri, 18 Jun 2010 13:29:14 +0100
Subject: sanitise ram_duration and target_duration_count so they don't have to
 both cope with being all of infinity, undefined and a number.

---
 src/rabbit_tests.erl          |  2 +-
 src/rabbit_variable_queue.erl | 19 +++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index dce4d46e..38b01b2c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1797,7 +1797,7 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
             {_Duration, VQ4} = rabbit_variable_queue:ram_duration(VQ3),
-            VQ5 = %% /37 otherwise the duration is just to high to stress things
+            VQ5 = %% /37 otherwise the duration is just too high to stress things
                 rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
             test_variable_queue_dynamic_duration_change_f(Len, VQ5)
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 34fc8cfd..7c5e527f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -261,8 +261,8 @@
              persistent_count     :: non_neg_integer(),
 
              transient_threshold  :: non_neg_integer(),
-             duration_target      :: non_neg_integer(),
-             target_ram_msg_count :: non_neg_integer(),
+             duration_target      :: number() | 'infinity',
+             target_ram_msg_count :: non_neg_integer() | 'infinity',
              ram_msg_count        :: non_neg_integer(),
              ram_msg_count_prev   :: non_neg_integer(),
              ram_index_count      :: non_neg_integer(),
@@ -357,8 +357,8 @@ init(QueueName, IsDurable, _Recover) ->
       len                  = DeltaCount1,
       persistent_count     = DeltaCount1,
 
-      duration_target      = undefined,
-      target_ram_msg_count = undefined,
+      duration_target      = infinity,
+      target_ram_msg_count = infinity,
       ram_msg_count        = 0,
       ram_msg_count_prev   = 0,
       ram_index_count      = 0,
@@ -585,13 +585,12 @@ set_ram_duration_target(DurationTarget,
     Rate = AvgEgressRate + AvgIngressRate,
     TargetRamMsgCount1 =
         case DurationTarget of
-            infinity  -> undefined;
-            undefined -> undefined;
+            infinity  -> infinity;
             _         -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
         end,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target      = DurationTarget },
-    a(case TargetRamMsgCount1 == undefined orelse
+    a(case TargetRamMsgCount1 == infinity orelse
           TargetRamMsgCount1 >= TargetRamMsgCount of
           true  -> State1;
           false -> reduce_memory_use(State1)
@@ -1038,7 +1037,7 @@ fetch_from_q3_to_q4(State = #vqstate {
 reduce_memory_use(State = #vqstate {
                     ram_msg_count        = RamMsgCount,
                     target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
+  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 reduce_memory_use(State = #vqstate {
                     target_ram_msg_count = TargetRamMsgCount }) ->
@@ -1054,7 +1053,7 @@ reduce_memory_use(State = #vqstate {
 
 msg_storage_type(_SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
                                     ram_msg_count        = RamMsgCount })
-  when TargetRamMsgCount == undefined orelse TargetRamMsgCount > RamMsgCount ->
+  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount > RamMsgCount ->
     msg;
 msg_storage_type( SeqId, #vqstate { target_ram_msg_count = 0, q3 = Q3 }) ->
     case bpqueue:out(Q3) of
@@ -1324,7 +1323,7 @@ maybe_push_alphas_to_betas(_Generator, _Consumer, _Q,
                            State = #vqstate {
                              ram_msg_count        = RamMsgCount,
                              target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount == undefined orelse TargetRamMsgCount >= RamMsgCount ->
+  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
     State;
 maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
     case Generator(Q) of
-- 
cgit v1.2.1


From ce72a29fb5a254c98f90162b763ec2a2376da5c6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Fri, 18 Jun 2010 13:54:13 +0100
Subject: Guarantee that we're only reading max one segment when we load in
 from ? to q3(?)

---
 src/rabbit_variable_queue.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7c5e527f..a3a33377 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1269,8 +1269,11 @@ maybe_deltas_to_betas(State = #vqstate {
             #delta { start_seq_id = DeltaSeqId,
                      count        = DeltaCount,
                      end_seq_id   = DeltaSeqIdEnd } = Delta,
+            DeltaSeqIdEnd1 =
+                lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId),
+                           DeltaSeqIdEnd]),
             {List, DeltaSeqId1, IndexState1} =
-                rabbit_queue_index:read(DeltaSeqId, DeltaSeqIdEnd, IndexState),
+                rabbit_queue_index:read(DeltaSeqId, DeltaSeqIdEnd1, IndexState),
             {Q3a, IndexState2} = betas_from_index_entries(
                                    List, TransientThreshold, IndexState1),
             State1 = State #vqstate { index_state = IndexState2 },
-- 
cgit v1.2.1


From 54963e5b1392788a68185de18608dbed447e3bfb Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 14:51:04 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a3a33377..977b1af2 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -847,8 +847,8 @@ permitted_ram_index_count(#vqstate { len   = Len,
                  BetaLen - trunc(BetaLen * BetaLen / AlphaBetaLen)
     end.
 
-should_force_index_to_disk(State =
-                           #vqstate { ram_index_count = RamIndexCount }) ->
+should_force_index_to_disk(State = #vqstate {
+                             ram_index_count = RamIndexCount }) ->
     case permitted_ram_index_count(State) of
         infinity  -> false;
         Permitted -> RamIndexCount >= Permitted
-- 
cgit v1.2.1


From 6136a577f86d237f77517703b1b2e37a82da29f7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 17:08:55 +0100
Subject: refactor push_betas_to_deltas to make the similarity of the treatment
 of Q2 and Q3 more obvious

---
 src/rabbit_variable_queue.erl | 112 +++++++++++++++++++++---------------------
 1 file changed, 57 insertions(+), 55 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 977b1af2..b4fd05d9 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -772,6 +772,15 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
 persistent_guids(Pubs) ->
     [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
 
+beta_bounds(Q) ->
+    case bpqueue:out(Q) of
+        {empty, Q} -> empty;
+        {{value, _IndexOnDisk1, #msg_status { seq_id = SeqIdMin }}, _Qa} ->
+            {{value, _IndexOnDisk2, #msg_status { seq_id = SeqIdMax }}, _Qb} =
+                bpqueue:out_r(Q),
+            {SeqIdMin, SeqIdMax}
+    end.
+
 betas_from_index_entries(List, TransientThreshold, IndexState) ->
     {Filtered, Delivers, Acks} =
         lists:foldr(
@@ -1350,74 +1359,67 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
                                         q3              = Q3,
                                         ram_index_count = RamIndexCount,
                                         index_state     = IndexState }) ->
-    %% HighSeqId is high in the sense that it must be higher than the
-    %% seq_id in Delta, but it's also the lowest of the betas that we
-    %% transfer from q2 to delta.
-    {HighSeqId, Len1, Q2a, RamIndexCount1, IndexState1} =
-        push_betas_to_deltas(
-          fun bpqueue:out/1, undefined, Q2, RamIndexCount, IndexState),
-    true = bpqueue:is_empty(Q2a), %% ASSERTION
-    EndSeqId =
-        case bpqueue:out_r(Q2) of
-            {empty, _Q2} ->
-                undefined;
-            {{value, _IndexOnDisk, #msg_status { seq_id = EndSeqId1 }}, _Q2} ->
-                EndSeqId1 + 1
+    {Delta1, Q2a, RamIndexCount1, IndexState1} =
+        case beta_bounds(Q2) of
+            empty ->
+                {?BLANK_DELTA, Q2, RamIndexCount, IndexState};
+            {Q2MinSeqId, Q2MaxSeqId} ->
+                {Q2Count, Q2b, RamIndexCount2, IndexState2} =
+                    push_betas_to_deltas(
+                      fun bpqueue:out/1, undefined, Q2, 0, RamIndexCount,
+                      IndexState),
+                %% Q2MinSeqId is high in the sense that it must be
+                %% higher than the seq_id in Delta, but it's also the
+                %% lowest of the betas that we transfer from q2 to
+                %% delta.
+                {#delta { start_seq_id = Q2MinSeqId,
+                          count        = Q2Count,
+                          end_seq_id   = Q2MaxSeqId + 1 },
+                 Q2b, RamIndexCount2, IndexState2}
         end,
-    Delta1 = #delta { start_seq_id = Delta1SeqId } =
-        combine_deltas(Delta, #delta { start_seq_id = HighSeqId,
-                                       count        = Len1,
-                                       end_seq_id   = EndSeqId }),
-    State1 = State #vqstate { q2              = bpqueue:new(),
-                              delta           = Delta1,
+    true = bpqueue:is_empty(Q2a), %% ASSERTION
+    Delta2 = #delta { start_seq_id = Delta2SeqId } =
+        combine_deltas(Delta, Delta1),
+    State1 = State #vqstate { q2              = Q2a,
+                              delta           = Delta2,
                               index_state     = IndexState1,
                               ram_index_count = RamIndexCount1 },
-    case bpqueue:out(Q3) of
-        {empty, _Q3} ->
+
+    case beta_bounds(Q3) of
+        empty ->
             State1;
-        {{value, _IndexOnDisk1, #msg_status { seq_id = SeqId }}, _Q3} ->
-            {{value, _IndexOnDisk2, #msg_status { seq_id = SeqIdMax }}, _Q3a} =
-                bpqueue:out_r(Q3),
-            Limit = rabbit_queue_index:next_segment_boundary(SeqId),
+        {Q3MinSeqId, Q3MaxSeqId} ->
+            Limit = rabbit_queue_index:next_segment_boundary(Q3MinSeqId),
             %% ASSERTION
-            true = Delta1SeqId == undefined orelse Delta1SeqId > SeqIdMax,
-            case SeqIdMax < Limit of
+            true = Delta2SeqId == undefined orelse Delta2SeqId > Q3MaxSeqId,
+            case Q3MaxSeqId < Limit of
                 true -> %% already only holding LTE one segment indices in q3
                     State1;
                 false ->
-                    %% SeqIdMax is low in the sense that it must be
-                    %% lower than the seq_id in delta1, in fact either
-                    %% delta1 has undefined as its seq_id or there
-                    %% does not exist a seq_id X s.t. X > SeqIdMax and
-                    %% X < delta1's seq_id (would be +1 if it wasn't
-                    %% for the possibility of gaps in the seq_ids).
-                    %% But because we use queue:out_r, SeqIdMax is
-                    %% actually also the highest seq_id of the betas we
-                    %% transfer from q3 to deltas.
-                    {SeqIdMax, Len2, Q3a, RamIndexCount2, IndexState2} =
-                        push_betas_to_deltas(fun bpqueue:out_r/1, Limit, Q3,
-                                             RamIndexCount1, IndexState1),
-                    Delta2 = #delta { start_seq_id = Limit,
+                    %% Q3MaxSeqId is low in the sense that it must be
+                    %% lower than the seq_id in delta2, in fact either
+                    %% delta2 has undefined as its seq_id or there
+                    %% does not exist a seq_id X s.t. X > Q3MaxSeqId
+                    %% and X < delta2's seq_id (would be +1 if it
+                    %% wasn't for the possibility of gaps in the
+                    %% seq_ids).  But Q3MaxSeqId is actually also the
+                    %% highest seq_id of the betas we transfer from q3
+                    %% to deltas.
+                    {Len2, Q3a, RamIndexCount3, IndexState3} =
+                        push_betas_to_deltas(
+                          fun bpqueue:out_r/1, Limit, Q3, 0, RamIndexCount1,
+                          IndexState1),
+                    Delta3 = #delta { start_seq_id = Limit,
                                       count        = Len2,
-                                      end_seq_id   = SeqIdMax + 1 },
-                    Delta3 = combine_deltas(Delta2, Delta1),
-                    State1 #vqstate { delta           = Delta3,
+                                      end_seq_id   = Q3MaxSeqId + 1 },
+                    Delta4 = combine_deltas(Delta3, Delta2),
+                    State1 #vqstate { delta           = Delta4,
                                       q3              = Q3a,
-                                      index_state     = IndexState2,
-                                      ram_index_count = RamIndexCount2 }
+                                      index_state     = IndexState3,
+                                      ram_index_count = RamIndexCount3 }
             end
     end.
 
-push_betas_to_deltas(Generator, Limit, Q, RamIndexCount, IndexState) ->
-    case Generator(Q) of
-        {empty, Qa} -> {undefined, 0, Qa, RamIndexCount, IndexState};
-        {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa} ->
-            {Count, Qb, RamIndexCount1, IndexState1} =
-                push_betas_to_deltas(
-                  Generator, Limit, Q, 0, RamIndexCount, IndexState),
-            {SeqId, Count, Qb, RamIndexCount1, IndexState1}
-    end.
-
 push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
     case Generator(Q) of
         {empty, Qa} ->
-- 
cgit v1.2.1


From 595e3a56d21c54f8c4e7b4b6ae5dffcba8982e7e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 17:27:50 +0100
Subject: refactor push_betas_to_deltas even further making the handling of Q2
 and Q3 even more similar

---
 src/rabbit_variable_queue.erl | 80 +++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b4fd05d9..02325351 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1364,7 +1364,7 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
             empty ->
                 {?BLANK_DELTA, Q2, RamIndexCount, IndexState};
             {Q2MinSeqId, Q2MaxSeqId} ->
-                {Q2Count, Q2b, RamIndexCount2, IndexState2} =
+                {Len1, Q2b, RamIndexCount2, IndexState2} =
                     push_betas_to_deltas(
                       fun bpqueue:out/1, undefined, Q2, 0, RamIndexCount,
                       IndexState),
@@ -1373,52 +1373,50 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
                 %% lowest of the betas that we transfer from q2 to
                 %% delta.
                 {#delta { start_seq_id = Q2MinSeqId,
-                          count        = Q2Count,
+                          count        = Len1,
                           end_seq_id   = Q2MaxSeqId + 1 },
                  Q2b, RamIndexCount2, IndexState2}
         end,
     true = bpqueue:is_empty(Q2a), %% ASSERTION
     Delta2 = #delta { start_seq_id = Delta2SeqId } =
         combine_deltas(Delta, Delta1),
-    State1 = State #vqstate { q2              = Q2a,
-                              delta           = Delta2,
-                              index_state     = IndexState1,
-                              ram_index_count = RamIndexCount1 },
-
-    case beta_bounds(Q3) of
-        empty ->
-            State1;
-        {Q3MinSeqId, Q3MaxSeqId} ->
-            Limit = rabbit_queue_index:next_segment_boundary(Q3MinSeqId),
-            %% ASSERTION
-            true = Delta2SeqId == undefined orelse Delta2SeqId > Q3MaxSeqId,
-            case Q3MaxSeqId < Limit of
-                true -> %% already only holding LTE one segment indices in q3
-                    State1;
-                false ->
-                    %% Q3MaxSeqId is low in the sense that it must be
-                    %% lower than the seq_id in delta2, in fact either
-                    %% delta2 has undefined as its seq_id or there
-                    %% does not exist a seq_id X s.t. X > Q3MaxSeqId
-                    %% and X < delta2's seq_id (would be +1 if it
-                    %% wasn't for the possibility of gaps in the
-                    %% seq_ids).  But Q3MaxSeqId is actually also the
-                    %% highest seq_id of the betas we transfer from q3
-                    %% to deltas.
-                    {Len2, Q3a, RamIndexCount3, IndexState3} =
-                        push_betas_to_deltas(
-                          fun bpqueue:out_r/1, Limit, Q3, 0, RamIndexCount1,
-                          IndexState1),
-                    Delta3 = #delta { start_seq_id = Limit,
-                                      count        = Len2,
-                                      end_seq_id   = Q3MaxSeqId + 1 },
-                    Delta4 = combine_deltas(Delta3, Delta2),
-                    State1 #vqstate { delta           = Delta4,
-                                      q3              = Q3a,
-                                      index_state     = IndexState3,
-                                      ram_index_count = RamIndexCount3 }
-            end
-    end.
+    {Delta3, Q3a, RamIndexCount3, IndexState3} =
+        case beta_bounds(Q3) of
+            empty ->
+                {?BLANK_DELTA, Q3, RamIndexCount1, IndexState1};
+            {Q3MinSeqId, Q3MaxSeqId} ->
+                Limit = rabbit_queue_index:next_segment_boundary(Q3MinSeqId),
+                %% ASSERTION
+                true = Delta2SeqId == undefined orelse Delta2SeqId > Q3MaxSeqId,
+                case Q3MaxSeqId < Limit of
+                    true ->
+                        %% already only holding LTE one segment indices in q3
+                        {?BLANK_DELTA, Q3, RamIndexCount1, IndexState1};
+                    false ->
+                        %% Q3MaxSeqId is low in the sense that it must
+                        %% be lower than the seq_id in delta2, in fact
+                        %% either delta2 has undefined as its seq_id
+                        %% or there does not exist a seq_id X s.t. X >
+                        %% Q3MaxSeqId and X < delta2's seq_id (would
+                        %% be +1 if it wasn't for the possibility of
+                        %% gaps in the seq_ids).  But Q3MaxSeqId is
+                        %% actually also the highest seq_id of the
+                        %% betas we transfer from q3 to deltas.
+                        {Len2, Q3b, RamIndexCount4, IndexState4} =
+                            push_betas_to_deltas(
+                              fun bpqueue:out_r/1, Limit, Q3, 0, RamIndexCount1,
+                              IndexState1),
+                        {#delta { start_seq_id = Limit,
+                                  count        = Len2,
+                                  end_seq_id   = Q3MaxSeqId + 1 },
+                         Q3b, RamIndexCount4, IndexState4}
+                end
+        end,
+    State #vqstate { q2              = Q2a,
+                     delta           = combine_deltas(Delta3, Delta2),
+                     q3              = Q3a,
+                     index_state     = IndexState3,
+                     ram_index_count = RamIndexCount3 }.
 
 push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
     case Generator(Q) of
-- 
cgit v1.2.1


From 27fb9868bd5fe8d6299f2061784d272aeca7ff4f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 19:30:38 +0100
Subject: yet more refactoring of push_betas_to_deltas finally make the q2 and
 q3 processing use the same code

---
 src/rabbit_variable_queue.erl | 99 ++++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 63 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 02325351..cfea41f0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1360,81 +1360,54 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
                                         ram_index_count = RamIndexCount,
                                         index_state     = IndexState }) ->
     {Delta1, Q2a, RamIndexCount1, IndexState1} =
-        case beta_bounds(Q2) of
-            empty ->
-                {?BLANK_DELTA, Q2, RamIndexCount, IndexState};
-            {Q2MinSeqId, Q2MaxSeqId} ->
-                {Len1, Q2b, RamIndexCount2, IndexState2} =
-                    push_betas_to_deltas(
-                      fun bpqueue:out/1, undefined, Q2, 0, RamIndexCount,
-                      IndexState),
-                %% Q2MinSeqId is high in the sense that it must be
-                %% higher than the seq_id in Delta, but it's also the
-                %% lowest of the betas that we transfer from q2 to
-                %% delta.
-                {#delta { start_seq_id = Q2MinSeqId,
-                          count        = Len1,
-                          end_seq_id   = Q2MaxSeqId + 1 },
-                 Q2b, RamIndexCount2, IndexState2}
-        end,
-    true = bpqueue:is_empty(Q2a), %% ASSERTION
-    Delta2 = #delta { start_seq_id = Delta2SeqId } =
-        combine_deltas(Delta, Delta1),
-    {Delta3, Q3a, RamIndexCount3, IndexState3} =
-        case beta_bounds(Q3) of
-            empty ->
-                {?BLANK_DELTA, Q3, RamIndexCount1, IndexState1};
-            {Q3MinSeqId, Q3MaxSeqId} ->
-                Limit = rabbit_queue_index:next_segment_boundary(Q3MinSeqId),
-                %% ASSERTION
-                true = Delta2SeqId == undefined orelse Delta2SeqId > Q3MaxSeqId,
-                case Q3MaxSeqId < Limit of
-                    true ->
-                        %% already only holding LTE one segment indices in q3
-                        {?BLANK_DELTA, Q3, RamIndexCount1, IndexState1};
-                    false ->
-                        %% Q3MaxSeqId is low in the sense that it must
-                        %% be lower than the seq_id in delta2, in fact
-                        %% either delta2 has undefined as its seq_id
-                        %% or there does not exist a seq_id X s.t. X >
-                        %% Q3MaxSeqId and X < delta2's seq_id (would
-                        %% be +1 if it wasn't for the possibility of
-                        %% gaps in the seq_ids).  But Q3MaxSeqId is
-                        %% actually also the highest seq_id of the
-                        %% betas we transfer from q3 to deltas.
-                        {Len2, Q3b, RamIndexCount4, IndexState4} =
-                            push_betas_to_deltas(
-                              fun bpqueue:out_r/1, Limit, Q3, 0, RamIndexCount1,
-                              IndexState1),
-                        {#delta { start_seq_id = Limit,
-                                  count        = Len2,
-                                  end_seq_id   = Q3MaxSeqId + 1 },
-                         Q3b, RamIndexCount4, IndexState4}
-                end
-        end,
+        push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end,
+                             fun bpqueue:out/1, Q2,
+                             RamIndexCount, IndexState),
+    {Delta2, Q3a, RamIndexCount3, IndexState3} =
+        push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1,
+                             fun bpqueue:out_r/1, Q3,
+                             RamIndexCount1, IndexState1),
+    Delta3 = combine_deltas(Delta2, combine_deltas(Delta, Delta1)),
     State #vqstate { q2              = Q2a,
-                     delta           = combine_deltas(Delta3, Delta2),
+                     delta           = Delta3,
                      q3              = Q3a,
                      index_state     = IndexState3,
                      ram_index_count = RamIndexCount3 }.
 
+push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) ->
+    case beta_bounds(Q) of
+        empty ->
+            {?BLANK_DELTA, Q, RamIndexCount, IndexState};
+        {MinSeqId, MaxSeqId} ->
+            Limit = LimitFun(MinSeqId),
+            case MaxSeqId < Limit of
+                true  -> {?BLANK_DELTA, Q, RamIndexCount, IndexState};
+                false -> {Len, Qb, RamIndexCount1, IndexState1} =
+                             push_betas_to_deltas(Generator, Limit, Q, 0,
+                                                  RamIndexCount, IndexState),
+                         {#delta { start_seq_id = Limit,
+                                   count        = Len,
+                                   end_seq_id   = MaxSeqId + 1 },
+                          Qb, RamIndexCount1, IndexState1}
+            end
+    end.
+
 push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
     case Generator(Q) of
-        {empty, Qa} ->
-            {Count, Qa, RamIndexCount, IndexState};
+        {empty, Q} ->
+            {Count, Q, RamIndexCount, IndexState};
         {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa}
-        when Limit =/= undefined andalso SeqId < Limit ->
+          when SeqId < Limit ->
             {Count, Q, RamIndexCount, IndexState};
         {{value, IndexOnDisk, MsgStatus}, Qa} ->
             {RamIndexCount1, IndexState1} =
                 case IndexOnDisk of
-                    true ->
-                        {RamIndexCount, IndexState};
-                    false ->
-                        {#msg_status { index_on_disk = true }, IndexState2} =
-                            maybe_write_index_to_disk(true, MsgStatus,
-                                                      IndexState),
-                        {RamIndexCount - 1, IndexState2}
+                    true  -> {RamIndexCount, IndexState};
+                    false -> {#msg_status { index_on_disk = true },
+                              IndexState2} =
+                                 maybe_write_index_to_disk(true, MsgStatus,
+                                                           IndexState),
+                             {RamIndexCount - 1, IndexState2}
                 end,
             push_betas_to_deltas(
               Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1)
-- 
cgit v1.2.1


From 6e85b204cef360b89795efcfb4f6d799bb8ad3af Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 19:43:01 +0100
Subject: a little bit of inlining

---
 src/rabbit_variable_queue.erl | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cfea41f0..c0c7c69e 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -772,15 +772,6 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
 persistent_guids(Pubs) ->
     [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
 
-beta_bounds(Q) ->
-    case bpqueue:out(Q) of
-        {empty, Q} -> empty;
-        {{value, _IndexOnDisk1, #msg_status { seq_id = SeqIdMin }}, _Qa} ->
-            {{value, _IndexOnDisk2, #msg_status { seq_id = SeqIdMax }}, _Qb} =
-                bpqueue:out_r(Q),
-            {SeqIdMin, SeqIdMax}
-    end.
-
 betas_from_index_entries(List, TransientThreshold, IndexState) ->
     {Filtered, Delivers, Acks} =
         lists:foldr(
@@ -1375,20 +1366,22 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
                      ram_index_count = RamIndexCount3 }.
 
 push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) ->
-    case beta_bounds(Q) of
-        empty ->
+    case bpqueue:out(Q) of
+        {empty, Q} ->
             {?BLANK_DELTA, Q, RamIndexCount, IndexState};
-        {MinSeqId, MaxSeqId} ->
+        {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} ->
+            {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} =
+                bpqueue:out_r(Q),
             Limit = LimitFun(MinSeqId),
             case MaxSeqId < Limit of
                 true  -> {?BLANK_DELTA, Q, RamIndexCount, IndexState};
-                false -> {Len, Qb, RamIndexCount1, IndexState1} =
+                false -> {Len, Qc, RamIndexCount1, IndexState1} =
                              push_betas_to_deltas(Generator, Limit, Q, 0,
                                                   RamIndexCount, IndexState),
                          {#delta { start_seq_id = Limit,
                                    count        = Len,
                                    end_seq_id   = MaxSeqId + 1 },
-                          Qb, RamIndexCount1, IndexState1}
+                          Qc, RamIndexCount1, IndexState1}
             end
     end.
 
-- 
cgit v1.2.1


From 4a4183eb4e15a638e24309dd6c766975fafd9c55 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 18 Jun 2010 19:47:45 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c0c7c69e..f201210b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1235,8 +1235,8 @@ limit_q3_ram_index(Reduction, State) ->
     {Reduction, State}.
 
 limit_ram_index(MapFoldFilterFun, Q, Reduction,
-                State = #vqstate { ram_index_count = RamIndexCount,
-                                   index_state     = IndexState }) ->
+                State = #vqstate { index_state     = IndexState,
+                                   ram_index_count = RamIndexCount }) ->
     {Qa, {Reduction1, IndexState1}} =
         MapFoldFilterFun(
           fun erlang:'not'/1,
@@ -1348,8 +1348,8 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
 push_betas_to_deltas(State = #vqstate { q2              = Q2,
                                         delta           = Delta,
                                         q3              = Q3,
-                                        ram_index_count = RamIndexCount,
-                                        index_state     = IndexState }) ->
+                                        index_state     = IndexState,
+                                        ram_index_count = RamIndexCount }) ->
     {Delta1, Q2a, RamIndexCount1, IndexState1} =
         push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end,
                              fun bpqueue:out/1, Q2,
-- 
cgit v1.2.1


From b0461ef232244486057e354dc5ecb8d0a54d1439 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sat, 19 Jun 2010 10:46:25 +0100
Subject: don't unify on function results that just happen to be side-effect
 free

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f201210b..bc49865b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1367,7 +1367,7 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
 
 push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) ->
     case bpqueue:out(Q) of
-        {empty, Q} ->
+        {empty, _Q} ->
             {?BLANK_DELTA, Q, RamIndexCount, IndexState};
         {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} ->
             {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} =
@@ -1387,7 +1387,7 @@ push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) ->
 
 push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
     case Generator(Q) of
-        {empty, Q} ->
+        {empty, _Q} ->
             {Count, Q, RamIndexCount, IndexState};
         {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa}
           when SeqId < Limit ->
-- 
cgit v1.2.1


From c2d4dee458d577914915eea99f8dec6d5b1a3a97 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 21 Jun 2010 15:36:32 +0100
Subject: Renumbering of variables

---
 src/rabbit_variable_queue.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index bc49865b..244c9ba1 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1350,17 +1350,17 @@ push_betas_to_deltas(State = #vqstate { q2              = Q2,
                                         q3              = Q3,
                                         index_state     = IndexState,
                                         ram_index_count = RamIndexCount }) ->
-    {Delta1, Q2a, RamIndexCount1, IndexState1} =
+    {Delta2, Q2a, RamIndexCount2, IndexState2} =
         push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end,
                              fun bpqueue:out/1, Q2,
                              RamIndexCount, IndexState),
-    {Delta2, Q3a, RamIndexCount3, IndexState3} =
+    {Delta3, Q3a, RamIndexCount3, IndexState3} =
         push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1,
                              fun bpqueue:out_r/1, Q3,
-                             RamIndexCount1, IndexState1),
-    Delta3 = combine_deltas(Delta2, combine_deltas(Delta, Delta1)),
+                             RamIndexCount2, IndexState2),
+    Delta4 = combine_deltas(Delta3, combine_deltas(Delta, Delta2)),
     State #vqstate { q2              = Q2a,
-                     delta           = Delta3,
+                     delta           = Delta4,
                      q3              = Q3a,
                      index_state     = IndexState3,
                      ram_index_count = RamIndexCount3 }.
-- 
cgit v1.2.1


From 061d64386d642687e76b701a517bfa4d9ac44665 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 21 Jun 2010 15:40:33 +0100
Subject: change queue_index:read API ...to read exactly the messages within
 the specified bounds, rather than just up to next segment boundary. This API
 is cleaner and means we no longer rely on a hidden invariant in
 variable_queue.

---
 src/rabbit_queue_index.erl    | 47 +++++++++++++++++++++----------------------
 src/rabbit_tests.erl          | 12 +++++------
 src/rabbit_variable_queue.erl |  6 +++---
 3 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9c757864..c1054d85 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -197,7 +197,7 @@
 -spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
 -spec(flush/1 :: (qistate()) -> qistate()).
 -spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
-             {[{guid(), seq_id(), boolean(), boolean()}], seq_id(), qistate()}).
+             {[{guid(), seq_id(), boolean(), boolean()}], qistate()}).
 -spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
 -spec(bounds/1 :: (qistate()) ->
              {non_neg_integer(), non_neg_integer(), qistate()}).
@@ -271,32 +271,17 @@ flush(State = #qistate { dirty_count = 0 }) -> State;
 flush(State)                                -> flush_journal(State).
 
 read(StartEnd, StartEnd, State) ->
-    {[], StartEnd, State};
+    {[], State};
 read(Start, End, State = #qistate { segments = Segments,
                                     dir = Dir }) when Start =< End ->
     %% Start is inclusive, End is exclusive.
-    {StartSeg, StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
-    {EndSeg, EndRelSeq}     = seq_id_to_seg_and_rel_seq_id(End),
-    Start1 = reconstruct_seq_id(StartSeg + 1, 0),
-    Next = case End =< Start1 of
-               true  -> End;
-               false -> Start1
-           end,
-    MaxRelSeq = case StartSeg =:= EndSeg of
-                    true  -> EndRelSeq;
-                    false -> ?SEGMENT_ENTRY_COUNT
-                end,
-    Segment = segment_find_or_new(StartSeg, Dir, Segments),
-    Messages = segment_entries_foldr(
-                 fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
-                     when StartRelSeq =< RelSeq andalso RelSeq < MaxRelSeq ->
-                         [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
-                            IsPersistent, IsDelivered == del} | Acc ];
-                     (_RelSeq, _Value, Acc) ->
-                         Acc
-                 end, [], Segment),
-    Segments1 = segment_store(Segment, Segments),
-    {Messages, Next, State #qistate { segments = Segments1 }}.
+    LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
+    UpperB = {EndSeg,   _EndRelSeq}   = seq_id_to_seg_and_rel_seq_id(End - 1),
+    {Messages, Segments1} =
+        lists:foldr(fun (Seg, Acc) ->
+                            read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir)
+                    end, {[], Segments}, lists:seq(StartSeg, EndSeg)),
+    {Messages, State #qistate { segments = Segments1 }}.
 
 next_segment_boundary(SeqId) ->
     {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
@@ -758,6 +743,20 @@ write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
          end,
     Hdl.
 
+read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq},
+                     {Messages, Segments}, Dir) ->
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    {segment_entries_foldr(
+       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
+             when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso
+                  (Seg < EndSeg   orelse EndRelSeq   >= RelSeq) ->
+               [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
+                  IsPersistent, IsDelivered == del} | Acc ];
+           (_RelSeq, _Value, Acc) ->
+               Acc
+       end, Messages, Segment),
+     segment_store(Segment, Segments)}.
+
 segment_entries_foldr(Fun, Init,
                       Segment = #segment { journal_entries = JEntries }) ->
     {SegEntries, _UnackedCount} = load_segment(false, Segment),
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 38b01b2c..c6c62c06 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1593,7 +1593,7 @@ test_queue_index() ->
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
-    {ReadA, SegmentSize, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
+    {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
     _Qi5 = rabbit_queue_index:terminate([], Qi4),
@@ -1604,7 +1604,7 @@ test_queue_index() ->
     {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
-    {ReadB, SegmentSize, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
+    {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsGuidsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
@@ -1615,7 +1615,7 @@ test_queue_index() ->
     {LenB, _Terms2, Qi12} = test_queue_init(),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
     Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
-    {ReadC, SegmentSize, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
+    {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
     ok = verify_read_with_published(true, true, ReadC,
                                     lists:reverse(SeqIdsGuidsB)),
     Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
@@ -1678,10 +1678,10 @@ test_queue_index() ->
     {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
     Qi48 = rabbit_queue_index:deliver([2,3,5,6], Qi47),
     Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
-    {[], 4, Qi50} = rabbit_queue_index:read(0, 4, Qi49),
-    {ReadD, 7, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
+    {[], Qi50} = rabbit_queue_index:read(0, 4, Qi49),
+    {ReadD, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
     ok = verify_read_with_published(true, false, ReadD, [Four, Five, Six]),
-    {ReadE, 9, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
+    {ReadE, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
     ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
     _Qi53 = rabbit_queue_index:delete_and_terminate(Qi52),
     ok = stop_msg_store(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 244c9ba1..0f0a14c6 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1269,11 +1269,11 @@ maybe_deltas_to_betas(State = #vqstate {
             #delta { start_seq_id = DeltaSeqId,
                      count        = DeltaCount,
                      end_seq_id   = DeltaSeqIdEnd } = Delta,
-            DeltaSeqIdEnd1 =
+            DeltaSeqId1 =
                 lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId),
                            DeltaSeqIdEnd]),
-            {List, DeltaSeqId1, IndexState1} =
-                rabbit_queue_index:read(DeltaSeqId, DeltaSeqIdEnd1, IndexState),
+            {List, IndexState1} =
+                rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState),
             {Q3a, IndexState2} = betas_from_index_entries(
                                    List, TransientThreshold, IndexState1),
             State1 = State #vqstate { index_state = IndexState2 },
-- 
cgit v1.2.1


From 58c35b57cb5ff1fe6584ec67dbaec50ccaebae0e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 21 Jun 2010 16:38:15 +0100
Subject: Ensure that this macro doesn't accidentally cause unification with
 variables in scope

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index c1054d85..9e970e28 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -152,7 +152,7 @@
 
 %% ---- misc ----
 
--define(PUB, {_Guid, _IsPersistent}).
+-define(PUB, {__Guid, __IsPersistent}).
 
 -define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]).
 
-- 
cgit v1.2.1


From 6b1f2548de2ae2666f7852a658c3520537fd86d1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 21 Jun 2010 16:40:11 +0100
Subject: Apparently this is better

---
 src/rabbit_queue_index.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 9e970e28..1f61111c 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -152,7 +152,7 @@
 
 %% ---- misc ----
 
--define(PUB, {__Guid, __IsPersistent}).
+-define(PUB, {_, _}). %% {Guid, IsPersistent}
 
 -define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]).
 
-- 
cgit v1.2.1


From 575719990e19002a348ea02d10289fedc69cf4be Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 21 Jun 2010 17:16:37 +0100
Subject: remove redundant conditions and assertions ...already covered by
 state invariant

---
 src/rabbit_variable_queue.erl | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 0f0a14c6..fca0e80a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1123,16 +1123,9 @@ publish(neither, MsgStatus, State) ->
                       end_seq_id   = SeqId + 1 },
     State1 #vqstate { delta = combine_deltas(Delta, Delta1) }.
 
-store_alpha_entry(MsgStatus, State = #vqstate {
-                               q1    = Q1,
-                               q2    = Q2,
-                               delta = #delta { count = DeltaCount },
-                               q3    = Q3,
-                               q4    = Q4 }) ->
-    case bpqueue:is_empty(Q2) andalso 0 == DeltaCount andalso
-        bpqueue:is_empty(Q3) of
-        true  -> true = queue:is_empty(Q1), %% ASSERTION
-                 State #vqstate { q4 = queue:in(MsgStatus, Q4) };
+store_alpha_entry(MsgStatus, State = #vqstate {q1 = Q1, q3 = Q3, q4 = Q4 }) ->
+    case bpqueue:is_empty(Q3) of
+        true  -> State #vqstate { q4 = queue:in(MsgStatus, Q4) };
         false -> maybe_push_q1_to_betas(
                    State #vqstate { q1 = queue:in(MsgStatus, Q1) })
     end.
-- 
cgit v1.2.1


From 88faef8ccf0dcce7208eff3b68089aa594f9b4e6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 21 Jun 2010 18:05:49 +0100
Subject: Improving test coverage of VQ

---
 src/rabbit_tests.erl          | 16 +++++++++++++---
 src/rabbit_variable_queue.erl |  8 ++------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c6c62c06..13aeb851 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1871,7 +1871,17 @@ test_queue_recover() ->
     ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
     ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup),
     ok = rabbit_amqqueue:start(),
-    {ok, Count} = rabbit_amqqueue:with_or_die(
-                    QName,
-                    fun (Q1) -> rabbit_amqqueue:delete(Q1, false, false) end),
+    rabbit_amqqueue:with_or_die(
+      QName,
+      fun (Q1 = #amqqueue { pid = QPid1 }) ->
+              CountMinusOne = Count - 1,
+              {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} =
+                  rabbit_amqqueue:basic_get(Q1, self(), false),
+              exit(QPid1, shutdown),
+              VQ1 = rabbit_variable_queue:init(QName, true, true),
+              {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} =
+                  rabbit_variable_queue:fetch(true, VQ1),
+              _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2),
+              rabbit_amqqueue:internal_delete(QName)
+      end),
     passed.
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index fca0e80a..ff2d57af 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -840,12 +840,8 @@ permitted_ram_index_count(#vqstate { len   = Len,
                                      q2    = Q2,
                                      q3    = Q3,
                                      delta = #delta { count = DeltaCount } }) ->
-    AlphaBetaLen = Len - DeltaCount,
-    case AlphaBetaLen == 0 of
-        true  -> infinity;
-        false -> BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
-                 BetaLen - trunc(BetaLen * BetaLen / AlphaBetaLen)
-    end.
+    BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
+    BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
 
 should_force_index_to_disk(State = #vqstate {
                              ram_index_count = RamIndexCount }) ->
-- 
cgit v1.2.1


From 11e07da14dfd023f818dda56a38bb949da201110 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 09:08:47 +0100
Subject: add some more variable_queue state invariant checks

---
 src/rabbit_variable_queue.erl | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ff2d57af..25dc5910 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -662,7 +662,11 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 %%----------------------------------------------------------------------------
 
 a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
-                     len = Len, target_ram_msg_count = TargetRamMsgCount }) ->
+                     len                  = Len,
+                     persistent_count     = PersistentCount,
+                     target_ram_msg_count = TargetRamMsgCount,
+                     ram_msg_count        = RamMsgCount,
+                     ram_index_count      = RamIndexCount }) ->
     E1 = queue:is_empty(Q1),
     E2 = bpqueue:is_empty(Q2),
     ED = Delta#delta.count == 0,
@@ -677,6 +681,11 @@ a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
     true = (E1 and E2 and E4) or not TZ,
     true = LZ == (E3 and E4),
 
+    true = Len             >= 0,
+    true = PersistentCount >= 0,
+    true = RamMsgCount     >= 0,
+    true = RamIndexCount   >= 0,
+
     State.
 
 one_if(true ) -> 1;
-- 
cgit v1.2.1


From 1581045c57bd8f9794342ffac1e506a9f30f07d1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 11:53:18 +0100
Subject: refactor: simplify limit_ram_index

---
 src/rabbit_variable_queue.erl | 69 ++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 25dc5910..23879df7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1204,52 +1204,39 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
                                    ?RAM_INDEX_BATCH_SIZE]),
             case Reduction < ?RAM_INDEX_BATCH_SIZE of
                 true  -> State;
-                false -> {Reduction1, State1} =
-                             limit_q2_ram_index(Reduction, State),
-                         {_Red2, State2} =
-                             limit_q3_ram_index(Reduction1, State1),
-                         State2
+                false -> #vqstate { q2 = Q2, q3 = Q3,
+                                    index_state = IndexState } = State,
+                         {Q2a, {Reduction1, IndexState1}} =
+                             limit_ram_index(fun bpqueue:map_fold_filter_l/4,
+                                             Q2, {Reduction, IndexState}),
+                         {Q3a, {Reduction2, IndexState2}} =
+                             limit_ram_index(fun bpqueue:map_fold_filter_r/4,
+                                             Q3, {Reduction1, IndexState1}),
+                         RamIndexCount1 = RamIndexCount -
+                             (Reduction - Reduction2),
+                         State #vqstate { q2 = Q2a, q3 = Q3a,
+                                          index_state = IndexState2,
+                                          ram_index_count = RamIndexCount1 }
             end;
        true ->
             State
     end.
 
-limit_q2_ram_index(Reduction, State = #vqstate { q2 = Q2 })
-  when Reduction > 0 ->
-    {Q2a, Reduction1, State1} = limit_ram_index(fun bpqueue:map_fold_filter_l/4,
-                                                Q2, Reduction, State),
-    {Reduction1, State1 #vqstate { q2 = Q2a }};
-limit_q2_ram_index(Reduction, State) ->
-    {Reduction, State}.
-
-limit_q3_ram_index(Reduction, State = #vqstate { q3 = Q3 })
-  when Reduction > 0 ->
-    %% use the _r version so that we prioritise the msgs closest to
-    %% delta, and least soon to be delivered
-    {Q3a, Reduction1, State1} = limit_ram_index(fun bpqueue:map_fold_filter_r/4,
-                                                Q3, Reduction, State),
-    {Reduction1, State1 #vqstate { q3 = Q3a }};
-limit_q3_ram_index(Reduction, State) ->
-    {Reduction, State}.
-
-limit_ram_index(MapFoldFilterFun, Q, Reduction,
-                State = #vqstate { index_state     = IndexState,
-                                   ram_index_count = RamIndexCount }) ->
-    {Qa, {Reduction1, IndexState1}} =
-        MapFoldFilterFun(
-          fun erlang:'not'/1,
-          fun (MsgStatus, {0, _IndexStateN}) ->
-                  false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
-                  stop;
-              (MsgStatus, {N, IndexStateN}) when N > 0 ->
-                  false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
-                  {MsgStatus1, IndexStateN1} =
-                      maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
-                  {true, MsgStatus1, {N-1, IndexStateN1}}
-          end, {Reduction, IndexState}, Q),
-    RamIndexCount1 = RamIndexCount - (Reduction - Reduction1),
-    {Qa, Reduction1, State #vqstate { index_state     = IndexState1,
-                                      ram_index_count = RamIndexCount1 }}.
+limit_ram_index(_MapFoldFilterFun, Q, {Reduction, IndexState})
+  when Reduction == 0 ->
+    {Q, {Reduction, IndexState}};
+limit_ram_index(MapFoldFilterFun, Q, {Reduction, IndexState}) ->
+    MapFoldFilterFun(
+      fun erlang:'not'/1,
+      fun (MsgStatus, {0, _IndexStateN}) ->
+              false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+              stop;
+          (MsgStatus, {N, IndexStateN}) when N > 0 ->
+              false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+              {MsgStatus1, IndexStateN1} =
+                  maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
+              {true, MsgStatus1, {N-1, IndexStateN1}}
+      end, {Reduction, IndexState}, Q).
 
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
-- 
cgit v1.2.1


From 6bcff045a25aa69afc105aec9768f7f00f93e2ca Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 13:32:06 +0100
Subject: massivly simplify variable_queue:publish Rather than trying to figure
 out which of q{1-4} or delta a message should go into, we make a
 straightforward decision to stuff it into either q1 or q4 and then use the
 existing logic for dealing with memory pressure to shuffle it to the right
 place.

---
 src/rabbit_variable_queue.erl | 119 +++++++++++-------------------------------
 1 file changed, 31 insertions(+), 88 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 23879df7..bd912837 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -426,9 +426,8 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
                               persistent_count = 0 })}.
 
 publish(Msg, State) ->
-    State1 = limit_ram_index(State),
-    {_SeqId, State2} = publish(Msg, false, false, State1),
-    a(State2).
+    {_SeqId, State1} = publish(Msg, false, false, State),
+    a(limit_ram_index(State1)).
 
 publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
     {blank_ack, a(State)};
@@ -1056,97 +1055,30 @@ reduce_memory_use(State = #vqstate {
 %% Internal gubbins for publishing
 %%----------------------------------------------------------------------------
 
-msg_storage_type(_SeqId, #vqstate { target_ram_msg_count = TargetRamMsgCount,
-                                    ram_msg_count        = RamMsgCount })
-  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount > RamMsgCount ->
-    msg;
-msg_storage_type( SeqId, #vqstate { target_ram_msg_count = 0, q3 = Q3 }) ->
-    case bpqueue:out(Q3) of
-        {empty, _Q3} ->
-            %% if TargetRamMsgCount == 0, we know we have no
-            %% alphas. If q3 is empty then delta must be empty too, so
-            %% create a beta, which should end up in q3
-            index;
-        {{value, _IndexOnDisk, #msg_status { seq_id = OldSeqId }}, _Q3a} ->
-            %% Don't look at the current delta as it may be empty. If
-            %% the SeqId is still within the current segment, it'll be
-            %% a beta, else it'll go into delta
-            case SeqId >= rabbit_queue_index:next_segment_boundary(OldSeqId) of
-                true  -> neither;
-                false -> index
-            end
-    end;
-msg_storage_type(_SeqId, #vqstate { q1 = Q1 }) ->
-    case queue:is_empty(Q1) of
-        true  -> index;
-        %% Can push out elders (in q1) to disk. This may also result
-        %% in the msg itself going to disk and q2/q3.
-        false -> msg
-    end.
-
 publish(Msg = #basic_message { is_persistent = IsPersistent },
         IsDelivered, MsgOnDisk,
-        State = #vqstate { next_seq_id      = SeqId,
+        State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
+                           next_seq_id      = SeqId,
                            len              = Len,
                            in_counter       = InCount,
                            persistent_count = PCount,
-                           durable          = IsDurable }) ->
+                           durable          = IsDurable,
+                           ram_msg_count    = RamMsgCount }) ->
     IsPersistent1 = IsDurable andalso IsPersistent,
     MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
         #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
+    {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
+    State2 = case bpqueue:is_empty(Q3) of
+                 false -> State1 #vqstate { q1 = queue:in(MsgStatus1, Q1) };
+                 true  -> State1 #vqstate { q4 = queue:in(MsgStatus1, Q4) }
+             end,
     PCount1 = PCount + one_if(IsPersistent1),
-    {SeqId, publish(msg_storage_type(SeqId, State), MsgStatus,
-                    State #vqstate { next_seq_id      = SeqId   + 1,
-                                     len              = Len     + 1,
-                                     in_counter       = InCount + 1,
-                                     persistent_count = PCount1 })}.
-
-publish(msg, MsgStatus, State) ->
-    {MsgStatus1, State1 = #vqstate { ram_msg_count = RamMsgCount }} =
-        maybe_write_to_disk(false, false, MsgStatus, State),
-    State2 = State1 # vqstate {ram_msg_count = RamMsgCount + 1 },
-    store_alpha_entry(MsgStatus1, State2);
-
-publish(index, MsgStatus, State) ->
-    ForceIndex = should_force_index_to_disk(State),
-    {MsgStatus1 = #msg_status { msg_on_disk = true,
-                                index_on_disk = IndexOnDisk },
-     State1 = #vqstate { ram_index_count = RamIndexCount, q1 = Q1 }} =
-        maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
-    RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
-    State2 = State1 #vqstate { ram_index_count = RamIndexCount1 },
-    true = queue:is_empty(Q1), %% ASSERTION
-    store_beta_entry(MsgStatus1, State2);
-
-publish(neither, MsgStatus, State) ->
-    {#msg_status { msg_on_disk = true, index_on_disk = true, seq_id = SeqId },
-     State1 = #vqstate { q1 = Q1, q2 = Q2, delta = Delta }} =
-        maybe_write_to_disk(true, true, MsgStatus, State),
-    true = queue:is_empty(Q1) andalso bpqueue:is_empty(Q2), %% ASSERTION
-    Delta1 = #delta { start_seq_id = SeqId,
-                      count        = 1,
-                      end_seq_id   = SeqId + 1 },
-    State1 #vqstate { delta = combine_deltas(Delta, Delta1) }.
-
-store_alpha_entry(MsgStatus, State = #vqstate {q1 = Q1, q3 = Q3, q4 = Q4 }) ->
-    case bpqueue:is_empty(Q3) of
-        true  -> State #vqstate { q4 = queue:in(MsgStatus, Q4) };
-        false -> maybe_push_q1_to_betas(
-                   State #vqstate { q1 = queue:in(MsgStatus, Q1) })
-    end.
-
-store_beta_entry(MsgStatus = #msg_status { msg_on_disk = true,
-                                           index_on_disk = IndexOnDisk },
-                 State = #vqstate { q2    = Q2,
-                                    delta = #delta { count = DeltaCount },
-                                    q3    = Q3 }) ->
-    MsgStatus1 = MsgStatus #msg_status { msg = undefined },
-    case DeltaCount == 0 of
-        true  -> State #vqstate { q3 = bpqueue:in(IndexOnDisk, MsgStatus1,
-                                                  Q3) };
-        false -> State #vqstate { q2 = bpqueue:in(IndexOnDisk, MsgStatus1,
-                                                  Q2) }
-    end.
+    {SeqId, reduce_memory_use(
+              State2 #vqstate { next_seq_id      = SeqId   + 1,
+                                len              = Len     + 1,
+                                in_counter       = InCount + 1,
+                                persistent_count = PCount1,
+                                ram_msg_count    = RamMsgCount + 1})}.
 
 maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
                                   msg_on_disk = true }, MSCState) ->
@@ -1291,9 +1223,20 @@ maybe_deltas_to_betas(State = #vqstate {
 maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out/1,
-      fun (MsgStatus, Q1a, State1) ->
-              %% these could legally go to q3 if delta and q2 are empty
-              store_beta_entry(MsgStatus, State1 #vqstate { q1 = Q1a })
+      fun (MsgStatus = #msg_status { msg_on_disk   = true,
+                                     index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q2    = Q2,
+                                    delta = #delta { count = DeltaCount },
+                                    q3    = Q3 }) ->
+              MsgStatus1 = MsgStatus #msg_status { msg = undefined },
+              case DeltaCount == 0 of
+                  true  -> State1 #vqstate {
+                             q1 = Q1a,
+                             q3 = bpqueue:in(IndexOnDisk, MsgStatus1, Q3) };
+                  false -> State1 #vqstate {
+                             q1 = Q1a,
+                             q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
+              end
       end, Q1, State).
 
 maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
-- 
cgit v1.2.1


From 3d712da85d9ec5b8981d1f363fc6ba8f0ac22f51 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 14:31:30 +0100
Subject: refactor: simplify maybe_push_q{1,4}_to_betas

---
 src/rabbit_variable_queue.erl | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index bd912837..fcebdddc 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1223,20 +1223,14 @@ maybe_deltas_to_betas(State = #vqstate {
 maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out/1,
-      fun (MsgStatus = #msg_status { msg_on_disk   = true,
-                                     index_on_disk = IndexOnDisk },
-           Q1a, State1 = #vqstate { q2    = Q2,
-                                    delta = #delta { count = DeltaCount },
-                                    q3    = Q3 }) ->
-              MsgStatus1 = MsgStatus #msg_status { msg = undefined },
-              case DeltaCount == 0 of
-                  true  -> State1 #vqstate {
-                             q1 = Q1a,
-                             q3 = bpqueue:in(IndexOnDisk, MsgStatus1, Q3) };
-                  false -> State1 #vqstate {
-                             q1 = Q1a,
-                             q2 = bpqueue:in(IndexOnDisk, MsgStatus1, Q2) }
-              end
+      fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q3 = Q3, delta = #delta { count = 0 } }) ->
+              State1 #vqstate { q1 = Q1a,
+                                q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) };
+          (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q2 = Q2, delta = #delta {} }) ->
+              State1 #vqstate { q1 = Q1a,
+                                q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) }
       end, Q1, State).
 
 maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
@@ -1244,9 +1238,7 @@ maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
       fun queue:out_r/1,
       fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
            Q4a, State1 = #vqstate { q3 = Q3 }) ->
-              MsgStatus1 = MsgStatus #msg_status { msg = undefined },
-              %% these must go to q3
-              State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus1, Q3),
+              State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3),
                                 q4 = Q4a }
       end, Q4, State).
 
@@ -1266,11 +1258,12 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
              State1 = #vqstate { ram_msg_count   = RamMsgCount,
                                  ram_index_count = RamIndexCount }} =
                 maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
+            MsgStatus2 = MsgStatus1 #msg_status { msg = undefined },
             RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
             State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
                                        ram_index_count = RamIndexCount1 },
             maybe_push_alphas_to_betas(Generator, Consumer, Qa,
-                                       Consumer(MsgStatus1, Qa, State2))
+                                       Consumer(MsgStatus2, Qa, State2))
     end.
 
 push_betas_to_deltas(State = #vqstate { q2              = Q2,
-- 
cgit v1.2.1


From e7023cf7cf9d983be76b6ffd8f7595ba54715f37 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 22 Jun 2010 15:31:11 +0100
Subject: Further code coverage improvements. Up to 96.52% on VQ

---
 src/rabbit_tests.erl | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 13aeb851..27ff51de 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1874,14 +1874,22 @@ test_queue_recover() ->
     rabbit_amqqueue:with_or_die(
       QName,
       fun (Q1 = #amqqueue { pid = QPid1 }) ->
-              CountMinusOne = Count - 1,
-              {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} =
+              Count1 = Count - 1,
+              {ok, Count1, {QName, QPid1, _AckTag, true, _Msg}} =
                   rabbit_amqqueue:basic_get(Q1, self(), false),
               exit(QPid1, shutdown),
               VQ1 = rabbit_variable_queue:init(QName, true, true),
-              {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} =
-                  rabbit_variable_queue:fetch(true, VQ1),
-              _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2),
+              VQ2 = variable_queue_publish(false, Count, VQ1),
+              VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+              {VQ4, _AckTags} = variable_queue_fetch(Count, true, true, Count + Count, VQ3),
+              {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
+              _VQ6 = rabbit_variable_queue:terminate(VQ5),
+              VQ7 = rabbit_variable_queue:init(QName, true, true),
+              {{_Msg1, true, _AckTag1, Count1}, VQ8} =
+                  rabbit_variable_queue:fetch(true, VQ7),
+              VQ9 = variable_queue_publish(false, 1, VQ8),
+              VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
+              _VQ11 = rabbit_variable_queue:delete_and_terminate(VQ10),
               rabbit_amqqueue:internal_delete(QName)
       end),
     passed.
-- 
cgit v1.2.1


From fdca38fcd5214a9bbf37b2929f7d154cf10e543e Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 22 Jun 2010 15:34:11 +0100
Subject: Remove meaningless assertion

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index fcebdddc..452e0276 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1228,7 +1228,7 @@ maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
               State1 #vqstate { q1 = Q1a,
                                 q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) };
           (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
-           Q1a, State1 = #vqstate { q2 = Q2, delta = #delta {} }) ->
+           Q1a, State1 = #vqstate { q2 = Q2 }) ->
               State1 #vqstate { q1 = Q1a,
                                 q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) }
       end, Q1, State).
-- 
cgit v1.2.1


From 95b30665fba2bb6af7d8e4af6ad16be1a855a462 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 22 Jun 2010 16:04:59 +0100
Subject: Still at 95.5% but less overloaded

---
 src/rabbit_tests.erl | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 27ff51de..ce0860d9 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1752,6 +1752,7 @@ fresh_variable_queue() ->
 test_variable_queue() ->
     passed = test_variable_queue_dynamic_duration_change(),
     passed = test_variable_queue_partial_segments_delta_thing(),
+    passed = test_variable_queue_all_the_bits_not_covered_elsewhere(),
     passed.
 
 test_variable_queue_dynamic_duration_change() ->
@@ -1874,22 +1875,32 @@ test_queue_recover() ->
     rabbit_amqqueue:with_or_die(
       QName,
       fun (Q1 = #amqqueue { pid = QPid1 }) ->
-              Count1 = Count - 1,
-              {ok, Count1, {QName, QPid1, _AckTag, true, _Msg}} =
+              CountMinusOne = Count - 1,
+              {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} =
                   rabbit_amqqueue:basic_get(Q1, self(), false),
               exit(QPid1, shutdown),
               VQ1 = rabbit_variable_queue:init(QName, true, true),
-              VQ2 = variable_queue_publish(false, Count, VQ1),
-              VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
-              {VQ4, _AckTags} = variable_queue_fetch(Count, true, true, Count + Count, VQ3),
-              {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
-              _VQ6 = rabbit_variable_queue:terminate(VQ5),
-              VQ7 = rabbit_variable_queue:init(QName, true, true),
-              {{_Msg1, true, _AckTag1, Count1}, VQ8} =
-                  rabbit_variable_queue:fetch(true, VQ7),
-              VQ9 = variable_queue_publish(false, 1, VQ8),
-              VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
-              _VQ11 = rabbit_variable_queue:delete_and_terminate(VQ10),
+              {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} =
+                  rabbit_variable_queue:fetch(true, VQ1),
+              _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2),
               rabbit_amqqueue:internal_delete(QName)
       end),
     passed.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere() ->
+    Count = 2*rabbit_queue_index:next_segment_boundary(0),
+    VQ0 = fresh_variable_queue(),
+    VQ1 = variable_queue_publish(true, Count, VQ0),
+    VQ2 = variable_queue_publish(false, Count, VQ1),
+    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+    {VQ4, _AckTags} = variable_queue_fetch(Count, true, false, Count + Count, VQ3),
+    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {{_Msg1, true, _AckTag1, Count1}, VQ8} = rabbit_variable_queue:fetch(true, VQ7),
+    VQ9 = variable_queue_publish(false, 1, VQ8),
+    VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
+    {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
+    {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
+    _VQ13 = rabbit_variable_queue:delete_and_terminate(VQ12),
+    passed.
-- 
cgit v1.2.1


From 4dbd911a7c5ca7e00ec38dc59f60a70f8aad976f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 16:41:10 +0100
Subject: optimisation: don't call reduce_memory_use quite so often
 specififcally, on requeue and tx_commit_index only call it once at the end
 rather than for every single message.

---
 src/rabbit_variable_queue.erl | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 452e0276..50fa0e26 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -427,7 +427,7 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
 
 publish(Msg, State) ->
     {_SeqId, State1} = publish(Msg, false, false, State),
-    a(limit_ram_index(State1)).
+    a(limit_ram_index(reduce_memory_use(State1))).
 
 publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
     {blank_ack, a(State)};
@@ -560,17 +560,18 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
        end)}.
 
 requeue(AckTags, State) ->
-    a(ack(fun (#msg_status { msg = Msg }, State1) ->
-                  {_SeqId, State2} = publish(Msg, true, false, State1),
-                  State2;
-              ({IsPersistent, Guid}, State1) ->
-                  #vqstate { msg_store_clients = MSCState } = State1,
-                  {{ok, Msg = #basic_message{}}, MSCState1} =
-                      read_from_msg_store(MSCState, IsPersistent, Guid),
-                  State2 = State1 #vqstate { msg_store_clients = MSCState1 },
-                  {_SeqId, State3} = publish(Msg, true, true, State2),
-                  State3
-          end, AckTags, State)).
+    a(reduce_memory_use(
+        ack(fun (#msg_status { msg = Msg }, State1) ->
+                    {_SeqId, State2} = publish(Msg, true, false, State1),
+                    State2;
+                ({IsPersistent, Guid}, State1) ->
+                    #vqstate { msg_store_clients = MSCState } = State1,
+                    {{ok, Msg = #basic_message{}}, MSCState1} =
+                        read_from_msg_store(MSCState, IsPersistent, Guid),
+                    State2 = State1 #vqstate { msg_store_clients = MSCState1 },
+                    {_SeqId, State3} = publish(Msg, true, true, State2),
+                    State3
+            end, AckTags, State))).
 
 len(#vqstate { len = Len }) -> Len.
 
@@ -951,7 +952,8 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
           end, {Acks, ack(Acks, State)}, Pubs),
     IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
     [ Fun() || Fun <- lists:reverse(SFuns) ],
-    State1 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }.
+    reduce_memory_use(
+      State1 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }).
 
 purge_betas_and_deltas(State = #vqstate { q3          = Q3,
                                           index_state = IndexState }) ->
@@ -1073,12 +1075,11 @@ publish(Msg = #basic_message { is_persistent = IsPersistent },
                  true  -> State1 #vqstate { q4 = queue:in(MsgStatus1, Q4) }
              end,
     PCount1 = PCount + one_if(IsPersistent1),
-    {SeqId, reduce_memory_use(
-              State2 #vqstate { next_seq_id      = SeqId   + 1,
-                                len              = Len     + 1,
-                                in_counter       = InCount + 1,
-                                persistent_count = PCount1,
-                                ram_msg_count    = RamMsgCount + 1})}.
+    {SeqId, State2 #vqstate { next_seq_id      = SeqId   + 1,
+                              len              = Len     + 1,
+                              in_counter       = InCount + 1,
+                              persistent_count = PCount1,
+                              ram_msg_count    = RamMsgCount + 1}}.
 
 maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
                                   msg_on_disk = true }, MSCState) ->
-- 
cgit v1.2.1


From 865ac7fda947d937d6366e77b7a6bdb2b0f76316 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 22 Jun 2010 17:10:40 +0100
Subject: Improve code coverage further

---
 src/rabbit_tests.erl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ce0860d9..73b4a9be 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1903,4 +1903,15 @@ test_variable_queue_all_the_bits_not_covered_elsewhere() ->
     {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
     {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
     _VQ13 = rabbit_variable_queue:delete_and_terminate(VQ12),
+
+    VQa0 = fresh_variable_queue(),
+    VQa1 = rabbit_variable_queue:set_ram_duration_target(0, VQa0),
+    VQa2 = variable_queue_publish(false, 4, VQa1),
+    {VQa3, AckTags} = variable_queue_fetch(2, false, false, 4, VQa2),
+    VQa4 = rabbit_variable_queue:requeue(AckTags, VQa3),
+    VQa5 = rabbit_variable_queue:sync(VQa4),
+    _VQa6 = rabbit_variable_queue:terminate(VQa5),
+    VQa7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {empty, VQa8} = rabbit_variable_queue:fetch(false, VQa7),
+    _VQa9 = rabbit_variable_queue:delete_and_terminate(VQa8),
     passed.
-- 
cgit v1.2.1


From 68f6eb998b7f91c05af258a00426d7f052a631be Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 18:03:11 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 58 +++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 50fa0e26..01fc114f 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -843,22 +843,6 @@ combine_deltas(#delta { start_seq_id = StartLow,
 beta_fold(Fun, Init, Q) ->
     bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
 
-permitted_ram_index_count(#vqstate { len = 0 }) ->
-    infinity;
-permitted_ram_index_count(#vqstate { len   = Len,
-                                     q2    = Q2,
-                                     q3    = Q3,
-                                     delta = #delta { count = DeltaCount } }) ->
-    BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
-    BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
-
-should_force_index_to_disk(State = #vqstate {
-                             ram_index_count = RamIndexCount }) ->
-    case permitted_ram_index_count(State) of
-        infinity  -> false;
-        Permitted -> RamIndexCount >= Permitted
-    end.
-
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
@@ -1040,19 +1024,6 @@ fetch_from_q3_to_q4(State = #vqstate {
             {loaded, State2}
     end.
 
-reduce_memory_use(State = #vqstate {
-                    ram_msg_count        = RamMsgCount,
-                    target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
-    State;
-reduce_memory_use(State = #vqstate {
-                    target_ram_msg_count = TargetRamMsgCount }) ->
-    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
-    case TargetRamMsgCount of
-        0 -> push_betas_to_deltas(State1);
-        _ -> State1
-    end.
-
 %%----------------------------------------------------------------------------
 %% Internal gubbins for publishing
 %%----------------------------------------------------------------------------
@@ -1171,6 +1142,35 @@ limit_ram_index(MapFoldFilterFun, Q, {Reduction, IndexState}) ->
               {true, MsgStatus1, {N-1, IndexStateN1}}
       end, {Reduction, IndexState}, Q).
 
+should_force_index_to_disk(State = #vqstate {
+                             ram_index_count = RamIndexCount }) ->
+    case permitted_ram_index_count(State) of
+        infinity  -> false;
+        Permitted -> RamIndexCount >= Permitted
+    end.
+
+permitted_ram_index_count(#vqstate { len = 0 }) ->
+    infinity;
+permitted_ram_index_count(#vqstate { len   = Len,
+                                     q2    = Q2,
+                                     q3    = Q3,
+                                     delta = #delta { count = DeltaCount } }) ->
+    BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
+    BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
+
+reduce_memory_use(State = #vqstate {
+                    ram_msg_count        = RamMsgCount,
+                    target_ram_msg_count = TargetRamMsgCount })
+  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
+    State;
+reduce_memory_use(State = #vqstate {
+                    target_ram_msg_count = TargetRamMsgCount }) ->
+    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
+    case TargetRamMsgCount of
+        0 -> push_betas_to_deltas(State1);
+        _ -> State1
+    end.
+
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
 maybe_deltas_to_betas(State = #vqstate {
-- 
cgit v1.2.1


From 94a4dc455dbcc400e9baad696f74729206f05da4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 18:55:07 +0100
Subject: make beta->gamma conversion part of general memory reduction scheme
 While it doesn't reduce memory itself, it is still part of the overall memory
 reduction logic. This allows us to cleanly separate the beta->gamma
 conversion from the major phase changes.

---
 src/rabbit_variable_queue.erl | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 01fc114f..113bd0f5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -427,7 +427,7 @@ purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
 
 publish(Msg, State) ->
     {_SeqId, State1} = publish(Msg, false, false, State),
-    a(limit_ram_index(reduce_memory_use(State1))).
+    a(reduce_memory_use(State1)).
 
 publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
     {blank_ack, a(State)};
@@ -1142,13 +1142,6 @@ limit_ram_index(MapFoldFilterFun, Q, {Reduction, IndexState}) ->
               {true, MsgStatus1, {N-1, IndexStateN1}}
       end, {Reduction, IndexState}, Q).
 
-should_force_index_to_disk(State = #vqstate {
-                             ram_index_count = RamIndexCount }) ->
-    case permitted_ram_index_count(State) of
-        infinity  -> false;
-        Permitted -> RamIndexCount >= Permitted
-    end.
-
 permitted_ram_index_count(#vqstate { len = 0 }) ->
     infinity;
 permitted_ram_index_count(#vqstate { len   = Len,
@@ -1158,17 +1151,20 @@ permitted_ram_index_count(#vqstate { len   = Len,
     BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
     BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
 
+reduce_memory_use(State = #vqstate {
+                    target_ram_msg_count = infinity }) ->
+    State;
 reduce_memory_use(State = #vqstate {
                     ram_msg_count        = RamMsgCount,
                     target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
-    State;
+  when TargetRamMsgCount >= RamMsgCount ->
+    limit_ram_index(State);
 reduce_memory_use(State = #vqstate {
                     target_ram_msg_count = TargetRamMsgCount }) ->
     State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
     case TargetRamMsgCount of
         0 -> push_betas_to_deltas(State1);
-        _ -> State1
+        _ -> limit_ram_index(State1)
     end.
 
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
@@ -1253,12 +1249,11 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
     case Generator(Q) of
         {empty, _Q} -> State;
         {{value, MsgStatus}, Qa} ->
-            ForceIndex = should_force_index_to_disk(State),
             {MsgStatus1 = #msg_status { msg_on_disk = true,
                                         index_on_disk = IndexOnDisk },
              State1 = #vqstate { ram_msg_count   = RamMsgCount,
                                  ram_index_count = RamIndexCount }} =
-                maybe_write_to_disk(true, ForceIndex, MsgStatus, State),
+                maybe_write_to_disk(true, false, MsgStatus, State),
             MsgStatus2 = MsgStatus1 #msg_status { msg = undefined },
             RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
             State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
-- 
cgit v1.2.1


From 1726042ba98e84d7aaa55a9f2da9793f9df72a28 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 22 Jun 2010 18:56:00 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 113bd0f5..958a2903 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1101,6 +1101,22 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
 %% Phase changes
 %%----------------------------------------------------------------------------
 
+reduce_memory_use(State = #vqstate {
+                    target_ram_msg_count = infinity }) ->
+    State;
+reduce_memory_use(State = #vqstate {
+                    ram_msg_count        = RamMsgCount,
+                    target_ram_msg_count = TargetRamMsgCount })
+  when TargetRamMsgCount >= RamMsgCount ->
+    limit_ram_index(State);
+reduce_memory_use(State = #vqstate {
+                    target_ram_msg_count = TargetRamMsgCount }) ->
+    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
+    case TargetRamMsgCount of
+        0 -> push_betas_to_deltas(State1);
+        _ -> limit_ram_index(State1)
+    end.
+
 limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
     Permitted = permitted_ram_index_count(State),
     if Permitted =/= infinity andalso RamIndexCount > Permitted ->
@@ -1151,22 +1167,6 @@ permitted_ram_index_count(#vqstate { len   = Len,
     BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
     BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
 
-reduce_memory_use(State = #vqstate {
-                    target_ram_msg_count = infinity }) ->
-    State;
-reduce_memory_use(State = #vqstate {
-                    ram_msg_count        = RamMsgCount,
-                    target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount >= RamMsgCount ->
-    limit_ram_index(State);
-reduce_memory_use(State = #vqstate {
-                    target_ram_msg_count = TargetRamMsgCount }) ->
-    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
-    case TargetRamMsgCount of
-        0 -> push_betas_to_deltas(State1);
-        _ -> limit_ram_index(State1)
-    end.
-
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
 maybe_deltas_to_betas(State = #vqstate {
-- 
cgit v1.2.1


From da925e34a1b378aaf99f9af3c39c927d8b9c9d5c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 23 Jun 2010 19:01:59 +0100
Subject: Chunk up work to do when converting ? to ?. Unfortunately this
 violates one of the invariants (Q1 or Q2 or Q4) -> ?(TargetRamMsgCount > 0)
 because it might be ==0 but the work is still in progress. I prefer relaxing
 the invariant over requiring a "big push" once we get to 0. Some of the tests
 now don't pass as they assume certain values for ? - this should be fixable.

---
 include/rabbit_backing_queue_spec.hrl |  4 +--
 src/rabbit_backing_queue.erl          | 17 ++++++-----
 src/rabbit_invariable_queue.erl       |  8 ++---
 src/rabbit_variable_queue.erl         | 56 +++++++++++++++++++++--------------
 4 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 55cd126e..47748bdb 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -57,7 +57,7 @@
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> {number(), state()}).
--spec(needs_sync/1 :: (state()) -> boolean()).
--spec(sync/1 :: (state()) -> state()).
+-spec(needs_idle_timeout/1 :: (state()) -> boolean()).
+-spec(idle_timeout/1 :: (state()) -> state()).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 432d6290..b76ae11e 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -113,14 +113,15 @@ behaviour_info(callbacks) ->
      %% queue.
      {ram_duration, 1},
 
-     %% Should 'sync' be called as soon as the queue process can
-     %% manage (either on an empty mailbox, or when a timer fires)?
-     {needs_sync, 1},
-
-     %% Called (eventually) after needs_sync returns 'true'. Note this
-     %% may be called more than once for each 'true' returned from
-     %% needs_sync.
-     {sync, 1},
+     %% Should 'idle_timeout' be called as soon as the queue process
+     %% can manage (either on an empty mailbox, or when a timer
+     %% fires)?
+     {needs_idle_timeout, 1},
+
+     %% Called (eventually) after needs_idle_timeout returns
+     %% 'true'. Note this may be called more than once for each 'true'
+     %% returned from needs_idle_timeout.
+     {idle_timeout, 1},
 
      %% Called immediately before the queue hibernates.
      {handle_pre_hibernate, 1},
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index a7ca20c8..e6bd11e3 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -34,8 +34,8 @@
 -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2,
          publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3,
          tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1,
-         set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1,
-         handle_pre_hibernate/1, status/1]).
+         set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1,
+         idle_timeout/1, handle_pre_hibernate/1, status/1]).
 
 -export([start/1]).
 
@@ -197,9 +197,9 @@ set_ram_duration_target(_DurationTarget, State) -> State.
 
 ram_duration(State) -> {0, State}.
 
-needs_sync(_State) -> false.
+needs_idle_timeout(_State) -> false.
 
-sync(State) -> State.
+idle_timeout(State) -> State.
 
 handle_pre_hibernate(State) -> State.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 958a2903..d5d48e58 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -36,7 +36,8 @@
          tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3,
          requeue/2, len/1, is_empty/1,
          set_ram_duration_target/2, ram_duration/1,
-         needs_sync/1, sync/1, handle_pre_hibernate/1, status/1]).
+         needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1,
+         status/1]).
 
 -export([start/1]).
 
@@ -224,7 +225,7 @@
 %% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't
 %% write more - we can always come back on the next publish to do
 %% more.
--define(RAM_INDEX_BATCH_SIZE, 64).
+-define(IO_BATCH_SIZE, 64).
 -define(PERSISTENT_MSG_STORE, msg_store_persistent).
 -define(TRANSIENT_MSG_STORE,  msg_store_transient).
 
@@ -626,10 +627,14 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
                                          out_counter        = 0,
                                          ram_msg_count_prev = RamMsgCount })}.
 
-needs_sync(#vqstate { on_sync = {_, _, []} }) -> false;
-needs_sync(_)                                 -> true.
+needs_idle_timeout(State = #vqstate { on_sync = {_, _, []},
+                                      ram_index_count = RamIndexCount }) ->
+    Permitted = permitted_ram_index_count(State),
+    Permitted =:= infinity orelse RamIndexCount =< Permitted;
+needs_idle_timeout(_) ->
+    true.
 
-sync(State) -> a(tx_commit_index(State)).
+idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
 
 handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
     State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
@@ -672,13 +677,11 @@ a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
     ED = Delta#delta.count == 0,
     E3 = bpqueue:is_empty(Q3),
     E4 = queue:is_empty(Q4),
-    TZ = TargetRamMsgCount == 0,
     LZ = Len == 0,
 
     true = E1 or not E3,
     true = E2 or not ED,
     true = ED or not E3,
-    true = (E1 and E2 and E4) or not TZ,
     true = LZ == (E3 and E4),
 
     true = Len             >= 0,
@@ -1110,19 +1113,22 @@ reduce_memory_use(State = #vqstate {
   when TargetRamMsgCount >= RamMsgCount ->
     limit_ram_index(State);
 reduce_memory_use(State = #vqstate {
+                    ram_msg_count        = RamMsgCount,
                     target_ram_msg_count = TargetRamMsgCount }) ->
-    State1 = maybe_push_q4_to_betas(maybe_push_q1_to_betas(State)),
+    Reduction = lists:min([RamMsgCount - TargetRamMsgCount, ?IO_BATCH_SIZE]),
+    {Reduction1, State1} = maybe_push_q1_to_betas(Reduction, State),
+    {_Reduction2, State2} = maybe_push_q4_to_betas(Reduction1, State1),
     case TargetRamMsgCount of
-        0 -> push_betas_to_deltas(State1);
-        _ -> limit_ram_index(State1)
+        0 -> push_betas_to_deltas(State2);
+        _ -> limit_ram_index(State2)
     end.
 
 limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
     Permitted = permitted_ram_index_count(State),
     if Permitted =/= infinity andalso RamIndexCount > Permitted ->
             Reduction = lists:min([RamIndexCount - Permitted,
-                                   ?RAM_INDEX_BATCH_SIZE]),
-            case Reduction < ?RAM_INDEX_BATCH_SIZE of
+                                   ?IO_BATCH_SIZE]),
+            case Reduction < ?IO_BATCH_SIZE of
                 true  -> State;
                 false -> #vqstate { q2 = Q2, q3 = Q3,
                                     index_state = IndexState } = State,
@@ -1217,7 +1223,9 @@ maybe_deltas_to_betas(State = #vqstate {
             end
     end.
 
-maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
+maybe_push_q1_to_betas(0, State) ->
+    {0, State};
+maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out/1,
       fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
@@ -1228,26 +1236,30 @@ maybe_push_q1_to_betas(State = #vqstate { q1 = Q1 }) ->
            Q1a, State1 = #vqstate { q2 = Q2 }) ->
               State1 #vqstate { q1 = Q1a,
                                 q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) }
-      end, Q1, State).
+      end, Quota, Q1, State).
 
-maybe_push_q4_to_betas(State = #vqstate { q4 = Q4 }) ->
+maybe_push_q4_to_betas(0, State) ->
+    {0, State};
+maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out_r/1,
       fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
            Q4a, State1 = #vqstate { q3 = Q3 }) ->
               State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3),
                                 q4 = Q4a }
-      end, Q4, State).
+      end, Quota, Q4, State).
 
-maybe_push_alphas_to_betas(_Generator, _Consumer, _Q,
+maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q,
                            State = #vqstate {
                              ram_msg_count        = RamMsgCount,
                              target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
-    State;
-maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
+  when Quota =:= 0 orelse TargetRamMsgCount =:= infinity orelse
+       TargetRamMsgCount >= RamMsgCount ->
+    {Quota, State};
+maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) ->
     case Generator(Q) of
-        {empty, _Q} -> State;
+        {empty, _Q} ->
+            {Quota, State};
         {{value, MsgStatus}, Qa} ->
             {MsgStatus1 = #msg_status { msg_on_disk = true,
                                         index_on_disk = IndexOnDisk },
@@ -1258,7 +1270,7 @@ maybe_push_alphas_to_betas(Generator, Consumer, Q, State) ->
             RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
             State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
                                        ram_index_count = RamIndexCount1 },
-            maybe_push_alphas_to_betas(Generator, Consumer, Qa,
+            maybe_push_alphas_to_betas(Generator, Consumer, Quota - 1, Qa,
                                        Consumer(MsgStatus2, Qa, State2))
     end.
 
-- 
cgit v1.2.1


From 696c30251819924016333aa6fd2fe3854a6c053f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 24 Jun 2010 15:02:36 +0100
Subject: Whoops, missed one

---
 src/rabbit_amqqueue_process.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 5fdf0ffa..6f29442b 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -179,7 +179,7 @@ noreply(NewState) ->
 next_state(State) ->
     State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
         ensure_rate_timer(State),
-    case BQ:needs_sync(BQS)of
+    case BQ:needs_idle_timeout(BQS)of
         true  -> {ensure_sync_timer(State1), 0};
         false -> {stop_sync_timer(State1), hibernate}
     end.
-- 
cgit v1.2.1


From 28f2e7cac7e977e800188bfe0f6e23ea0011386a Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 24 Jun 2010 15:14:34 +0100
Subject: Yup, missed a few more too

---
 src/rabbit_amqqueue_process.erl | 4 ++--
 src/rabbit_tests.erl            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 6f29442b..6ba4f298 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -188,7 +188,7 @@ ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) ->
     {ok, TRef} = timer:apply_after(
                    ?SYNC_INTERVAL,
                    rabbit_amqqueue, maybe_run_queue_via_backing_queue,
-                   [self(), fun (BQS) -> BQ:sync(BQS) end]),
+                   [self(), fun (BQS) -> BQ:idle_timeout(BQS) end]),
     State#q{sync_timer_ref = TRef};
 ensure_sync_timer(State) ->
     State.
@@ -823,7 +823,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
 
 handle_info(timeout, State = #q{backing_queue = BQ}) ->
     noreply(maybe_run_queue_via_backing_queue(
-              fun (BQS) -> BQ:sync(BQS) end, State));
+              fun (BQS) -> BQ:idle_timeout(BQS) end, State));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 73b4a9be..540ea2f4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1909,7 +1909,7 @@ test_variable_queue_all_the_bits_not_covered_elsewhere() ->
     VQa2 = variable_queue_publish(false, 4, VQa1),
     {VQa3, AckTags} = variable_queue_fetch(2, false, false, 4, VQa2),
     VQa4 = rabbit_variable_queue:requeue(AckTags, VQa3),
-    VQa5 = rabbit_variable_queue:sync(VQa4),
+    VQa5 = rabbit_variable_queue:idle_timeout(VQa4),
     _VQa6 = rabbit_variable_queue:terminate(VQa5),
     VQa7 = rabbit_variable_queue:init(test_queue(), true, true),
     {empty, VQa8} = rabbit_variable_queue:fetch(false, VQa7),
-- 
cgit v1.2.1


From c15a3e05baba778fbd25caf706c825b20e5a7fcb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 24 Jun 2010 17:11:20 +0100
Subject: Corrections to tests, and actually make the needs_idle_timeout logic
 do something which might reflect what happens when the idle timeout gets
 called. All the tests pass.

---
 src/rabbit_tests.erl          | 13 +++++++++++--
 src/rabbit_variable_queue.erl | 13 +++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 540ea2f4..c684484d 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1812,7 +1812,8 @@ test_variable_queue_partial_segments_delta_thing() ->
     VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
     {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
-    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+    VQ3 = variable_queue_wait_for_shuffling_end(
+            rabbit_variable_queue:set_ram_duration_target(0, VQ2)),
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
     io:format("~p~n", [S3]),
@@ -1821,7 +1822,8 @@ test_variable_queue_partial_segments_delta_thing() ->
     assert_prop(S3, q3, SegmentSize),
     assert_prop(S3, len, SegmentSize + HalfSegment),
     VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
-    VQ5 = variable_queue_publish(true, 1, VQ4),
+    VQ5 = variable_queue_wait_for_shuffling_end(
+            variable_queue_publish(true, 1, VQ4)),
     %% should have 1 alpha, but it's in the same segment as the deltas
     S5 = rabbit_variable_queue:status(VQ5),
     io:format("~p~n", [S5]),
@@ -1848,6 +1850,13 @@ test_variable_queue_partial_segments_delta_thing() ->
 
     passed.
 
+variable_queue_wait_for_shuffling_end(VQ) ->
+    case rabbit_variable_queue:needs_idle_timeout(VQ) of
+        true  -> variable_queue_wait_for_shuffling_end(
+                  rabbit_variable_queue:idle_timeout(VQ));
+        false -> VQ
+    end.
+
 test_queue_recover() ->
     Count = 2*rabbit_queue_index:next_segment_boundary(0),
     TxID = rabbit_guid:guid(),
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d5d48e58..1a9301c0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -627,12 +627,14 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
                                          out_counter        = 0,
                                          ram_msg_count_prev = RamMsgCount })}.
 
-needs_idle_timeout(State = #vqstate { on_sync = {_, _, []},
-                                      ram_index_count = RamIndexCount }) ->
+needs_idle_timeout(#vqstate { on_sync              = {_, _, SFuns},
+                              target_ram_msg_count = TargetRamMsgCount,
+                              ram_msg_count        = RamMsgCount })
+  when SFuns =/= [] orelse RamMsgCount > TargetRamMsgCount ->
+    true;
+needs_idle_timeout(State = #vqstate { ram_index_count      = RamIndexCount }) ->
     Permitted = permitted_ram_index_count(State),
-    Permitted =:= infinity orelse RamIndexCount =< Permitted;
-needs_idle_timeout(_) ->
-    true.
+    Permitted =/= infinity andalso RamIndexCount > Permitted.
 
 idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
 
@@ -669,7 +671,6 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                      len                  = Len,
                      persistent_count     = PersistentCount,
-                     target_ram_msg_count = TargetRamMsgCount,
                      ram_msg_count        = RamMsgCount,
                      ram_index_count      = RamIndexCount }) ->
     E1 = queue:is_empty(Q1),
-- 
cgit v1.2.1


From ea7ca9c0d32ed23df7296db66a1f8e6da7f99c3d Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 24 Jun 2010 17:53:15 +0100
Subject: Glad to see I'm not the only one who's made this mistake: release and
 remove are two different things

---
 src/rabbit_variable_queue.erl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 958a2903..e4a81311 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -509,7 +509,7 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
     end.
 
 ack(AckTags, State) ->
-    a(ack(fun (_AckEntry, State1) -> State1 end, AckTags, State)).
+    a(ack(remove, fun (_AckEntry, State1) -> State1 end, AckTags, State)).
 
 tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent },
            State = #vqstate { durable           = IsDurable,
@@ -561,7 +561,8 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
 
 requeue(AckTags, State) ->
     a(reduce_memory_use(
-        ack(fun (#msg_status { msg = Msg }, State1) ->
+        ack(release,
+            fun (#msg_status { msg = Msg }, State1) ->
                     {_SeqId, State2} = publish(Msg, true, false, State1),
                     State2;
                 ({IsPersistent, Guid}, State1) ->
@@ -847,9 +848,10 @@ beta_fold(Fun, Init, Q) ->
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-ack(_Fun, [], State) ->
+ack(_ReleaseOrRemove, _Fun, [], State) ->
     State;
-ack(Fun, AckTags, State) ->
+ack(ReleaseOrRemove, Fun, AckTags, State) when ReleaseOrRemove =:= remove orelse
+                                               ReleaseOrRemove =:= release ->
     {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
                                                  persistent_count = PCount }} =
         lists:foldl(
@@ -867,7 +869,7 @@ ack(Fun, AckTags, State) ->
           end, {{[], dict:new()}, State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           rabbit_msg_store:release(MsgStore, Guids)
+                           rabbit_msg_store:ReleaseOrRemove(MsgStore, Guids)
                    end, ok, GuidsByStore),
     PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
                            error        -> 0;
-- 
cgit v1.2.1


From 7db13cdc2f37dcae0d09855f859b3d64b3421f41 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 24 Jun 2010 19:53:53 +0100
Subject: refactor: don't use module:Fun

---
 src/rabbit_variable_queue.erl | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index e4a81311..712ebf84 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -509,7 +509,9 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
     end.
 
 ack(AckTags, State) ->
-    a(ack(remove, fun (_AckEntry, State1) -> State1 end, AckTags, State)).
+    a(ack(fun rabbit_msg_store:remove/2,
+          fun (_AckEntry, State1) -> State1 end,
+          AckTags, State)).
 
 tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent },
            State = #vqstate { durable           = IsDurable,
@@ -561,7 +563,7 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
 
 requeue(AckTags, State) ->
     a(reduce_memory_use(
-        ack(release,
+        ack(fun rabbit_msg_store:release/2,
             fun (#msg_status { msg = Msg }, State1) ->
                     {_SeqId, State2} = publish(Msg, true, false, State1),
                     State2;
@@ -572,7 +574,8 @@ requeue(AckTags, State) ->
                     State2 = State1 #vqstate { msg_store_clients = MSCState1 },
                     {_SeqId, State3} = publish(Msg, true, true, State2),
                     State3
-            end, AckTags, State))).
+            end,
+            AckTags, State))).
 
 len(#vqstate { len = Len }) -> Len.
 
@@ -848,10 +851,9 @@ beta_fold(Fun, Init, Q) ->
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-ack(_ReleaseOrRemove, _Fun, [], State) ->
+ack(_MsgStoreFun, _Fun, [], State) ->
     State;
-ack(ReleaseOrRemove, Fun, AckTags, State) when ReleaseOrRemove =:= remove orelse
-                                               ReleaseOrRemove =:= release ->
+ack(MsgStoreFun, Fun, AckTags, State) ->
     {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
                                                  persistent_count = PCount }} =
         lists:foldl(
@@ -869,7 +871,7 @@ ack(ReleaseOrRemove, Fun, AckTags, State) when ReleaseOrRemove =:= remove orelse
           end, {{[], dict:new()}, State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           rabbit_msg_store:ReleaseOrRemove(MsgStore, Guids)
+                           MsgStoreFun(MsgStore, Guids)
                    end, ok, GuidsByStore),
     PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
                            error        -> 0;
-- 
cgit v1.2.1


From e6ffd26755e7f90ff1e93e48354be96fb5b6fbe6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 30 Jun 2010 11:56:54 +0100
Subject: tid() -> ets:tid() == able to compile on R14A

---
 src/rabbit_msg_store.erl    | 10 +++++-----
 src/rabbit_msg_store_gc.erl |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 706a7fae..60c13372 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -112,10 +112,10 @@
                                             index_module       :: atom(),
                                             dir                :: file_path(),
                                             gc_pid             :: pid(),
-                                            file_handles_ets   :: tid(),
-                                            file_summary_ets   :: tid(),
-                                            dedup_cache_ets    :: tid(),
-                                            cur_file_cache_ets :: tid() }).
+                                            file_handles_ets   :: ets:tid(),
+                                            file_summary_ets   :: ets:tid(),
+                                            dedup_cache_ets    :: ets:tid(),
+                                            cur_file_cache_ets :: ets:tid() }).
 -type(startup_fun_state() ::
         {(fun ((A) -> 'finished' | {guid(), non_neg_integer(), A})), A}).
 
@@ -140,7 +140,7 @@
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
-               {tid(), file_path(), atom(), any()}) ->
+               {ets:tid(), file_path(), atom(), any()}) ->
                    'concurrent_readers' | non_neg_integer()).
 
 -endif.
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
index 4b80d088..a02d1375 100644
--- a/src/rabbit_msg_store_gc.erl
+++ b/src/rabbit_msg_store_gc.erl
@@ -55,7 +55,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/4 :: (file_path(), any(), atom(), tid()) ->
+-spec(start_link/4 :: (file_path(), any(), atom(), ets:tid()) ->
                            {'ok', pid()} | 'ignore' | {'error', any()}).
 -spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok').
 -spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok').
-- 
cgit v1.2.1


From 68fc459e43c53caca2fba4661df026acb64474d1 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Tue, 6 Jul 2010 10:55:13 +0100
Subject: Forgot to fully initialise qi state.

---
 src/rabbit_queue_index.erl | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 1f61111c..459c0fb6 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -350,10 +350,13 @@ blank_state(QueueName) ->
     StrName = queue_name_to_dir_name(QueueName),
     Dir = filename:join(queues_dir(), StrName),
     ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
-    #qistate { dir            = Dir,
-               segments       = segments_new(),
-               journal_handle = undefined,
-               dirty_count    = 0 }.
+    {ok, MaxJournal} =
+        application:get_env(rabbit, queue_index_max_journal_entries),
+    #qistate { dir                 = Dir,
+               segments            = segments_new(),
+               journal_handle      = undefined,
+               dirty_count         = 0,
+               max_journal_entries = MaxJournal }.
 
 detect_clean_shutdown(Dir) ->
     case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
-- 
cgit v1.2.1


From ab24c65accc6304e01fdfa95f6f24696ad07feff Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 12:01:04 +0100
Subject: cosmetic(ish) - avoid unnecessary thunking and some layout fixes

---
 src/rabbit_tests.erl | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 8fd3fa07..4b9c736a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1886,15 +1886,12 @@ test_queue_recover() ->
     TxID = rabbit_guid:guid(),
     {new, #amqqueue { pid = QPid, name = QName }} =
         rabbit_amqqueue:declare(test_queue(), true, false, [], none),
-    Msg = fun() -> rabbit_basic:message(
-                     rabbit_misc:r(<<>>, exchange, <<>>),
-                     <<>>, #'P_basic'{delivery_mode = 2}, <<>>) end,
-    Delivery = #delivery{mandatory = false,
-                         immediate = false,
-                         txn = TxID,
-                         sender = self(),
-                         message = Msg()},
-    [true = rabbit_amqqueue:deliver(QPid, Delivery) || _ <- lists:seq(1, Count)],
+    Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>),
+                               <<>>, #'P_basic'{delivery_mode = 2}, <<>>),
+    Delivery = #delivery{mandatory = false, immediate = false, txn = TxID,
+                         sender = self(), message = Msg},
+    [true = rabbit_amqqueue:deliver(QPid, Delivery) ||
+        _ <- lists:seq(1, Count)],
     rabbit_amqqueue:commit_all([QPid], TxID, self()),
     exit(QPid, kill),
     MRef = erlang:monitor(process, QPid),
-- 
cgit v1.2.1


From 331a54ac214be6f6c96765051f53c6385614b500 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 12:11:24 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4b9c736a..b28dd839 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1620,7 +1620,7 @@ test_queue_index() ->
     MostOfASegment = trunc(SegmentSize*0.75),
     stop_msg_store(),
     ok = empty_test_queue(),
-    SeqIdsA = lists:seq(0,MostOfASegment-1),
+    SeqIdsA = lists:seq(0, MostOfASegment-1),
     SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
     {0, _Terms, Qi0} = test_queue_init(),
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
-- 
cgit v1.2.1


From c8e59dadcc63416e11b1f14f8d414032d65a5c2b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 13:27:01 +0100
Subject: add some comments and TODOs

---
 src/rabbit_variable_queue.erl | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 712ebf84..c1a67ddb 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -110,21 +110,18 @@
 %% should be very few betas remaining, thus the transition is fast (no
 %% work needs to be done for the gamma -> delta transition).
 %%
-%% The conversion of betas to gammas is done on publish, in batches of
-%% exactly ?RAM_INDEX_BATCH_SIZE. This value should not be too small,
-%% otherwise the frequent operations on the queues of q2 and q3 will
-%% not be effectively amortised, nor should it be too big, otherwise a
-%% publish will take too long as it attempts to do too much work and
-%% thus stalls the queue. Therefore, it must be just right. This
-%% approach is preferable to doing work on a new queue-duration
-%% because converting all the indicated betas to gammas at that point
-%% can be far too expensive, thus requiring batching and segmented
-%% work anyway, and furthermore, if we're not getting any publishes
-%% anyway then the queue is either being drained or has no
-%% consumers. In the latter case, an expensive beta to delta
-%% transition doesn't matter, and in the former case the queue's
-%% shrinking length makes it unlikely (though not impossible) that the
-%% duration will become 0.
+%% The conversion of betas to gammas is done on all actions that can
+%% increase the message count, such as publish and requeue, and when
+%% the queue is asked to reduce its memory usage. The conversion is
+%% done in batches of exactly ?RAM_INDEX_BATCH_SIZE. This value should
+%% not be too small, otherwise the frequent operations on the queues
+%% of q2 and q3 will not be effectively amortised (switching the
+%% direction of queue access defeats amortisation), nor should it be
+%% too big, otherwise converting a batch stalls the queue for too
+%% long. Therefore, it must be just right. This approach is preferable
+%% to doing work on a new queue-duration because converting all the
+%% indicated betas to gammas at that point can be far too expensive,
+%% thus requiring batching and segmented work anyway.
 %%
 %% In the queue we only keep track of messages that are pending
 %% delivery. This is fine for queue purging, but can be expensive for
@@ -393,6 +390,9 @@ terminate(State) ->
 %% the only difference between purge and delete is that delete also
 %% needs to delete everything that's been delivered and not ack'd.
 delete_and_terminate(State) ->
+    %% TODO: there is no need to interact with qi at all - which we do
+    %% as part of 'purge' and 'remove_pending_ack', other than
+    %% deleting it.
     {_PurgeCount, State1} = purge(State),
     State2 = #vqstate { index_state         = IndexState,
                         msg_store_clients   = {{MSCStateP, PRef},
@@ -411,6 +411,9 @@ delete_and_terminate(State) ->
                         msg_store_clients = undefined }).
 
 purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
+    %% TODO: when there are no pending acks, which is a common case,
+    %% we could simply wipe the qi instead of issuing delivers and
+    %% acks for all the messages.
     IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4,
                                        IndexState),
     State1 = #vqstate { q1 = Q1, index_state = IndexState2 } =
@@ -1133,6 +1136,10 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
                          {Q2a, {Reduction1, IndexState1}} =
                              limit_ram_index(fun bpqueue:map_fold_filter_l/4,
                                              Q2, {Reduction, IndexState}),
+                         %% TODO: we shouldn't be writing index
+                         %% entries for messages that can never end up
+                         %% in delta due them residing in the only
+                         %% segment held by q3.
                          {Q3a, {Reduction2, IndexState2}} =
                              limit_ram_index(fun bpqueue:map_fold_filter_r/4,
                                              Q3, {Reduction1, IndexState1}),
-- 
cgit v1.2.1


From f1f8931bc439be518d3318f9c340af86f969637c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 16:05:02 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ef63ff8e..2ca1a639 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1258,8 +1258,8 @@ maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q,
                            State = #vqstate {
                              ram_msg_count        = RamMsgCount,
                              target_ram_msg_count = TargetRamMsgCount })
-  when Quota =:= 0 orelse TargetRamMsgCount =:= infinity orelse
-       TargetRamMsgCount >= RamMsgCount ->
+  when Quota =:= 0 orelse
+       TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
     {Quota, State};
 maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) ->
     case Generator(Q) of
-- 
cgit v1.2.1


From 53c18464e9f0355f1b7e96ca9ecbfb5edf51cf96 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 16:19:36 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 2ca1a639..b7d2460b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1121,7 +1121,7 @@ reduce_memory_use(State = #vqstate {
                     ram_msg_count        = RamMsgCount,
                     target_ram_msg_count = TargetRamMsgCount }) ->
     Reduction = lists:min([RamMsgCount - TargetRamMsgCount, ?IO_BATCH_SIZE]),
-    {Reduction1, State1} = maybe_push_q1_to_betas(Reduction, State),
+    { Reduction1, State1} = maybe_push_q1_to_betas(Reduction,  State),
     {_Reduction2, State2} = maybe_push_q4_to_betas(Reduction1, State1),
     case TargetRamMsgCount of
         0 -> push_betas_to_deltas(State2);
@@ -1131,8 +1131,7 @@ reduce_memory_use(State = #vqstate {
 limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
     Permitted = permitted_ram_index_count(State),
     if Permitted =/= infinity andalso RamIndexCount > Permitted ->
-            Reduction = lists:min([RamIndexCount - Permitted,
-                                   ?IO_BATCH_SIZE]),
+            Reduction = lists:min([RamIndexCount - Permitted, ?IO_BATCH_SIZE]),
             case Reduction < ?IO_BATCH_SIZE of
                 true  -> State;
                 false -> #vqstate { q2 = Q2, q3 = Q3,
-- 
cgit v1.2.1


From 7dab5238a12bffca25dcc6d6e932876de8127a28 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 17:28:06 +0100
Subject: tiny refactor

---
 src/rabbit_variable_queue.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c1a67ddb..8b9d17e7 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1153,9 +1153,8 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
             State
     end.
 
-limit_ram_index(_MapFoldFilterFun, Q, {Reduction, IndexState})
-  when Reduction == 0 ->
-    {Q, {Reduction, IndexState}};
+limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) ->
+    {Q, {0, IndexState}};
 limit_ram_index(MapFoldFilterFun, Q, {Reduction, IndexState}) ->
     MapFoldFilterFun(
       fun erlang:'not'/1,
-- 
cgit v1.2.1


From 863747ad95e9bb458bf2db0fff764273f3883bf2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 19:18:25 +0100
Subject: simplify memory reduction decisions

---
 src/rabbit_variable_queue.erl | 78 ++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 42 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 5893385a..6f6f3d92 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -634,14 +634,15 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
                                          out_counter        = 0,
                                          ram_msg_count_prev = RamMsgCount })}.
 
-needs_idle_timeout(#vqstate { on_sync              = {_, _, SFuns},
-                              target_ram_msg_count = TargetRamMsgCount,
-                              ram_msg_count        = RamMsgCount })
-  when SFuns =/= [] orelse RamMsgCount > TargetRamMsgCount ->
+needs_idle_timeout(#vqstate { on_sync = {_, _, SFuns}}) when SFuns =/= [] ->
     true;
-needs_idle_timeout(State = #vqstate { ram_index_count      = RamIndexCount }) ->
-    Permitted = permitted_ram_index_count(State),
-    Permitted =/= infinity andalso RamIndexCount > Permitted.
+needs_idle_timeout(State = #vqstate { target_ram_msg_count = TargetRamMsgCount,
+                                      ram_msg_count        = RamMsgCount,
+                                      ram_index_count      = RamIndexCount}) ->
+    case reduction(RamMsgCount, TargetRamMsgCount) of
+        0 -> reduction(RamIndexCount, State) == ?IO_BATCH_SIZE;
+        _ -> true
+    end.
 
 idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
 
@@ -1112,50 +1113,37 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
 %% Phase changes
 %%----------------------------------------------------------------------------
 
-reduce_memory_use(State = #vqstate {
-                    target_ram_msg_count = infinity }) ->
-    State;
-reduce_memory_use(State = #vqstate {
-                    ram_msg_count        = RamMsgCount,
-                    target_ram_msg_count = TargetRamMsgCount })
-  when TargetRamMsgCount >= RamMsgCount ->
-    limit_ram_index(State);
 reduce_memory_use(State = #vqstate {
                     ram_msg_count        = RamMsgCount,
                     target_ram_msg_count = TargetRamMsgCount }) ->
-    Reduction = lists:min([RamMsgCount - TargetRamMsgCount, ?IO_BATCH_SIZE]),
+    Reduction = reduction(RamMsgCount, TargetRamMsgCount),
     { Reduction1, State1} = maybe_push_q1_to_betas(Reduction,  State),
     {_Reduction2, State2} = maybe_push_q4_to_betas(Reduction1, State1),
     case TargetRamMsgCount of
-        0 -> push_betas_to_deltas(State2);
-        _ -> limit_ram_index(State2)
+        infinity -> State2;
+        0        -> push_betas_to_deltas(State2);
+        _        -> limit_ram_index(State2)
     end.
 
 limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
-    Permitted = permitted_ram_index_count(State),
-    if Permitted =/= infinity andalso RamIndexCount > Permitted ->
-            Reduction = lists:min([RamIndexCount - Permitted, ?IO_BATCH_SIZE]),
-            case Reduction < ?IO_BATCH_SIZE of
-                true  -> State;
-                false -> #vqstate { q2 = Q2, q3 = Q3,
-                                    index_state = IndexState } = State,
-                         {Q2a, {Reduction1, IndexState1}} =
-                             limit_ram_index(fun bpqueue:map_fold_filter_l/4,
-                                             Q2, {Reduction, IndexState}),
-                         %% TODO: we shouldn't be writing index
-                         %% entries for messages that can never end up
-                         %% in delta due them residing in the only
-                         %% segment held by q3.
-                         {Q3a, {Reduction2, IndexState2}} =
-                             limit_ram_index(fun bpqueue:map_fold_filter_r/4,
-                                             Q3, {Reduction1, IndexState1}),
-                         RamIndexCount1 = RamIndexCount -
-                             (Reduction - Reduction2),
-                         State #vqstate { q2 = Q2a, q3 = Q3a,
-                                          index_state = IndexState2,
-                                          ram_index_count = RamIndexCount1 }
-            end;
-       true ->
+    Reduction = reduction(RamIndexCount, permitted_ram_index_count(State)),
+    case Reduction of
+        ?IO_BATCH_SIZE ->
+            #vqstate { q2 = Q2, q3 = Q3, index_state = IndexState } = State,
+            {Q2a, {Reduction1, IndexState1}} =
+                limit_ram_index(fun bpqueue:map_fold_filter_l/4,
+                                Q2, {Reduction, IndexState}),
+            %% TODO: we shouldn't be writing index entries for
+            %% messages that can never end up in delta due them
+            %% residing in the only segment held by q3.
+            {Q3a, {Reduction2, IndexState2}} =
+                limit_ram_index(fun bpqueue:map_fold_filter_r/4,
+                                Q3, {Reduction1, IndexState1}),
+            RamIndexCount1 = RamIndexCount - (Reduction - Reduction2),
+            State #vqstate { q2 = Q2a, q3 = Q3a,
+                             index_state = IndexState2,
+                             ram_index_count = RamIndexCount1 };
+        _ ->
             State
     end.
 
@@ -1183,6 +1171,12 @@ permitted_ram_index_count(#vqstate { len   = Len,
     BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
     BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
 
+reduction(Current, Permitted)
+  when Permitted =:= infinity orelse Permitted >= Current ->
+    0;
+reduction(Current, Permitted) ->
+    lists:min([Current - Permitted, ?IO_BATCH_SIZE]).
+
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
 maybe_deltas_to_betas(State = #vqstate {
-- 
cgit v1.2.1


From 659da033a93a18a74429ab5d49af3b587bd949f4 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 23:09:47 +0100
Subject: oops

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 6f6f3d92..a558b022 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -640,7 +640,8 @@ needs_idle_timeout(State = #vqstate { target_ram_msg_count = TargetRamMsgCount,
                                       ram_msg_count        = RamMsgCount,
                                       ram_index_count      = RamIndexCount}) ->
     case reduction(RamMsgCount, TargetRamMsgCount) of
-        0 -> reduction(RamIndexCount, State) == ?IO_BATCH_SIZE;
+        0 -> Permitted = permitted_ram_index_count(State),
+             reduction(RamIndexCount, Permitted) == ?IO_BATCH_SIZE;
         _ -> true
     end.
 
-- 
cgit v1.2.1


From 93c4df4ab973b4b2e9a808a41e89d2a5680ecfd0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 6 Jul 2010 23:35:07 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a558b022..d9f22c5a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -634,7 +634,7 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
                                          out_counter        = 0,
                                          ram_msg_count_prev = RamMsgCount })}.
 
-needs_idle_timeout(#vqstate { on_sync = {_, _, SFuns}}) when SFuns =/= [] ->
+needs_idle_timeout(#vqstate { on_sync = {_, _, [_|_]}}) ->
     true;
 needs_idle_timeout(State = #vqstate { target_ram_msg_count = TargetRamMsgCount,
                                       ram_msg_count        = RamMsgCount,
-- 
cgit v1.2.1


From 0ed077c885c4fba48a7486b3071184d247f847db Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 7 Jul 2010 11:40:24 +0100
Subject: unify the phase change predicate and phase change operation so that
 the logic for determining the necessity of phase changes is kept in one place
 and cannot diverge.

---
 src/rabbit_variable_queue.erl | 113 ++++++++++++++++++++++++++----------------
 1 file changed, 70 insertions(+), 43 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d9f22c5a..f19fcee5 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -636,14 +636,12 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
 
 needs_idle_timeout(#vqstate { on_sync = {_, _, [_|_]}}) ->
     true;
-needs_idle_timeout(State = #vqstate { target_ram_msg_count = TargetRamMsgCount,
-                                      ram_msg_count        = RamMsgCount,
-                                      ram_index_count      = RamIndexCount}) ->
-    case reduction(RamMsgCount, TargetRamMsgCount) of
-        0 -> Permitted = permitted_ram_index_count(State),
-             reduction(RamIndexCount, Permitted) == ?IO_BATCH_SIZE;
-        _ -> true
-    end.
+needs_idle_timeout(State) ->
+    {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end,
+                                      fun (_Quota, State1) -> State1 end,
+                                      fun (State1)         -> State1 end,
+                                      State),
+    Res.
 
 idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
 
@@ -1114,43 +1112,67 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
 %% Phase changes
 %%----------------------------------------------------------------------------
 
-reduce_memory_use(State = #vqstate {
-                    ram_msg_count        = RamMsgCount,
-                    target_ram_msg_count = TargetRamMsgCount }) ->
-    Reduction = reduction(RamMsgCount, TargetRamMsgCount),
-    { Reduction1, State1} = maybe_push_q1_to_betas(Reduction,  State),
-    {_Reduction2, State2} = maybe_push_q4_to_betas(Reduction1, State1),
-    case TargetRamMsgCount of
-        infinity -> State2;
-        0        -> push_betas_to_deltas(State2);
-        _        -> limit_ram_index(State2)
+%% Determine whether a reduction in memory use is necessary, and call
+%% functions to perform the required phase changes. The function can
+%% also be used to just do the former, by passing in dummy phase
+%% change functions.
+%%
+%% The function does not report on any needed beta->delta conversions,
+%% though the conversion function for that is called as necessary. The
+%% reason is twofold. Firstly, this is safe because the conversion is
+%% only ever necessary just after a transition to a
+%% target_ram_msg_count of zero or after an incremental alpha->beta
+%% conversion. In the former case the conversion is performed straight
+%% away (i.e. any betas present at the time are converted to deltas),
+%% and in the latter case the need for a conversion is flagged up
+%% anyway. Secondly, this is necessary because we do not have a
+%% precise and cheap predicate for determining whether a beta->delta
+%% conversion is necessary - due to the complexities of retaining up
+%% one segment's worth of messages in q3 - and thus would risk
+%% perpetually reporting the need for a conversion when no such
+%% conversion is needed. That in turn could cause an infinite loop.
+reduce_memory_use(AlphaBetaFun, BetaGammaFun, BetaDeltaFun, State) ->
+    {Reduce, State1} = case chunk_size(State #vqstate.ram_msg_count,
+                                       State #vqstate.target_ram_msg_count) of
+                           0  -> {false, State};
+                           S1 -> {true, AlphaBetaFun(S1, State)}
+                       end,
+    case State1 #vqstate.target_ram_msg_count of
+        infinity -> {Reduce, State1};
+        0        -> {Reduce, BetaDeltaFun(State1)};
+        _        -> case chunk_size(State1 #vqstate.ram_index_count,
+                                   permitted_ram_index_count(State1)) of
+                        ?IO_BATCH_SIZE = S2 -> {true, BetaGammaFun(S2, State1)};
+                        _                   -> {Reduce, State1}
+                    end
     end.
 
-limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
-    Reduction = reduction(RamIndexCount, permitted_ram_index_count(State)),
-    case Reduction of
-        ?IO_BATCH_SIZE ->
-            #vqstate { q2 = Q2, q3 = Q3, index_state = IndexState } = State,
-            {Q2a, {Reduction1, IndexState1}} =
-                limit_ram_index(fun bpqueue:map_fold_filter_l/4,
-                                Q2, {Reduction, IndexState}),
-            %% TODO: we shouldn't be writing index entries for
-            %% messages that can never end up in delta due them
-            %% residing in the only segment held by q3.
-            {Q3a, {Reduction2, IndexState2}} =
-                limit_ram_index(fun bpqueue:map_fold_filter_r/4,
-                                Q3, {Reduction1, IndexState1}),
-            RamIndexCount1 = RamIndexCount - (Reduction - Reduction2),
-            State #vqstate { q2 = Q2a, q3 = Q3a,
-                             index_state = IndexState2,
-                             ram_index_count = RamIndexCount1 };
-        _ ->
-            State
-    end.
+reduce_memory_use(State) ->
+    {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2,
+                                    fun limit_ram_index/2,
+                                    fun push_betas_to_deltas/1,
+                                    State),
+    State1.
+
+limit_ram_index(Quota, State = #vqstate { q2 = Q2, q3 = Q3,
+                                          index_state = IndexState,
+                                          ram_index_count = RamIndexCount }) ->
+    {Q2a, {Quota1, IndexState1}} = limit_ram_index(
+                                     fun bpqueue:map_fold_filter_l/4,
+                                     Q2, {Quota, IndexState}),
+    %% TODO: we shouldn't be writing index entries for messages that
+    %% can never end up in delta due them residing in the only segment
+    %% held by q3.
+    {Q3a, {Quota2, IndexState2}} = limit_ram_index(
+                                     fun bpqueue:map_fold_filter_r/4,
+                                     Q3, {Quota1, IndexState1}),
+    State #vqstate { q2 = Q2a, q3 = Q3a,
+                     index_state = IndexState2,
+                     ram_index_count = RamIndexCount - (Quota - Quota2) }.
 
 limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) ->
     {Q, {0, IndexState}};
-limit_ram_index(MapFoldFilterFun, Q, {Reduction, IndexState}) ->
+limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) ->
     MapFoldFilterFun(
       fun erlang:'not'/1,
       fun (MsgStatus, {0, _IndexStateN}) ->
@@ -1161,7 +1183,7 @@ limit_ram_index(MapFoldFilterFun, Q, {Reduction, IndexState}) ->
               {MsgStatus1, IndexStateN1} =
                   maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
               {true, MsgStatus1, {N-1, IndexStateN1}}
-      end, {Reduction, IndexState}, Q).
+      end, {Quota, IndexState}, Q).
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
     infinity;
@@ -1172,10 +1194,10 @@ permitted_ram_index_count(#vqstate { len   = Len,
     BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
     BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
 
-reduction(Current, Permitted)
+chunk_size(Current, Permitted)
   when Permitted =:= infinity orelse Permitted >= Current ->
     0;
-reduction(Current, Permitted) ->
+chunk_size(Current, Permitted) ->
     lists:min([Current - Permitted, ?IO_BATCH_SIZE]).
 
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
@@ -1228,6 +1250,11 @@ maybe_deltas_to_betas(State = #vqstate {
             end
     end.
 
+push_alphas_to_betas(Quota, State) ->
+    { Quota1, State1} = maybe_push_q1_to_betas(Quota,  State),
+    {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1),
+    State2.
+
 maybe_push_q1_to_betas(0, State) ->
     {0, State};
 maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) ->
-- 
cgit v1.2.1


From 9bb6bcb286a75def21c749cb25f1a5abccd7d615 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 7 Jul 2010 13:27:11 +0100
Subject: Correct documentation for variable queue and the chunking of
 transitions

---
 src/rabbit_variable_queue.erl | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ff74dd83..87caf487 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -111,18 +111,22 @@
 %% should be very few betas remaining, thus the transition is fast (no
 %% work needs to be done for the gamma -> delta transition).
 %%
-%% The conversion of betas to gammas is done on all actions that can
-%% increase the message count, such as publish and requeue, and when
-%% the queue is asked to reduce its memory usage. The conversion is
-%% done in batches of exactly ?RAM_INDEX_BATCH_SIZE. This value should
-%% not be too small, otherwise the frequent operations on the queues
-%% of q2 and q3 will not be effectively amortised (switching the
-%% direction of queue access defeats amortisation), nor should it be
-%% too big, otherwise converting a batch stalls the queue for too
-%% long. Therefore, it must be just right. This approach is preferable
-%% to doing work on a new queue-duration because converting all the
-%% indicated betas to gammas at that point can be far too expensive,
-%% thus requiring batching and segmented work anyway.
+%% The conversion of betas to gammas is done in batches of exactly
+%% ?IO_BATCH_SIZE. This value should not be too small, otherwise the
+%% frequent operations on the queues of q2 and q3 will not be
+%% effectively amortised (switching the direction of queue access
+%% defeats amortisation), nor should it be too big, otherwise
+%% converting a batch stalls the queue for too long. Therefore, it
+%% must be just right.
+%%
+%% The conversion from alphas to betas is also chunked, but only to
+%% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at
+%% any one time. This further smooths the effects of changes to the
+%% target_ram_index_count and ensures the queue remains responsive
+%% even when there is a large amount of IO work to do. The
+%% idle_timeout callback is utilised to ensure that conversions are
+%% done as promptly as possible whilst ensuring the queue remains
+%% responsive.
 %%
 %% In the queue we only keep track of messages that are pending
 %% delivery. This is fine for queue purging, but can be expensive for
-- 
cgit v1.2.1


From e851dec199696c95b3e16be42e62bc5c5fe79cdb Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 7 Jul 2010 13:45:39 +0100
Subject: Minor typeo

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 87caf487..cbe21cdb 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -122,7 +122,7 @@
 %% The conversion from alphas to betas is also chunked, but only to
 %% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at
 %% any one time. This further smooths the effects of changes to the
-%% target_ram_index_count and ensures the queue remains responsive
+%% target_ram_msg_count and ensures the queue remains responsive
 %% even when there is a large amount of IO work to do. The
 %% idle_timeout callback is utilised to ensure that conversions are
 %% done as promptly as possible whilst ensuring the queue remains
-- 
cgit v1.2.1


From 7d4bd3fcda27c84fc1f22d1a65b6a3000fe96b94 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 7 Jul 2010 14:01:32 +0100
Subject: remove redundant heads

---
 src/rabbit_variable_queue.erl | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index cbe21cdb..0a170e72 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1258,8 +1258,6 @@ push_alphas_to_betas(Quota, State) ->
     {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1),
     State2.
 
-maybe_push_q1_to_betas(0, State) ->
-    {0, State};
 maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out/1,
@@ -1273,8 +1271,6 @@ maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) ->
                                 q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) }
       end, Quota, Q1, State).
 
-maybe_push_q4_to_betas(0, State) ->
-    {0, State};
 maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) ->
     maybe_push_alphas_to_betas(
       fun queue:out_r/1,
-- 
cgit v1.2.1


From 2824543b7ef4162c0767560ea7e4833c8c3a8db0 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 7 Jul 2010 14:09:47 +0100
Subject: When pushing ? to ?, *always* do the youngest ? first as they will
 stay around the longest and thus have the greatest chance of reducing the
 number of subsequent ?s pushed to ?

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a9e7ccc7..e5665448 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1133,7 +1133,7 @@ limit_ram_index(State = #vqstate { ram_index_count = RamIndexCount }) ->
                 false -> #vqstate { q2 = Q2, q3 = Q3,
                                     index_state = IndexState } = State,
                          {Q2a, {Reduction1, IndexState1}} =
-                             limit_ram_index(fun bpqueue:map_fold_filter_l/4,
+                             limit_ram_index(fun bpqueue:map_fold_filter_r/4,
                                              Q2, {Reduction, IndexState}),
                          %% TODO: we shouldn't be writing index
                          %% entries for messages that can never end up
-- 
cgit v1.2.1


From fa5e61b9c3d146ed3249783c1f86c0e69c8e3543 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 7 Jul 2010 14:22:17 +0100
Subject: Bump the msg_store file size limit when testing from 128bytes to
 512bytes (normal operational limit is 16MB). This ensures that the GC code is
 hit

---
 src/rabbit_tests.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b28dd839..2dd2974a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1346,7 +1346,7 @@ test_backing_queue() ->
         {ok, rabbit_variable_queue} ->
             {ok, FileSizeLimit} =
                 application:get_env(rabbit, msg_store_file_size_limit),
-            application:set_env(rabbit, msg_store_file_size_limit, 128,
+            application:set_env(rabbit, msg_store_file_size_limit, 512,
                                 infinity),
             {ok, MaxJournal} =
                 application:get_env(rabbit, queue_index_max_journal_entries),
-- 
cgit v1.2.1


From ac665472a4959df2ba95d3aa7e337b91963e2e26 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 7 Jul 2010 23:16:14 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 52845f63..eb711b42 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -911,14 +911,14 @@ tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun,
                            on_sync     = OnSync = {SAcks, SPubs, SFuns},
                            pending_ack = PA,
                            durable     = IsDurable }) ->
-    %% If we are a non-durable queue, or (no persisent pubs, and no
+    %% If we are a non-durable queue, or (no persistent pubs, and no
     %% persistent acks) then we can skip the queue_index loop.
     case (not IsDurable) orelse
         (IsTransientPubs andalso
          lists:foldl(
            fun (AckTag,  true ) ->
                    case dict:find(AckTag, PA) of
-                       {ok, #msg_status {}}         -> true;
+                       {ok, #msg_status {}}        -> true;
                        {ok, {IsPersistent, _Guid}} -> not IsPersistent
                    end;
                (_AckTag, false) -> false
-- 
cgit v1.2.1


From 7404df895b1cb696d6643a79c2a35cf251db55a3 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 7 Jul 2010 23:44:36 +0100
Subject: minor refactor

---
 src/rabbit_variable_queue.erl | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index eb711b42..04ec9628 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -915,14 +915,12 @@ tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun,
     %% persistent acks) then we can skip the queue_index loop.
     case (not IsDurable) orelse
         (IsTransientPubs andalso
-         lists:foldl(
-           fun (AckTag,  true ) ->
-                   case dict:find(AckTag, PA) of
-                       {ok, #msg_status {}}        -> true;
-                       {ok, {IsPersistent, _Guid}} -> not IsPersistent
-                   end;
-               (_AckTag, false) -> false
-           end, true, AckTags)) of
+         lists:all(fun (AckTag) ->
+                           case dict:find(AckTag, PA) of
+                               {ok, #msg_status {}}        -> true;
+                               {ok, {IsPersistent, _Guid}} -> not IsPersistent
+                           end
+                   end, AckTags)) of
         true  -> State1 = tx_commit_index(State #vqstate {
                                             on_sync = {[], [Pubs], [Fun]} }),
                  State1 #vqstate { on_sync = OnSync };
-- 
cgit v1.2.1


From 2d8684361c477f6e7e053f41b2c9be1602be82d7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 8 Jul 2010 15:30:53 +0100
Subject: don't force storing of binary properties see also bug 22957.

---
 src/rabbit_variable_queue.erl | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 04ec9628..9d830cc0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -823,11 +823,6 @@ betas_from_index_entries(List, TransientThreshold, IndexState) ->
      rabbit_queue_index:ack(Acks,
                             rabbit_queue_index:deliver(Delivers, IndexState))}.
 
-ensure_binary_properties(Msg = #basic_message { content = Content }) ->
-    Msg #basic_message {
-      content = rabbit_binary_parser:clear_decoded_content(
-                  rabbit_binary_generator:ensure_content_encoded(Content)) }.
-
 %% the first arg is the older delta
 combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) ->
     ?BLANK_DELTA;
@@ -1075,8 +1070,11 @@ maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
         with_msg_store_state(
           MSCState, IsPersistent,
           fun (MsgStore, MSCState2) ->
-                  rabbit_msg_store:write(
-                    MsgStore, Guid, ensure_binary_properties(Msg), MSCState2)
+                  Msg1 = Msg #basic_message {
+                           %% don't persist any recoverable decoded properties
+                           content = rabbit_binary_parser:clear_decoded_content(
+                                       Msg #basic_message.content)},
+                  rabbit_msg_store:write(MsgStore, Guid, Msg1, MSCState2)
           end),
     {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
 maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
-- 
cgit v1.2.1


From a395b71e52f34bc5f2dc5fdd0ee4f3189154ba3b Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 8 Jul 2010 16:48:58 +0100
Subject: Document ram_index_count

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 9d830cc0..c176f682 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -117,7 +117,8 @@
 %% effectively amortised (switching the direction of queue access
 %% defeats amortisation), nor should it be too big, otherwise
 %% converting a batch stalls the queue for too long. Therefore, it
-%% must be just right.
+%% must be just right. ram_index_count is used here and is the number
+%% of betas.
 %%
 %% The conversion from alphas to betas is also chunked, but only to
 %% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at
-- 
cgit v1.2.1


From 7d4bb85a9914dae30216cf053a49f12ea0308bb6 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Thu, 8 Jul 2010 16:58:29 +0100
Subject: Expose the persistent count in the vq status

---
 src/rabbit_variable_queue.erl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c176f682..625c3451 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -660,7 +660,8 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                   ram_index_count      = RamIndexCount,
                   avg_egress_rate      = AvgEgressRate,
                   avg_ingress_rate     = AvgIngressRate,
-                  next_seq_id          = NextSeqId }) ->
+                  next_seq_id          = NextSeqId,
+                  persistent_count     = PersistentCount }) ->
     [ {q1                   , queue:len(Q1)},
       {q2                   , bpqueue:len(Q2)},
       {delta                , Delta},
@@ -673,7 +674,8 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
       {ram_index_count      , RamIndexCount},
       {avg_egress_rate      , AvgEgressRate},
       {avg_ingress_rate     , AvgIngressRate},
-      {next_seq_id          , NextSeqId} ].
+      {next_seq_id          , NextSeqId},
+      {persistent_count     , PersistentCount} ].
 
 %%----------------------------------------------------------------------------
 %% Minor helpers
-- 
cgit v1.2.1


From 1fd35617d32d20fbecab18a45488eb56ab1f64c8 Mon Sep 17 00:00:00 2001
From: David Wragg <david@rabbitmq.com>
Date: Tue, 13 Jul 2010 03:00:33 +0100
Subject: Adjust the CONFIG_FILE default with the macports prefix

---
 packaging/macports/Portfile.in | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in
index 188a81c0..1856f1b1 100644
--- a/packaging/macports/Portfile.in
+++ b/packaging/macports/Portfile.in
@@ -75,22 +75,12 @@ post-destroot {
 
     reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \
         ${realsbin}/rabbitmq-env
-    reinplace -E "s:(CLUSTER_CONFIG_FILE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(LOG_BASE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(MNESIA_BASE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(PIDS_FILE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
+    foreach var {CONFIG_FILE CLUSTER_CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} {
+        reinplace -E "s:^($var)=/:\\1=${prefix}/:" \
+            ${realsbin}/rabbitmq-multi \
+            ${realsbin}/rabbitmq-server \
+            ${realsbin}/rabbitmqctl
+    }
 
     xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \
                 ${wrappersbin}/rabbitmq-multi
-- 
cgit v1.2.1


From 697805d5d73c1193ecce6df1ac393a9f44ce255e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 14 Jul 2010 15:56:28 +0100
Subject: cosmetic: more sensible order

---
 src/rabbit_variable_queue.erl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 625c3451..8799fff3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -488,10 +488,10 @@ fetch(AckRequired, State = #vqstate { q4               = Q4,
             Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end,
             Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end,
             IndexState2 =
-                case {MsgOnDisk, IndexOnDisk, AckRequired, IsPersistent} of
-                    {true, false, false,     _} -> Rem(), IndexState1;
-                    {true,  true, false,     _} -> Rem(), Ack();
-                    {true,  true,  true, false} -> Ack();
+                case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of
+                    {false, true, false,     _} -> Rem(), IndexState1;
+                    {false, true,  true,     _} -> Rem(), Ack();
+                    { true, true,  true, false} -> Ack();
                     _                           -> IndexState1
                 end,
 
-- 
cgit v1.2.1


From ce910415643d379b6e5cacc90eb46ae186609220 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 07:54:25 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 152 ++++++++++++++++++++++--------------------
 1 file changed, 78 insertions(+), 74 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 8799fff3..84db3246 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -735,51 +735,6 @@ maybe_write_delivered(false, _SeqId, IndexState) ->
 maybe_write_delivered(true, SeqId, IndexState) ->
     rabbit_queue_index:deliver([SeqId], IndexState).
 
-accumulate_ack(SeqId, IsPersistent, Guid, {SeqIdsAcc, Dict}) ->
-    {case IsPersistent of
-         true  -> [SeqId | SeqIdsAcc];
-         false -> SeqIdsAcc
-     end, rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
-
-record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
-                                 is_persistent = IsPersistent,
-                                 msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
-    AckEntry = case MsgOnDisk of
-                   true  -> {IsPersistent, Guid};
-                   false -> MsgStatus
-               end,
-    dict:store(SeqId, AckEntry, PA).
-
-remove_pending_ack(KeepPersistent,
-                   State = #vqstate { pending_ack = PA,
-                                      index_state = IndexState }) ->
-    {{SeqIds, GuidsByStore}, PA1} =
-        dict:fold(
-          fun (SeqId, {IsPersistent, Guid}, {Acc, PA2}) ->
-                  {accumulate_ack(SeqId, IsPersistent, Guid, Acc),
-                   case KeepPersistent andalso IsPersistent of
-                       true  -> PA2;
-                       false -> dict:erase(SeqId, PA2)
-                   end};
-              (SeqId, #msg_status {}, {Acc, PA2}) ->
-                  {Acc, dict:erase(SeqId, PA2)}
-          end, {{[], dict:new()}, PA}, PA),
-    case KeepPersistent of
-        true  -> State1 = State #vqstate { pending_ack = PA1 },
-                 case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
-                     error       -> State1;
-                     {ok, Guids} -> ok = rabbit_msg_store:remove(
-                                           ?TRANSIENT_MSG_STORE, Guids),
-                                    State1
-                 end;
-        false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-                 ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                                        rabbit_msg_store:remove(MsgStore, Guids)
-                                end, ok, GuidsByStore),
-                 State #vqstate { pending_ack = dict:new(),
-                                  index_state = IndexState1 }
-    end.
-
 lookup_tx(Txn) -> case get({txn, Txn}) of
                       undefined -> #tx { pending_messages = [],
                                          pending_acks     = [] };
@@ -859,35 +814,6 @@ beta_fold(Fun, Init, Q) ->
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-ack(_MsgStoreFun, _Fun, [], State) ->
-    State;
-ack(MsgStoreFun, Fun, AckTags, State) ->
-    {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
-                                                 persistent_count = PCount }} =
-        lists:foldl(
-          fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) ->
-                  {ok, AckEntry} = dict:find(SeqId, PA),
-                  {case AckEntry of
-                       #msg_status { index_on_disk = false, %% ASSERTIONS
-                                     msg_on_disk   = false,
-                                     is_persistent = false } ->
-                           Acc;
-                       {IsPersistent, Guid} ->
-                           accumulate_ack(SeqId, IsPersistent, Guid, Acc)
-                   end, Fun(AckEntry, State2 #vqstate {
-                                        pending_ack = dict:erase(SeqId, PA) })}
-          end, {{[], dict:new()}, State}, AckTags),
-    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-    ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           MsgStoreFun(MsgStore, Guids)
-                   end, ok, GuidsByStore),
-    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
-                           error        -> 0;
-                           {ok, Guids} -> length(Guids)
-                       end,
-    State1 #vqstate { index_state      = IndexState1,
-                      persistent_count = PCount1 }.
-
 msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
     Self = self(),
     F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
@@ -1110,6 +1036,84 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
     {MsgStatus2, State #vqstate { index_state       = IndexState1,
                                   msg_store_clients = MSCState1 }}.
 
+%%----------------------------------------------------------------------------
+%% Internal gubbins for acks
+%%----------------------------------------------------------------------------
+
+record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
+                                 is_persistent = IsPersistent,
+                                 msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
+    AckEntry = case MsgOnDisk of
+                   true  -> {IsPersistent, Guid};
+                   false -> MsgStatus
+               end,
+    dict:store(SeqId, AckEntry, PA).
+
+remove_pending_ack(KeepPersistent,
+                   State = #vqstate { pending_ack = PA,
+                                      index_state = IndexState }) ->
+    {{SeqIds, GuidsByStore}, PA1} =
+        dict:fold(
+          fun (SeqId, {IsPersistent, Guid}, {Acc, PA2}) ->
+                  {accumulate_ack(SeqId, IsPersistent, Guid, Acc),
+                   case KeepPersistent andalso IsPersistent of
+                       true  -> PA2;
+                       false -> dict:erase(SeqId, PA2)
+                   end};
+              (SeqId, #msg_status {}, {Acc, PA2}) ->
+                  {Acc, dict:erase(SeqId, PA2)}
+          end, {{[], dict:new()}, PA}, PA),
+    case KeepPersistent of
+        true  -> State1 = State #vqstate { pending_ack = PA1 },
+                 case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+                     error       -> State1;
+                     {ok, Guids} -> ok = rabbit_msg_store:remove(
+                                           ?TRANSIENT_MSG_STORE, Guids),
+                                    State1
+                 end;
+        false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+                 ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                                        rabbit_msg_store:remove(MsgStore, Guids)
+                                end, ok, GuidsByStore),
+                 State #vqstate { pending_ack = dict:new(),
+                                  index_state = IndexState1 }
+    end.
+
+ack(_MsgStoreFun, _Fun, [], State) ->
+    State;
+ack(MsgStoreFun, Fun, AckTags, State) ->
+    {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
+                                                 persistent_count = PCount }} =
+        lists:foldl(
+          fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) ->
+                  {ok, AckEntry} = dict:find(SeqId, PA),
+                  {case AckEntry of
+                       #msg_status { index_on_disk = false, %% ASSERTIONS
+                                     msg_on_disk   = false,
+                                     is_persistent = false } ->
+                           Acc;
+                       {IsPersistent, Guid} ->
+                           accumulate_ack(SeqId, IsPersistent, Guid, Acc)
+                   end, Fun(AckEntry, State2 #vqstate {
+                                        pending_ack = dict:erase(SeqId, PA) })}
+          end, {{[], dict:new()}, State}, AckTags),
+    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+    ok = dict:fold(fun (MsgStore, Guids, ok) ->
+                           MsgStoreFun(MsgStore, Guids)
+                   end, ok, GuidsByStore),
+    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
+                           error        -> 0;
+                           {ok, Guids} -> length(Guids)
+                       end,
+    State1 #vqstate { index_state      = IndexState1,
+                      persistent_count = PCount1 }.
+
+accumulate_ack(SeqId, IsPersistent, Guid, {SeqIdsAcc, Dict}) ->
+    {case IsPersistent of
+         true  -> [SeqId | SeqIdsAcc];
+         false -> SeqIdsAcc
+     end, rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
+
 %%----------------------------------------------------------------------------
 %% Phase changes
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 815d4c2d45bc80cd42d4e54b269a4efd53de272c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 08:21:54 +0100
Subject: simplify remove_pending_ack The pending_ack dict should always be
 empty at the end. Previously we kept the ack records of persistent messages
 when KeepPersistent=true.

---
 src/rabbit_variable_queue.erl | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 84db3246..04dcf88d 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1052,20 +1052,15 @@ record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
 remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       index_state = IndexState }) ->
-    {{SeqIds, GuidsByStore}, PA1} =
-        dict:fold(
-          fun (SeqId, {IsPersistent, Guid}, {Acc, PA2}) ->
-                  {accumulate_ack(SeqId, IsPersistent, Guid, Acc),
-                   case KeepPersistent andalso IsPersistent of
-                       true  -> PA2;
-                       false -> dict:erase(SeqId, PA2)
-                   end};
-              (SeqId, #msg_status {}, {Acc, PA2}) ->
-                  {Acc, dict:erase(SeqId, PA2)}
-          end, {{[], dict:new()}, PA}, PA),
+    {SeqIds, GuidsByStore} =
+        dict:fold(fun (SeqId, {IsPersistent, Guid}, Acc) ->
+                          accumulate_ack(SeqId, IsPersistent, Guid, Acc);
+                      (_SeqId, #msg_status {}, Acc) ->
+                          Acc
+                  end, {[], dict:new()}, PA),
+    State1 = State #vqstate { pending_ack = dict:new() },
     case KeepPersistent of
-        true  -> State1 = State #vqstate { pending_ack = PA1 },
-                 case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+        true  -> case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
                      error       -> State1;
                      {ok, Guids} -> ok = rabbit_msg_store:remove(
                                            ?TRANSIENT_MSG_STORE, Guids),
@@ -1075,8 +1070,7 @@ remove_pending_ack(KeepPersistent,
                  ok = dict:fold(fun (MsgStore, Guids, ok) ->
                                         rabbit_msg_store:remove(MsgStore, Guids)
                                 end, ok, GuidsByStore),
-                 State #vqstate { pending_ack = dict:new(),
-                                  index_state = IndexState1 }
+                 State1 #vqstate { index_state = IndexState1 }
     end.
 
 ack(_MsgStoreFun, _Fun, [], State) ->
-- 
cgit v1.2.1


From 98eaebfed19e7dda8a1ca981fd8136d2b2d33b93 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 08:53:04 +0100
Subject: refactor

---
 src/rabbit_variable_queue.erl | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 04dcf88d..74168773 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1052,12 +1052,8 @@ record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
 remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       index_state = IndexState }) ->
-    {SeqIds, GuidsByStore} =
-        dict:fold(fun (SeqId, {IsPersistent, Guid}, Acc) ->
-                          accumulate_ack(SeqId, IsPersistent, Guid, Acc);
-                      (_SeqId, #msg_status {}, Acc) ->
-                          Acc
-                  end, {[], dict:new()}, PA),
+    {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3,
+                                       {[], dict:new()}, PA),
     State1 = State #vqstate { pending_ack = dict:new() },
     case KeepPersistent of
         true  -> case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
@@ -1081,15 +1077,9 @@ ack(MsgStoreFun, Fun, AckTags, State) ->
         lists:foldl(
           fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) ->
                   {ok, AckEntry} = dict:find(SeqId, PA),
-                  {case AckEntry of
-                       #msg_status { index_on_disk = false, %% ASSERTIONS
-                                     msg_on_disk   = false,
-                                     is_persistent = false } ->
-                           Acc;
-                       {IsPersistent, Guid} ->
-                           accumulate_ack(SeqId, IsPersistent, Guid, Acc)
-                   end, Fun(AckEntry, State2 #vqstate {
-                                        pending_ack = dict:erase(SeqId, PA) })}
+                  {accumulate_ack(SeqId, AckEntry, Acc),
+                   Fun(AckEntry, State2 #vqstate {
+                                   pending_ack = dict:erase(SeqId, PA) })}
           end, {{[], dict:new()}, State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
     ok = dict:fold(fun (MsgStore, Guids, ok) ->
@@ -1102,7 +1092,11 @@ ack(MsgStoreFun, Fun, AckTags, State) ->
     State1 #vqstate { index_state      = IndexState1,
                       persistent_count = PCount1 }.
 
-accumulate_ack(SeqId, IsPersistent, Guid, {SeqIdsAcc, Dict}) ->
+accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS
+                                     msg_on_disk   = false,
+                                     index_on_disk = false }, Acc) ->
+    Acc;
+accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
     {case IsPersistent of
          true  -> [SeqId | SeqIdsAcc];
          false -> SeqIdsAcc
-- 
cgit v1.2.1


From afb9ab06460eb122e07a4d7f857e8b62a336f16c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 08:53:21 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 74168773..c9aec2ff 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -1086,7 +1086,7 @@ ack(MsgStoreFun, Fun, AckTags, State) ->
                            MsgStoreFun(MsgStore, Guids)
                    end, ok, GuidsByStore),
     PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
-                           error        -> 0;
+                           error       -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
     State1 #vqstate { index_state      = IndexState1,
-- 
cgit v1.2.1


From 82d254d06cbe0b61fdc42f54a106609601c54584 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 12:59:59 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 90 +++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index c9aec2ff..46e1aad8 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -915,51 +915,6 @@ remove_queue_entries1(
          false -> Acks
      end}.
 
-fetch_from_q3_to_q4(State = #vqstate {
-                      q1                = Q1,
-                      q2                = Q2,
-                      delta             = #delta { count = DeltaCount },
-                      q3                = Q3,
-                      q4                = Q4,
-                      ram_msg_count     = RamMsgCount,
-                      ram_index_count   = RamIndexCount,
-                      msg_store_clients = MSCState }) ->
-    case bpqueue:out(Q3) of
-        {empty, _Q3} ->
-            {empty, State};
-        {{value, IndexOnDisk, MsgStatus = #msg_status {
-                                msg = undefined, guid = Guid,
-                                is_persistent = IsPersistent }}, Q3a} ->
-            {{ok, Msg = #basic_message {}}, MSCState1} =
-                read_from_msg_store(MSCState, IsPersistent, Guid),
-            Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
-            RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
-            true = RamIndexCount1 >= 0, %% ASSERTION
-            State1 = State #vqstate { q3                = Q3a,
-                                      q4                = Q4a,
-                                      ram_msg_count     = RamMsgCount + 1,
-                                      ram_index_count   = RamIndexCount1,
-                                      msg_store_clients = MSCState1 },
-            State2 =
-                case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
-                    {true, true} ->
-                        %% q3 is now empty, it wasn't before; delta is
-                        %% still empty. So q2 must be empty, and q1
-                        %% can now be joined onto q4
-                        true = bpqueue:is_empty(Q2), %% ASSERTION
-                        State1 #vqstate { q1 = queue:new(),
-                                          q4 = queue:join(Q4a, Q1) };
-                    {true, false} ->
-                        maybe_deltas_to_betas(State1);
-                    {false, _} ->
-                        %% q3 still isn't empty, we've not touched
-                        %% delta, so the invariants between q1, q2,
-                        %% delta and q3 are maintained
-                        State1
-                end,
-            {loaded, State2}
-    end.
-
 %%----------------------------------------------------------------------------
 %% Internal gubbins for publishing
 %%----------------------------------------------------------------------------
@@ -1194,6 +1149,51 @@ chunk_size(Current, Permitted)
 chunk_size(Current, Permitted) ->
     lists:min([Current - Permitted, ?IO_BATCH_SIZE]).
 
+fetch_from_q3_to_q4(State = #vqstate {
+                      q1                = Q1,
+                      q2                = Q2,
+                      delta             = #delta { count = DeltaCount },
+                      q3                = Q3,
+                      q4                = Q4,
+                      ram_msg_count     = RamMsgCount,
+                      ram_index_count   = RamIndexCount,
+                      msg_store_clients = MSCState }) ->
+    case bpqueue:out(Q3) of
+        {empty, _Q3} ->
+            {empty, State};
+        {{value, IndexOnDisk, MsgStatus = #msg_status {
+                                msg = undefined, guid = Guid,
+                                is_persistent = IsPersistent }}, Q3a} ->
+            {{ok, Msg = #basic_message {}}, MSCState1} =
+                read_from_msg_store(MSCState, IsPersistent, Guid),
+            Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
+            RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
+            true = RamIndexCount1 >= 0, %% ASSERTION
+            State1 = State #vqstate { q3                = Q3a,
+                                      q4                = Q4a,
+                                      ram_msg_count     = RamMsgCount + 1,
+                                      ram_index_count   = RamIndexCount1,
+                                      msg_store_clients = MSCState1 },
+            State2 =
+                case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
+                    {true, true} ->
+                        %% q3 is now empty, it wasn't before; delta is
+                        %% still empty. So q2 must be empty, and q1
+                        %% can now be joined onto q4
+                        true = bpqueue:is_empty(Q2), %% ASSERTION
+                        State1 #vqstate { q1 = queue:new(),
+                                          q4 = queue:join(Q4a, Q1) };
+                    {true, false} ->
+                        maybe_deltas_to_betas(State1);
+                    {false, _} ->
+                        %% q3 still isn't empty, we've not touched
+                        %% delta, so the invariants between q1, q2,
+                        %% delta and q3 are maintained
+                        State1
+                end,
+            {loaded, State2}
+    end.
+
 maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
     State;
 maybe_deltas_to_betas(State = #vqstate {
-- 
cgit v1.2.1


From b37158792c075bd47e312556f5332cf393ee032c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 13:37:54 +0100
Subject: add some msg_status invariants these are quite important for
 understanding much of the code

---
 src/rabbit_variable_queue.erl | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 46e1aad8..4d57304c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -451,7 +451,7 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
     MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
         #msg_status { is_delivered = true },
     {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
-    PA1 = record_pending_ack(MsgStatus1, PA),
+    PA1 = record_pending_ack(m(MsgStatus1), PA),
     PCount1 = PCount + one_if(IsPersistent1),
     {SeqId, a(State1 #vqstate { next_seq_id       = SeqId    + 1,
                                 out_counter       = OutCount + 1,
@@ -705,6 +705,16 @@ a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
 
     State.
 
+m(MsgStatus = #msg_status { msg           = Msg,
+                            is_persistent = IsPersistent,
+                            msg_on_disk   = MsgOnDisk,
+                            index_on_disk = IndexOnDisk }) ->
+    true = (not IsPersistent) or IndexOnDisk,
+    true = (not IndexOnDisk) or MsgOnDisk,
+    true = (Msg =/= undefined) or MsgOnDisk,
+
+    MsgStatus.
+
 one_if(true ) -> 1;
 one_if(false) -> 0.
 
@@ -765,14 +775,14 @@ betas_from_index_entries(List, TransientThreshold, IndexState) ->
                                     false -> [SeqId | Delivers1]
                                 end,
                                 [SeqId | Acks1]};
-                      false -> {[#msg_status { msg           = undefined,
-                                               guid          = Guid,
-                                               seq_id        = SeqId,
-                                               is_persistent = IsPersistent,
-                                               is_delivered  = IsDelivered,
-                                               msg_on_disk   = true,
-                                               index_on_disk = true
-                                             } | Filtered1],
+                      false -> {[m(#msg_status { msg           = undefined,
+                                                 guid          = Guid,
+                                                 seq_id        = SeqId,
+                                                 is_persistent = IsPersistent,
+                                                 is_delivered  = IsDelivered,
+                                                 msg_on_disk   = true,
+                                                 index_on_disk = true
+                                               }) | Filtered1],
                                 Delivers1,
                                 Acks1}
                   end
@@ -933,8 +943,8 @@ publish(Msg = #basic_message { is_persistent = IsPersistent },
         #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
     {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
     State2 = case bpqueue:is_empty(Q3) of
-                 false -> State1 #vqstate { q1 = queue:in(MsgStatus1, Q1) };
-                 true  -> State1 #vqstate { q4 = queue:in(MsgStatus1, Q4) }
+                 false -> State1 #vqstate { q1 = queue:in(m(MsgStatus1), Q1) };
+                 true  -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) }
              end,
     PCount1 = PCount + one_if(IsPersistent1),
     {SeqId, State2 #vqstate { next_seq_id      = SeqId   + 1,
@@ -1131,7 +1141,7 @@ limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) ->
               false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
               {MsgStatus1, IndexStateN1} =
                   maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
-              {true, MsgStatus1, {N-1, IndexStateN1}}
+              {true, m(MsgStatus1), {N-1, IndexStateN1}}
       end, {Quota, IndexState}, Q).
 
 permitted_ram_index_count(#vqstate { len = 0 }) ->
@@ -1166,7 +1176,7 @@ fetch_from_q3_to_q4(State = #vqstate {
                                 is_persistent = IsPersistent }}, Q3a} ->
             {{ok, Msg = #basic_message {}}, MSCState1} =
                 read_from_msg_store(MSCState, IsPersistent, Guid),
-            Q4a = queue:in(MsgStatus #msg_status { msg = Msg }, Q4),
+            Q4a = queue:in(m(MsgStatus #msg_status { msg = Msg }), Q4),
             RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
             true = RamIndexCount1 >= 0, %% ASSERTION
             State1 = State #vqstate { q3                = Q3a,
@@ -1288,7 +1298,7 @@ maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) ->
              State1 = #vqstate { ram_msg_count   = RamMsgCount,
                                  ram_index_count = RamIndexCount }} =
                 maybe_write_to_disk(true, false, MsgStatus, State),
-            MsgStatus2 = MsgStatus1 #msg_status { msg = undefined },
+            MsgStatus2 = m(MsgStatus1 #msg_status { msg = undefined }),
             RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
             State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
                                        ram_index_count = RamIndexCount1 },
-- 
cgit v1.2.1


From 65e5356a731a9b1bb2d4c1e835116f270f8560a1 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 13:54:08 +0100
Subject: cosmetic

---
 src/rabbit_variable_queue.erl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4d57304c..4e120a94 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -755,11 +755,6 @@ store_tx(Txn, Tx) -> put({txn, Txn}, Tx).
 
 erase_tx(Txn) -> erase({txn, Txn}).
 
-update_rate(Now, Then, Count, {OThen, OCount}) ->
-    %% form the avg over the current period and the previous
-    Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
-    {Avg, {Then, Count}}.
-
 persistent_guids(Pubs) ->
     [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
 
@@ -820,6 +815,11 @@ combine_deltas(#delta { start_seq_id = StartLow,
 beta_fold(Fun, Init, Q) ->
     bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
 
+update_rate(Now, Then, Count, {OThen, OCount}) ->
+    %% form the avg over the current period and the previous
+    Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
+    {Avg, {Then, Count}}.
+
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
-- 
cgit v1.2.1


From 54638fa8c82d9e0ce1c8782bb71e7eb68d5982a2 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 14:06:31 +0100
Subject: improve type specs

---
 src/rabbit_variable_queue.erl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4e120a94..89144a83 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -237,6 +237,7 @@
 
 -ifdef(use_specs).
 
+-type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
 -type(seq_id()  :: non_neg_integer()).
 -type(ack()     :: seq_id() | 'blank_ack').
 
@@ -270,13 +271,11 @@
              ram_index_count      :: non_neg_integer(),
              out_counter          :: non_neg_integer(),
              in_counter           :: non_neg_integer(),
-             egress_rate          :: {{integer(), integer(), integer()},
-                                      non_neg_integer()},
+             egress_rate          :: {timestamp(), non_neg_integer()},
              avg_egress_rate      :: float(),
-             ingress_rate         :: {{integer(), integer(), integer()},
-                                      non_neg_integer()},
+             ingress_rate         :: {timestamp(), non_neg_integer()},
              avg_ingress_rate     :: float(),
-             rate_timestamp       :: {integer(), integer(), integer()}
+             rate_timestamp       :: timestamp()
             }).
 
 -include("rabbit_backing_queue_spec.hrl").
-- 
cgit v1.2.1


From 4b16c7c6b1c16b1bf46cc1dcea2d875f5c442fb8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 14:11:28 +0100
Subject: avoid >= between a number and an atom

---
 src/rabbit_variable_queue.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 89144a83..4fc11206 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -602,7 +602,8 @@ set_ram_duration_target(DurationTarget,
     State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
                               duration_target      = DurationTarget },
     a(case TargetRamMsgCount1 == infinity orelse
-          TargetRamMsgCount1 >= TargetRamMsgCount of
+          (TargetRamMsgCount =/= infinity andalso
+           TargetRamMsgCount1 >= TargetRamMsgCount) of
           true  -> State1;
           false -> reduce_memory_use(State1)
       end).
-- 
cgit v1.2.1


From e70244a4f02ec2070411c36603009556934ec01a Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 14:13:00 +0100
Subject: rate averages are meant to be floats

---
 src/rabbit_variable_queue.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 4fc11206..89ba342d 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -366,9 +366,9 @@ init(QueueName, IsDurable, _Recover) ->
       out_counter          = 0,
       in_counter           = 0,
       egress_rate          = {Now, 0},
-      avg_egress_rate      = 0,
+      avg_egress_rate      = 0.0,
       ingress_rate         = {Now, DeltaCount1},
-      avg_ingress_rate     = 0,
+      avg_ingress_rate     = 0.0,
       rate_timestamp       = Now
      },
     a(maybe_deltas_to_betas(State)).
-- 
cgit v1.2.1


From 4e39ae382302832d9353fd28d78944f543fe083f Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 15 Jul 2010 14:56:41 +0100
Subject: another place were averages could end up being integers instead of
 floats

---
 src/rabbit_variable_queue.erl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 89ba342d..f32c98ba 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -816,9 +816,8 @@ beta_fold(Fun, Init, Q) ->
     bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
 
 update_rate(Now, Then, Count, {OThen, OCount}) ->
-    %% form the avg over the current period and the previous
-    Avg = 1000000 * ((Count + OCount) / timer:now_diff(Now, OThen)),
-    {Avg, {Then, Count}}.
+    %% avg over the current period and the previous
+    {1000000.0 * (Count + OCount) / timer:now_diff(Now, OThen), {Then, Count}}.
 
 %%----------------------------------------------------------------------------
 %% Internal major helpers for Public API
-- 
cgit v1.2.1


From 073f7f4b99b8a99611eab6b429445e54939faadd Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 16 Jul 2010 08:04:12 +0100
Subject: refactor: trim args for msg_store_callback we only ever need a
 msg_store_callback when we have persisted pubs

---
 src/rabbit_variable_queue.erl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f32c98ba..5344b72a 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -563,7 +563,7 @@ tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
                       IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
            false -> ok = rabbit_msg_store:sync(
                            ?PERSISTENT_MSG_STORE, PersistentGuids,
-                           msg_store_callback(PersistentGuids, IsTransientPubs,
+                           msg_store_callback(PersistentGuids,
                                               PubsOrdered, AckTags1, Fun)),
                     State
        end)}.
@@ -823,12 +823,11 @@ update_rate(Now, Then, Count, {OThen, OCount}) ->
 %% Internal major helpers for Public API
 %%----------------------------------------------------------------------------
 
-msg_store_callback(PersistentGuids, IsTransientPubs, Pubs, AckTags, Fun) ->
+msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
     Self = self(),
     F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
                     Self, fun (StateN) -> tx_commit_post_msg_store(
-                                            IsTransientPubs, Pubs,
-                                            AckTags, Fun, StateN)
+                                            false, Pubs, AckTags, Fun, StateN)
                           end)
         end,
     fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
-- 
cgit v1.2.1


From 5038152cd62834600a0ec87d272c014522a55bbc Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 16 Jul 2010 08:44:13 +0100
Subject: refactor: invert some conditionals for better readability

---
 src/rabbit_variable_queue.erl | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 5344b72a..3f73ac0b 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -549,23 +549,21 @@ tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
     {lists:flatten(AckTags), a(State)}.
 
 tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
-    %% If we are a non-durable queue, or we have no persistent pubs,
-    %% we can skip the msg_store loop.
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
     erase_tx(Txn),
     PubsOrdered = lists:reverse(Pubs),
     AckTags1 = lists:flatten(AckTags),
     PersistentGuids = persistent_guids(PubsOrdered),
-    IsTransientPubs = [] == PersistentGuids,
+    HasPersistentPubs = PersistentGuids =/= [],
     {AckTags1,
-     a(case (not IsDurable) orelse IsTransientPubs of
-           true  -> tx_commit_post_msg_store(
-                      IsTransientPubs, PubsOrdered, AckTags1, Fun, State);
-           false -> ok = rabbit_msg_store:sync(
+     a(case IsDurable andalso HasPersistentPubs of
+           true  -> ok = rabbit_msg_store:sync(
                            ?PERSISTENT_MSG_STORE, PersistentGuids,
                            msg_store_callback(PersistentGuids,
                                               PubsOrdered, AckTags1, Fun)),
-                    State
+                    State;
+           false -> tx_commit_post_msg_store(
+                      HasPersistentPubs, PubsOrdered, AckTags1, Fun, State)
        end)}.
 
 requeue(AckTags, State) ->
@@ -827,7 +825,7 @@ msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
     Self = self(),
     F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
                     Self, fun (StateN) -> tx_commit_post_msg_store(
-                                            false, Pubs, AckTags, Fun, StateN)
+                                            true, Pubs, AckTags, Fun, StateN)
                           end)
         end,
     fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
@@ -838,27 +836,25 @@ msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
                     end)
     end.
 
-tx_commit_post_msg_store(IsTransientPubs, Pubs, AckTags, Fun,
+tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
                          State = #vqstate {
                            on_sync     = OnSync = {SAcks, SPubs, SFuns},
                            pending_ack = PA,
                            durable     = IsDurable }) ->
-    %% If we are a non-durable queue, or (no persistent pubs, and no
-    %% persistent acks) then we can skip the queue_index loop.
-    case (not IsDurable) orelse
-        (IsTransientPubs andalso
-         lists:all(fun (AckTag) ->
+    case IsDurable andalso
+        (HasPersistentPubs orelse
+         lists:any(fun (AckTag) ->
                            case dict:find(AckTag, PA) of
-                               {ok, #msg_status {}}        -> true;
-                               {ok, {IsPersistent, _Guid}} -> not IsPersistent
+                               {ok, #msg_status {}}        -> false;
+                               {ok, {IsPersistent, _Guid}} -> IsPersistent
                            end
                    end, AckTags)) of
-        true  -> State1 = tx_commit_index(State #vqstate {
-                                            on_sync = {[], [Pubs], [Fun]} }),
-                 State1 #vqstate { on_sync = OnSync };
-        false -> State #vqstate { on_sync = { [AckTags | SAcks],
+        true  -> State #vqstate { on_sync = { [AckTags | SAcks],
                                               [Pubs | SPubs],
-                                              [Fun | SFuns] }}
+                                              [Fun | SFuns] }};
+        false -> State1 = tx_commit_index(State #vqstate {
+                                            on_sync = {[], [Pubs], [Fun]} }),
+                 State1 #vqstate { on_sync = OnSync }
     end.
 
 tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
-- 
cgit v1.2.1


From 7faa5cd9af3ffed04e7e9b5fb05d6d2d088eb732 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 16 Jul 2010 11:30:08 +0100
Subject: add pending_acks to bq state report it turns out to be useful

---
 src/rabbit_variable_queue.erl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 3f73ac0b..15509b3c 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -652,6 +652,7 @@ handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
 
 status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                   len                  = Len,
+                  pending_ack          = PA,
                   on_sync              = {_, _, From},
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count        = RamMsgCount,
@@ -666,6 +667,7 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
       {q3                   , bpqueue:len(Q3)},
       {q4                   , queue:len(Q4)},
       {len                  , Len},
+      {pending_acks         , dict:size(PA)},
       {outstanding_txns     , length(From)},
       {target_ram_msg_count , TargetRamMsgCount},
       {ram_msg_count        , RamMsgCount},
-- 
cgit v1.2.1


From 5507dbf1418dec3d61acfe82d74f3dcf7469be44 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Fri, 16 Jul 2010 11:51:30 +0100
Subject: don't forget to process transactional acks ...when the tx doesn't
 involve any persistent messages, thus plugging a leak in pending_ack and qi.

To reproduce:

rr(amqp_connection).
Conn = amqp_connection:start_network(),
Ch = amqp_connection:open_channel(Conn),
Q = <<"Q">>,
amqp_channel:call(Ch, #'queue.declare'{queue = Q, durable = true, exclusive = true}),
amqp_channel:call(Ch, #'basic.publish'{routing_key = Q}, #amqp_msg{}),
amqp_channel:call(Ch, #'basic.get'{queue = Q}),
amqp_channel:call(Ch, #'tx.select'{}),
amqp_channel:call(Ch, #'basic.ack'{delivery_tag = 1}),
amqp_channel:call(Ch, #'tx.commit'{}),
ok.

and then

$ ./scripts/rabbitmqctl list_queues name messages_unacknowledged backing_queue_status
Listing queues ...
Q	0	[{q1,0},{q2,0},{delta,{delta,undefined,0,undefined}},{q3,0},{q4,0},{len,0},{pending_acks,1},{outstanding_txns,0},{target_ram_msg_count,infinity},{ram_msg_count,0},{ram_index_count,0},{avg_egress_rate,0.0},{avg_ingress_rate,0.0},{next_seq_id,1},{persistent_count,0}]
...done.

Note the pending_acks of 1.
---
 src/rabbit_variable_queue.erl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 15509b3c..3378c435 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -854,8 +854,10 @@ tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
         true  -> State #vqstate { on_sync = { [AckTags | SAcks],
                                               [Pubs | SPubs],
                                               [Fun | SFuns] }};
-        false -> State1 = tx_commit_index(State #vqstate {
-                                            on_sync = {[], [Pubs], [Fun]} }),
+        false -> State1 = tx_commit_index(
+                            State #vqstate { on_sync = { [AckTags],
+                                                         [Pubs],
+                                                         [Fun]} }),
                  State1 #vqstate { on_sync = OnSync }
     end.
 
-- 
cgit v1.2.1


From 2bf91694ddb11c2d54ae1cb23c5e000dcb67bb92 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sun, 18 Jul 2010 13:32:09 +0100
Subject: don't log warnings on clean startup of a msg_store

---
 src/rabbit_msg_store.erl      | 16 +++++++---------
 src/rabbit_tests.erl          | 26 +++++++++++---------------
 src/rabbit_variable_queue.erl |  1 -
 3 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 23526a4b..7dea2f94 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -34,7 +34,7 @@
 -behaviour(gen_server2).
 
 -export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
-         sync/3, client_init/2, client_terminate/1, delete_client/2, clean/2,
+         sync/3, client_init/2, client_terminate/1, delete_client/2,
          successfully_recovered_state/1]).
 
 -export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal
@@ -139,7 +139,6 @@
 -spec(client_init/2 :: (server(), binary()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
 -spec(delete_client/2 :: (server(), binary()) -> 'ok').
--spec(clean/2 :: (atom(), file:filename()) -> 'ok').
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
@@ -384,10 +383,6 @@ delete_client(Server, Ref) ->
 successfully_recovered_state(Server) ->
     gen_server2:call(Server, successfully_recovered_state, infinity).
 
-clean(Server, BaseDir) ->
-    Dir = filename:join(BaseDir, atom_to_list(Server)),
-    ok = rabbit_misc:recursive_delete([Dir]).
-
 %%----------------------------------------------------------------------------
 %% Client-side-only helpers
 %%----------------------------------------------------------------------------
@@ -506,7 +501,6 @@ init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
                                              [self()]),
 
     Dir = filename:join(BaseDir, atom_to_list(Server)),
-    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
 
     {ok, IndexModule} = application:get_env(msg_store_index_module),
     rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]),
@@ -1125,7 +1119,12 @@ index_delete_by_file(File, #msstate { index_module = Index,
 %% shutdown and recovery
 %%----------------------------------------------------------------------------
 
+recover_index_and_client_refs(IndexModule, undefined, Dir, _Server) ->
+    ok = rabbit_misc:recursive_delete([Dir]),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    {false, IndexModule:new(Dir), sets:new()};
 recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server) ->
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
     Fresh = fun (ErrorMsg, ErrorArgs) ->
                     rabbit_log:warning("~w: " ++ ErrorMsg ++
                                        "~nrebuilding indices from scratch~n",
@@ -1138,8 +1137,7 @@ recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server) ->
         {true, Terms} ->
             RecClientRefs  = proplists:get_value(client_refs, Terms, []),
             RecIndexModule = proplists:get_value(index_module, Terms),
-            case (ClientRefs =/= undefined andalso
-                  lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
+            case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
                   andalso IndexModule =:= RecIndexModule) of
                 true  -> case IndexModule:recover(Dir) of
                              {ok, IndexState1} ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ff7df11b..630483da 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1374,17 +1374,13 @@ test_backing_queue() ->
     end.
 
 start_msg_store_empty() ->
-    start_msg_store(fun (ok) -> finished end, ok).
+    start_msg_store(undefined, {fun (ok) -> finished end, ok}).
 
-start_msg_store(MsgRefDeltaGen, MsgRefDeltaGenInit) ->
+start_msg_store(ClientRefs, StartupFunState) ->
     ok = rabbit_sup:start_child(
            ?PERSISTENT_MSG_STORE, rabbit_msg_store,
-           [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
-            {MsgRefDeltaGen, MsgRefDeltaGenInit}]),
-    start_transient_msg_store().
-
-start_transient_msg_store() ->
-    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
+           [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), ClientRefs,
+            StartupFunState]),
     ok = rabbit_sup:start_child(
            ?TRANSIENT_MSG_STORE, rabbit_msg_store,
            [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
@@ -1502,13 +1498,13 @@ test_msg_store() ->
     ok = rabbit_msg_store:client_terminate(MSCState7),
     %% stop and restart, preserving every other msg in 2nd half
     ok = stop_msg_store(),
-    ok = start_msg_store(fun ([]) -> finished;
-                             ([Guid|GuidsTail])
-                             when length(GuidsTail) rem 2 == 0 ->
-                                 {Guid, 1, GuidsTail};
-                             ([Guid|GuidsTail]) ->
-                                 {Guid, 0, GuidsTail}
-                         end, Guids2ndHalf),
+    ok = start_msg_store([], {fun ([]) -> finished;
+                                  ([Guid|GuidsTail])
+                                    when length(GuidsTail) rem 2 == 0 ->
+                                      {Guid, 1, GuidsTail};
+                                  ([Guid|GuidsTail]) ->
+                                      {Guid, 0, GuidsTail}
+                              end, Guids2ndHalf}),
     %% check we have the right msgs left
     lists:foldl(
       fun (Guid, Bool) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 3378c435..d7116ba0 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -294,7 +294,6 @@
 %%----------------------------------------------------------------------------
 
 start(DurableQueues) ->
-    ok = rabbit_msg_store:clean(?TRANSIENT_MSG_STORE, rabbit_mnesia:dir()),
     {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues),
     Refs = [Ref || Terms <- AllTerms,
                    begin
-- 
cgit v1.2.1


From 52286b38abf010232d3ed27952150efaeb5d7045 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sun, 18 Jul 2010 14:20:46 +0100
Subject: add a 'todo'

---
 src/rabbit_queue_index.erl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
index 261a4968..91b19976 100644
--- a/src/rabbit_queue_index.erl
+++ b/src/rabbit_queue_index.erl
@@ -126,6 +126,11 @@
 
 -define(SEGMENT_EXTENSION, ".idx").
 
+%% TODO: The segment size would be configurable, but deriving all the
+%% other values is quite hairy and quite possibly noticably less
+%% efficient, depending on how clever the compiler is when it comes to
+%% binary generation/matching with constant vs variable lengths.
+
 -define(REL_SEQ_BITS, 14).
 -define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
 
-- 
cgit v1.2.1


From c07d7710b6d716fa41306994fc266f11bc931b86 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sun, 18 Jul 2010 14:29:29 +0100
Subject: tiny refactor

---
 src/rabbit_variable_queue.erl | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index d7116ba0..35a6ff78 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -308,14 +308,13 @@ start(DurableQueues) ->
                                  Refs, StartFunState]).
 
 init(QueueName, IsDurable, _Recover) ->
-    MsgStoreRecovered =
-        rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE),
-    ContainsCheckFun =
-        fun (Guid) ->
-                rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
-        end,
     {DeltaCount, Terms, IndexState} =
-        rabbit_queue_index:init(QueueName, MsgStoreRecovered, ContainsCheckFun),
+        rabbit_queue_index:init(
+          QueueName,
+          rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE),
+          fun (Guid) ->
+                  rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+          end),
     {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState),
 
     {PRef, TRef, Terms1} =
-- 
cgit v1.2.1


From 53f65ded1fc6d9ba3bf7ccc37b787eac59f3c01c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Sun, 18 Jul 2010 17:45:54 +0100
Subject: tweak msg_store API to make it safer ...by not allowing clients to be
 deleted w/o terminating them too

---
 src/rabbit_msg_store.erl      | 10 ++++++----
 src/rabbit_tests.erl          |  4 ++--
 src/rabbit_variable_queue.erl |  8 ++++----
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
index 7dea2f94..63100571 100644
--- a/src/rabbit_msg_store.erl
+++ b/src/rabbit_msg_store.erl
@@ -34,8 +34,8 @@
 -behaviour(gen_server2).
 
 -export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
-         sync/3, client_init/2, client_terminate/1, delete_client/2,
-         successfully_recovered_state/1]).
+         sync/3, client_init/2, client_terminate/1,
+         client_delete_and_terminate/3, successfully_recovered_state/1]).
 
 -export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal
 
@@ -138,7 +138,8 @@
 -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
 -spec(client_init/2 :: (server(), binary()) -> client_msstate()).
 -spec(client_terminate/1 :: (client_msstate()) -> 'ok').
--spec(delete_client/2 :: (server(), binary()) -> 'ok').
+-spec(client_delete_and_terminate/3 ::
+        (client_msstate(), server(), binary()) -> 'ok').
 -spec(successfully_recovered_state/1 :: (server()) -> boolean()).
 
 -spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
@@ -377,7 +378,8 @@ client_terminate(CState) ->
     close_all_handles(CState),
     ok.
 
-delete_client(Server, Ref) ->
+client_delete_and_terminate(CState, Server, Ref) ->
+    ok = client_terminate(CState),
     ok = gen_server2:call(Server, {delete_client, Ref}, infinity).
 
 successfully_recovered_state(Server) ->
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 630483da..ec0387c6 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1598,8 +1598,8 @@ queue_index_publish(SeqIds, Persistent, Qi) ->
                                                            Guid, MSCStateN),
                   {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM}
           end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds),
-    ok = rabbit_msg_store:delete_client(MsgStore, Ref),
-    ok = rabbit_msg_store:client_terminate(MSCStateEnd),
+    ok = rabbit_msg_store:client_delete_and_terminate(
+           MSCStateEnd, MsgStore, Ref),
     {A, B}.
 
 verify_read_with_published(_Delivered, _Persistent, [], _) ->
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 35a6ff78..b3ba3f17 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -403,12 +403,12 @@ delete_and_terminate(State) ->
     IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState),
     case MSCStateP of
         undefined -> ok;
-        _         -> rabbit_msg_store:delete_client(
-                       ?PERSISTENT_MSG_STORE, PRef),
+        _         -> rabbit_msg_store:client_delete_and_terminate(
+                       MSCStateP, ?PERSISTENT_MSG_STORE, PRef),
                      rabbit_msg_store:client_terminate(MSCStateP)
     end,
-    rabbit_msg_store:delete_client(?TRANSIENT_MSG_STORE, TRef),
-    rabbit_msg_store:client_terminate(MSCStateT),
+    rabbit_msg_store:client_delete_and_terminate(
+      MSCStateT, ?TRANSIENT_MSG_STORE, TRef),
     a(State2 #vqstate { index_state       = IndexState1,
                         msg_store_clients = undefined }).
 
-- 
cgit v1.2.1


From 299896e4a89930cd030a93bd70a0cfd70361559d Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 08:08:48 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ec0387c6..becb9a83 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -205,6 +205,7 @@ priority_queue_out_all(Q) ->
         {empty, _}       -> [];
         {{value, V}, Q1} -> [V | priority_queue_out_all(Q1)]
     end.
+
 test_priority_queue(Q) ->
     {priority_queue:is_queue(Q),
      priority_queue:is_empty(Q),
-- 
cgit v1.2.1


From 838f8d48c1838f288517b63cf2351fc48ea8e371 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 09:01:28 +0100
Subject: refactor: 'cons_if' helper function

---
 src/rabbit_variable_queue.erl | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index b3ba3f17..f1510da3 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -716,6 +716,9 @@ m(MsgStatus = #msg_status { msg           = Msg,
 one_if(true ) -> 1;
 one_if(false) -> 0.
 
+cons_if(true,   E, L) -> [E | L];
+cons_if(false, _E, L) -> L.
+
 msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
     #msg_status { seq_id = SeqId, guid = Guid, msg = Msg,
                   is_persistent = IsPersistent, is_delivered = false,
@@ -763,10 +766,7 @@ betas_from_index_entries(List, TransientThreshold, IndexState) ->
                {Filtered1, Delivers1, Acks1}) ->
                   case SeqId < TransientThreshold andalso not IsPersistent of
                       true  -> {Filtered1,
-                                case IsDelivered of
-                                    true  -> Delivers1;
-                                    false -> [SeqId | Delivers1]
-                                end,
+                                cons_if(not IsDelivered, SeqId, Delivers1),
                                 [SeqId | Acks1]};
                       false -> {[m(#msg_status { msg           = undefined,
                                                  guid          = Guid,
@@ -871,10 +871,7 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
                {SeqIdsAcc, State2}) ->
                   IsPersistent1 = IsDurable andalso IsPersistent,
                   {SeqId, State3} = publish(Msg, false, IsPersistent1, State2),
-                  {case IsPersistent1 of
-                       true  -> [SeqId | SeqIdsAcc];
-                       false -> SeqIdsAcc
-                   end, State3}
+                  {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3}
           end, {Acks, ack(Acks, State)}, Pubs),
     IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
     [ Fun() || Fun <- lists:reverse(SFuns) ],
@@ -912,14 +909,8 @@ remove_queue_entries1(
                                         GuidsByStore);
          false -> GuidsByStore
      end,
-     case IndexOnDisk andalso not IsDelivered of
-         true  -> [SeqId | Delivers];
-         false -> Delivers
-     end,
-     case IndexOnDisk of
-         true  -> [SeqId | Acks];
-         false -> Acks
-     end}.
+     cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
+     cons_if(IndexOnDisk, SeqId, Acks)}.
 
 %%----------------------------------------------------------------------------
 %% Internal gubbins for publishing
@@ -1058,10 +1049,8 @@ accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS
                                      index_on_disk = false }, Acc) ->
     Acc;
 accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
-    {case IsPersistent of
-         true  -> [SeqId | SeqIdsAcc];
-         false -> SeqIdsAcc
-     end, rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
+    {cons_if(IsPersistent, SeqId, SeqIdsAcc),
+     rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
 
 %%----------------------------------------------------------------------------
 %% Phase changes
-- 
cgit v1.2.1


From 602dde6b8bbbd505c11ef1ce036b5f420c589884 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 11:10:34 +0100
Subject: optimise qi:sync call in tx commit call it with acks for persistent &
 durable messages only

---
 src/rabbit_variable_queue.erl | 84 ++++++++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index f1510da3..37f8e037 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -220,6 +220,8 @@
 
 -record(tx, { pending_messages, pending_acks }).
 
+-record(sync, { persistent_acks, acks, pubs, funs }).
+
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
 %% betas that we must be due to write indices for before we do any
@@ -245,6 +247,11 @@
                           count        :: non_neg_integer (),
                           end_seq_id   :: non_neg_integer() }).
 
+-type(sync() :: #sync { persistent_acks :: [[seq_id()]],
+                        acks            :: [[seq_id()]],
+                        pubs            :: [[rabbit_guid:guid()]],
+                        funs            :: [fun (() -> any())] }).
+
 -type(state() :: #vqstate {
              q1                   :: queue(),
              q2                   :: bpqueue:bpqueue(),
@@ -256,8 +263,7 @@
              index_state          :: any(),
              msg_store_clients    :: 'undefined' | {{any(), binary()},
                                                     {any(), binary()}},
-             on_sync              :: {[[ack()]], [[rabbit_guid:guid()]],
-                                      [fun (() -> any())]},
+             on_sync              :: sync(),
              durable              :: boolean(),
 
              len                  :: non_neg_integer(),
@@ -289,6 +295,11 @@
                                          count        = 0,
                                          end_seq_id   = Z }).
 
+-define(BLANK_SYNC, #sync { persistent_acks = [],
+                            acks            = [],
+                            pubs            = [],
+                            funs            = [] }).
+
 %%----------------------------------------------------------------------------
 %% Public API
 %%----------------------------------------------------------------------------
@@ -349,7 +360,7 @@ init(QueueName, IsDurable, _Recover) ->
       index_state          = IndexState1,
       msg_store_clients    = {{PersistentClient, PRef},
                               {TransientClient, TRef}},
-      on_sync              = {[], [], []},
+      on_sync              = ?BLANK_SYNC,
       durable              = IsDurable,
       transient_threshold  = NextSeqId,
 
@@ -634,14 +645,14 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
                                          out_counter        = 0,
                                          ram_msg_count_prev = RamMsgCount })}.
 
-needs_idle_timeout(#vqstate { on_sync = {_, _, [_|_]}}) ->
-    true;
-needs_idle_timeout(State) ->
+needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
     {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end,
                                       fun (_Quota, State1) -> State1 end,
                                       fun (State1)         -> State1 end,
                                       State),
-    Res.
+    Res;
+needs_idle_timeout(_State) ->
+    true.
 
 idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
 
@@ -651,7 +662,7 @@ handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
 status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                   len                  = Len,
                   pending_ack          = PA,
-                  on_sync              = {_, _, From},
+                  on_sync              = #sync { funs = From },
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count        = RamMsgCount,
                   ram_index_count      = RamIndexCount,
@@ -838,33 +849,48 @@ msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
 
 tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
                          State = #vqstate {
-                           on_sync     = OnSync = {SAcks, SPubs, SFuns},
+                           on_sync     = OnSync = #sync {
+                                           persistent_acks = SPAcks,
+                                           acks            = SAcks,
+                                           pubs            = SPubs,
+                                           funs            = SFuns },
                            pending_ack = PA,
                            durable     = IsDurable }) ->
-    case IsDurable andalso
-        (HasPersistentPubs orelse
-         lists:any(fun (AckTag) ->
-                           case dict:find(AckTag, PA) of
-                               {ok, #msg_status {}}        -> false;
-                               {ok, {IsPersistent, _Guid}} -> IsPersistent
-                           end
-                   end, AckTags)) of
-        true  -> State #vqstate { on_sync = { [AckTags | SAcks],
-                                              [Pubs | SPubs],
-                                              [Fun | SFuns] }};
+    PersistentAcks =
+        case IsDurable of
+            true  -> [AckTag || AckTag <- AckTags,
+                                case dict:find(AckTag, PA) of
+                                    {ok, #msg_status {}}        -> false;
+                                    {ok, {IsPersistent, _Guid}} -> IsPersistent
+                                end];
+            false -> []
+        end,
+    case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of
+        true  -> State #vqstate { on_sync = #sync {
+                                    persistent_acks = [PersistentAcks | SPAcks],
+                                    acks            = [AckTags | SAcks],
+                                    pubs            = [Pubs | SPubs],
+                                    funs            = [Fun | SFuns] }};
         false -> State1 = tx_commit_index(
-                            State #vqstate { on_sync = { [AckTags],
-                                                         [Pubs],
-                                                         [Fun]} }),
+                            State #vqstate { on_sync = #sync {
+                                               persistent_acks = [],
+                                               acks            = [AckTags],
+                                               pubs            = [Pubs],
+                                               funs            = [Fun] } }),
                  State1 #vqstate { on_sync = OnSync }
     end.
 
-tx_commit_index(State = #vqstate { on_sync = {_, _, []} }) ->
+tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
     State;
-tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
+tx_commit_index(State = #vqstate { on_sync = #sync {
+                                     persistent_acks = SPAcks,
+                                     acks            = SAcks,
+                                     pubs            = SPubs,
+                                     funs            = SFuns },
                                    durable = IsDurable }) ->
-    Acks = lists:flatten(SAcks),
-    Pubs = lists:flatten(lists:reverse(SPubs)),
+    PAcks = lists:flatten(SPAcks),
+    Acks  = lists:flatten(SAcks),
+    Pubs  = lists:flatten(lists:reverse(SPubs)),
     {SeqIds, State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
@@ -872,11 +898,11 @@ tx_commit_index(State = #vqstate { on_sync = {SAcks, SPubs, SFuns},
                   IsPersistent1 = IsDurable andalso IsPersistent,
                   {SeqId, State3} = publish(Msg, false, IsPersistent1, State2),
                   {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3}
-          end, {Acks, ack(Acks, State)}, Pubs),
+          end, {PAcks, ack(Acks, State)}, Pubs),
     IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
     [ Fun() || Fun <- lists:reverse(SFuns) ],
     reduce_memory_use(
-      State1 #vqstate { index_state = IndexState1, on_sync = {[], [], []} }).
+      State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }).
 
 purge_betas_and_deltas(State = #vqstate { q3          = Q3,
                                           index_state = IndexState }) ->
-- 
cgit v1.2.1


From 321b1dc7775cabf4895e185fa6f07264f2f77fdf Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 19 Jul 2010 12:08:26 +0100
Subject: Cosmetics

---
 src/rabbit_variable_queue.erl | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 37f8e037..7e960fde 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -220,7 +220,7 @@
 
 -record(tx, { pending_messages, pending_acks }).
 
--record(sync, { persistent_acks, acks, pubs, funs }).
+-record(sync, { acks_persistent, acks_all, pubs, funs }).
 
 %% When we discover, on publish, that we should write some indices to
 %% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
@@ -247,8 +247,8 @@
                           count        :: non_neg_integer (),
                           end_seq_id   :: non_neg_integer() }).
 
--type(sync() :: #sync { persistent_acks :: [[seq_id()]],
-                        acks            :: [[seq_id()]],
+-type(sync() :: #sync { acks_persistent :: [[seq_id()]],
+                        acks_all        :: [[seq_id()]],
                         pubs            :: [[rabbit_guid:guid()]],
                         funs            :: [fun (() -> any())] }).
 
@@ -295,8 +295,8 @@
                                          count        = 0,
                                          end_seq_id   = Z }).
 
--define(BLANK_SYNC, #sync { persistent_acks = [],
-                            acks            = [],
+-define(BLANK_SYNC, #sync { acks_persistent = [],
+                            acks_all        = [],
                             pubs            = [],
                             funs            = [] }).
 
@@ -850,8 +850,8 @@ msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
 tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
                          State = #vqstate {
                            on_sync     = OnSync = #sync {
-                                           persistent_acks = SPAcks,
-                                           acks            = SAcks,
+                                           acks_persistent = SPAcks,
+                                           acks_all        = SAcks,
                                            pubs            = SPubs,
                                            funs            = SFuns },
                            pending_ack = PA,
@@ -867,14 +867,14 @@ tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
         end,
     case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of
         true  -> State #vqstate { on_sync = #sync {
-                                    persistent_acks = [PersistentAcks | SPAcks],
-                                    acks            = [AckTags | SAcks],
+                                    acks_persistent = [PersistentAcks | SPAcks],
+                                    acks_all        = [AckTags | SAcks],
                                     pubs            = [Pubs | SPubs],
                                     funs            = [Fun | SFuns] }};
         false -> State1 = tx_commit_index(
                             State #vqstate { on_sync = #sync {
-                                               persistent_acks = [],
-                                               acks            = [AckTags],
+                                               acks_persistent = [],
+                                               acks_all        = [AckTags],
                                                pubs            = [Pubs],
                                                funs            = [Fun] } }),
                  State1 #vqstate { on_sync = OnSync }
@@ -883,8 +883,8 @@ tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
 tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
     State;
 tx_commit_index(State = #vqstate { on_sync = #sync {
-                                     persistent_acks = SPAcks,
-                                     acks            = SAcks,
+                                     acks_persistent = SPAcks,
+                                     acks_all        = SAcks,
                                      pubs            = SPubs,
                                      funs            = SFuns },
                                    durable = IsDurable }) ->
-- 
cgit v1.2.1


From 2e208148d803b50849370abdda7bcfab85d3820f Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 19 Jul 2010 12:20:19 +0100
Subject: dict:find -> dict:fetch andalso lists:flatten -> lists:append

---
 src/rabbit_variable_queue.erl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7e960fde..a2c3c100 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -555,13 +555,13 @@ tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
                                               persistent_guids(Pubs));
              false -> ok
          end,
-    {lists:flatten(AckTags), a(State)}.
+    {lists:append(AckTags), a(State)}.
 
 tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
     #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
     erase_tx(Txn),
     PubsOrdered = lists:reverse(Pubs),
-    AckTags1 = lists:flatten(AckTags),
+    AckTags1 = lists:append(AckTags),
     PersistentGuids = persistent_guids(PubsOrdered),
     HasPersistentPubs = PersistentGuids =/= [],
     {AckTags1,
@@ -859,9 +859,9 @@ tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
     PersistentAcks =
         case IsDurable of
             true  -> [AckTag || AckTag <- AckTags,
-                                case dict:find(AckTag, PA) of
-                                    {ok, #msg_status {}}        -> false;
-                                    {ok, {IsPersistent, _Guid}} -> IsPersistent
+                                case dict:fetch(AckTag, PA) of
+                                    #msg_status {}        -> false;
+                                    {IsPersistent, _Guid} -> IsPersistent
                                 end];
             false -> []
         end,
@@ -888,9 +888,9 @@ tx_commit_index(State = #vqstate { on_sync = #sync {
                                      pubs            = SPubs,
                                      funs            = SFuns },
                                    durable = IsDurable }) ->
-    PAcks = lists:flatten(SPAcks),
-    Acks  = lists:flatten(SAcks),
-    Pubs  = lists:flatten(lists:reverse(SPubs)),
+    PAcks = lists:append(SPAcks),
+    Acks  = lists:append(SAcks),
+    Pubs  = lists:append(lists:reverse(SPubs)),
     {SeqIds, State1 = #vqstate { index_state = IndexState }} =
         lists:foldl(
           fun (Msg = #basic_message { is_persistent = IsPersistent },
-- 
cgit v1.2.1


From 459fa7e6b9efcd42460c3ccf7a4fd40a70907521 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 12:51:30 +0100
Subject: refactor: take adantage of l/r symmetry in bpqueue tests

---
 src/rabbit_tests.erl | 106 +++++++++++++++++++++------------------------------
 1 file changed, 43 insertions(+), 63 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index ec6fbe3f..cba9e2c3 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -217,26 +217,17 @@ test_bpqueue() ->
     Q = bpqueue:new(),
     true = bpqueue:is_empty(Q),
     0 = bpqueue:len(Q),
+    [] = bpqueue:to_list(Q),
 
-    Q1 = bpqueue:in(bar, 3, bpqueue:in(foo, 2, bpqueue:in(foo, 1, Q))),
-    false = bpqueue:is_empty(Q1),
-    3 = bpqueue:len(Q1),
-    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(Q1),
-
-    Q2 = bpqueue:in_r(bar, 3, bpqueue:in_r(foo, 2, bpqueue:in_r(foo, 1, Q))),
-    false = bpqueue:is_empty(Q2),
-    3 = bpqueue:len(Q2),
-    [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(Q2),
-
-    {empty, _Q} = bpqueue:out(Q),
-    {{value, foo, 1}, Q3} = bpqueue:out(Q1),
-    {{value, foo, 2}, Q4} = bpqueue:out(Q3),
-    {{value, bar, 3}, _Q5} = bpqueue:out(Q4),
-
-    {empty, _Q} = bpqueue:out_r(Q),
-    {{value, foo, 1}, Q6} = bpqueue:out_r(Q2),
-    {{value, foo, 2}, Q7} = bpqueue:out_r(Q6),
-    {{value, bar, 3}, _Q8} = bpqueue:out_r(Q7),
+    Q1 = bpqueue_test(fun bpqueue:in/3, fun bpqueue:out/1,
+                      fun bpqueue:to_list/1,
+                      fun bpqueue:foldl/3, fun bpqueue:map_fold_filter_l/4),
+    Q2 = bpqueue_test(fun bpqueue:in_r/3, fun bpqueue:out_r/1,
+                      fun (QR) -> lists:reverse(
+                                    [{P, lists:reverse(L)} ||
+                                        {P, L} <- bpqueue:to_list(QR)])
+                      end,
+                      fun bpqueue:foldr/3, fun bpqueue:map_fold_filter_r/4),
 
     [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)),
     [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)),
@@ -260,46 +251,9 @@ test_bpqueue() ->
           end,
           {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])),
 
-    ok = bpqueue:foldl(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
-                       ok, Q),
-    ok = bpqueue:foldr(fun (Prefix, Value, ok) -> {error, Prefix, Value} end,
-                       ok, Q),
-
-    [] = bpqueue:to_list(Q),
-
     [{bar,3}, {foo,2}, {foo,1}] =
         bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2),
 
-    F1 = fun (Qn) ->
-                 bpqueue:map_fold_filter_l(
-                   fun (foo) -> true;
-                       (_) -> false
-                   end,
-                   fun (2, _Num) -> stop;
-                       (V, Num)  -> {bar, -V, V - Num} end,
-                   0, Qn)
-         end,
-
-    F2 = fun (Qn) ->
-                 bpqueue:map_fold_filter_r(
-                   fun (foo) -> true;
-                       (_) -> false
-                   end,
-                   fun (2, _Num) -> stop;
-                       (V, Num)  -> {bar, -V, V - Num} end,
-                   0, Qn)
-         end,
-
-    {Q9, 1} = F1(Q1),
-    [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = bpqueue:to_list(Q9),
-    {Q10, 0} = F2(Q1),
-    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(Q10),
-
-    {Q11, 0} = F1(Q),
-    [] = bpqueue:to_list(Q11),
-    {Q12, 0} = F2(Q),
-    [] = bpqueue:to_list(Q12),
-
     BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}],
     BPQ = bpqueue:from_list(BPQL),
 
@@ -308,13 +262,6 @@ test_bpqueue() ->
     {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ),
     {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ),
     {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ),
-    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
-    {[], 0} = Queue_to_list(bpqueue:map_fold_filter_l(
-                              fun(_P)-> throw(explosion) end,
-                              fun(_V, _N) -> throw(explosion) end, 0, Q)),
-    {[], 0} = Queue_to_list(bpqueue:map_fold_filter_r(
-                              fun(_P)-> throw(explosion) end,
-                              fun(_V, _N) -> throw(explosion) end, 0, Q)),
 
     %% process 1 item
     {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} =
@@ -348,6 +295,39 @@ test_bpqueue() ->
 
     passed.
 
+bpqueue_test(In, Out, List, Fold, MapFoldFilter) ->
+    Q = bpqueue:new(),
+    {empty, _Q} = Out(Q),
+
+    ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q),
+    {LHS, RHS} = MapFoldFilter(fun(_P)     -> throw(explosion) end,
+                               fun(_V, _N) -> throw(explosion) end, 0, Q),
+    {[], 0} = {bpqueue:to_list(LHS), RHS},
+
+    Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))),
+    false = bpqueue:is_empty(Q1),
+    3 = bpqueue:len(Q1),
+    [{foo, [1, 2]}, {bar, [3]}] = List(Q1),
+
+    {{value, foo, 1}, Q3}  = Out(Q1),
+    {{value, foo, 2}, Q4}  = Out(Q3),
+    {{value, bar, 3}, _Q5} = Out(Q4),
+
+    F = fun (QN) ->
+                MapFoldFilter(fun (foo) -> true;
+                                  (_)   -> false
+                              end,
+                              fun (2, _Num) -> stop;
+                                  (V, Num)  -> {bar, -V, V - Num} end,
+                              0, QN)
+        end,
+    {Q6, 0} = F(Q),
+    [] = bpqueue:to_list(Q6),
+    {Q7, 1} = F(Q1),
+    [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = List(Q7),
+
+    Q1.
+
 bpqueue_mffl(FF1A, FF2A, BPQ) ->
     bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ).
 
-- 
cgit v1.2.1


From d05e19a41857040b1d10435509501a6f49a30a77 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 12:55:34 +0100
Subject: tweak

---
 src/rabbit_tests.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index cba9e2c3..c1d3885b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -300,9 +300,9 @@ bpqueue_test(In, Out, List, Fold, MapFoldFilter) ->
     {empty, _Q} = Out(Q),
 
     ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q),
-    {LHS, RHS} = MapFoldFilter(fun(_P)     -> throw(explosion) end,
-                               fun(_V, _N) -> throw(explosion) end, 0, Q),
-    {[], 0} = {bpqueue:to_list(LHS), RHS},
+    {Q1M, 0} = MapFoldFilter(fun(_P)     -> throw(explosion) end,
+                             fun(_V, _N) -> throw(explosion) end, 0, Q),
+    [] = bpqueue:to_list(Q1M),
 
     Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))),
     false = bpqueue:is_empty(Q1),
-- 
cgit v1.2.1


From 9c8a0bedf7df7665c0abb5c097e4397ad6879ad0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 13:11:22 +0100
Subject: refactor: simplify msg_store tests

---
 src/rabbit_tests.erl | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c1d3885b..b506784e 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1414,6 +1414,12 @@ msg_store_write(Guids, MSCState) ->
               rabbit_msg_store:write(?PERSISTENT_MSG_STORE, Guid, Guid, MSCStateN) end,
       {ok, MSCState}, Guids).
 
+msg_store_remove(Ids) ->
+    lists:foldl(fun (Guid, ok) ->
+                        rabbit_msg_store:remove(?PERSISTENT_MSG_STORE,
+                                                [guid_bin(Guid)])
+                end, ok, Ids).
+
 test_msg_store() ->
     stop_msg_store(),
     ok = start_msg_store_empty(),
@@ -1528,22 +1534,13 @@ test_msg_store() ->
                      MSCStateN
              end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), GuidsBig)),
     %% .., then 3s by 1...
-    ok = lists:foldl(
-           fun (Guid, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(Guid)])
-           end, ok, lists:seq(BigCount, 1, -3)),
+    ok = msg_store_remove(lists:seq(BigCount, 1, -3)),
     %% .., then remove 3s by 2, from the young end first. This hits
     %% GC (under 50% good data left, but no empty files. Must GC).
-    ok = lists:foldl(
-           fun (Guid, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(Guid)])
-           end, ok, lists:seq(BigCount-1, 1, -3)),
+    ok = msg_store_remove(lists:seq(BigCount-1, 1, -3)),
     %% .., then remove 3s by 3, from the young end first. This hits
     %% GC...
-    ok = lists:foldl(
-           fun (Guid, ok) ->
-                   rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, [guid_bin(Guid)])
-           end, ok, lists:seq(BigCount-2, 1, -3)),
+    ok = msg_store_remove(lists:seq(BigCount-2, 1, -3)),
     %% ensure empty
     false = msg_store_contains(false, GuidsBig),
     %% restart empty
-- 
cgit v1.2.1


From fee1562beec1e481fb281bce5d8e11c0320de1ab Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 13:26:04 +0100
Subject: refactor: simplify msg_store tests

---
 src/rabbit_tests.erl | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index b506784e..c1b52e68 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1420,6 +1420,11 @@ msg_store_remove(Ids) ->
                                                 [guid_bin(Guid)])
                 end, ok, Ids).
 
+foreach_with_msg_store_client(Store, Ref, Fun, L) ->
+    rabbit_msg_store:client_terminate(
+      lists:foldl(fun (Guid, MSCState) -> Fun(Guid, Store, MSCState) end,
+                  rabbit_msg_store:client_init(Store, Ref), L)).
+
 test_msg_store() ->
     stop_msg_store(),
     ok = start_msg_store_empty(),
@@ -1518,21 +1523,21 @@ test_msg_store() ->
     BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)),
     GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
     Payload = << 0:PayloadSizeBits >>,
-    ok = rabbit_msg_store:client_terminate(
-           lists:foldl(
-             fun (Guid, MSCStateN) ->
-                     {ok, MSCStateM} =
-                         rabbit_msg_store:write(?PERSISTENT_MSG_STORE, Guid, Payload, MSCStateN),
-                     MSCStateM
-             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), GuidsBig)),
+    ok = foreach_with_msg_store_client(
+           ?PERSISTENT_MSG_STORE, Ref,
+           fun (Guid, Store, MSCStateM) ->
+                   {ok, MSCStateN} =
+                       rabbit_msg_store:write(Store, Guid, Payload, MSCStateM),
+                   MSCStateN
+           end, GuidsBig),
     %% now read them to ensure we hit the fast client-side reading
-    ok = rabbit_msg_store:client_terminate(
-           lists:foldl(
-             fun (Guid, MSCStateM) ->
-                     {{ok, Payload}, MSCStateN} =
-                         rabbit_msg_store:read(?PERSISTENT_MSG_STORE, Guid, MSCStateM),
-                     MSCStateN
-             end, rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), GuidsBig)),
+    ok = foreach_with_msg_store_client(
+           ?PERSISTENT_MSG_STORE, Ref,
+           fun (Guid, Store, MSCStateM) ->
+                   {{ok, Payload}, MSCStateN} =
+                       rabbit_msg_store:read(Store, Guid, MSCStateM),
+                   MSCStateN
+           end, GuidsBig),
     %% .., then 3s by 1...
     ok = msg_store_remove(lists:seq(BigCount, 1, -3)),
     %% .., then remove 3s by 2, from the young end first. This hits
-- 
cgit v1.2.1


From d3f7800dd364eb53781f8eee437284a3d401e730 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 13:35:01 +0100
Subject: refactor: simplify msg_store tests

---
 src/rabbit_tests.erl | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c1b52e68..34ff55fe 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1400,25 +1400,21 @@ msg_store_sync(Guids) ->
     end.
 
 msg_store_read(Guids, MSCState) ->
-    lists:foldl(
-      fun (Guid, MSCStateM) ->
-              {{ok, Guid}, MSCStateN} = rabbit_msg_store:read(
-                                           ?PERSISTENT_MSG_STORE, Guid, MSCStateM),
-              MSCStateN
-      end,
-      MSCState, Guids).
+    lists:foldl(fun (Guid, MSCStateM) ->
+                        {{ok, Guid}, MSCStateN} = rabbit_msg_store:read(
+                                                    ?PERSISTENT_MSG_STORE,
+                                                    Guid, MSCStateM),
+                        MSCStateN
+                end, MSCState, Guids).
 
 msg_store_write(Guids, MSCState) ->
-    lists:foldl(
-      fun (Guid, {ok, MSCStateN}) ->
-              rabbit_msg_store:write(?PERSISTENT_MSG_STORE, Guid, Guid, MSCStateN) end,
-      {ok, MSCState}, Guids).
+    lists:foldl(fun (Guid, {ok, MSCStateN}) ->
+                        rabbit_msg_store:write(?PERSISTENT_MSG_STORE,
+                                               Guid, Guid, MSCStateN)
+                end, {ok, MSCState}, Guids).
 
-msg_store_remove(Ids) ->
-    lists:foldl(fun (Guid, ok) ->
-                        rabbit_msg_store:remove(?PERSISTENT_MSG_STORE,
-                                                [guid_bin(Guid)])
-                end, ok, Ids).
+msg_store_remove(Guids) ->
+    rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids).
 
 foreach_with_msg_store_client(Store, Ref, Fun, L) ->
     rabbit_msg_store:client_terminate(
@@ -1539,13 +1535,13 @@ test_msg_store() ->
                    MSCStateN
            end, GuidsBig),
     %% .., then 3s by 1...
-    ok = msg_store_remove(lists:seq(BigCount, 1, -3)),
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]),
     %% .., then remove 3s by 2, from the young end first. This hits
     %% GC (under 50% good data left, but no empty files. Must GC).
-    ok = msg_store_remove(lists:seq(BigCount-1, 1, -3)),
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]),
     %% .., then remove 3s by 3, from the young end first. This hits
     %% GC...
-    ok = msg_store_remove(lists:seq(BigCount-2, 1, -3)),
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]),
     %% ensure empty
     false = msg_store_contains(false, GuidsBig),
     %% restart empty
-- 
cgit v1.2.1


From 451a11a4318df5971767bb0db562f46ef3a60c5c Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 19 Jul 2010 14:20:41 +0100
Subject: Improve documentation and document persistent vs transient msg store
 are for

---
 src/rabbit_variable_queue.erl | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index a2c3c100..7200bc73 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -129,12 +129,11 @@
 %% done as promptly as possible whilst ensuring the queue remains
 %% responsive.
 %%
-%% In the queue we only keep track of messages that are pending
-%% delivery. This is fine for queue purging, but can be expensive for
-%% queue deletion: for queue deletion we must scan all the way through
-%% all remaining segments in the queue index (we start by doing a
-%% purge) and delete messages from the msg_store that we find in the
-%% queue index.
+%% In the queue we keep track of both messages that are pending
+%% delivery and messages that are pending acks. This ensures that
+%% purging (deleting the former) and deletion (deleting the former and
+%% the latter) are both cheap and do require any scanning through qi
+%% segments.
 %%
 %% Notes on Clean Shutdown
 %% (This documents behaviour in variable_queue, queue_index and
@@ -149,6 +148,13 @@
 %% queue_index adds to these terms the details of its segments and
 %% stores the terms in the queue directory.
 %%
+%% Two message stores are used. One is created for persistent messages
+%% to durable queues that must survive restarts, and the other is used
+%% for all other messages that just happen to need to be written to
+%% disk. On start up we can therefore nuke the transient message
+%% store, and be sure that the messages in the persistent store are
+%% all that we need.
+%%
 %% The references to the msg_stores are there so that the msg_store
 %% knows to only trust its saved state if all of the queues it was
 %% previously talking to come up cleanly. Likewise, the queues
@@ -162,10 +168,12 @@
 %% startup, stores the next_seq_id reported by the queue_index as the
 %% transient_threshold. From that point on, whenever it's reading a
 %% message off disk via the queue_index, if the seq_id is below this
-%% threshold and the message is transient then it drops the
-%% message. This avoids the expensive operation of scanning the entire
-%% queue on startup in order to delete transient messages that were
-%% only pushed to disk to save memory.
+%% threshold and the message is transient then it drops the message
+%% (the message itself won't exist on disk because it would have been
+%% stored in the transient msg_store which would have had its saved
+%% state nuked on startup). This avoids the expensive operation of
+%% scanning the entire queue on startup in order to delete transient
+%% messages that were only pushed to disk to save memory.
 %%
 %%----------------------------------------------------------------------------
 
-- 
cgit v1.2.1


From 05e822b26e0556cf3f9decb526a16803622675b2 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Mon, 19 Jul 2010 14:47:59 +0100
Subject: More documentation

---
 src/rabbit_variable_queue.erl | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 7200bc73..ac3128d4 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -99,6 +99,36 @@
 %% demanded as the queue is read from. Thus only publishes to the
 %% queue will take up available spare capacity.
 %%
+%% When we report our duration to the memory monitor, we calculate
+%% average ingress and egress rates over the last two samples, and
+%% then calculate our duration based on the sum of the ingress and
+%% egress rates. More than two samples could be used, but it's a
+%% balance between responding quickly enough to changes in
+%% producers/consumers versus ignoring temporary blips. The problem
+%% with temporary blips is that with just a few queues, they can have
+%% substantial impact on the calculation of the average duration and
+%% hence cause unnecessary I/O. Another alternative is to increase the
+%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5
+%% seconds. However, that then runs the risk of being too slow to
+%% inform the memory monitor of changes. Thus a 5 second interval,
+%% plus a rolling average over the last two samples seems to work
+%% well in practice.
+%%
+%% The sum of the ingress and egress rates is used because the egress
+%% rate alone is not sufficient. Adding in the ingress rate means that
+%% queues which are being flooded by messages are given more memory,
+%% resulting in them being able to process the messages faster (by
+%% doing less I/O, or at least deferring it) and thus helping keep
+%% their mailboxes empty and thus the queue as a whole is more
+%% responsive. If such a queue also has fast but previously idle
+%% consumers, the consumer can then start to be driven as fast as it
+%% can go, whereas if only egress rate was being used, the incoming
+%% messages may have to be written to disk and then read back in,
+%% resulting in the hard disk being a bottleneck in driving the
+%% consumers. Generally, we want to give Rabbit every chance of
+%% getting rid of messages as fast as possible and remaining
+%% responsive, and using only the egress rate impacts that goal.
+%%
 %% If a queue is full of transient messages, then the transition from
 %% betas to deltas will be potentially very expensive as millions of
 %% entries must be written to disk by the queue_index module. This can
-- 
cgit v1.2.1


From a04151e688705c47af821f5287185a26e1beead5 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 15:26:13 +0100
Subject: flesh out vq API with funs useful for testing

---
 src/rabbit_variable_queue.erl | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index ac3128d4..5a53e2bd 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -41,6 +41,9 @@
 
 -export([start/1]).
 
+%% exported for testing only
+-export([start_msg_store/2, stop_msg_store/0]).
+
 %%----------------------------------------------------------------------------
 %% Definitions:
 
@@ -344,11 +347,15 @@
 
 start(DurableQueues) ->
     {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues),
-    Refs = [Ref || Terms <- AllTerms,
-                   begin
-                       Ref = proplists:get_value(persistent_ref, Terms),
-                       Ref =/= undefined
-                   end],
+    start_msg_store(
+      [Ref || Terms <- AllTerms,
+              begin
+                  Ref = proplists:get_value(persistent_ref, Terms),
+                  Ref =/= undefined
+              end],
+      StartFunState).
+
+start_msg_store(Refs, StartFunState) ->
     ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
                                 [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
                                  undefined,  {fun (ok) -> finished end, ok}]),
@@ -356,6 +363,10 @@ start(DurableQueues) ->
                                 [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
                                  Refs, StartFunState]).
 
+stop_msg_store() ->
+    ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE),
+    ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE).
+
 init(QueueName, IsDurable, _Recover) ->
     {DeltaCount, Terms, IndexState} =
         rabbit_queue_index:init(
-- 
cgit v1.2.1


From a6cc5c05cc60210a97bfd16649dd01dfd91d7a37 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 15:27:04 +0100
Subject: refactor msg_store starting/stopping in tests using the new funs in
 vq

---
 src/rabbit_tests.erl | 82 +++++++++++++++++-----------------------------------
 1 file changed, 27 insertions(+), 55 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 34ff55fe..54a2cc8c 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1354,28 +1354,10 @@ test_backing_queue() ->
             passed
     end.
 
-start_msg_store_empty() ->
-    start_msg_store(undefined, {fun (ok) -> finished end, ok}).
-
-start_msg_store(ClientRefs, StartupFunState) ->
-    ok = rabbit_sup:start_child(
-           ?PERSISTENT_MSG_STORE, rabbit_msg_store,
-           [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), ClientRefs,
-            StartupFunState]),
-    ok = rabbit_sup:start_child(
-           ?TRANSIENT_MSG_STORE, rabbit_msg_store,
-           [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined,
-            {fun (ok) -> finished end, ok}]).
-
-stop_msg_store() ->
-    case supervisor:terminate_child(rabbit_sup, ?PERSISTENT_MSG_STORE) of
-        ok -> supervisor:delete_child(rabbit_sup, ?PERSISTENT_MSG_STORE),
-              case supervisor:terminate_child(rabbit_sup, ?TRANSIENT_MSG_STORE) of
-                  ok -> supervisor:delete_child(rabbit_sup, ?TRANSIENT_MSG_STORE);
-                  E -> {transient, E}
-              end;
-        E -> {persistent, E}
-    end.
+restart_msg_store_empty() ->
+    ok = rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start_msg_store(
+           undefined, {fun (ok) -> finished end, ok}).
 
 guid_bin(X) ->
     erlang:md5(term_to_binary(X)).
@@ -1422,8 +1404,7 @@ foreach_with_msg_store_client(Store, Ref, Fun, L) ->
                   rabbit_msg_store:client_init(Store, Ref), L)).
 
 test_msg_store() ->
-    stop_msg_store(),
-    ok = start_msg_store_empty(),
+    restart_msg_store_empty(),
     Self = self(),
     Guids = [guid_bin(M) || M <- lists:seq(1,100)],
     {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids),
@@ -1485,22 +1466,22 @@ test_msg_store() ->
     MSCState7 = msg_store_read(Guids2ndHalf, MSCState6),
     ok = rabbit_msg_store:client_terminate(MSCState7),
     %% stop and restart, preserving every other msg in 2nd half
-    ok = stop_msg_store(),
-    ok = start_msg_store([], {fun ([]) -> finished;
-                                  ([Guid|GuidsTail])
-                                    when length(GuidsTail) rem 2 == 0 ->
-                                      {Guid, 1, GuidsTail};
-                                  ([Guid|GuidsTail]) ->
-                                      {Guid, 0, GuidsTail}
-                              end, Guids2ndHalf}),
+    ok = rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start_msg_store(
+           [], {fun ([]) -> finished;
+                    ([Guid|GuidsTail])
+                      when length(GuidsTail) rem 2 == 0 ->
+                        {Guid, 1, GuidsTail};
+                    ([Guid|GuidsTail]) ->
+                        {Guid, 0, GuidsTail}
+                end, Guids2ndHalf}),
     %% check we have the right msgs left
     lists:foldl(
       fun (Guid, Bool) ->
               not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid))
       end, false, Guids2ndHalf),
     %% restart empty
-    ok = stop_msg_store(),
-    ok = start_msg_store_empty(),
+    restart_msg_store_empty(),
     %% check we don't contain any of the msgs
     false = msg_store_contains(false, Guids),
     %% publish the first half again
@@ -1511,8 +1492,7 @@ test_msg_store() ->
            msg_store_read(Guids1stHalf, MSCState9)),
     ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf),
     %% restart empty
-    ok = stop_msg_store(),
-    ok = start_msg_store_empty(), %% now safe to reuse guids
+    restart_msg_store_empty(), %% now safe to reuse guids
     %% push a lot of msgs in... at least 100 files worth
     {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit),
     PayloadSizeBits = 65536,
@@ -1545,8 +1525,7 @@ test_msg_store() ->
     %% ensure empty
     false = msg_store_contains(false, GuidsBig),
     %% restart empty
-    ok = stop_msg_store(),
-    ok = start_msg_store_empty(),
+    restart_msg_store_empty(),
     passed.
 
 queue_name(Name) ->
@@ -1556,6 +1535,7 @@ test_queue() ->
     queue_name(<<"test">>).
 
 empty_test_queue() ->
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
     {0, _Terms, Qi1} = test_queue_init(),
     _Qi2 = rabbit_queue_index:delete_and_terminate(Qi1),
@@ -1601,7 +1581,6 @@ test_queue_index() ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     TwoSegs = SegmentSize + SegmentSize,
     MostOfASegment = trunc(SegmentSize*0.75),
-    stop_msg_store(),
     ok = empty_test_queue(),
     SeqIdsA = lists:seq(0, MostOfASegment-1),
     SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
@@ -1613,7 +1592,7 @@ test_queue_index() ->
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
     _Qi5 = rabbit_queue_index:terminate([], Qi4),
-    ok = stop_msg_store(),
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
     {0, _Terms1, Qi6} = test_queue_init(),
@@ -1624,7 +1603,7 @@ test_queue_index() ->
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsGuidsB)),
     _Qi11 = rabbit_queue_index:terminate([], Qi10),
-    ok = stop_msg_store(),
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as MostOfASegment
     LenB = length(SeqIdsB),
@@ -1639,12 +1618,11 @@ test_queue_index() ->
     %% Everything will have gone now because #pubs == #acks
     {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
     _Qi19 = rabbit_queue_index:terminate([], Qi18),
-    ok = stop_msg_store(),
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
     {0, _Terms3, Qi20} = test_queue_init(),
     _Qi21 = rabbit_queue_index:delete_and_terminate(Qi20),
-    ok = stop_msg_store(),
     ok = empty_test_queue(),
 
     %% These next bits are just to hit the auto deletion of segment files.
@@ -1658,7 +1636,6 @@ test_queue_index() ->
     Qi26 = rabbit_queue_index:flush(Qi25),
     {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
     _Qi28 = rabbit_queue_index:delete_and_terminate(Qi27),
-    ok = stop_msg_store(),
     ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
@@ -1669,7 +1646,6 @@ test_queue_index() ->
     Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
     Qi34 = rabbit_queue_index:flush(Qi33),
     _Qi35 = rabbit_queue_index:delete_and_terminate(Qi34),
-    ok = stop_msg_store(),
     ok = empty_test_queue(),
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
@@ -1680,7 +1656,6 @@ test_queue_index() ->
     Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
     Qi40 = rabbit_queue_index:flush(Qi39),
     _Qi41 = rabbit_queue_index:delete_and_terminate(Qi40),
-    ok = stop_msg_store(),
     ok = empty_test_queue(),
 
     %% d) get messages in all states to a segment, then flush, then do
@@ -1700,7 +1675,6 @@ test_queue_index() ->
     {ReadE, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
     ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
     _Qi53 = rabbit_queue_index:delete_and_terminate(Qi52),
-    ok = stop_msg_store(),
     ok = empty_test_queue(),
 
     %% e) as for (d), but use terminate instead of read, which will
@@ -1710,21 +1684,21 @@ test_queue_index() ->
     Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
     Qi57 = rabbit_queue_index:ack([0], Qi56),
     _Qi58 = rabbit_queue_index:terminate([], Qi57),
-    ok = stop_msg_store(),
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     {5, _Terms9, Qi59} = test_queue_init(),
     {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
     Qi61 = rabbit_queue_index:deliver([2,3,5,6], Qi60),
     Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
     _Qi63 = rabbit_queue_index:terminate([], Qi62),
-    ok = stop_msg_store(),
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     {5, _Terms10, Qi64} = test_queue_init(),
     _Qi65 = rabbit_queue_index:delete_and_terminate(Qi64),
 
-    ok = stop_msg_store(),
+    rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
-    ok = stop_msg_store(),
+
     passed.
 
 variable_queue_publish(IsPersistent, Count, VQ) ->
@@ -1753,7 +1727,6 @@ assert_prop(List, Prop, Value) ->
     Value = proplists:get_value(Prop, List).
 
 fresh_variable_queue() ->
-    stop_msg_store(),
     ok = empty_test_queue(),
     VQ = rabbit_variable_queue:init(test_queue(), true, false),
     S0 = rabbit_variable_queue:status(VQ),
@@ -1890,9 +1863,8 @@ test_queue_recover() ->
     receive {'DOWN', MRef, process, QPid, _Info} -> ok
     after 10000 -> exit(timeout_waiting_for_queue_death)
     end,
-    ok = stop_msg_store(),
-    ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
-    ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup),
+    rabbit_variable_queue:stop_msg_store(),
+    rabbit_amqqueue:stop(),
     ok = rabbit_amqqueue:start(),
     rabbit_amqqueue:with_or_die(
       QName,
-- 
cgit v1.2.1


From 204725208ea3085df2dca611bc883fd6fab4bdf8 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 15:53:02 +0100
Subject: simplify qi tests

---
 src/rabbit_tests.erl | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 54a2cc8c..1a38621b 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1577,14 +1577,18 @@ test_queue_init() ->
               rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
       end).
 
+empty_test_queue_init() ->
+    ok = empty_test_queue(),
+    {0, _Terms, Qi} = test_queue_init(),
+    Qi.
+    
 test_queue_index() ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     TwoSegs = SegmentSize + SegmentSize,
     MostOfASegment = trunc(SegmentSize*0.75),
-    ok = empty_test_queue(),
     SeqIdsA = lists:seq(0, MostOfASegment-1),
     SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
-    {0, _Terms, Qi0} = test_queue_init(),
+    Qi0 = empty_test_queue_init(),
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
@@ -1623,45 +1627,41 @@ test_queue_index() ->
     %% should get length back as 0 because all persistent msgs have been acked
     {0, _Terms3, Qi20} = test_queue_init(),
     _Qi21 = rabbit_queue_index:delete_and_terminate(Qi20),
-    ok = empty_test_queue(),
 
     %% These next bits are just to hit the auto deletion of segment files.
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    {0, _Terms4, Qi22} = test_queue_init(),
+    Qi22 = empty_test_queue_init(),
     {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = rabbit_queue_index:deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
     Qi26 = rabbit_queue_index:flush(Qi25),
     {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
     _Qi28 = rabbit_queue_index:delete_and_terminate(Qi27),
-    ok = empty_test_queue(),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    {0, _Terms5, Qi29} = test_queue_init(),
+    Qi29 = empty_test_queue_init(),
     {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = rabbit_queue_index:deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
     Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
     Qi34 = rabbit_queue_index:flush(Qi33),
     _Qi35 = rabbit_queue_index:delete_and_terminate(Qi34),
-    ok = empty_test_queue(),
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
-    {0, _Terms6, Qi36} = test_queue_init(),
+    Qi36 = empty_test_queue_init(),
     {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = rabbit_queue_index:deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
     Qi40 = rabbit_queue_index:flush(Qi39),
     _Qi41 = rabbit_queue_index:delete_and_terminate(Qi40),
-    ok = empty_test_queue(),
 
     %% d) get messages in all states to a segment, then flush, then do
     %% the same again, don't flush and read. This will hit all
     %% possibilities in combining the segment with the journal.
-    {0, _Terms7, Qi42} = test_queue_init(),
+    Qi42 = empty_test_queue_init(),
     {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], false, Qi42),
     Qi44 = rabbit_queue_index:deliver([0,1,4], Qi43),
     Qi45 = rabbit_queue_index:ack([0], Qi44),
@@ -1675,11 +1675,10 @@ test_queue_index() ->
     {ReadE, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
     ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
     _Qi53 = rabbit_queue_index:delete_and_terminate(Qi52),
-    ok = empty_test_queue(),
 
     %% e) as for (d), but use terminate instead of read, which will
     %% exercise journal_minus_segment, not segment_plus_journal.
-    {0, _Terms8, Qi54} = test_queue_init(),
+    Qi54 = empty_test_queue_init(),
     {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], true, Qi54),
     Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
     Qi57 = rabbit_queue_index:ack([0], Qi56),
-- 
cgit v1.2.1


From 9bf26ff9baa023dedd9192d3f440619f80246465 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 16:00:14 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 1a38621b..30b82650 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1534,13 +1534,25 @@ queue_name(Name) ->
 test_queue() ->
     queue_name(<<"test">>).
 
+init_test_queue() ->
+    rabbit_queue_index:init(
+      test_queue(), false,
+      fun (Guid) ->
+              rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+      end).
+
 empty_test_queue() ->
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
-    {0, _Terms, Qi1} = test_queue_init(),
+    {0, _Terms, Qi1} = init_test_queue(),
     _Qi2 = rabbit_queue_index:delete_and_terminate(Qi1),
     ok.
 
+init_empty_test_queue() ->
+    ok = empty_test_queue(),
+    {0, _Terms, Qi} = init_test_queue(),
+    Qi.
+
 queue_index_publish(SeqIds, Persistent, Qi) ->
     Ref = rabbit_guid:guid(),
     MsgStore = case Persistent of
@@ -1570,25 +1582,13 @@ verify_read_with_published(Delivered, Persistent,
 verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
     ko.
 
-test_queue_init() ->
-    rabbit_queue_index:init(
-      test_queue(), false,
-      fun (Guid) ->
-              rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
-      end).
-
-empty_test_queue_init() ->
-    ok = empty_test_queue(),
-    {0, _Terms, Qi} = test_queue_init(),
-    Qi.
-    
 test_queue_index() ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     TwoSegs = SegmentSize + SegmentSize,
     MostOfASegment = trunc(SegmentSize*0.75),
     SeqIdsA = lists:seq(0, MostOfASegment-1),
     SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
-    Qi0 = empty_test_queue_init(),
+    Qi0 = init_empty_test_queue(),
     {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
     {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
     {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
@@ -1599,7 +1599,7 @@ test_queue_index() ->
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
-    {0, _Terms1, Qi6} = test_queue_init(),
+    {0, _Terms1, Qi6} = init_test_queue(),
     {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
@@ -1611,7 +1611,7 @@ test_queue_index() ->
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as MostOfASegment
     LenB = length(SeqIdsB),
-    {LenB, _Terms2, Qi12} = test_queue_init(),
+    {LenB, _Terms2, Qi12} = init_test_queue(),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
     Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
     {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
@@ -1625,14 +1625,14 @@ test_queue_index() ->
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, _Terms3, Qi20} = test_queue_init(),
+    {0, _Terms3, Qi20} = init_test_queue(),
     _Qi21 = rabbit_queue_index:delete_and_terminate(Qi20),
 
     %% These next bits are just to hit the auto deletion of segment files.
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    Qi22 = empty_test_queue_init(),
+    Qi22 = init_empty_test_queue(),
     {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
     Qi24 = rabbit_queue_index:deliver(SeqIdsC, Qi23),
     Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
@@ -1641,7 +1641,7 @@ test_queue_index() ->
     _Qi28 = rabbit_queue_index:delete_and_terminate(Qi27),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    Qi29 = empty_test_queue_init(),
+    Qi29 = init_empty_test_queue(),
     {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
     Qi31 = rabbit_queue_index:deliver(SeqIdsC, Qi30),
     {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
@@ -1651,7 +1651,7 @@ test_queue_index() ->
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     SeqIdsD = lists:seq(0,SegmentSize*4),
-    Qi36 = empty_test_queue_init(),
+    Qi36 = init_empty_test_queue(),
     {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
     Qi38 = rabbit_queue_index:deliver(SeqIdsD, Qi37),
     Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
@@ -1661,7 +1661,7 @@ test_queue_index() ->
     %% d) get messages in all states to a segment, then flush, then do
     %% the same again, don't flush and read. This will hit all
     %% possibilities in combining the segment with the journal.
-    Qi42 = empty_test_queue_init(),
+    Qi42 = init_empty_test_queue(),
     {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], false, Qi42),
     Qi44 = rabbit_queue_index:deliver([0,1,4], Qi43),
     Qi45 = rabbit_queue_index:ack([0], Qi44),
@@ -1678,21 +1678,21 @@ test_queue_index() ->
 
     %% e) as for (d), but use terminate instead of read, which will
     %% exercise journal_minus_segment, not segment_plus_journal.
-    Qi54 = empty_test_queue_init(),
+    Qi54 = init_empty_test_queue(),
     {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], true, Qi54),
     Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
     Qi57 = rabbit_queue_index:ack([0], Qi56),
     _Qi58 = rabbit_queue_index:terminate([], Qi57),
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
-    {5, _Terms9, Qi59} = test_queue_init(),
+    {5, _Terms9, Qi59} = init_test_queue(),
     {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
     Qi61 = rabbit_queue_index:deliver([2,3,5,6], Qi60),
     Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
     _Qi63 = rabbit_queue_index:terminate([], Qi62),
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([test_queue()]),
-    {5, _Terms10, Qi64} = test_queue_init(),
+    {5, _Terms10, Qi64} = init_test_queue(),
     _Qi65 = rabbit_queue_index:delete_and_terminate(Qi64),
 
     rabbit_variable_queue:stop_msg_store(),
-- 
cgit v1.2.1


From 56aded725eb8adbb971a5f221969df37f62b53f0 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 16:15:41 +0100
Subject: simplify qi tests

---
 src/rabbit_tests.erl | 137 +++++++++++++++++++++++++++------------------------
 1 file changed, 73 insertions(+), 64 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 30b82650..21cb41df 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1541,6 +1541,12 @@ init_test_queue() ->
               rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
       end).
 
+restart_test_queue(Qi) ->
+    _ = rabbit_queue_index:terminate([], Qi),
+    rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start([test_queue()]),
+    init_test_queue().
+
 empty_test_queue() ->
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
@@ -1553,6 +1559,9 @@ init_empty_test_queue() ->
     {0, _Terms, Qi} = init_test_queue(),
     Qi.
 
+with_empty_test_queue(Fun) ->
+    rabbit_queue_index:delete_and_terminate(Fun(init_empty_test_queue())).
+
 queue_index_publish(SeqIds, Persistent, Qi) ->
     Ref = rabbit_guid:guid(),
     MsgStore = case Persistent of
@@ -1595,23 +1604,17 @@ test_queue_index() ->
     {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
     ok = verify_read_with_published(false, false, ReadA,
                                     lists:reverse(SeqIdsGuidsA)),
-    _Qi5 = rabbit_queue_index:terminate([], Qi4),
-    rabbit_variable_queue:stop_msg_store(),
-    ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0, as all the msgs were transient
-    {0, _Terms1, Qi6} = init_test_queue(),
+    {0, _Terms1, Qi6} = restart_test_queue(Qi4),
     {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
     {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
     {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
     {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
     ok = verify_read_with_published(false, true, ReadB,
                                     lists:reverse(SeqIdsGuidsB)),
-    _Qi11 = rabbit_queue_index:terminate([], Qi10),
-    rabbit_variable_queue:stop_msg_store(),
-    ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as MostOfASegment
     LenB = length(SeqIdsB),
-    {LenB, _Terms2, Qi12} = init_test_queue(),
+    {LenB, _Terms2, Qi12} = restart_test_queue(Qi10),
     {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
     Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
     {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
@@ -1621,79 +1624,85 @@ test_queue_index() ->
     Qi17 = rabbit_queue_index:flush(Qi16),
     %% Everything will have gone now because #pubs == #acks
     {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
-    _Qi19 = rabbit_queue_index:terminate([], Qi18),
-    rabbit_variable_queue:stop_msg_store(),
-    ok = rabbit_variable_queue:start([test_queue()]),
     %% should get length back as 0 because all persistent msgs have been acked
-    {0, _Terms3, Qi20} = init_test_queue(),
+    {0, _Terms3, Qi20} = restart_test_queue(Qi18),
     _Qi21 = rabbit_queue_index:delete_and_terminate(Qi20),
 
+    SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
     %% These next bits are just to hit the auto deletion of segment files.
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
-    SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
-    Qi22 = init_empty_test_queue(),
-    {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
-    Qi24 = rabbit_queue_index:deliver(SeqIdsC, Qi23),
-    Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
-    Qi26 = rabbit_queue_index:flush(Qi25),
-    {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], false, Qi26),
-    _Qi28 = rabbit_queue_index:delete_and_terminate(Qi27),
+    with_empty_test_queue(
+      fun (Qi22) ->
+              {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
+              Qi24 = rabbit_queue_index:deliver(SeqIdsC, Qi23),
+              Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
+              Qi26 = rabbit_queue_index:flush(Qi25),
+              {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize],
+                                                           false, Qi26),
+              Qi27
+      end),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
-    Qi29 = init_empty_test_queue(),
-    {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, false, Qi29),
-    Qi31 = rabbit_queue_index:deliver(SeqIdsC, Qi30),
-    {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], false, Qi31),
-    Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
-    Qi34 = rabbit_queue_index:flush(Qi33),
-    _Qi35 = rabbit_queue_index:delete_and_terminate(Qi34),
+    with_empty_test_queue(
+      fun (Qi29) ->
+              {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC,
+                                                           false, Qi29),
+              Qi31 = rabbit_queue_index:deliver(SeqIdsC, Qi30),
+              {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize],
+                                                           false, Qi31),
+              Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
+              rabbit_queue_index:flush(Qi33)
+      end),
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
-    SeqIdsD = lists:seq(0,SegmentSize*4),
-    Qi36 = init_empty_test_queue(),
-    {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
-    Qi38 = rabbit_queue_index:deliver(SeqIdsD, Qi37),
-    Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
-    Qi40 = rabbit_queue_index:flush(Qi39),
-    _Qi41 = rabbit_queue_index:delete_and_terminate(Qi40),
+    with_empty_test_queue(
+      fun (Qi36) ->
+              SeqIdsD = lists:seq(0,SegmentSize*4),
+              {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
+              Qi38 = rabbit_queue_index:deliver(SeqIdsD, Qi37),
+              Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
+              rabbit_queue_index:flush(Qi39)
+      end),
 
     %% d) get messages in all states to a segment, then flush, then do
     %% the same again, don't flush and read. This will hit all
     %% possibilities in combining the segment with the journal.
-    Qi42 = init_empty_test_queue(),
-    {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], false, Qi42),
-    Qi44 = rabbit_queue_index:deliver([0,1,4], Qi43),
-    Qi45 = rabbit_queue_index:ack([0], Qi44),
-    Qi46 = rabbit_queue_index:flush(Qi45),
-    {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
-    Qi48 = rabbit_queue_index:deliver([2,3,5,6], Qi47),
-    Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
-    {[], Qi50} = rabbit_queue_index:read(0, 4, Qi49),
-    {ReadD, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
-    ok = verify_read_with_published(true, false, ReadD, [Four, Five, Six]),
-    {ReadE, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
-    ok = verify_read_with_published(false, false, ReadE, [Seven, Eight]),
-    _Qi53 = rabbit_queue_index:delete_and_terminate(Qi52),
+    with_empty_test_queue(
+      fun (Qi42) ->
+              {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7],
+                                                                false, Qi42),
+              Qi44 = rabbit_queue_index:deliver([0,1,4], Qi43),
+              Qi45 = rabbit_queue_index:ack([0], Qi44),
+              Qi46 = rabbit_queue_index:flush(Qi45),
+              {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
+              Qi48 = rabbit_queue_index:deliver([2,3,5,6], Qi47),
+              Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
+              {[], Qi50} = rabbit_queue_index:read(0, 4, Qi49),
+              {ReadD, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
+              ok = verify_read_with_published(true, false, ReadD,
+                                              [Four, Five, Six]),
+              {ReadE, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
+              ok = verify_read_with_published(false, false, ReadE,
+                                              [Seven, Eight]),
+              Qi52
+      end),
 
     %% e) as for (d), but use terminate instead of read, which will
     %% exercise journal_minus_segment, not segment_plus_journal.
-    Qi54 = init_empty_test_queue(),
-    {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], true, Qi54),
-    Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
-    Qi57 = rabbit_queue_index:ack([0], Qi56),
-    _Qi58 = rabbit_queue_index:terminate([], Qi57),
-    rabbit_variable_queue:stop_msg_store(),
-    ok = rabbit_variable_queue:start([test_queue()]),
-    {5, _Terms9, Qi59} = init_test_queue(),
-    {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
-    Qi61 = rabbit_queue_index:deliver([2,3,5,6], Qi60),
-    Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
-    _Qi63 = rabbit_queue_index:terminate([], Qi62),
-    rabbit_variable_queue:stop_msg_store(),
-    ok = rabbit_variable_queue:start([test_queue()]),
-    {5, _Terms10, Qi64} = init_test_queue(),
-    _Qi65 = rabbit_queue_index:delete_and_terminate(Qi64),
+    with_empty_test_queue(
+      fun (Qi54) ->
+              {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7],
+                                                          true, Qi54),
+              Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
+              Qi57 = rabbit_queue_index:ack([0], Qi56),
+              {5, _Terms9, Qi59} = restart_test_queue(Qi57),
+              {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
+              Qi61 = rabbit_queue_index:deliver([2,3,5,6], Qi60),
+              Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
+              {5, _Terms10, Qi64} = restart_test_queue(Qi62),
+              Qi64
+      end),
 
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
-- 
cgit v1.2.1


From ef556c6d0ffe3227e96a4829944fd528dad80545 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 16:48:13 +0100
Subject: simplify qi tests

---
 src/rabbit_tests.erl | 75 ++++++++++++++++++++++++++--------------------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 21cb41df..4260df1d 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1554,13 +1554,10 @@ empty_test_queue() ->
     _Qi2 = rabbit_queue_index:delete_and_terminate(Qi1),
     ok.
 
-init_empty_test_queue() ->
+with_empty_test_queue(Fun) ->
     ok = empty_test_queue(),
     {0, _Terms, Qi} = init_test_queue(),
-    Qi.
-
-with_empty_test_queue(Fun) ->
-    rabbit_queue_index:delete_and_terminate(Fun(init_empty_test_queue())).
+    rabbit_queue_index:delete_and_terminate(Fun(Qi)).
 
 queue_index_publish(SeqIds, Persistent, Qi) ->
     Ref = rabbit_guid:guid(),
@@ -1597,38 +1594,42 @@ test_queue_index() ->
     MostOfASegment = trunc(SegmentSize*0.75),
     SeqIdsA = lists:seq(0, MostOfASegment-1),
     SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
-    Qi0 = init_empty_test_queue(),
-    {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
-    {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
-    {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
-    {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
-    ok = verify_read_with_published(false, false, ReadA,
-                                    lists:reverse(SeqIdsGuidsA)),
-    %% should get length back as 0, as all the msgs were transient
-    {0, _Terms1, Qi6} = restart_test_queue(Qi4),
-    {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
-    {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
-    {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
-    {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
-    ok = verify_read_with_published(false, true, ReadB,
-                                    lists:reverse(SeqIdsGuidsB)),
-    %% should get length back as MostOfASegment
-    LenB = length(SeqIdsB),
-    {LenB, _Terms2, Qi12} = restart_test_queue(Qi10),
-    {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
-    Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
-    {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
-    ok = verify_read_with_published(true, true, ReadC,
-                                    lists:reverse(SeqIdsGuidsB)),
-    Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
-    Qi17 = rabbit_queue_index:flush(Qi16),
-    %% Everything will have gone now because #pubs == #acks
-    {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
-    %% should get length back as 0 because all persistent msgs have been acked
-    {0, _Terms3, Qi20} = restart_test_queue(Qi18),
-    _Qi21 = rabbit_queue_index:delete_and_terminate(Qi20),
-
-    SeqIdsC = lists:seq(0,trunc(SegmentSize/2)),
+    SeqIdsC = lists:seq(0, trunc(SegmentSize/2)),
+
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
+              {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
+              {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
+              {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
+              ok = verify_read_with_published(false, false, ReadA,
+                                              lists:reverse(SeqIdsGuidsA)),
+              %% should get length back as 0, as all the msgs were transient
+              {0, _Terms1, Qi6} = restart_test_queue(Qi4),
+              {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
+              {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
+              {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
+              {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
+              ok = verify_read_with_published(false, true, ReadB,
+                                              lists:reverse(SeqIdsGuidsB)),
+              %% should get length back as MostOfASegment
+              LenB = length(SeqIdsB),
+              {LenB, _Terms2, Qi12} = restart_test_queue(Qi10),
+              {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
+              Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
+              {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
+              ok = verify_read_with_published(true, true, ReadC,
+                                              lists:reverse(SeqIdsGuidsB)),
+              Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
+              Qi17 = rabbit_queue_index:flush(Qi16),
+              %% Everything will have gone now because #pubs == #acks
+              {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
+              %% should get length back as 0 because all persistent
+              %% msgs have been acked
+              {0, _Terms3, Qi20} = restart_test_queue(Qi18),
+              Qi20
+      end),
+
     %% These next bits are just to hit the auto deletion of segment files.
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
-- 
cgit v1.2.1


From 0c14a380471e87a3cfe9d7162eb70f1fa1dd2e60 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 17:00:45 +0100
Subject: simplify qi tests

---
 src/rabbit_tests.erl | 102 ++++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 50 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4260df1d..bd2669a0 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1550,8 +1550,8 @@ restart_test_queue(Qi) ->
 empty_test_queue() ->
     rabbit_variable_queue:stop_msg_store(),
     ok = rabbit_variable_queue:start([]),
-    {0, _Terms, Qi1} = init_test_queue(),
-    _Qi2 = rabbit_queue_index:delete_and_terminate(Qi1),
+    {0, _Terms, Qi} = init_test_queue(),
+    _ = rabbit_queue_index:delete_and_terminate(Qi),
     ok.
 
 with_empty_test_queue(Fun) ->
@@ -1595,6 +1595,7 @@ test_queue_index() ->
     SeqIdsA = lists:seq(0, MostOfASegment-1),
     SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
     SeqIdsC = lists:seq(0, trunc(SegmentSize/2)),
+    SeqIdsD = lists:seq(0, SegmentSize*4),
 
     with_empty_test_queue(
       fun (Qi0) ->
@@ -1626,83 +1627,84 @@ test_queue_index() ->
               {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
               %% should get length back as 0 because all persistent
               %% msgs have been acked
-              {0, _Terms3, Qi20} = restart_test_queue(Qi18),
-              Qi20
+              {0, _Terms3, Qi19} = restart_test_queue(Qi18),
+              Qi19
       end),
 
     %% These next bits are just to hit the auto deletion of segment files.
     %% First, partials:
     %% a) partial pub+del+ack, then move to new segment
     with_empty_test_queue(
-      fun (Qi22) ->
-              {Qi23, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, false, Qi22),
-              Qi24 = rabbit_queue_index:deliver(SeqIdsC, Qi23),
-              Qi25 = rabbit_queue_index:ack(SeqIdsC, Qi24),
-              Qi26 = rabbit_queue_index:flush(Qi25),
-              {Qi27, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize],
-                                                           false, Qi26),
-              Qi27
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC,
+                                                         false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
+              Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2),
+              Qi4 = rabbit_queue_index:flush(Qi3),
+              {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize],
+                                                          false, Qi4),
+              Qi5
       end),
 
     %% b) partial pub+del, then move to new segment, then ack all in old segment
     with_empty_test_queue(
-      fun (Qi29) ->
-              {Qi30, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC,
-                                                           false, Qi29),
-              Qi31 = rabbit_queue_index:deliver(SeqIdsC, Qi30),
-              {Qi32, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize],
-                                                           false, Qi31),
-              Qi33 = rabbit_queue_index:ack(SeqIdsC, Qi32),
-              rabbit_queue_index:flush(Qi33)
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC,
+                                                          false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
+              {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize],
+                                                          false, Qi2),
+              Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3),
+              rabbit_queue_index:flush(Qi4)
       end),
 
     %% c) just fill up several segments of all pubs, then +dels, then +acks
     with_empty_test_queue(
-      fun (Qi36) ->
-              SeqIdsD = lists:seq(0,SegmentSize*4),
-              {Qi37, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, false, Qi36),
-              Qi38 = rabbit_queue_index:deliver(SeqIdsD, Qi37),
-              Qi39 = rabbit_queue_index:ack(SeqIdsD, Qi38),
-              rabbit_queue_index:flush(Qi39)
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD,
+                                                          false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1),
+              Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2),
+              rabbit_queue_index:flush(Qi3)
       end),
 
     %% d) get messages in all states to a segment, then flush, then do
     %% the same again, don't flush and read. This will hit all
     %% possibilities in combining the segment with the journal.
     with_empty_test_queue(
-      fun (Qi42) ->
-              {Qi43, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7],
-                                                                false, Qi42),
-              Qi44 = rabbit_queue_index:deliver([0,1,4], Qi43),
-              Qi45 = rabbit_queue_index:ack([0], Qi44),
-              Qi46 = rabbit_queue_index:flush(Qi45),
-              {Qi47, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi46),
-              Qi48 = rabbit_queue_index:deliver([2,3,5,6], Qi47),
-              Qi49 = rabbit_queue_index:ack([1,2,3], Qi48),
-              {[], Qi50} = rabbit_queue_index:read(0, 4, Qi49),
-              {ReadD, Qi51} = rabbit_queue_index:read(4, 7, Qi50),
+      fun (Qi0) ->
+              {Qi1, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7],
+                                                               false, Qi0),
+              Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
+              Qi3 = rabbit_queue_index:ack([0], Qi2),
+              Qi4 = rabbit_queue_index:flush(Qi3),
+              {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4),
+              Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
+              Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
+              {[], Qi8} = rabbit_queue_index:read(0, 4, Qi7),
+              {ReadD, Qi9} = rabbit_queue_index:read(4, 7, Qi8),
               ok = verify_read_with_published(true, false, ReadD,
                                               [Four, Five, Six]),
-              {ReadE, Qi52} = rabbit_queue_index:read(7, 9, Qi51),
+              {ReadE, Qi10} = rabbit_queue_index:read(7, 9, Qi9),
               ok = verify_read_with_published(false, false, ReadE,
                                               [Seven, Eight]),
-              Qi52
+              Qi10
       end),
 
     %% e) as for (d), but use terminate instead of read, which will
     %% exercise journal_minus_segment, not segment_plus_journal.
     with_empty_test_queue(
-      fun (Qi54) ->
-              {Qi55, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7],
-                                                          true, Qi54),
-              Qi56 = rabbit_queue_index:deliver([0,1,4], Qi55),
-              Qi57 = rabbit_queue_index:ack([0], Qi56),
-              {5, _Terms9, Qi59} = restart_test_queue(Qi57),
-              {Qi60, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi59),
-              Qi61 = rabbit_queue_index:deliver([2,3,5,6], Qi60),
-              Qi62 = rabbit_queue_index:ack([1,2,3], Qi61),
-              {5, _Terms10, Qi64} = restart_test_queue(Qi62),
-              Qi64
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7],
+                                                         true, Qi0),
+              Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
+              Qi3 = rabbit_queue_index:ack([0], Qi2),
+              {5, _Terms9, Qi4} = restart_test_queue(Qi3),
+              {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4),
+              Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
+              Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
+              {5, _Terms10, Qi8} = restart_test_queue(Qi7),
+              Qi8
       end),
 
     rabbit_variable_queue:stop_msg_store(),
-- 
cgit v1.2.1


From b04b3a312e83a6e2d81db9b66841e879f624db29 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 17:33:49 +0100
Subject: minor refactoring of vq tests

---
 src/rabbit_tests.erl | 64 ++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 34 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index bd2669a0..37f0f9e4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1737,7 +1737,7 @@ variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
 assert_prop(List, Prop, Value) ->
     Value = proplists:get_value(Prop, List).
 
-fresh_variable_queue() ->
+with_fresh_variable_queue(Fun) ->
     ok = empty_test_queue(),
     VQ = rabbit_variable_queue:init(test_queue(), true, false),
     S0 = rabbit_variable_queue:status(VQ),
@@ -1747,17 +1747,19 @@ fresh_variable_queue() ->
     assert_prop(S0, delta, {delta, undefined, 0, undefined}),
     assert_prop(S0, q3, 0),
     assert_prop(S0, q4, 0),
-    VQ.
-
+    _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)),
+    passed.
+    
 test_variable_queue() ->
-    passed = test_variable_queue_dynamic_duration_change(),
-    passed = test_variable_queue_partial_segments_delta_thing(),
-    passed = test_variable_queue_all_the_bits_not_covered_elsewhere(),
+    [passed = with_fresh_variable_queue(F) ||
+        F <- [fun test_variable_queue_dynamic_duration_change/1,
+              fun test_variable_queue_partial_segments_delta_thing/1,
+              fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1,
+              fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1]],
     passed.
 
-test_variable_queue_dynamic_duration_change() ->
+test_variable_queue_dynamic_duration_change(VQ0) ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
-    VQ0 = fresh_variable_queue(),
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
     VQ1 = variable_queue_publish(false, Len1, VQ0),
@@ -1777,10 +1779,7 @@ test_variable_queue_dynamic_duration_change() ->
     VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
     VQ10 = rabbit_variable_queue:handle_pre_hibernate(VQ9),
     {empty, VQ11} = rabbit_variable_queue:fetch(true, VQ10),
-
-    rabbit_variable_queue:delete_and_terminate(VQ11),
-
-    passed.
+    VQ11.
 
 test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
     VQ1 = variable_queue_publish(false, 1, VQ0),
@@ -1806,10 +1805,9 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
             test_variable_queue_dynamic_duration_change_f(Len, VQ3)
     end.
 
-test_variable_queue_partial_segments_delta_thing() ->
+test_variable_queue_partial_segments_delta_thing(VQ0) ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     HalfSegment = SegmentSize div 2,
-    VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
     {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
     VQ3 = variable_queue_wait_for_shuffling_end(
@@ -1846,9 +1844,7 @@ test_variable_queue_partial_segments_delta_thing() ->
     VQ8 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ7),
     %% should be empty now
     {empty, VQ9} = rabbit_variable_queue:fetch(true, VQ8),
-    rabbit_variable_queue:delete_and_terminate(VQ9),
-
-    passed.
+    VQ9.
 
 variable_queue_wait_for_shuffling_end(VQ) ->
     case rabbit_variable_queue:needs_idle_timeout(VQ) of
@@ -1892,31 +1888,31 @@ test_queue_recover() ->
       end),
     passed.
 
-test_variable_queue_all_the_bits_not_covered_elsewhere() ->
+test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
     Count = 2*rabbit_queue_index:next_segment_boundary(0),
-    VQ0 = fresh_variable_queue(),
     VQ1 = variable_queue_publish(true, Count, VQ0),
     VQ2 = variable_queue_publish(false, Count, VQ1),
     VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
-    {VQ4, _AckTags} = variable_queue_fetch(Count, true, false, Count + Count, VQ3),
+    {VQ4, _AckTags} =
+        variable_queue_fetch(Count, true, false, Count + Count, VQ3),
     {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
     _VQ6 = rabbit_variable_queue:terminate(VQ5),
     VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
-    {{_Msg1, true, _AckTag1, Count1}, VQ8} = rabbit_variable_queue:fetch(true, VQ7),
+    {{_Msg1, true, _AckTag1, Count1}, VQ8} =
+        rabbit_variable_queue:fetch(true, VQ7),
     VQ9 = variable_queue_publish(false, 1, VQ8),
     VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
     {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
     {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
-    _VQ13 = rabbit_variable_queue:delete_and_terminate(VQ12),
-
-    VQa0 = fresh_variable_queue(),
-    VQa1 = rabbit_variable_queue:set_ram_duration_target(0, VQa0),
-    VQa2 = variable_queue_publish(false, 4, VQa1),
-    {VQa3, AckTags} = variable_queue_fetch(2, false, false, 4, VQa2),
-    VQa4 = rabbit_variable_queue:requeue(AckTags, VQa3),
-    VQa5 = rabbit_variable_queue:idle_timeout(VQa4),
-    _VQa6 = rabbit_variable_queue:terminate(VQa5),
-    VQa7 = rabbit_variable_queue:init(test_queue(), true, true),
-    {empty, VQa8} = rabbit_variable_queue:fetch(false, VQa7),
-    _VQa9 = rabbit_variable_queue:delete_and_terminate(VQa8),
-    passed.
+    VQ12.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
+    VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0),
+    VQ2 = variable_queue_publish(false, 4, VQ1),
+    {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2),
+    VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3),
+    VQ5 = rabbit_variable_queue:idle_timeout(VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7),
+    VQ8.
-- 
cgit v1.2.1


From 61974b44a7e5d031428411397d314ea3ebb72e9e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 19:37:12 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 62 ++++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 37f0f9e4..7593c081 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1797,8 +1797,8 @@ test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
                    end,
             {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
             {_Duration, VQ4} = rabbit_variable_queue:ram_duration(VQ3),
-            VQ5 = %% /37 otherwise the duration is just too high to stress things
-                rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
+            %% /37 otherwise the duration is just too high to stress things
+            VQ5 = rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
             io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
             test_variable_queue_dynamic_duration_change_f(Len, VQ5)
     after 0 ->
@@ -1853,6 +1853,35 @@ variable_queue_wait_for_shuffling_end(VQ) ->
         false -> VQ
     end.
 
+test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
+    Count = 2*rabbit_queue_index:next_segment_boundary(0),
+    VQ1 = variable_queue_publish(true, Count, VQ0),
+    VQ2 = variable_queue_publish(false, Count, VQ1),
+    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+    {VQ4, _AckTags} =
+        variable_queue_fetch(Count, true, false, Count + Count, VQ3),
+    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {{_Msg1, true, _AckTag1, Count1}, VQ8} =
+        rabbit_variable_queue:fetch(true, VQ7),
+    VQ9 = variable_queue_publish(false, 1, VQ8),
+    VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
+    {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
+    {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
+    VQ12.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
+    VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0),
+    VQ2 = variable_queue_publish(false, 4, VQ1),
+    {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2),
+    VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3),
+    VQ5 = rabbit_variable_queue:idle_timeout(VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7),
+    VQ8.
+
 test_queue_recover() ->
     Count = 2*rabbit_queue_index:next_segment_boundary(0),
     TxID = rabbit_guid:guid(),
@@ -1887,32 +1916,3 @@ test_queue_recover() ->
               rabbit_amqqueue:internal_delete(QName)
       end),
     passed.
-
-test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
-    Count = 2*rabbit_queue_index:next_segment_boundary(0),
-    VQ1 = variable_queue_publish(true, Count, VQ0),
-    VQ2 = variable_queue_publish(false, Count, VQ1),
-    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
-    {VQ4, _AckTags} =
-        variable_queue_fetch(Count, true, false, Count + Count, VQ3),
-    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
-    _VQ6 = rabbit_variable_queue:terminate(VQ5),
-    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
-    {{_Msg1, true, _AckTag1, Count1}, VQ8} =
-        rabbit_variable_queue:fetch(true, VQ7),
-    VQ9 = variable_queue_publish(false, 1, VQ8),
-    VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
-    {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
-    {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
-    VQ12.
-
-test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
-    VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0),
-    VQ2 = variable_queue_publish(false, 4, VQ1),
-    {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2),
-    VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3),
-    VQ5 = rabbit_variable_queue:idle_timeout(VQ4),
-    _VQ6 = rabbit_variable_queue:terminate(VQ5),
-    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
-    {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7),
-    VQ8.
-- 
cgit v1.2.1


From 60f4000aef1fb7964cd80b2d7a843e0a469d84aa Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Mon, 19 Jul 2010 19:47:27 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7593c081..c12872f2 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1398,10 +1398,10 @@ msg_store_write(Guids, MSCState) ->
 msg_store_remove(Guids) ->
     rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids).
 
-foreach_with_msg_store_client(Store, Ref, Fun, L) ->
+foreach_with_msg_store_client(MsgStore, Ref, Fun, L) ->
     rabbit_msg_store:client_terminate(
-      lists:foldl(fun (Guid, MSCState) -> Fun(Guid, Store, MSCState) end,
-                  rabbit_msg_store:client_init(Store, Ref), L)).
+      lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MsgStore, MSCState) end,
+                  rabbit_msg_store:client_init(MsgStore, Ref), L)).
 
 test_msg_store() ->
     restart_msg_store_empty(),
@@ -1501,17 +1501,17 @@ test_msg_store() ->
     Payload = << 0:PayloadSizeBits >>,
     ok = foreach_with_msg_store_client(
            ?PERSISTENT_MSG_STORE, Ref,
-           fun (Guid, Store, MSCStateM) ->
-                   {ok, MSCStateN} =
-                       rabbit_msg_store:write(Store, Guid, Payload, MSCStateM),
+           fun (Guid, MsgStore, MSCStateM) ->
+                   {ok, MSCStateN} = rabbit_msg_store:write(
+                                       MsgStore, Guid, Payload, MSCStateM),
                    MSCStateN
            end, GuidsBig),
     %% now read them to ensure we hit the fast client-side reading
     ok = foreach_with_msg_store_client(
            ?PERSISTENT_MSG_STORE, Ref,
-           fun (Guid, Store, MSCStateM) ->
-                   {{ok, Payload}, MSCStateN} =
-                       rabbit_msg_store:read(Store, Guid, MSCStateM),
+           fun (Guid, MsgStore, MSCStateM) ->
+                   {{ok, Payload}, MSCStateN} = rabbit_msg_store:read(
+                                                  MsgStore, Guid, MSCStateM),
                    MSCStateN
            end, GuidsBig),
     %% .., then 3s by 1...
@@ -1718,11 +1718,10 @@ variable_queue_publish(IsPersistent, Count, VQ) ->
               rabbit_variable_queue:publish(
                 rabbit_basic:message(
                   rabbit_misc:r(<<>>, exchange, <<>>),
-                  <<>>, #'P_basic'{delivery_mode =
-                                       case IsPersistent of
-                                           true  -> 2;
-                                           false -> 1
-                                       end}, <<>>), VQN)
+                  <<>>, #'P_basic'{delivery_mode = case IsPersistent of
+                                                       true  -> 2;
+                                                       false -> 1
+                                                   end}, <<>>), VQN)
       end, VQ, lists:seq(1, Count)).
 
 variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
-- 
cgit v1.2.1


From 0a6e6a18ba7ad8f4c23099df92c93908bb5a3492 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 20 Jul 2010 11:28:17 +0100
Subject: made one of the vq tests a little less obscure ...and don't rely on
 timing

---
 src/rabbit_tests.erl | 67 +++++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index c12872f2..f0881021 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1762,47 +1762,44 @@ test_variable_queue_dynamic_duration_change(VQ0) ->
     %% start by sending in a couple of segments worth
     Len1 = 2*SegmentSize,
     VQ1 = variable_queue_publish(false, Len1, VQ0),
-    {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
-    {ok, _TRef} = timer:send_after(1000, {duration, 60,
-                                          fun (V) -> (V*0.75)-1 end}),
-    VQ3 = test_variable_queue_dynamic_duration_change_f(Len1, VQ2),
-    {VQ4, AckTags} = variable_queue_fetch(Len1, false, false, Len1, VQ3),
-    VQ5 = rabbit_variable_queue:ack(AckTags, VQ4),
-    {empty, VQ6} = rabbit_variable_queue:fetch(true, VQ5),
+
+    VQ2 = squeeze_and_relax_queue(Len1, VQ1),
+
+    %% drain
+    {VQ3, AckTags} = variable_queue_fetch(Len1, false, false, Len1, VQ2),
+    VQ4 = rabbit_variable_queue:ack(AckTags, VQ3),
+    {empty, VQ5} = rabbit_variable_queue:fetch(true, VQ4),
 
     %% just publish and fetch some persistent msgs, this hits the the
     %% partial segment path in queue_index due to the period when
     %% duration was 0 and the entire queue was delta.
-    VQ7 = variable_queue_publish(true, 20, VQ6),
-    {VQ8, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ7),
-    VQ9 = rabbit_variable_queue:ack(AckTags1, VQ8),
-    VQ10 = rabbit_variable_queue:handle_pre_hibernate(VQ9),
-    {empty, VQ11} = rabbit_variable_queue:fetch(true, VQ10),
-    VQ11.
-
-test_variable_queue_dynamic_duration_change_f(Len, VQ0) ->
+    VQ6 = variable_queue_publish(true, 20, VQ5),
+    {VQ7, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ6),
+    VQ8 = rabbit_variable_queue:ack(AckTags1, VQ7),
+    VQ9 = rabbit_variable_queue:handle_pre_hibernate(VQ8),
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
+    VQ10.
+
+squeeze_and_relax_queue(Len, VQ0) ->
+    Churn = Len div 32,
+    VQ1 = publish_fetch_and_ack(Churn, Len, VQ0),
+    {Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
+    lists:foldl(
+      fun (Duration1, VQ3) ->
+              {_Duration, VQ4} = rabbit_variable_queue:ram_duration(VQ3),
+              io:format("~p:~n~p~n",
+                        [Duration1, rabbit_variable_queue:status(VQ4)]),
+              VQ5 = rabbit_variable_queue:set_ram_duration_target(
+                      Duration1, VQ4),
+              publish_fetch_and_ack(Churn, Len, VQ5)
+      end, VQ2, [Duration / 4, 0, Duration / 4, infinity]).
+
+publish_fetch_and_ack(0, _Len, VQ0) ->
+    VQ0;
+publish_fetch_and_ack(N, Len, VQ0) ->
     VQ1 = variable_queue_publish(false, 1, VQ0),
     {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1),
-    VQ3 = rabbit_variable_queue:ack([AckTag], VQ2),
-    receive
-        {duration, _, stop} ->
-            VQ3;
-        {duration, N, Fun} ->
-            N1 = lists:max([Fun(N), 0]),
-            Fun1 = case N1 of
-                       0               -> fun (V) -> (V+1)/0.75 end;
-                       _ when N1 > 400 -> stop;
-                       _               -> Fun
-                   end,
-            {ok, _TRef} = timer:send_after(1000, {duration, N1, Fun1}),
-            {_Duration, VQ4} = rabbit_variable_queue:ram_duration(VQ3),
-            %% /37 otherwise the duration is just too high to stress things
-            VQ5 = rabbit_variable_queue:set_ram_duration_target(N/37, VQ4),
-            io:format("~p:~n~p~n~n", [N, rabbit_variable_queue:status(VQ5)]),
-            test_variable_queue_dynamic_duration_change_f(Len, VQ5)
-    after 0 ->
-            test_variable_queue_dynamic_duration_change_f(Len, VQ3)
-    end.
+    publish_fetch_and_ack(N-1, Len, rabbit_variable_queue:ack([AckTag], VQ2)).
 
 test_variable_queue_partial_segments_delta_thing(VQ0) ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
-- 
cgit v1.2.1


From 4f9c4017cc3df0576b1de4ece3fb4dfc40749531 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 20 Jul 2010 12:52:50 +0100
Subject: remove superfluous test this is covered elsewhere already, notably in
 test_variable_queue_partial_segments_delta_thing.

---
 src/rabbit_tests.erl | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index f0881021..58a470a9 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1770,15 +1770,7 @@ test_variable_queue_dynamic_duration_change(VQ0) ->
     VQ4 = rabbit_variable_queue:ack(AckTags, VQ3),
     {empty, VQ5} = rabbit_variable_queue:fetch(true, VQ4),
 
-    %% just publish and fetch some persistent msgs, this hits the the
-    %% partial segment path in queue_index due to the period when
-    %% duration was 0 and the entire queue was delta.
-    VQ6 = variable_queue_publish(true, 20, VQ5),
-    {VQ7, AckTags1} = variable_queue_fetch(20, true, false, 20, VQ6),
-    VQ8 = rabbit_variable_queue:ack(AckTags1, VQ7),
-    VQ9 = rabbit_variable_queue:handle_pre_hibernate(VQ8),
-    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
-    VQ10.
+    VQ5.
 
 squeeze_and_relax_queue(Len, VQ0) ->
     Churn = Len div 32,
-- 
cgit v1.2.1


From cab1334151fcb54c979075742a1441dc3cd63166 Mon Sep 17 00:00:00 2001
From: Alexandru Scvortov <alexandru@rabbitmq.com>
Date: Tue, 20 Jul 2010 14:22:44 +0100
Subject: reporting of cluster node types now works

---
 src/rabbit_mnesia.erl | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index e2b6927f..7e1b8a9f 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -77,7 +77,7 @@ status() ->
                                                 {disc,      disc_copies},
                                                 {ram,       ram_copies}],
                             begin
-                                Nodes = mnesia:table_info(schema, CopyType),
+                                Nodes = nodes_of_type(CopyType),
                                 Nodes =/= []
                             end];
                  no -> case mnesia:system_info(db_nodes) of
@@ -144,6 +144,15 @@ empty_ram_only_tables() ->
 
 %%--------------------------------------------------------------------
 
+nodes_of_type(Type) ->
+    %% This function should return the nodes of a certain type (ram,
+    %% disc or disc_only) in the current cluster.  The type of nodes
+    %% is determined when the cluster is initially configured.
+    %% Specifically, we check whether a certain table, which we know
+    %% will be written to disk on a disc node, is stored on disk or in
+    %% RAM.
+    mnesia:table_info(rabbit_durable_exchange, Type).
+
 table_definitions() ->
     [{rabbit_user,
       [{record_name, user},
@@ -175,10 +184,10 @@ table_definitions() ->
       [{record_name, reverse_route},
        {attributes, record_info(fields, reverse_route)},
        {type, ordered_set}]},
-     {rabbit_durable_exchange,
-      [{record_name, exchange},
-       {attributes, record_info(fields, exchange)},
-       {disc_copies, [node()]}]},
+     {rabbit_durable_exchange,                  % if you change this entry,
+      [{record_name, exchange},                 % consider the implications
+       {attributes, record_info(fields, exchange)}, % on nodes_of_type/1
+       {disc_copies, [node()]}]}, % <--- this line is particularly important
      {rabbit_exchange,
       [{record_name, exchange},
        {attributes, record_info(fields, exchange)}]},
-- 
cgit v1.2.1


From bf0d23434301e6a6f0bb547d46dc2dd228c9814c Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 20 Jul 2010 16:48:01 +0100
Subject: vq test refactoring

---
 src/rabbit_tests.erl | 80 ++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 58a470a9..4000fa95 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1736,16 +1736,17 @@ variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
 assert_prop(List, Prop, Value) ->
     Value = proplists:get_value(Prop, List).
 
+assert_props(List, PropVals) ->
+    [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals].
+
 with_fresh_variable_queue(Fun) ->
     ok = empty_test_queue(),
     VQ = rabbit_variable_queue:init(test_queue(), true, false),
     S0 = rabbit_variable_queue:status(VQ),
-    assert_prop(S0, len, 0),
-    assert_prop(S0, q1, 0),
-    assert_prop(S0, q2, 0),
-    assert_prop(S0, delta, {delta, undefined, 0, undefined}),
-    assert_prop(S0, q3, 0),
-    assert_prop(S0, q4, 0),
+    assert_props(S0, [{q1, 0}, {q2, 0},
+                      {delta, {delta, undefined, 0, undefined}},
+                      {q3, 0}, {q4, 0},
+                      {len, 0}]),
     _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)),
     passed.
     
@@ -1759,32 +1760,31 @@ test_variable_queue() ->
 
 test_variable_queue_dynamic_duration_change(VQ0) ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+
     %% start by sending in a couple of segments worth
-    Len1 = 2*SegmentSize,
-    VQ1 = variable_queue_publish(false, Len1, VQ0),
+    Len = 2*SegmentSize,
+    VQ1 = variable_queue_publish(false, Len, VQ0),
 
-    VQ2 = squeeze_and_relax_queue(Len1, VQ1),
+    %% squeeze and relax queue
+    Churn = Len div 32,
+    VQ2 = publish_fetch_and_ack(Churn, Len, VQ1),
+    {Duration, VQ3} = rabbit_variable_queue:ram_duration(VQ2),
+    VQ7 = lists:foldl(
+            fun (Duration1, VQ4) ->
+                    {_Duration, VQ5} = rabbit_variable_queue:ram_duration(VQ4),
+                    io:format("~p:~n~p~n",
+                              [Duration1, rabbit_variable_queue:status(VQ5)]),
+                    VQ6 = rabbit_variable_queue:set_ram_duration_target(
+                            Duration1, VQ5),
+                    publish_fetch_and_ack(Churn, Len, VQ6)
+            end, VQ3, [Duration / 4, 0, Duration / 4, infinity]),
 
     %% drain
-    {VQ3, AckTags} = variable_queue_fetch(Len1, false, false, Len1, VQ2),
-    VQ4 = rabbit_variable_queue:ack(AckTags, VQ3),
-    {empty, VQ5} = rabbit_variable_queue:fetch(true, VQ4),
+    {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags, VQ8),
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
 
-    VQ5.
-
-squeeze_and_relax_queue(Len, VQ0) ->
-    Churn = Len div 32,
-    VQ1 = publish_fetch_and_ack(Churn, Len, VQ0),
-    {Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
-    lists:foldl(
-      fun (Duration1, VQ3) ->
-              {_Duration, VQ4} = rabbit_variable_queue:ram_duration(VQ3),
-              io:format("~p:~n~p~n",
-                        [Duration1, rabbit_variable_queue:status(VQ4)]),
-              VQ5 = rabbit_variable_queue:set_ram_duration_target(
-                      Duration1, VQ4),
-              publish_fetch_and_ack(Churn, Len, VQ5)
-      end, VQ2, [Duration / 4, 0, Duration / 4, infinity]).
+    VQ10.
 
 publish_fetch_and_ack(0, _Len, VQ0) ->
     VQ0;
@@ -1803,30 +1803,30 @@ test_variable_queue_partial_segments_delta_thing(VQ0) ->
     %% one segment in q3 as betas, and half a segment in delta
     S3 = rabbit_variable_queue:status(VQ3),
     io:format("~p~n", [S3]),
-    assert_prop(S3, delta, {delta, SegmentSize, HalfSegment,
-                            SegmentSize + HalfSegment}),
-    assert_prop(S3, q3, SegmentSize),
-    assert_prop(S3, len, SegmentSize + HalfSegment),
+    assert_props(S3, [{delta, {delta, SegmentSize, HalfSegment,
+                               SegmentSize + HalfSegment}},
+                      {q3, SegmentSize},
+                      {len, SegmentSize + HalfSegment}]),
     VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
     VQ5 = variable_queue_wait_for_shuffling_end(
             variable_queue_publish(true, 1, VQ4)),
     %% should have 1 alpha, but it's in the same segment as the deltas
     S5 = rabbit_variable_queue:status(VQ5),
     io:format("~p~n", [S5]),
-    assert_prop(S5, q1, 1),
-    assert_prop(S5, delta, {delta, SegmentSize, HalfSegment,
-                            SegmentSize + HalfSegment}),
-    assert_prop(S5, q3, SegmentSize),
-    assert_prop(S5, len, SegmentSize + HalfSegment + 1),
+    assert_props(S5, [{q1, 1},
+                      {delta, {delta, SegmentSize, HalfSegment,
+                               SegmentSize + HalfSegment}},
+                      {q3, SegmentSize},
+                      {len, SegmentSize + HalfSegment + 1}]),
     {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false,
                                           SegmentSize + HalfSegment + 1, VQ5),
     %% the half segment should now be in q3 as betas
     S6 = rabbit_variable_queue:status(VQ6),
     io:format("~p~n", [S6]),
-    assert_prop(S6, delta, {delta, undefined, 0, undefined}),
-    assert_prop(S6, q1, 1),
-    assert_prop(S6, q3, HalfSegment),
-    assert_prop(S6, len, HalfSegment + 1),
+    assert_props(S6, [{q1, 1},
+                      {delta, {delta, undefined, 0, undefined}},
+                      {q3, HalfSegment},
+                      {len, HalfSegment + 1}]),
     {VQ7, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
                                            HalfSegment + 1, VQ6),
     VQ8 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ7),
-- 
cgit v1.2.1


From 90f35e4b5090197ff63826a60f917af59e761660 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 20 Jul 2010 18:33:13 +0100
Subject: more vq test refactoring

---
 src/rabbit_tests.erl | 66 +++++++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 4000fa95..dff3ec5a 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1796,43 +1796,45 @@ publish_fetch_and_ack(N, Len, VQ0) ->
 test_variable_queue_partial_segments_delta_thing(VQ0) ->
     SegmentSize = rabbit_queue_index:next_segment_boundary(0),
     HalfSegment = SegmentSize div 2,
-    VQ1 = variable_queue_publish(true, SegmentSize + HalfSegment, VQ0),
+    OneAndAHalfSegment = SegmentSize + HalfSegment,
+    VQ1 = variable_queue_publish(true, OneAndAHalfSegment, VQ0),
     {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
-    VQ3 = variable_queue_wait_for_shuffling_end(
-            rabbit_variable_queue:set_ram_duration_target(0, VQ2)),
-    %% one segment in q3 as betas, and half a segment in delta
-    S3 = rabbit_variable_queue:status(VQ3),
-    io:format("~p~n", [S3]),
-    assert_props(S3, [{delta, {delta, SegmentSize, HalfSegment,
-                               SegmentSize + HalfSegment}},
-                      {q3, SegmentSize},
-                      {len, SegmentSize + HalfSegment}]),
+    VQ3 = check_variable_queue_status(
+            rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+            %% one segment in q3 as betas, and half a segment in delta
+            [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
+             {q3, SegmentSize},
+             {len, SegmentSize + HalfSegment}]),
+    %% one alpha, but it's in the same segment as the deltas
     VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
-    VQ5 = variable_queue_wait_for_shuffling_end(
-            variable_queue_publish(true, 1, VQ4)),
-    %% should have 1 alpha, but it's in the same segment as the deltas
-    S5 = rabbit_variable_queue:status(VQ5),
-    io:format("~p~n", [S5]),
-    assert_props(S5, [{q1, 1},
-                      {delta, {delta, SegmentSize, HalfSegment,
-                               SegmentSize + HalfSegment}},
-                      {q3, SegmentSize},
-                      {len, SegmentSize + HalfSegment + 1}]),
+    VQ5 = check_variable_queue_status(
+            variable_queue_publish(true, 1, VQ4),
+            [{q1, 1},
+             {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
+             {q3, SegmentSize},
+             {len, SegmentSize + HalfSegment + 1}]),
     {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false,
                                           SegmentSize + HalfSegment + 1, VQ5),
-    %% the half segment should now be in q3 as betas
-    S6 = rabbit_variable_queue:status(VQ6),
-    io:format("~p~n", [S6]),
-    assert_props(S6, [{q1, 1},
-                      {delta, {delta, undefined, 0, undefined}},
-                      {q3, HalfSegment},
-                      {len, HalfSegment + 1}]),
-    {VQ7, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
-                                           HalfSegment + 1, VQ6),
-    VQ8 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ7),
+    VQ7 = check_variable_queue_status(
+            VQ6,
+            %% the half segment should now be in q3 as betas
+            [{q1, 1},
+             {delta, {delta, undefined, 0, undefined}},
+             {q3, HalfSegment},
+             {len, HalfSegment + 1}]),
+    {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
+                                           HalfSegment + 1, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8),
     %% should be empty now
-    {empty, VQ9} = rabbit_variable_queue:fetch(true, VQ8),
-    VQ9.
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
+    VQ10.
+
+check_variable_queue_status(VQ0, Props) ->
+    VQ1 = variable_queue_wait_for_shuffling_end(VQ0),
+    S = rabbit_variable_queue:status(VQ1),
+    io:format("~p~n", [S]),
+    assert_props(S, Props),
+    VQ1.
 
 variable_queue_wait_for_shuffling_end(VQ) ->
     case rabbit_variable_queue:needs_idle_timeout(VQ) of
-- 
cgit v1.2.1


From 60a75a58c7edf09668ab9770f49a4556539d8b87 Mon Sep 17 00:00:00 2001
From: Alexandru Scvortov <alexandru@rabbitmq.com>
Date: Tue, 20 Jul 2010 22:49:18 +0100
Subject: reformatted comments

---
 src/rabbit_mnesia.erl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 7e1b8a9f..689f799d 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -184,10 +184,12 @@ table_definitions() ->
       [{record_name, reverse_route},
        {attributes, record_info(fields, reverse_route)},
        {type, ordered_set}]},
-     {rabbit_durable_exchange,                  % if you change this entry,
-      [{record_name, exchange},                 % consider the implications
-       {attributes, record_info(fields, exchange)}, % on nodes_of_type/1
-       {disc_copies, [node()]}]}, % <--- this line is particularly important
+     %% Consider the implications to nodes_of_type/1 before altering
+     %% the next entry.
+     {rabbit_durable_exchange,
+      [{record_name, exchange},
+       {attributes, record_info(fields, exchange)},
+       {disc_copies, [node()]}]},
      {rabbit_exchange,
       [{record_name, exchange},
        {attributes, record_info(fields, exchange)}]},
-- 
cgit v1.2.1


From 82afd48cfc995368ed78844f37ac6a523eb30283 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 21 Jul 2010 05:50:03 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index dff3ec5a..59d881bb 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1805,10 +1805,10 @@ test_variable_queue_partial_segments_delta_thing(VQ0) ->
             [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
              {q3, SegmentSize},
              {len, SegmentSize + HalfSegment}]),
-    %% one alpha, but it's in the same segment as the deltas
     VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
     VQ5 = check_variable_queue_status(
             variable_queue_publish(true, 1, VQ4),
+            %% one alpha, but it's in the same segment as the deltas
             [{q1, 1},
              {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
              {q3, SegmentSize},
-- 
cgit v1.2.1


From 508fa90dec361c4dd412c87b1c0d0f28a392cfc3 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 21 Jul 2010 06:05:07 +0100
Subject: cosmetic

---
 src/rabbit_tests.erl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 59d881bb..7d03ecbd 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1844,13 +1844,14 @@ variable_queue_wait_for_shuffling_end(VQ) ->
     end.
 
 test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
-    Count = 2*rabbit_queue_index:next_segment_boundary(0),
+    Count = 2 * rabbit_queue_index:next_segment_boundary(0),
     VQ1 = variable_queue_publish(true, Count, VQ0),
     VQ2 = variable_queue_publish(false, Count, VQ1),
     VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
-    {VQ4, _AckTags} =
-        variable_queue_fetch(Count, true, false, Count + Count, VQ3),
-    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4),
+    {VQ4, _AckTags}  = variable_queue_fetch(Count, true, false,
+                                            Count + Count, VQ3),
+    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false,
+                                            Count, VQ4),
     _VQ6 = rabbit_variable_queue:terminate(VQ5),
     VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
     {{_Msg1, true, _AckTag1, Count1}, VQ8} =
@@ -1873,7 +1874,7 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
     VQ8.
 
 test_queue_recover() ->
-    Count = 2*rabbit_queue_index:next_segment_boundary(0),
+    Count = 2 * rabbit_queue_index:next_segment_boundary(0),
     TxID = rabbit_guid:guid(),
     {new, #amqqueue { pid = QPid, name = QName }} =
         rabbit_amqqueue:declare(test_queue(), true, false, [], none),
-- 
cgit v1.2.1


From 8d3c44c15648c302bef221232eb7452104a876f7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 21 Jul 2010 06:59:57 +0100
Subject: flesh out backing_queue API

---
 include/rabbit_backing_queue_spec.hrl | 1 +
 src/rabbit_amqqueue.erl               | 4 +++-
 src/rabbit_backing_queue.erl          | 5 +++++
 src/rabbit_invariable_queue.erl       | 5 ++++-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 0f57f624..005994f0 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -38,6 +38,7 @@
 -type(ack_required() :: boolean()).
 
 -spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok').
+-spec(stop/0 :: () -> 'ok').
 -spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index e5faef54..df947443 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -167,7 +167,9 @@ start() ->
 
 stop() ->
     ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
-    ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup).
+    ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup),
+    {ok, BQ} = application:get_env(rabbit, backing_queue_module),
+    ok = BQ:stop().
 
 find_durable_queues() ->
     Node = node(),
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index b76ae11e..2230c507 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -42,6 +42,11 @@ behaviour_info(callbacks) ->
      %% shared resources.
      {start, 1},
 
+     %% Called to tear down any state/resources. NB: Implementations
+     %% should not depend on this function being called on shutdown
+     %% and instead should hook into the rabbit supervision hierarchy.
+     {stop, 0},
+
      %% Initialise the backing queue and its state.
      {init, 3},
 
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index cf8bc8f7..4e0dad84 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -37,7 +37,7 @@
          set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1,
          idle_timeout/1, handle_pre_hibernate/1, status/1]).
 
--export([start/1]).
+-export([start/1, stop/0]).
 
 -behaviour(rabbit_backing_queue).
 
@@ -61,6 +61,9 @@
 start(DurableQueues) ->
     ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]).
 
+stop() ->
+    ok = rabbit_sup:stop_child(rabbit_persister).
+
 init(QName, IsDurable, Recover) ->
     Q = queue:from_list(case IsDurable andalso Recover of
                             true  -> rabbit_persister:queue_content(QName);
-- 
cgit v1.2.1


From 5697d474b3a6eabf2ad6b35b38ed444dbc80030e Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 21 Jul 2010 07:39:10 +0100
Subject: rabbit_amqqueue:stop already stops the msg_store

---
 src/rabbit_tests.erl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 7d03ecbd..73f892a4 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1890,7 +1890,6 @@ test_queue_recover() ->
     receive {'DOWN', MRef, process, QPid, _Info} -> ok
     after 10000 -> exit(timeout_waiting_for_queue_death)
     end,
-    rabbit_variable_queue:stop_msg_store(),
     rabbit_amqqueue:stop(),
     ok = rabbit_amqqueue:start(),
     rabbit_amqqueue:with_or_die(
-- 
cgit v1.2.1


From ec4ca063cbb5d8503a7ef992d349783ff80f408b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Wed, 21 Jul 2010 13:20:15 +0100
Subject: use the new BQ:stop/0 function in tests for better symmetry

---
 src/rabbit_tests.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 73f892a4..cff55c91 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1543,12 +1543,12 @@ init_test_queue() ->
 
 restart_test_queue(Qi) ->
     _ = rabbit_queue_index:terminate([], Qi),
-    rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:stop(),
     ok = rabbit_variable_queue:start([test_queue()]),
     init_test_queue().
 
 empty_test_queue() ->
-    rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:stop(),
     ok = rabbit_variable_queue:start([]),
     {0, _Terms, Qi} = init_test_queue(),
     _ = rabbit_queue_index:delete_and_terminate(Qi),
@@ -1707,7 +1707,7 @@ test_queue_index() ->
               Qi8
       end),
 
-    rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:stop(),
     ok = rabbit_variable_queue:start([]),
 
     passed.
-- 
cgit v1.2.1


From a00c6066f4b4b88a2e436206b86e6e28b395e0d8 Mon Sep 17 00:00:00 2001
From: Matthew Sackman <matthew@rabbitmq.com>
Date: Wed, 21 Jul 2010 17:29:24 +0100
Subject: Bump specs requirements to R14A

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index a66d0640..5694292d 100644
--- a/Makefile
+++ b/Makefile
@@ -41,10 +41,10 @@ RABBIT_PLT=rabbit.plt
 
 ifndef USE_SPECS
 # our type specs rely on features and bug fixes in dialyzer that are
-# only available in R13B04 upwards (R13B04 is erts 5.7.5)
+# only available in R14A upwards (R13B04 is erts 5.7.5)
 #
 # NB: the test assumes that version number will only contain single digits
-USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.4" ]; then echo "true"; else echo "false"; fi)
+USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.5" ]; then echo "true"; else echo "false"; fi)
 endif
 
 #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests
-- 
cgit v1.2.1


From ac9cb2a32a2c3cf63e03a44416dd3a1d9c2969b7 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 22 Jul 2010 09:36:55 +0100
Subject: refactor: extract rates into separate part of vq state

---
 src/rabbit_variable_queue.erl | 88 ++++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 43 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 06094950..8bff66af 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -236,13 +236,11 @@
           ram_index_count,
           out_counter,
           in_counter,
-          egress_rate,
-          avg_egress_rate,
-          ingress_rate,
-          avg_ingress_rate,
-          rate_timestamp
+          rates
          }).
 
+-record(rates, { egress, ingress, avg_egress, avg_ingress, timestamp }).
+
 -record(msg_status,
         { seq_id,
           guid,
@@ -284,6 +282,12 @@
 -type(seq_id()  :: non_neg_integer()).
 -type(ack()     :: seq_id() | 'blank_ack').
 
+-type(rates() :: #rates { egress      :: {timestamp(), non_neg_integer()},
+                          ingress     :: {timestamp(), non_neg_integer()},
+                          avg_egress  :: float(),
+                          avg_ingress :: float(),
+                          timestamp   :: timestamp() }).
+
 -type(delta() :: #delta { start_seq_id :: non_neg_integer(),
                           count        :: non_neg_integer (),
                           end_seq_id   :: non_neg_integer() }).
@@ -318,12 +322,7 @@
              ram_index_count      :: non_neg_integer(),
              out_counter          :: non_neg_integer(),
              in_counter           :: non_neg_integer(),
-             egress_rate          :: {timestamp(), non_neg_integer()},
-             avg_egress_rate      :: float(),
-             ingress_rate         :: {timestamp(), non_neg_integer()},
-             avg_ingress_rate     :: float(),
-             rate_timestamp       :: timestamp()
-            }).
+             rates                :: rates() }).
 
 -include("rabbit_backing_queue_spec.hrl").
 
@@ -425,12 +424,11 @@ init(QueueName, IsDurable, _Recover) ->
       ram_index_count      = 0,
       out_counter          = 0,
       in_counter           = 0,
-      egress_rate          = {Now, 0},
-      avg_egress_rate      = 0.0,
-      ingress_rate         = {Now, DeltaCount1},
-      avg_ingress_rate     = 0.0,
-      rate_timestamp       = Now
-     },
+      rates                = #rates { egress      = {Now, 0},
+                                      ingress     = {Now, DeltaCount1},
+                                      avg_egress  = 0.0,
+                                      avg_ingress = 0.0,
+                                      timestamp   = Now } },
     a(maybe_deltas_to_betas(State)).
 
 terminate(State) ->
@@ -648,8 +646,8 @@ is_empty(State) -> 0 == len(State).
 
 set_ram_duration_target(DurationTarget,
                         State = #vqstate {
-                          avg_egress_rate      = AvgEgressRate,
-                          avg_ingress_rate     = AvgIngressRate,
+                          rates = #rates { avg_egress  = AvgEgressRate,
+                                           avg_ingress = AvgIngressRate },
                           target_ram_msg_count = TargetRamMsgCount }) ->
     Rate = AvgEgressRate + AvgIngressRate,
     TargetRamMsgCount1 =
@@ -666,14 +664,15 @@ set_ram_duration_target(DurationTarget,
           false -> reduce_memory_use(State1)
       end).
 
-ram_duration(State = #vqstate { egress_rate        = Egress,
-                                ingress_rate       = Ingress,
-                                rate_timestamp     = Timestamp,
-                                in_counter         = InCount,
-                                out_counter        = OutCount,
-                                ram_msg_count      = RamMsgCount,
-                                duration_target    = DurationTarget,
-                                ram_msg_count_prev = RamMsgCountPrev }) ->
+ram_duration(State = #vqstate {
+               rates              = #rates { egress    = Egress,
+                                             ingress   = Ingress,
+                                             timestamp = Timestamp } = Rates,
+               in_counter         = InCount,
+               out_counter        = OutCount,
+               ram_msg_count      = RamMsgCount,
+               duration_target    = DurationTarget,
+               ram_msg_count_prev = RamMsgCountPrev }) ->
     Now = now(),
     {AvgEgressRate,   Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
     {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
@@ -685,16 +684,18 @@ ram_duration(State = #vqstate { egress_rate        = Egress,
                          (2 * (AvgEgressRate + AvgIngressRate))
         end,
 
-    {Duration, set_ram_duration_target(DurationTarget,
-                                       State #vqstate {
-                                         egress_rate        = Egress1,
-                                         avg_egress_rate    = AvgEgressRate,
-                                         ingress_rate       = Ingress1,
-                                         avg_ingress_rate   = AvgIngressRate,
-                                         rate_timestamp     = Now,
-                                         in_counter         = 0,
-                                         out_counter        = 0,
-                                         ram_msg_count_prev = RamMsgCount })}.
+    {Duration, set_ram_duration_target(
+                 DurationTarget,
+                 State #vqstate {
+                   rates              = Rates #rates {
+                                          egress      = Egress1,
+                                          ingress     = Ingress1,
+                                          avg_egress  = AvgEgressRate,
+                                          avg_ingress = AvgIngressRate,
+                                          timestamp   = Now },
+                   in_counter         = 0,
+                   out_counter        = 0,
+                   ram_msg_count_prev = RamMsgCount })}.
 
 needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
     {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end,
@@ -717,10 +718,11 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
                   target_ram_msg_count = TargetRamMsgCount,
                   ram_msg_count        = RamMsgCount,
                   ram_index_count      = RamIndexCount,
-                  avg_egress_rate      = AvgEgressRate,
-                  avg_ingress_rate     = AvgIngressRate,
                   next_seq_id          = NextSeqId,
-                  persistent_count     = PersistentCount }) ->
+                  persistent_count     = PersistentCount,
+                  rates                = #rates {
+                    avg_egress  = AvgEgressRate,
+                    avg_ingress = AvgIngressRate } }) ->
     [ {q1                   , queue:len(Q1)},
       {q2                   , bpqueue:len(Q2)},
       {delta                , Delta},
@@ -732,10 +734,10 @@ status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
       {target_ram_msg_count , TargetRamMsgCount},
       {ram_msg_count        , RamMsgCount},
       {ram_index_count      , RamIndexCount},
-      {avg_egress_rate      , AvgEgressRate},
-      {avg_ingress_rate     , AvgIngressRate},
       {next_seq_id          , NextSeqId},
-      {persistent_count     , PersistentCount} ].
+      {persistent_count     , PersistentCount},
+      {avg_egress_rate      , AvgEgressRate},
+      {avg_ingress_rate     , AvgIngressRate} ].
 
 %%----------------------------------------------------------------------------
 %% Minor helpers
-- 
cgit v1.2.1


From 8fe39e57d56d08bb803424e914bf51b2adaaab1b Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 22 Jul 2010 15:12:36 +0100
Subject: add rabbit_misc:orddict_cons helper analogous to dict_cons

---
 src/rabbit_misc.erl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 9ed106d6..a0a5ba58 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -61,7 +61,8 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
--export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
+-export([recursive_delete/1, dict_cons/3, orddict_cons/3,
+         unlink_and_capture_exit/1]).
 
 -import(mnesia).
 -import(lists).
@@ -168,7 +169,10 @@
 -spec(recursive_delete/1 ::
         ([file:filename()])
         -> rabbit_types:ok_or_error({file:filename(), any()})).
--spec(dict_cons/3 :: (any(), any(), dict:dictionary()) -> dict:dictionary()).
+-spec(dict_cons/3 :: (any(), any(), dict:dictionary()) ->
+                          dict:dictionary()).
+-spec(orddict_cons/3 :: (any(), any(), orddict:dictionary()) ->
+                             orddict:dictionary()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
 
 -endif.
@@ -661,6 +665,9 @@ recursive_delete1(Path) ->
 dict_cons(Key, Value, Dict) ->
     dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
 
+orddict_cons(Key, Value, Dict) ->
+    orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
+
 unlink_and_capture_exit(Pid) ->
     unlink(Pid),
     receive {'EXIT', Pid, _} -> ok
-- 
cgit v1.2.1


From 86a7ad787334356739079279880c6947c82b0c67 Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Thu, 22 Jul 2010 15:14:07 +0100
Subject: replace use of dict with orddict for per-msg_store partitions which
 is much faster for small dicts

---
 src/rabbit_variable_queue.erl | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
index 8bff66af..92ffc511 100644
--- a/src/rabbit_variable_queue.erl
+++ b/src/rabbit_variable_queue.erl
@@ -971,10 +971,10 @@ purge_betas_and_deltas(State = #vqstate { q3          = Q3,
 
 remove_queue_entries(Fold, Q, IndexState) ->
     {GuidsByStore, Delivers, Acks} =
-        Fold(fun remove_queue_entries1/2, {dict:new(), [], []}, Q),
-    ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           rabbit_msg_store:remove(MsgStore, Guids)
-                   end, ok, GuidsByStore),
+        Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q),
+    ok = orddict:fold(fun (MsgStore, Guids, ok) ->
+                              rabbit_msg_store:remove(MsgStore, Guids)
+                      end, ok, GuidsByStore),
     rabbit_queue_index:ack(Acks,
                            rabbit_queue_index:deliver(Delivers, IndexState)).
 
@@ -984,8 +984,8 @@ remove_queue_entries1(
                 index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
   {GuidsByStore, Delivers, Acks}) ->
     {case MsgOnDisk of
-         true  -> rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid,
-                                        GuidsByStore);
+         true  -> rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid,
+                                           GuidsByStore);
          false -> GuidsByStore
      end,
      cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
@@ -1084,19 +1084,20 @@ remove_pending_ack(KeepPersistent,
                    State = #vqstate { pending_ack = PA,
                                       index_state = IndexState }) ->
     {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3,
-                                       {[], dict:new()}, PA),
+                                       {[], orddict:new()}, PA),
     State1 = State #vqstate { pending_ack = dict:new() },
     case KeepPersistent of
-        true  -> case dict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+        true  -> case orddict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
                      error       -> State1;
                      {ok, Guids} -> ok = rabbit_msg_store:remove(
                                            ?TRANSIENT_MSG_STORE, Guids),
                                     State1
                  end;
         false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-                 ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                                        rabbit_msg_store:remove(MsgStore, Guids)
-                                end, ok, GuidsByStore),
+                 ok = orddict:fold(
+                        fun (MsgStore, Guids, ok) ->
+                                rabbit_msg_store:remove(MsgStore, Guids)
+                        end, ok, GuidsByStore),
                  State1 #vqstate { index_state = IndexState1 }
     end.
 
@@ -1111,12 +1112,12 @@ ack(MsgStoreFun, Fun, AckTags, State) ->
                   {accumulate_ack(SeqId, AckEntry, Acc),
                    Fun(AckEntry, State2 #vqstate {
                                    pending_ack = dict:erase(SeqId, PA) })}
-          end, {{[], dict:new()}, State}, AckTags),
+          end, {{[], orddict:new()}, State}, AckTags),
     IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
-    ok = dict:fold(fun (MsgStore, Guids, ok) ->
-                           MsgStoreFun(MsgStore, Guids)
-                   end, ok, GuidsByStore),
-    PCount1 = PCount - case dict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
+    ok = orddict:fold(fun (MsgStore, Guids, ok) ->
+                              MsgStoreFun(MsgStore, Guids)
+                      end, ok, GuidsByStore),
+    PCount1 = PCount - case orddict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
                            error       -> 0;
                            {ok, Guids} -> length(Guids)
                        end,
@@ -1129,7 +1130,7 @@ accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS
     Acc;
 accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
     {cons_if(IsPersistent, SeqId, SeqIdsAcc),
-     rabbit_misc:dict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
+     rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
 
 %%----------------------------------------------------------------------------
 %% Phase changes
-- 
cgit v1.2.1


From b107c356be2e174735b00d49cc3373cd7cb27ef4 Mon Sep 17 00:00:00 2001
From: Alexandru Scvortov <alexandru@rabbitmq.com>
Date: Mon, 26 Jul 2010 09:45:12 +0100
Subject: selectively receive more messages during tests

---
 src/rabbit_tests.erl | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index cff55c91..516e9134 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -1081,6 +1081,11 @@ expect_normal_channel_termination(MRef, Ch) ->
     after 1000 -> throw(channel_failed_to_exit)
     end.
 
+gobble_channel_exit() ->
+    receive {channel_exit, _, _} -> ok
+    after 1000 -> throw(channel_exit_not_received)
+    end.
+
 test_memory_pressure() ->
     {Writer0, Ch0, MRef0} = test_memory_pressure_spawn(),
     [ok = rabbit_channel:conserve_memory(Ch0, Conserve) ||
@@ -1103,6 +1108,7 @@ test_memory_pressure() ->
     Content = rabbit_basic:build_content(#'P_basic'{}, <<>>),
     ok = rabbit_channel:do(Ch0, #'basic.publish'{}, Content),
     expect_normal_channel_termination(MRef0, Ch0),
+    gobble_channel_exit(),
 
     {Writer1, Ch1, MRef1} = test_memory_pressure_spawn(),
     ok = rabbit_channel:conserve_memory(Ch1, true),
@@ -1114,19 +1120,23 @@ test_memory_pressure() ->
     %% send back the wrong flow_ok. Channel should die.
     ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}),
     expect_normal_channel_termination(MRef1, Ch1),
+    gobble_channel_exit(),
 
     {_Writer2, Ch2, MRef2} = test_memory_pressure_spawn(),
     %% just out of the blue, send a flow_ok. Life should end.
     ok = rabbit_channel:do(Ch2, #'channel.flow_ok'{active = true}),
     expect_normal_channel_termination(MRef2, Ch2),
+    gobble_channel_exit(),
 
     {_Writer3, Ch3, MRef3} = test_memory_pressure_spawn(),
     ok = rabbit_channel:conserve_memory(Ch3, true),
+    ok = test_memory_pressure_receive_flow(false),
     receive {'DOWN', MRef3, process, Ch3, _} ->
             ok
     after 12000 ->
             throw(channel_failed_to_exit)
     end,
+    gobble_channel_exit(),
 
     alarm_handler:set_alarm({vm_memory_high_watermark, []}),
     Me = self(),
-- 
cgit v1.2.1


From 3a799b253758b1486f2ae10823a4646b8bd655bf Mon Sep 17 00:00:00 2001
From: Matthias Radestock <matthias@rabbitmq.com>
Date: Tue, 27 Jul 2010 09:09:00 +0100
Subject: remove superfluous warning We don't complain about similar situations
 elsewhere, so there is no point doing it here.

---
 src/rabbit_amqqueue_process.erl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index a2cbcf55..468a41b2 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -719,8 +719,6 @@ handle_call({requeue, AckTags, ChPid}, From, State) ->
     gen_server2:reply(From, ok),
     case lookup_ch(ChPid) of
         not_found ->
-            rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n",
-                               [ChPid]),
             noreply(State);
         C = #cr{acktags = ChAckTags} ->
             ChAckTags1 = subtract_acks(ChAckTags, AckTags),
-- 
cgit v1.2.1